Oct-24-2017, 08:23 PM
dear community
The following code runned - like a charme - all is nice. Very well. in python version 2.xy
but i guess that there are some other errors waiting at the fence ...
The following code runned - like a charme - all is nice. Very well. in python version 2.xy
import urllib
import urlparse
import re
url = "http://search.cpan.org/author/?W"
html = urllib.urlopen(url).read()
for lk, capname, name in re.findall('<a href="(/~.*?/)"><b>(.*?)</b></
a><br/><small>(.*?)</small>', html):
alk = urlparse.urljoin(url, lk)
data = { 'url':alk, 'name':name, 'cname':capname }
phtml = urllib.urlopen(alk).read()
memail = re.search('<a href="mailto:(.*?)">', phtml)
if memail:
data['email'] = memail.group(1)
print datai got back the following
IndentationError: Missing parentheses in call to 'print'
>>>
>>> import urllib
>>> import urllib.parse
>>> import re
>>>
>>> url = "http://search.cpan.org/author/?W"
>>> html = urllib.urlopen(url).read()
Traceback (innermost last):
File "<stdin>", line 1, in <module>
AttributeError: 'module' object has no attribute 'urlopen'
>>> for lk, capname, name in re.findall('<a href="(/~.*?/)"><b>(.*?)</b></
File "<stdin>", line 1
for lk, capname, name in re.findall('<a href="(/~.*?/)"><b>(.*?)</b></
^
SyntaxError: EOL while scanning string literal
>>> a><br/><small>(.*?)</small>', html):
File "<stdin>", line 1
a><br/><small>(.*?)</small>', html):
^
SyntaxError: invalid syntax
>>> alk = urlparse.urljoin(url, lk)
File "<stdin>", line 1
alk = urlparse.urljoin(url, lk)
^
IndentationError: unexpected indent
>>>
>>> data = { 'url':alk, 'name':name, 'cname':capname }
File "<stdin>", line 1
data = { 'url':alk, 'name':name, 'cname':capname }
^
IndentationError: unexpected indent
>>>
>>> phtml = urllib.urlopen(alk).read()
File "<stdin>", line 1
phtml = urllib.urlopen(alk).read()
^
IndentationError: unexpected indent
>>> memail = re.search('<a href="mailto:(.*?)">', phtml)
File "<stdin>", line 1
memail = re.search('<a href="mailto:(.*?)">', phtml)
^
IndentationError: unexpected indent
>>> if memail:
File "<stdin>", line 1
if memail:
^
IndentationError: unexpected indent
>>> data['email'] = memail.group(1)
File "<stdin>", line 1
data['email'] = memail.group(1)
^
IndentationError: unexpected indent
>>>
>>> print data
File "<stdin>", line 1
print data
^
IndentationError: Missing parentheses in call to 'print'
>>> okay - first of all i have to install the urllib.parse modulebut i guess that there are some other errors waiting at the fence ...
