I am trying to build a web crawler to extract all the links on a webpage. I have created 2 python files. (class: scanner.py and object: vulnerability-scanner.py). When I run the script there is an error shows up. I am unable to find the error. Help me to solve this.
Source code
----------------------------------------------------------------------------
scanner.py
vulnerability-scanner.py
error
Source code
----------------------------------------------------------------------------
scanner.py
import requests
import re
import urllib.parse
class Scanner:
def __init__(self, url):
self.target_url = url
self.target_links = []
def extract_links_from(self, url):
response = requests.get(url)
return re.findall('"((http|ftp)s?://.*?)"', response.content.decode('utf-8'))
def crawl(self, url=None):
if url == None:
url = self.target_url
href_links = self.extract_links_from(url)
for link in href_links:
link = urllib.parse.urljoin(url, link)
if '#' in link:
link = link.split("#")[0]
if self.target_url in link and link not in self.target_links:
self.target_links.append(link)
print(link)
self.crawl(link)------------------------------------------------------------------------------------------vulnerability-scanner.py
import scanner target_url = "http://localhost/DVWA/" vul_scanner = scanner.Scanner(target_url) vul_scanner.crawl(target_url)-------------------------------------------------------------------------------------------
error
Error:Traceback (most recent call last):
File "C:/xampp/htdocs/WebVIM/vulnerability-scanner.py", line 5, in <module>
vul_scanner.crawl(target_url)
File "C:\xampp\htdocs\WebVIM\scanner.py", line 19, in crawl
link = urllib.parse.urljoin(url, link)
File "C:\Users\HouseMoNaRa\AppData\Local\Programs\Python\Python37-32\lib\urllib\parse.py", line 487, in urljoin
base, url, _coerce_result = _coerce_args(base, url)
File "C:\Users\HouseMoNaRa\AppData\Local\Programs\Python\Python37-32\lib\urllib\parse.py", line 120, in _coerce_args
raise TypeError("Cannot mix str and non-str arguments")
TypeError: Cannot mix str and non-str arguments
