Nov-17-2017, 12:30 PM
Hi Team
I have a program in which I want threads want to be run in parallel.
import requests
from queue import Queue
import time
from bs4 import BeautifulSoup
import threading
urlList = []
q = Queue()
def url_c(url):
try:
r = requests.get(url)
htmldoc = r.content
if r.status_code in [400,404,403,408,409,501,502,503]:print (str(r.status_code)+"-"+str(r.status_code)+"-->"+url)
else: print ("no problem in-->",url)
soup= BeautifulSoup(htmldoc,'html.parser')
links = []
links = soup.findAll('a')
if len(links)>0:
for link in links:
if link.get('href') not in urlList and link.get('href') is not None and len(link.get('href'))>10 and 'JavaScript' not in link.get('href'):
if 'http' not in link.get('href'):
urlList.append(url + link.get('href'))
else:
urlList.append(link.get('href'))
except:
print("ERROR ",url)
def threader():
while True:
url = q.get
url_c(url)
q.task_done()
# how many threads are we going to allow for
for x in range(10):
t = threading.Thread(target=threader)
# classifying as a daemon, so they will die when the main dies
t.daemon = True
# begins, must come after daemon definition
t.start()
def main():
global end
global start
end = 0
start =1
print('Enter the URL')
url = input()
url_c(url)
end = len(urlList)
while start != end:
end = len(urlList)
print(start)
url_c(urlList[start])
q.put(urlList[start])
print(end)
start +=1
i =1
for u in urlList:
print('length is ->',len(u),'-',u)
i +=1
print('There are ',len(urlList),' links.')
main()Please advise what to do?
