Good Day Everyone. I am having issues with web scaping as I am not sure why it does not want to scape. I am using xpath and also soup to gather the next URL to check if it works however it does not want to work. What am I doing wrong?
import requests
from lxml import etree
import html5lib
from bs4 import BeautifulSoup
from urllib.parse import urljoin
import time, re
import csv
import time
start = time.time()
print('Starting Program')
base ="https://www.studylight.org/lexicons/eng/hebrew/1.html"
url = "https://www.studylight.org/lexicons/eng/hebrew/1.html"
while True:
request = requests.get(urljoin(base,url)) #Get URL server status
soup = BeautifulSoup(request.content, 'html5lib') #Pass url content to Soup
dom = etree.HTML(str(soup)) #Ini etree
url = dom.xpath('/html/body/div[1]/div[3]/div[2]/div[4]/form/div/div[3]/div[2]/a') #Find Next Page URL
url2 = urljoin(base,url)
urltest2 = soup.find_all("span", class_="greek-hebrew fs-21") #Find next url
print('Test First url', url2,' Test number 2 ' , urltest2)
# #for line in soup.find_all('a'):
# #print(urljoin(base,line.text))#.get('href'))
if url2 in 'https://www.studylight.org/lexicons/eng/hebrew/3.html': # Page to Stop
break # Break out of loop
print('Program Completed')
