Sep-22-2022, 01:29 AM
I need to control a website at which the links i interact with open in a new popup/tab. The href attribute of the links do me no good because using it in a separate request directs me back to the homepage rather than to the details they reference (is there a way to prevent this behaviour?). I am following code I copied from the web and using the context class of Playwright to obtain a handle on the new window but the code fails with the errors
Error:Traceback (most recent call last):
File "C:\Wayne\Python\WebScraping\SCRAPYDEMO\StateMO\ohio_miami.py", line 140, in <module>
miami('09/10/2022')
File "C:\Wayne\Python\WebScraping\SCRAPYDEMO\StateMO\ohio_miami.py", line 89, in miami
with context.expect_page() as window:
File "C:\Wayne\Python\WebScraping\SCRAPYDEMO\StateMO\venv\lib\site-packages\playwright\_impl\_sync_base.py", line 78, in __exit__
self._event.value
File "C:\Wayne\Python\WebScraping\SCRAPYDEMO\StateMO\venv\lib\site-packages\playwright\_impl\_sync_base.py", line 58, in value
raise exception
File "C:\Wayne\Python\WebScraping\SCRAPYDEMO\StateMO\ohio_miami.py", line 92, in miami
print(type(window.value))
File "C:\Wayne\Python\WebScraping\SCRAPYDEMO\StateMO\venv\lib\site-packages\playwright\_impl\_sync_base.py", line 58, in value
raise exception
playwright._impl._api_types.TimeoutError: Timeout 30000.0ms exceeded while waiting for event "page"import os.path
import requests
from playwright.sync_api import sync_playwright
import datetime
from time import sleep
import csv
from bs4 import BeautifulSoup
import re
def miami(start_date):
sd_obj_in = datetime.date(int(start_date[6:]), int(start_date[:2]), int(start_date[3:5]))
save_date_obj = sd_obj_in
with sync_playwright() as p:
# open site and manipulate search with playwright
browser = p.chromium.launch(headless=False, slow_mo=50)
context = browser.new_context()
page = context.new_page()
# page = browser.new_page()
page.goto('https://courts.miamicountyohio.gov/eservices/home.page.11')
page.wait_for_selector('.anchorButton')
page.locator('a.anchorButton').click() # disclaimer page click-button
# sleep(3)
page.wait_for_selector('#searchPageTabSection')
# search page
page.locator("xpath=//ul/li[5]/a").click() # click case type tab
sleep(3)
page.wait_for_selector('.hasDatepicker')
# search dates entry
page.query_selector('input.hasDatepicker').fill(start_date)
sleep(2)
end_date = datetime.date.today().strftime('%m/%d/%Y')
page.query_selector("xpath=//div[contains(@class,'endDate')]/input").fill(end_date)
sleep(2)
# case type select (traffic)
case_code = page.query_selector("xpath=//select[@name='caseCd']")
sleep(1)
option_tr = case_code.query_selector("xpath=option[starts-with(@value,'TR')]")
option_tr.scroll_into_view_if_needed()
option_tr.click()
sleep(2)
# status select (open)
status = page.query_selector("xpath=//select[@name='statCd']")
sleep(1)
option_open = status.query_selector("xpath=option[text()='Open']")
option_open.scroll_into_view_if_needed()
option_open.click()
sleep(2)
# party type select (defendant)
pty = page.query_selector("xpath=//select[@name='ptyCd']")
sleep(1)
option_def = pty.query_selector("xpath=option[text()='Defendant']")
option_def.scroll_into_view_if_needed()
option_def.click()
sleep(2)
# search button click
page.query_selector("xpath=//input[@value='Search']").click()
sleep(4)
data_path = 'c:\\working\\access\\oh\\ohio\\miami.csv'
header = ['Name', 'DOB', 'Case Number', 'File Date', 'Addr1', 'Addr2', 'Addr3', 'City', 'State', 'Zip',
'Statute', 'Degree', 'Description']
if os.path.isfile(data_path):
os.remove(data_path)
with open(data_path, 'w', newline='') as f:
export_file = csv.writer(f)
export_file.writerow(header)
for row in page.query_selector_all("xpath=//table[@id='grid']/tbody/tr"):
link = row.query_selector("xpath=/td[5]/span/a")
record = [
row.query_selector_all("td")[2].inner_text(), # name
row.query_selector_all("td")[4].inner_text(), # dob
row.query_selector_all("td")[5].inner_text(), # case number
row.query_selector_all("td")[6].inner_text(), # file date
]
print(record)
# page.wait_for_selector(".ptyContactInfo")
with context.expect_page() as window:
link.click()
sleep(2)
new_window = window.value
case_details = parse_details(new_window.content())
for detail in case_details['Charges']:
rec = record
rec.append(case_details['Addr1'])
rec.append(case_details['Addr2'])
rec.append(case_details['Addr3'])
rec.append(case_details['City'])
rec.append(case_details['State'])
rec.append(case_details['Zip'])
rec.append(detail['Statute'])
rec.append(detail['Degree'])
rec.append(detail['Desc'])
print(rec)
exit()
export_file.writerow(rec)
print('done')
def parse_details(content):
soup = BeautifulSoup(content, 'html.parser')
contact_info = soup.find("li", class_="ptyContactInfo")
print(contact_info)
docket = {'Addr1': contact_info.contents[0].get_text()}
docket['Addr2'] = contact_info.contents[1].get_text()
docket['Addr3'] = contact_info.contents[2].get_text()
docket['City'] = contact_info.contents[3].get_text()
docket['State'] = contact_info.contents[5].get_text()
docket['Zip'] = contact_info.contents[6].get_text()
charges = []
for case_info in soup.find_all('div', class_='chrg'):
charges.append({
'Statute': case_info.find('span', 'chgHeadActn'),
'Degree': case_info.find_all('span', 'chgHeadDeg'),
'Desc': case_info.find_all('span', 'chgHeadDscr')
})
docket['Charges'] = charges
return docket
miami('09/10/2022')Can anybody tell me why window.value raises an error?
