managing new windows

JonWayn · Sep-22-2022, 01:29 AM

I need to control a website at which the links i interact with open in a new popup/tab. The href attribute of the links do me no good because using it in a separate request directs me back to the homepage rather than to the details they reference (is there a way to prevent this behaviour?). I am following code I copied from the web and using the context class of Playwright to obtain a handle on the new window but the code fails with the errors

Error:Traceback (most recent call last):
  File "C:\Wayne\Python\WebScraping\SCRAPYDEMO\StateMO\ohio_miami.py", line 140, in <module>
    miami('09/10/2022')
  File "C:\Wayne\Python\WebScraping\SCRAPYDEMO\StateMO\ohio_miami.py", line 89, in miami
    with context.expect_page() as window:
  File "C:\Wayne\Python\WebScraping\SCRAPYDEMO\StateMO\venv\lib\site-packages\playwright\_impl\_sync_base.py", line 78, in __exit__
    self._event.value
  File "C:\Wayne\Python\WebScraping\SCRAPYDEMO\StateMO\venv\lib\site-packages\playwright\_impl\_sync_base.py", line 58, in value
    raise exception
  File "C:\Wayne\Python\WebScraping\SCRAPYDEMO\StateMO\ohio_miami.py", line 92, in miami
    print(type(window.value))
  File "C:\Wayne\Python\WebScraping\SCRAPYDEMO\StateMO\venv\lib\site-packages\playwright\_impl\_sync_base.py", line 58, in value
    raise exception
playwright._impl._api_types.TimeoutError: Timeout 30000.0ms exceeded while waiting for event "page"

import os.path
import requests
from playwright.sync_api import sync_playwright
import datetime
from time import sleep
import csv
from bs4 import BeautifulSoup
import re


def miami(start_date):
    sd_obj_in = datetime.date(int(start_date[6:]), int(start_date[:2]), int(start_date[3:5]))
    save_date_obj = sd_obj_in

    with sync_playwright() as p:
        # open site and manipulate search with playwright
        browser = p.chromium.launch(headless=False, slow_mo=50)
        context = browser.new_context()
        page = context.new_page()
        # page = browser.new_page()
        page.goto('https://courts.miamicountyohio.gov/eservices/home.page.11')

        page.wait_for_selector('.anchorButton')
        page.locator('a.anchorButton').click()                              # disclaimer page click-button
        # sleep(3)

        page.wait_for_selector('#searchPageTabSection')
        # search page
        page.locator("xpath=//ul/li[5]/a").click()                          # click case type tab
        sleep(3)

        page.wait_for_selector('.hasDatepicker')
        # search dates entry
        page.query_selector('input.hasDatepicker').fill(start_date)
        sleep(2)
        end_date = datetime.date.today().strftime('%m/%d/%Y')
        page.query_selector("xpath=//div[contains(@class,'endDate')]/input").fill(end_date)
        sleep(2)

        # case type select (traffic)
        case_code = page.query_selector("xpath=//select[@name='caseCd']")
        sleep(1)
        option_tr = case_code.query_selector("xpath=option[starts-with(@value,'TR')]")
        option_tr.scroll_into_view_if_needed()
        option_tr.click()
        sleep(2)

        # status select (open)
        status = page.query_selector("xpath=//select[@name='statCd']")
        sleep(1)
        option_open = status.query_selector("xpath=option[text()='Open']")
        option_open.scroll_into_view_if_needed()
        option_open.click()
        sleep(2)

        # party type select (defendant)
        pty = page.query_selector("xpath=//select[@name='ptyCd']")
        sleep(1)
        option_def = pty.query_selector("xpath=option[text()='Defendant']")
        option_def.scroll_into_view_if_needed()
        option_def.click()
        sleep(2)

        # search button click
        page.query_selector("xpath=//input[@value='Search']").click()
        sleep(4)

        data_path = 'c:\\working\\access\\oh\\ohio\\miami.csv'
        header = ['Name', 'DOB', 'Case Number', 'File Date', 'Addr1', 'Addr2', 'Addr3', 'City', 'State', 'Zip',
                  'Statute', 'Degree', 'Description']
        if os.path.isfile(data_path):
            os.remove(data_path)

        with open(data_path, 'w', newline='') as f:
            export_file = csv.writer(f)
            export_file.writerow(header)

        for row in page.query_selector_all("xpath=//table[@id='grid']/tbody/tr"):
            link = row.query_selector("xpath=/td[5]/span/a")
            record = [
                row.query_selector_all("td")[2].inner_text(),       # name
                row.query_selector_all("td")[4].inner_text(),       # dob
                row.query_selector_all("td")[5].inner_text(),       # case number
                row.query_selector_all("td")[6].inner_text(),       # file date
            ]

            print(record)
            # page.wait_for_selector(".ptyContactInfo")
            with context.expect_page() as window:
                link.click()
                sleep(2)
                new_window = window.value
                case_details = parse_details(new_window.content())

            for detail in case_details['Charges']:
                rec = record
                rec.append(case_details['Addr1'])
                rec.append(case_details['Addr2'])
                rec.append(case_details['Addr3'])
                rec.append(case_details['City'])
                rec.append(case_details['State'])
                rec.append(case_details['Zip'])
                rec.append(detail['Statute'])
                rec.append(detail['Degree'])
                rec.append(detail['Desc'])

                print(rec)
                exit()
                export_file.writerow(rec)

        print('done')


def parse_details(content):
    soup = BeautifulSoup(content, 'html.parser')
    contact_info = soup.find("li", class_="ptyContactInfo")
    print(contact_info)
    docket = {'Addr1': contact_info.contents[0].get_text()}
    docket['Addr2'] = contact_info.contents[1].get_text()
    docket['Addr3'] = contact_info.contents[2].get_text()
    docket['City'] = contact_info.contents[3].get_text()
    docket['State'] = contact_info.contents[5].get_text()
    docket['Zip'] = contact_info.contents[6].get_text()
    charges = []

    for case_info in soup.find_all('div', class_='chrg'):
        charges.append({
            'Statute': case_info.find('span', 'chgHeadActn'),
            'Degree': case_info.find_all('span', 'chgHeadDeg'),
            'Desc': case_info.find_all('span', 'chgHeadDscr')
        })

    docket['Charges'] = charges
    return docket


miami('09/10/2022')

Can anybody tell me why window.value raises an error?

**Larz60+** · Sep-22-2022, 05:26 PM

Is what you call the 'anchor button' actually the 'Click Here' button?
If so, you need selenium to wait for css_selector: '#id3e'. or as an alternative, xpath: '//*[@id="id3e"]'
to do so, you will need to use selenium, not Beautifulsoup, or at least load the page with selenium before using Beautifulsoup.
The link is embedded in a JavaScript script, so not visible on front page.

Possibly Related Threads…
Thread		Author	Replies	Views	Last Post
	Managing recursive tasks in celery.	vamix	1	1,419	Sep-04-2024, 06:36 PM Last Post: deanhystad
	Multiprocessing Managing Help	m7md_hka	0	834	Aug-22-2024, 09:57 AM Last Post: m7md_hka
	Managing Objects	JoeDainton123	1	2,673	May-15-2021, 03:18 PM Last Post: Yoriz
	Managing dependencies with pipenv	t4keheart	6	6,697	Aug-05-2020, 12:39 AM Last Post: t4keheart
	managing command codes for external controller box	Oolongtea	0	2,774	Sep-19-2019, 08:32 AM Last Post: Oolongtea
	Python what should be name of the module for managing data of users ?	harun2525	3	4,898	Dec-06-2017, 06:11 PM Last Post: nilamo
	managing modules/scripts dynamically	hbknjr	2	4,244	Oct-06-2017, 05:07 PM Last Post: hbknjr

managing new windows

User Panel Messages

Announcements