Apr-21-2021, 04:38 PM
Hi,
I am writing this code for scraping off of a website into an excel spreadsheet, i am having an issue where the website doesn't use a list of the same length and so it means that I get an attribute error for the find_next function, wondering if anyone knows of a workaround.
My coding is a bit of mess
I am writing this code for scraping off of a website into an excel spreadsheet, i am having an issue where the website doesn't use a list of the same length and so it means that I get an attribute error for the find_next function, wondering if anyone knows of a workaround.
My coding is a bit of mess
import requests
from bs4 import BeautifulSoup
import pandas as pd
page_number = 1
url = 'https://www.autotrader.co.uk/car-search?advertClassification=standard&postcode=la94py&onesearchad=Used&onesearchad=Nearly%20New&onesearchad=New&advertising-location=at_cars&is-quick-search=TRUE&include-delivery-option=on&page='
agent = {"User-Agent":'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36'}
car_spec = []
car_age = []
car_style = []
car_mileage =[]
car_engine_size =[]
car_BHP = []
price_lst = []
car_detail = []
car_gearbox_style = []
car_fuel_type = []
car_next=[]
while page_number < 100:
all_car = []
page_number += 1
pg_no = str(page_number)
print(page_number)
url2= url+pg_no
response = requests.get(url2, headers=agent)
soup = BeautifulSoup(response.content, 'lxml')
car_elements = soup.find_all('div', class_='product-card-content__car-info')
for tag in car_elements:
price = tag.find('div', class_='product-card-pricing__price')
price_lst.append(price.text.strip())
for tag in car_elements:
car = tag.find('h3', class_='product-card-details__title')
car_detail.append(car.text.strip())
for tag in car_elements:
car = tag.find('li', class_='atc-type-picanto--medium')
if car is None:
car='0'
car_age.append(car)
else:
car_age.append(car.text)
car = tag.find('li', class_='atc-type-picanto--medium').find_next('li', class_='atc-type-picanto--medium')
if car is None:
car='0'
car_style.append(car)
else:
car_style.append(car.text)
car = tag.find('li', class_ ='atc-type-picanto--medium').find_next('li', class_='atc-type-picanto--medium').find_next('li', class_='atc-type-picanto--medium')
if car is None:
car='0'
car_mileage.append(car)
else:
car_mileage.append(car.text)
car = tag.find('li',class_='atc-type-picanto--medium').find_next('li', class_='atc-type-picanto--medium').find_next('li', class_='atc-type-picanto--medium').find_next('li', class_='atc-type-picanto--medium')
if car is None:
car='0'
car_engine_size.append(car)
else:
car_engine_size.append(car.text)
car= tag.find('li', class_ ='atc-type-picanto--medium').find_next('li', class_='atc-type-picanto--medium').find_next('li', class_='atc-type-picanto--medium').find_next('li', class_='atc-type-picanto--medium').find_next('li', class_='atc-type-picanto--medium').find_next('li', class_='atc-type-picanto--medium')
if car is None:
car='0'
car_gearbox_style.append(car)
else:
car_gearbox_style.append(car.text)
car = tag.find('li', class_ ='atc-type-picanto--medium').find_next('li', class_='atc-type-picanto--medium').find_next('li', class_='atc-type-picanto--medium').find_next('li', class_='atc-type-picanto--medium').find_next('li', class_='atc-type-picanto--medium').find_next('li', class_='atc-type-picanto--medium').find_next('li', class_='atc-type-picanto--medium')
if car is None:
car='0'
car_fuel_type.append(car)
else:
car_fuel_type.append(car.text)
all_car = zip(car_detail, price_lst, car_age,car_style,car_mileage,car_engine_size,car_gearbox_style,car_fuel_type)
# Create the pandas DataFrame
df = pd.DataFrame(all_car)
df.to_excel("car_info.xlsx", index=False, sheet_name='car_info')
