hi guys,
i am learning scraping and i currently i ve stopped in point of doing pagination in script below:
what should be my next step?
I would appreciate any kind of help/tip!
i am learning scraping and i currently i ve stopped in point of doing pagination in script below:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import re
import urllib
headers = {
'Sec-Fetch-Mode': 'cors',
'Referer': 'https://www.pararius.com/apartments/amsterdam',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36',
'Content-Type': 'text/plain',
}
data = '{"tags":[{"sizes":[{"width":728,"height":90},{"width":970,"height":250}],"primary_size":{"width":728,"height":90},"ad_types":["banner"],"uuid":"5f5a2718d3aa6d","id":11247563,"allow_smaller_sizes":false,"use_pmt_rule":false,"prebid":true,"disable_psa":true},{"sizes":[{"width":728,"height":90},{"width":970,"height":250}],"primary_size":{"width":728,"height":90},"ad_types":["banner"],"uuid":"66526a063a1a8c","id":11247564,"allow_smaller_sizes":false,"use_pmt_rule":false,"prebid":true,"disable_psa":true}],"sdk":{"source":"pbjs","version":"2.19.0-pre"},"gdpr_consent":{"consent_string":"BOmDsv2OmDsv2BQABBENCN-AAAAmd7_______9______5uz_Ov_v_f__33e8__9v_l_7_-___u_-3zd4-_1vf99yfm1-7etr3tp_87ues2_Xur__59__3z3_9phPrsk89ryw","consent_required":true},"referrer_detection":{"rd_ref":"https%3A%2F%2Fwww.pararius.com%2Fapartments%2Famsterdam","rd_top":true,"rd_ifs":1,"rd_stk":"https%3A%2F%2Fwww.pararius.com%2Fapartments%2Famsterdam,https%3A%2F%2Fwww.pararius.com%2Fapartments%2Famsterdam"}}'
page_number = 2
page = 'https://www.pararius.com/apartments/amsterdam/page-' + str(page_number)
r = requests.get(page, headers=headers, data=data)
content = (r.text)
soup = BeautifulSoup(content, 'html.parser')
for section in soup.find_all(class_='property-list-item-container'):
dlink = section.find('a').get('href')
type = section.find('span', {'class': 'type'}).text
neighborhood = section.find('a').text.strip().split()[1]
size = section.find('li', {'class': 'surface'}).text.strip().split()[0]
bedrooms = section.find('li', {'class': 'surface'}).text.strip().split()[2]
furniture = section.find('li', {'class': 'surface'}).text.strip().split()[4]
if furniture == 'upholstered':
furniture = "Unfurnished"
elif furniture == 'furnished or upholstered':
furniture = "Furnished & Unfurnished"
availablefrom = size = section.find('li', {'class': 'surface'}).text.strip().split()[6]
price = section.find('p', {'class': 'price '}).text.strip().split()[0]
curr = "EUR" if "€" in price else "other"
print(curr)
breakI have to add that it might happend that result from the site has let's say 50 pages, and it can happen that it has 30 only... how to deal with it?what should be my next step?
I would appreciate any kind of help/tip!
