I need some help with this code. It always returns the same (first) page over and over again instead of going to the next page. I can't figure out why this is the case. I guess the probem would be in the loop.
import csv
import time
from bs4 import BeautifulSoup
def fetch_data(url):
try:
response = requests.get(url, verify=False) # Disable SSL verification
response.raise_for_status()
return response.json()
except requests.exceptions.HTTPError as err:
print(f"HTTP error occurred: {err}")
except Exception as err:
print(f"Other error occurred: {err}")
return None
def process_all_pages(base_url, per_page=25):
all_data = []
page = 1
while True:
url = f"{base_url}?page={page}&per_page={per_page}"
print(f"Fetching data from: {url}") # Debug statement
data = fetch_data(url)
# Verbose debugging
if data:
print(f"Received response for page {page}: {data}") # Print the raw response
else:
print(f"No data received for page {page}. Exiting loop.")
break
if 'data' in data and isinstance(data['data'], list):
num_items = len(data['data'])
print(f"Page {page}: Received {num_items} items") # Debug statement
all_data.extend(data['data'])
# Check if there are more pages to fetch
if num_items < per_page:
# If fewer than per_page items are returned, this is the last page
print(f"Page {page}: Last page reached with {num_items} items.")
break
else:
print(f"Page {page}: No valid data or end of data reached") # Debug statement
break
page += 1
time.sleep(1) # Adjust based on the rate limit
return all_data
# Fetch all data from the API endpoint
json_url = "https://samviewer.digile.be/nl/sam/ampps.json"
all_data = process_all_pages(json_url)
if all_data:
with open('output.csv', mode='w', newline='', encoding='utf-8') as file:
csv_writer = csv.writer(file)
# Write the header row
headers = ['CTI-extended', 'Naam', 'CNK publiek', 'Prijs publiek', 'Prijs af-fabriek', 'Vergunninghouder (verdeler)']
csv_writer.writerow(headers)
for item in all_data:
# Extracting values and cleaning HTML tags
cti_ext = extract_text_from_html(item.get('cti_ext', ''))
name = extract_text_from_html(item.get('name', ''))
cnk_p = extract_text_from_html(item.get('cnk_p', ''))
price_pub = extract_text_from_html(item.get('price_pub', ''))
price_exf = extract_text_from_html(item.get('price_exf', ''))
company = extract_text_from_html(item.get('company', ''))
# Construct row
row = [
cti_ext,
name,
cnk_p,
price_pub,
price_exf,
company
]
# Write the row to CSV only if at least one field has data
if any(field for field in row):
csv_writer.writerow(row)
print(f"Data successfully written to output.csv. Total rows: {len(all_data)}")
else:
print("No data retrieved from the API.")
Larz60+ write Jul-23-2024, 08:54 AM:
Please post all code, output and errors (it it's entirety) between their respective tags. Refer to BBCode help topic on how to post. Use the "Preview Post" button to make sure the code is presented as you expect before hitting the "Post Reply/Thread" button.
Tags have been added for you this time. Please use BBCode tags on future posts.
Please post all code, output and errors (it it's entirety) between their respective tags. Refer to BBCode help topic on how to post. Use the "Preview Post" button to make sure the code is presented as you expect before hitting the "Post Reply/Thread" button.
Tags have been added for you this time. Please use BBCode tags on future posts.
