please can you advise or help me on my code, the print out to the screen and csv works, but i would like a json outputfile
that can list all the /07-feb-2020/R1park-1400 /07-feb-2020/R2park-1530 /07-feb-2020/R3park-1600 with the beginning www.race.com/ so i can then import this in to another part of my program to run the next part of the scrape
thank you
that can list all the /07-feb-2020/R1park-1400 /07-feb-2020/R2park-1530 /07-feb-2020/R3park-1600 with the beginning www.race.com/ so i can then import this in to another part of my program to run the next part of the scrape
thank you
import requests
from bs4 import BeautifulSoup
import csv
import json
#working links local
html_doc = open('D:\@users\python\raceing.html')
#example html doc
#
#href="/07-feb-2020/R1park-1400" class="winning">2:00</a></td><td
#
#href="/07-feb-2020/R2park-1530" class="winning">3:30</a></td><td
#
#href="/07-feb-2020/R3park-1600" class="winning">4:00</a></td><td
#
soup = BeautifulSoup(html_doc, 'html.parser')
link_set = set()
for link in soup.find_all('a',{'class' : 'winning'}):
web_links = link.get("href")
#links = [urljoin(start_url,['href']) # convert relative url to absolute url
print(web_links)
# link_set.add(web_links)
csvfile = open('course.csv', 'w+', newline='')
writer = csv.writer(csvfile)
writer.writerow(['Links'])
for link in link_set:
writer.writerow([link])
csvfile.close()
#working the json file where the output must be stored
html_doc = []
for url in html_doc:
response = requests.get(url)
url.append(res.text)
out_file = open("race.json", "w")
json.dump(web_links,out_file, indent = 6)
out_file.close()
