Hi. I'm just started with Python and trying to practicing.
I parse a website and creating a list with dictionaries and it works well, then I trying to export it and getting errors
Full code
Answer
Fixed myself
I parse a website and creating a list with dictionaries and it works well, then I trying to export it and getting errors
Full code
import requests
from bs4 import BeautifulSoup as bs
import xlwt
headers = {'accept': '*/*',
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.2 Safari/605.1.15'
}
base_url = 'https://hh.ru/search/vacancy?search_period=3&area=1&text=python&page=0'
def hh_parse(base_url, headers):
jobs = []
session = requests.Session()
request = session.get(base_url, headers=headers)
if request.status_code == 200:
soup = bs(request.content, 'html.parser')
divs = soup.find_all('div', attrs={'data-qa': 'vacancy-serp__vacancy'})
for div in divs:
title = div.find('a', attrs ={'data-qa': 'vacancy-serp__vacancy-title'}).text
href = div.find('a', attrs={'data-qa': 'vacancy-serp__vacancy-title'})['href']
company = div.find('a', attrs={'data-qa': 'vacancy-serp__vacancy-employer'}).text
text1 = div.find('div', attrs ={'data-qa': 'vacancy-serp__vacancy_snippet_responsibility'}).text
text2 = div.find('div', attrs={'data-qa': 'vacancy-serp__vacancy_snippet_requirement'}).text
content = text1 + ' ' + text2
jobs.append({
'title': title,
'href': href,
'company': company,
'content': content
})
print(len(jobs))
wb = xlwt.Workbook()
ws = wb.add_sheet("Export")
for i in jobs:
next_string = 0 + 1
ws.write(next_string, 1, i['title'])
ws.write(next_string, 2, i['href'])
ws.write(next_string, 3, i['company'])
ws.write(next_string, 4, i['content'])
wb.save("Export.xls")
else:
print('ERROR')
hh_parse(base_url, headers) Answer
Output:/Users/ak/Documents/Python/Practice/RusrekParse/mlsvenv/bin/python /Users/ak/Documents/Python/Practice/RusrekParse/rusrek_murr.py
16
Traceback (most recent call last):
File "/Users/ak/Documents/Python/Practice/RusrekParse/rusrek_murr.py", line 53, in <module>
hh_parse(base_url, headers)
File "/Users/ak/Documents/Python/Practice/RusrekParse/rusrek_murr.py", line 40, in hh_parse
ws.write(next_string, 1, i['title'])
File "/Users/ak/Documents/Python/Practice/RusrekParse/mlsvenv/lib/python3.7/site-packages/xlwt/Worksheet.py", line 1088, in write
self.row(r).write(c, label, style)
File "/Users/ak/Documents/Python/Practice/RusrekParse/mlsvenv/lib/python3.7/site-packages/xlwt/Row.py", line 235, in write
StrCell(self.__idx, col, style_index, self.__parent_wb.add_str(label))
File "/Users/ak/Documents/Python/Practice/RusrekParse/mlsvenv/lib/python3.7/site-packages/xlwt/Row.py", line 154, in insert_cell
raise Exception(msg)
Exception: Attempt to overwrite cell: sheetname='Export' rowx=1 colx=1
Process finished with exit code 1Fixed myself
