Sep-24-2018, 06:02 PM
hi,
I have some html links and i want to find some particular text and it's next text also. I am using regex but receiving lost of empty lists.
These are links:
https://www.99acres.com/mailers/mmm_html...7-558.html https://www.99acres.com/mailers/mmm_html...-2016.html https://www.99acres.com/mailers/mmm_html...7-553.html
text i am finding Area Range: Next Text also Possession: next text also for example possession 2019 Price: next text also
below are my codes:
I have some html links and i want to find some particular text and it's next text also. I am using regex but receiving lost of empty lists.
These are links:
https://www.99acres.com/mailers/mmm_html...7-558.html https://www.99acres.com/mailers/mmm_html...-2016.html https://www.99acres.com/mailers/mmm_html...7-553.html
text i am finding Area Range: Next Text also Possession: next text also for example possession 2019 Price: next text also
below are my codes:
import requests
from bs4 import BeautifulSoup
import csv
import json
import itertools
import re
file = {}
final_data = []
final = []
textdata = []
def readfile(alldata, filename):
with open("./"+filename, "w") as csvfile:
csvfile = csv.writer(csvfile, delimiter=",")
for i in range(0, len(alldata)):
csvfile.writerow(alldata[i])
def parsedata(url, values):
r = requests.get(url, values)
data = r.text
return data
def getresults():
global final_data, file
with open("Mailers.csv", "r") as f:
reader = csv.reader(f)
next(reader)
for row in reader:
ids = row[0]
link = row[1]
html = parsedata(link, {})
soup = BeautifulSoup(html, "html.parser")
titles = soup.title.text
td = soup.find_all("td")
for i in td:
sublist = []
data = i.text
pattern = r'(Possession:)(.)(.+)'
x1 = re.findall(pattern, data)
sublist.append(x1)
sublist.append(link)
final_data.append(sublist)
print(final_data)
return final_data
def main():
getresults()
readfile(final_data, "Data.csv")
main()
