Jun-12-2019, 09:40 AM
Hi, I am doing web development using Flask and Python. However when i try to integrate the backend to the Flask, I am stuck at this def index(): return Google. It comes up in the backend coding program but I would like it to appear frontend on a website somehow. Appreciate any kind advice thanks.
@app.route('/')
@app.route('/index')
def index():
return Google
from bs4 import BeautifulSoup
import requests
import pandas as pd
from urllib.request import urlopen,urlparse, Request,HTTPError
import urllib
import re
import numpy as np
import csv
from http.client import BadStatusLine
import ssl
import json
#from googlesearch import search
class Google:
@classmethod
def search1(self, search):
url_list = [] #store all the extracted urls in a List
title_list = [] #store all the extracted titles in a List
description_list = [] #store all the extracted Description in a List
all_links = []
for start in range(0,10):
#page = requests.get('https://www.google.com/search?rlz=1C1CHBF_enSG851SG851&ei=Nib2XI6FEcmLvQS1xb-wBQ&q=site%3Alinkedin.com+inurl%3Ain+%7C+inurl%3Apub+%7C+inurl%3Aprofile+-inurl%3Adir+-inurl%3Atitle+-inurl%3Agroups+-inurl%3Acompany+-inurl%3Ajobs+-inurl%3Ajobs2+VP&oq=site%3Alinkedin.com+inurl%3Ain+%7C+inurl%3Apub+%7C+inurl%3Aprofile+-inurl%3Adir+-inurl%3Atitle+-inurl%3Agroups+-inurl%3Acompany+-inurl%3Ajobs+-inurl%3Ajobs2'+search+str(start*10), verify = False)
page = requests.get('http://www.google.com/search?q='+search+str(start*10), verify = False, timeout=5)
#page = requests.get('https://www.google.com/search?q='+search, verify = True)
soup = BeautifulSoup(page.content, "lxml")
#soup = BeautifulSoup(page.content)
for link in soup.find_all("a",href=re.compile("(?<=/url\?q=)(htt.*://.*)")): #original working code
a = (re.split(":(?=http)",link["href"].replace("/url?q=","")))
a = a[0].split("&")[0]
url_list.append(a)
#for tit in soup.find_all('title', attrs={'class':'r'}): #extract all Titles
#for tit in soup.find_all('title'):
#print(tit)
#title_list.append(tit)
#print(title_list)
#soup.find('h1').text
#company = soup.find('h1', {'class' : 'listing-name'})
titles = soup.find_all("title") #havent fix title "title"
#titles = soup.find("meta", attrs="title")
for title in titles:
print(title.contents)
title_list.append(title)
#for title in titles:
#title = ''
#if soup.title is not None:
# title = soup.title
#elif soup.find("h1") is not None:
# title = soup.find("h1")
#return title
#print(title.contents)
#title_list.append(title)
for descr in soup.find_all('div'): #extract all description
#abstract=soup.find("div").text.replace("\t", "").replace("\r", "").replace("\n", "")
#alldescr += descr
#description_list.append(descr)
#headers = map(lambda x:x[:-1], [[b.text for b in i.find_all('dt')] for i in soup.find_all('dl')][0])
#data = [[re.sub('\s{2,}', '', b.text) for b in i.find_all('dd')] for i in soup.find_all('dl')]
#print(data)
#final_data = [dict(zip(headers, i)) for i in data]
#print(final_data)
description_list.append(page.content)
#print(description_list)
record_list = [list(item) for item in list(zip(url_list, title_list, description_list))] #join all the lists
df = pd.DataFrame(data=record_list,columns=['URL','Title', 'Description'])
df.to_csv('result_url_topic_desc.csv', index=False, encoding="utf8")
with open('result_url_topic_desc.csv') as f:
reader = csv.DictReader(f)
rows = list(reader)
with open('result_url_topic_desc_JSON.json', 'w') as f:
json.dump(rows, f, sort_keys=False, indent=4, separators=(',', ': '))
user_input = input("Enter your search string : ")
Google.search1(user_input) # user search string
#Google.search1('cloud managed services') # user search string, it could be anything the user types
