Feb-03-2020, 06:27 AM
Hi,
I am new to Python and doing a web scraping project and trying to web scrape the data from restaurant aggregator site (https://www.zomato.com/mumbai) using Jupyter notebook and want to scrape different types of restaurants from each neighborhood in the city but I am getting IndexError: list index out of range -->buttons[0].click().
Below is my code.
Thank you all in advance.
I am new to Python and doing a web scraping project and trying to web scrape the data from restaurant aggregator site (https://www.zomato.com/mumbai) using Jupyter notebook and want to scrape different types of restaurants from each neighborhood in the city but I am getting IndexError: list index out of range -->buttons[0].click().
Below is my code.
import pandas as pd
!pip install -U selenium
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
import time
chrome_path = r'C:\Users\Klsingh\Desktop\chromedriver.exe'
# Url for each neighborhood
url = 'https://www.zomato.com/mumbai/lower-parel-restaurants'
# neighborhood
city = "lower_parel_"
wd = webdriver.Chrome(executable_path=r'C:\Users\Klsingh\Desktop\chromedriver.exe')
wd.get(url)
rest_types = wd.find_element_by_xpath('//div[@class="search-filter-label pb5"]')
rest_types = rest_types.text.split("\n")
print(rest_types)
wd.quit()
# This function returns all the buttons for navigating each category
def rest_types_buttons():
buttons = wd.find_elements_by_xpath('//span[@class="zred"]')
return buttons
# This function returns the link, name and address of restaurant for each page
def name_link_add():
rest_link = []
rest_name = []
restaurants = wd.find_elements_by_xpath('//a[@class="result-title hover_feedback zred bold ln24 fontsize0 "]')
for name in restaurants:
rest_link.append(name.get_attribute('href'))
rest_name.append(name.text)
restaurants_address = wd.find_elements_by_xpath('//div[@class="col-m-16 search-result-address grey-text nowrap ln22"]')
rest_address = []
for rest_add in restaurants_address:
rest_address.append(rest_add.text)
return rest_link, rest_name, rest_address
# This function returns the all the data from an individual category (all pages combined)
def get_data_rest_type(rest_type):
try:
prev_link,prev_name,prev_add = None, None, None
rest_link, rest_name, rest_address = name_link_add()
link = []
name = []
address = []
while(prev_link != rest_link):
prev_link,prev_name,prev_add = rest_link, rest_name, rest_address
link = link + prev_link
name = name + prev_name
address = address + prev_add
next_page_button = wd.find_element_by_xpath('//i[@class="right angle icon"]')
next_page_button.click()
wd.switch_to_window(wd.window_handles[0])
rest_link, rest_name, rest_address = name_link_add()
# Below two if conditions are for debugging
if((len(rest_address) == len(rest_link) == len(rest_name)) == False):
print("need to see, name link address mismatch")
break
if(len(rest_name) == 0):
print("Empty found")
except:
print("unknown error")
return link, name, address
# For each of the category
type_ = ("_".join(rest_types[0].lower().split(' ')))
print(type_)
# Go inside the category by clicking on the button on the main page
wd = webdriver.Chrome(chrome_path)
wd.get(url)
wd.switch_to_window(wd.window_handles[0])
buttons = rest_types_buttons()
buttons[0].click()
wd.switch_to_window(wd.window_handles[0])This portion of the code Quote:# Go inside the category by clicking on the button on the main pageis giving me Index error as mentioned below.
Error:C:\Users\Klsingh\Anaconda3\lib\site-packages\ipykernel_launcher.py:4: DeprecationWarning: use driver.switch_to.window instead
after removing the cwd from sys.path.
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-12-422a6bba8955> in <module>
4 wd.switch_to_window(wd.window_handles[0])
5 button = rest_types_button()
----> 6 button[0].click()
7 driver.switch_to_window(wd.window_handles[0])
IndexError: list index out of rangePlease help. Thank you all in advance.
