Oct-16-2019, 09:38 AM
Hello everyone, I am an infant at web scraping I am trying to get some info from a website and I am at my wits end. Some assistance would really help me out big time.
import requests
from lxml import html
import csv
import pandas as pd
#visit the webpage and access the web contents
r = requests.get('https://www.basketball-reference.com/boxscores/201810160BOS.html')
data = html.fromstring(r.text)
#xpath: collect the specific data you're looking for
player = data.xpath("//tbody/tr/th[@data-stat='player']/a[@href]/text()")
mp = data.xpath("//tbody/tr/td[@data-stat='mp']/a[@href]/text()")
fg = data.xpath("//tbody/tr/td[@data-stat='fg']/a[@href]/text()")
fga = data.xpath("//tbody/tr/td[@data-stat='fga']/a[@href]/text()")
fg_pct = data.xpath("//tbody/tr/td[@data-stat='fg_pct']")
fg_pct = [i.text_content() for i in fg_pct]
fg3 = data.xpath("//tbody/tr/td[@data-stat='fg3']/a[@href]/text()")
fg3a = data.xpath("//tbody/tr/td[@data-stat='fg3a']/a[@href]/text()")
fg3_pct = data.xpath("//tbody/tr/td[@data-stat='fg3_pct']")
fg3_pct = [i.text_content() for i in fg3_pct]
ft = data.xpath("//tbody/tr/td[@data-stat='ft']/a[@href]/text()")
fta = data.xpath("//tbody/tr/td[@data-stat='fta']/a[@href]/text()")
ft_pct = data.xpath("//tbody/tr/td[@data-stat='ft_pct']")
ft_pct = [i.text_content() for i in ft_pct]
orb = data.xpath("//tbody/tr/td[@data-stat='orb']/a[@href]/text()")
drb = data.xpath("//tbody/tr/td[@data-stat='drb']/a[@href]/text()")
trb = data.xpath("//tbody/tr/td[@data-stat='trb']/a[@href]/text()")
ast = data.xpath("//tbody/tr/td[@data-stat='ast']/a[@href]/text()")
stl = data.xpath("//tbody/tr/td[@data-stat='stl']/a[@href]/text()")
blk = data.xpath("//tbody/tr/td[@data-stat='blk']/a[@href]/text()")
tov = data.xpath("//tbody/tr/td[@data-stat='tov']/a[@href]/text()")
pf = data.xpath("//tbody/tr/td[@data-stat='pf']/a[@href]/text()")
pts = data.xpath("//tbody/tr/td[@data-stat='pts']/a[@href]/text()")
plus_minus = data.xpath("//tbody/tr/td[@data-stat='plus_minus']/a[@href]/text()")
nbaboxstats = zip(player,
mp,
fg,
fga,
fg_pct,
fg3,
fg3a,
fg3_pct,
ft,
fta,
ft_pct,
orb,
drb,
trb,
ast,
stl,
blk,
tov,
pf,
pts,
plus_minus)
#organizing our data structure to a pandas dataframe
df = pd.DataFrame(nbaboxstats)
df.rename(columns = {0:'starter',
1:'mp',
2:'fg',
3:'fga',
4:'fg_pct',
5:'fg3',
6:'fg3a',
7:'fg3_pct',
8:'ft',
9:'fta',
10:'ft_pct',
11:'orb',
12:'drb',
13:'trb',
14:'ast',
15:'stl',
16:'blk',
17:'tov',
18:'pf',
19:'pts',
20:'plus_minus'})
df.head()

![[Image: Jnssvn.png]](https://imagizer.imageshack.com/v2/xq90/921/Jnssvn.png)