Python Forum
Thread Rating:
  • 0 Vote(s) - 0 Average
  • 1
  • 2
  • 3
  • 4
  • 5
Error 403 Scraping website
#1
I am tryng to scrape data from forebet website
https://www.forebet.com/en/live-football-tips

from bs4 import BeautifulSoup

import requests

cookies = {
    '_ga': 'GA1.1.1322032426.1697570692',
    'FCCDCF': '%5Bnull%2Cnull%2Cnull%2C%5B%22CPzyhEAPzyhEAEsACBITDbCoAP_AAEPAAAIwINJB7D7FbSFCwH57aLsAMAhXRsCAQqQAAASBAmABQAKQIAQCkkAYFESgBAACAAAAICZBIQIMCAgACUABQAAAAAEEAAAABAAIIAAAgAEAAAAIAAACAIAAEAAIAAAAEAAAmQhAAIIACAAAhAAAIAAAAAAAAAAAAgCAAAAAAAAAAAAAAAAAAQQaQD2F2K2kKFgPi2QWYAQBCijYEAhUAAAAkCBIAAgAUgQAgFIIAwAIFAAAAAAAAAQEgCQAAQABAAAIACgAAAAAAIAAAAAAAQQAAAAAIAAAAAAAAEAQAAAAAQAAAAIAABEhCAAQQAEAAAAAAAQAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAgAA%22%2C%221~2072.70.89.93.108.122.149.196.2253.2299.259.2357.311.317.323.2373.338.358.2415.415.2506.2526.482.486.494.495.2568.2571.2575.540.574.2624.609.2677.2779.827.864.981.1048.1051.1095.1097.1201.1205.1276.1301.1365.1415.1449.1570.1577.1651.1716.1735.1753.1765.1834.1870.1878.1889.1958%22%2C%22503DD04E-F3E9-4EFF-A1F2-CEAA5E025CB3%22%5D%2Cnull%2Cnull%2C%5B%5D%5D',
    'favgsfr': '',
    'jfcookie[lang]': 'en',
    'SRV': 'forebet1-7',
    'PHPSESSID': '176f15b91c648176dc0cafd0b8412a56',
    '__gpi': 'UID=00000c9bbd72df97:T=1697638730:RT=1697639114:S=ALNI_MZ98G6kcvSXHPivthgdMAIT30mK6Q',
    '__gads': 'ID=dd238352ed90c58d:T=1697638730:RT=1697639114:S=ALNI_Ma6O2eNHVvRgFzwd0gCbWD_JO-vXQ',
    '_ga_7LGKT05LES': 'GS1.1.1697638100.3.1.1697639116.0.0.0',
    'FCNEC': '%5B%5B%22AKsRol-2dRZj6DzhOMTHCc1M-wgGbQDLWbi4JDKm1GETmc1LZJ3_pP6lZgEWoy-pgp4EtjcF23Fc43KhI3C5U03KqucXWsqnhHZAKs5zwwu_xxfKQNcmE-BDmCV7uuDXrUAYODsKcuDfyFdoQtQFdqwsXh_FSvtMsQ%3D%3D%22%5D%2Cnull%2C%5B%5B5%2C%2255%22%5D%5D%5D',
}

headers = {
    'authority': 'www.forebet.com',
    'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
    'accept-language': 'it-IT,it;q=0.9,en-US;q=0.8,en;q=0.7,de;q=0.6',
    'cache-control': 'max-age=0',
    # 'cookie': '_ga=GA1.1.1322032426.1697570692; FCCDCF=%5Bnull%2Cnull%2Cnull%2C%5B%22CPzyhEAPzyhEAEsACBITDbCoAP_AAEPAAAIwINJB7D7FbSFCwH57aLsAMAhXRsCAQqQAAASBAmABQAKQIAQCkkAYFESgBAACAAAAICZBIQIMCAgACUABQAAAAAEEAAAABAAIIAAAgAEAAAAIAAACAIAAEAAIAAAAEAAAmQhAAIIACAAAhAAAIAAAAAAAAAAAAgCAAAAAAAAAAAAAAAAAAQQaQD2F2K2kKFgPi2QWYAQBCijYEAhUAAAAkCBIAAgAUgQAgFIIAwAIFAAAAAAAAAQEgCQAAQABAAAIACgAAAAAAIAAAAAAAQQAAAAAIAAAAAAAAEAQAAAAAQAAAAIAABEhCAAQQAEAAAAAAAQAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAgAA%22%2C%221~2072.70.89.93.108.122.149.196.2253.2299.259.2357.311.317.323.2373.338.358.2415.415.2506.2526.482.486.494.495.2568.2571.2575.540.574.2624.609.2677.2779.827.864.981.1048.1051.1095.1097.1201.1205.1276.1301.1365.1415.1449.1570.1577.1651.1716.1735.1753.1765.1834.1870.1878.1889.1958%22%2C%22503DD04E-F3E9-4EFF-A1F2-CEAA5E025CB3%22%5D%2Cnull%2Cnull%2C%5B%5D%5D; favgsfr=; jfcookie[lang]=en; SRV=forebet1-7; PHPSESSID=176f15b91c648176dc0cafd0b8412a56; __gpi=UID=00000c9bbd72df97:T=1697638730:RT=1697639114:S=ALNI_MZ98G6kcvSXHPivthgdMAIT30mK6Q; __gads=ID=dd238352ed90c58d:T=1697638730:RT=1697639114:S=ALNI_Ma6O2eNHVvRgFzwd0gCbWD_JO-vXQ; _ga_7LGKT05LES=GS1.1.1697638100.3.1.1697639116.0.0.0; FCNEC=%5B%5B%22AKsRol-2dRZj6DzhOMTHCc1M-wgGbQDLWbi4JDKm1GETmc1LZJ3_pP6lZgEWoy-pgp4EtjcF23Fc43KhI3C5U03KqucXWsqnhHZAKs5zwwu_xxfKQNcmE-BDmCV7uuDXrUAYODsKcuDfyFdoQtQFdqwsXh_FSvtMsQ%3D%3D%22%5D%2Cnull%2C%5B%5B5%2C%2255%22%5D%5D%5D',
    'referer': 'https://www.forebet.com/en/live-football-tips',
    'sec-ch-ua': '"Chromium";v="118", "Google Chrome";v="118", "Not=A?Brand";v="99"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
    'sec-fetch-dest': 'document',
    'sec-fetch-mode': 'navigate',
    'sec-fetch-site': 'same-origin',
    'sec-fetch-user': '?1',
    'upgrade-insecure-requests': '1',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36',
}

response = requests.get('https://www.forebet.com/en/live-football-tips', cookies=cookies, headers=headers)
print (response)
#source = requests.get('https://www.forebet.com/en/live-football-tips').text
Error:
<Response [403]>
tryed also request.get withiut curl ..and in the source there was an error your ip is blocked
Reply
#2
Do it like this.
from bs4 import BeautifulSoup
import requests

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
    'Accept-Language': 'en-US,en;q=0.9',
    'DNT': '1',
    'Connection': 'keep-alive',
    'Upgrade-Insecure-Requests': '1'
}

response = requests.get('https://www.forebet.com/en/live-football-tips', headers=headers)
soup = BeautifulSoup(response.content, 'lxml')
first = soup.select_one('div.tnms > div > a')
print(first.text.strip())
Output:
SM Sanga Balende Simba(DRC) 18/10/2023 15:30
cartonics likes this post
Reply
#3
Thanks that's helped a lot.

i am tryng to take other datas some of them are ok

from bs4 import BeautifulSoup
import requests
 
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
    'Accept-Language': 'en-US,en;q=0.9',
    'DNT': '1',
    'Connection': 'keep-alive',
    'Upgrade-Insecure-Requests': '1'
}
 
response = requests.get('https://www.forebet.com/en/football-predictions/predictions-1x2', headers=headers)
soup = BeautifulSoup(response.content, 'lxml')
#first = soup.select_one('div.tnms > div > a')
rows = soup.find_all('div', {'class': 'rcnt'})
# for team in teams:
for r in rows:
    hometeam = r.find('span',class_= 'homeTeam').text
    awayTeam = r.find('span',class_= 'awayTeam').text
    pscore = r.find('span', class_='forepr').text
    location = r.find('span', {'itemprop': 'location'}).meta['content']
    #weather = r.find('div', class_="prwth tabonly")
    #correctscore = r.find('div', class_='ex_sc tabonly').text
    correctscore = r.select_one('div.ex_sc.tabonly')
   #Probability = r.find('span', class_='fprc').text
    
    print(hometeam + ' - ' + awayTeam + ' - ' + location + ' - ' + pscore )
while i am failing with these:
#weather = r.find('div', class_="prwth tabonly")
#correctscore = r.find('div', class_='ex_sc tabonly').text
correctscore = r.select_one('div.ex_sc.tabonly')
#Probability = r.find('span', class_='fprc').text
i need probability for each 1 x 2 average goals and correct score

Error:
correctscore = r.find('div', class_='ex_sc tabonly').text AttributeError: 'NoneType' object has no attribute 'text'
i cannot understand why.. because it's the same .. except that is a number ...
Reply
#4
There are two blank rows at the end,this will return None so when call .text on it will yield AttributeError.
A couple of way to fix it:
for r in rows:
    if r.find('div', class_='ex_sc tabonly') == None:
        pass
    else:
        correctscore = r.find('div', class_='ex_sc tabonly').text
        print(correctscore)
for r in rows:
    try:
        correctscore = r.find('div', class_='ex_sc tabonly').text
        print(correctscore)
    except AttributeError:
        pass
cartonics likes this post
Reply
#5
Thanks thats worked but sometimes gives 2 times the value and it's also the value of successive row

using
for r in rows:
    hometeam = r.find('span',class_= 'homeTeam').text
    awayTeam = r.find('span',class_= 'awayTeam').text
    pscore = r.find('span', class_='forepr').text
    try:
        correctscore = r.find('div', class_='ex_sc tabonly').text
        avgscore = r.find('div', class_='avg_sc tabonly').text
        probability = r.find('span', class_='fprc').text
        print(avgscore)
    except AttributeError:
        pass
    
    location = r.find('span', {'itemprop': 'location'}).meta['content']
    #weather = r.find('div', class_="prwth tabonly")
    
    print(hometeam + ' - ' + awayTeam + ' - ' + location + '  ' + pscore + ' / ' + correctscore + ' ' + avgscore + ' ' + probability)
for example here:
Quote:CA Douglas Haig - El Linqueño - Miguel Morales - 1 , 2 - 1
1 - 2
Club Cipolletti - Club Olimpo - Estadio La Visera de Cemento - 2 , 1 - 2
1 - 0
Sol de América - 9 de Julio Rafaela - Estadio Club Sol de América Formosa - 1 , 1 - 0
2 - 1
Unión Sunchales - Def. Pronunciamiento - Estadio Unión de Sunchales - 1 , 2 - 1
3 - 0

while with this:
Probability = r.find('span', class_='fprc').text
Error:
NameError: name 'probability' is not defined
Reply
#6
When you add new values try to test only that single value in loop,
then will see that r.find('span', class_='fprc').text only return None.
As it should be r.find('span', class_='fpr').text.
Also use f-string.
for r in rows:
    hometeam = r.find('span',class_= 'homeTeam').text
    awayTeam = r.find('span',class_= 'awayTeam').text
    pscore = r.find('span', class_='forepr').text
    location = r.find('span', {'itemprop': 'location'}).meta['content']
    try:
        correctscore = r.find('div', class_='ex_sc tabonly').text
        avgscore = r.find('div', class_='avg_sc tabonly').text
        probability = r.find('span', class_='fpr').text
    except AttributeError:
        pass
    print(f"{hometeam} - {awayTeam} - {location}  {pscore} / {correctscore} {avgscore} {probability}")
Reply
#7
(Oct-19-2023, 10:56 AM)snippsat Wrote: As it should be r.find('span', class_='fpr').text.
Also use f-string.
        probability = r.find('span', class_='fpr').text
    except AttributeError:
        pass
    print(f"{hometeam} - {awayTeam} - {location}  {pscore} / {correctscore} {avgscore} {probability}")

the problem was that i need all of them: the valure for 1 value for x and value for 2 and not only fpr class that is the "highest"
<div class="fprc"><span>29</span><span>21</span><span class="fpr">50</span></div>
Reply
#8
(Oct-19-2023, 11:29 AM)cartonics Wrote: the problem was that i need all of them: the valure for 1 value for x and value for 2 and not only fpr class that is the "highest"
Like this,it's a little tricky beauce the way is structured,so i use regex here to the get numbers.
from bs4 import BeautifulSoup
import requests
import re

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
    'Accept-Language': 'en-US,en;q=0.9',
    'DNT': '1',
    'Connection': 'keep-alive',
    'Upgrade-Insecure-Requests': '1'
}

response = requests.get('https://www.forebet.com/en/live-football-tips', headers=headers)
soup = BeautifulSoup(response.content, 'lxml')
rows = soup.find_all('div', {'class': 'rcnt'})
for r in rows:
    try:
        fprc = r.select_one('.fprc')
        #print(fprc)
        pro = re.findall(r'\d+', fprc.decode())
        p1, x, p2 = pro
        print(p1 ,x, p2)
    except AttributeError:
        pass
Output:
22 35 43 61 30 9 15 54 32 20 35 45 28 30 43 25 43 32 11 46 44 64 20 16 29 51 20
cartonics likes this post
Reply
#9
in this page:
https://www.forebet.com/en/live-football-tips

for r in rows:
   try:
        l_min = r.find('span',class_= 'l_min').text
        l_score = r.select_one('div.lscr_td lResTdSmall > div > b').text
        l_odd = r.find('span',class_= 'lscrsp lcurodd')
        print(f"{l_score}{l_min}{l_odd}")
No errors but no values! :(
Reply
#10
(Oct-20-2023, 07:12 AM)cartonics Wrote: No errors but no values! :(
Do not test new vaules like this in the try:except then don't see new errors of if just return None(as case is here)
There are serval problems here.
lscrsp can find but,not lscr_td(becuse is not in rows = soup.find_all('div', {'class': 'rcnt'}) at all)
>>> rows[0].select_one('.lscrsp')
<span class="lscrsp" onclick="return getHodd(this,1993942);">1.25</span>
>>> rows[1].select_one('.lscrsp')
<span class="lscrsp" onclick="return getHodd(this,1955491);">3.50</span>
For other vaule have to start higher opp in HTML source.
rows = soup.select_one('.contentmiddle')
>>> rows.select('.lscr_td')[1]
<div class="lscr_td lResTdSmall">
<span class="lscrsp lscrlv" onclick="return getEvents(1993942,this,false)"><b class="l_scr">3 - 2</b></span><span class="ht_scr">(1 - 2)</span></div>
>>> rows.select('.lscr_td')[2]
<div class="lscr_td lResTdSmall">
<span class="lscrsp lscrlv" onclick="return getEvents(1994386,this,false)"><b class="l_scr">1 - 0</b></span><span class="ht_scr">(1 - 0)</span></div>
>>> rows.select('.lscr_td')[2].text
'\n1 - 0(1 - 0)'
from bs4 import BeautifulSoup
import requests
import re

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
    'Accept-Language': 'en-US,en;q=0.9',
    'DNT': '1',
    'Connection': 'keep-alive',
    'Upgrade-Insecure-Requests': '1'
}

response = requests.get('https://www.forebet.com/en/live-football-tips', headers=headers)
soup = BeautifulSoup(response.content, 'lxml')
#rows = soup.find_all('div', {'class': 'rcnt'})
rows = soup.select_one('.contentmiddle')
for r in rows.select('.l_scr'):
    print(r.text)
Output:
1 - 0 0 - 1 0 - 0 0 - 1 1 - 3 ......
These sport betting table can be tricky if new this,so try to test singel vaules before doing stuff in a loop.
cartonics likes this post
Reply


Possibly Related Threads…
Thread Author Replies Views Last Post
  Scraping Data from Website melkaray 3 2,472 Sep-22-2023, 12:41 PM
Last Post: melkaray

Forum Jump:

User Panel Messages

Announcements
Announcement #1 8/1/2020
Announcement #2 8/2/2020
Announcement #3 8/6/2020