I want to get the HTML this site https://www.forebet.com/en/football-predictions after pressing the button More[+] enough times to load all games. Each time the button More[+] on the bottom of the page the HTML changes and shows more football games. How do I get the request to the page with all the football games loaded?
JavaScript
x
32
32
1
from bs4 import BeautifulSoup
2
import requests
3
4
leagues = {"EPL","UCL","Es1","De1","Fr1","Pt1","It1","UEL"}
5
6
class ForeBet:
7
8
#gets all games from the leagues on leagues returning the games on a string list
9
#game format is League|Date|Hour|Home Team|Away Team|Prob Home|Prob Tie| Prob Away
10
def get_games_and_probs(self):
11
12
response=requests.get('https://www.forebet.com/en/football-prediction')
13
soup = BeautifulSoup(response.text, 'html.parser')
14
results=list()
15
16
games = soup.findAll(class_='rcnt tr_0')+soup.findAll(class_='rcnt tr_1')
17
18
for game in games:
19
if(leagues.__contains__(game.find(class_='shortTag').text.strip())):
20
game=game.find(class_='shortTag').text+"|"+
21
game.find(class_='date_bah').text.split(" ")[0]+"|"+
22
game.find(class_='date_bah').text.split(" ")[1]+"|"+
23
game.find(class_='homeTeam').text+"|"+
24
game.find(class_='awayTeam').text+"|"+
25
game.find(class_='fprc').findNext().text+"|"+
26
game.find(class_='fprc').findNext().findNext().text+"|"+
27
game.find(class_='fprc').findNext().findNext().findNext().text
28
print(game)
29
results.append(game)
30
31
return results
32
Advertisement
Answer
Like stated, requests and beautfulsoup are used to parse data, not to interact with the site. To do that you need Selenium.
Your other option is to see if you can fetch the data directly, and see if there are parameters that can make another request as if you clicked the get more. Does this do the trick for you?
JavaScript
1
23
23
1
import pandas as pd
2
import requests
3
4
results = pd.DataFrame()
5
i=0
6
while True:
7
print(i)
8
url = 'https://m.forebet.com/scripts/getrs.php'
9
payload = {
10
'ln': 'en',
11
'tp': '1x2',
12
'in': '%s' %(i+11),
13
'ord': '0'}
14
15
jsonData = requests.get(url, params=payload).json()
16
results = results.append(pd.DataFrame(jsonData[0]), sort=False).reset_index(drop=True)
17
18
if max(results['id'].value_counts()) <=1:
19
i+=1
20
else:
21
results = results.drop_duplicates()
22
break
23
Output:
JavaScript
1
16
16
1
print(results)
2
id pr_under country full_name
3
0 1473708 31 England Isthmian League
4
1 1473713 35 England Isthmian League
5
2 1473745 28 England Isthmian League
6
3 1473710 35 England Isthmian League
7
4 1473033 28 England Premier League 2
8
..
9
515 1419208 47 Argentina Torneo Federal A
10
516 1419156 57 Argentina Torneo Federal A
11
517 1450589 50 Armenia Premier League
12
518 1450590 35 Armenia Premier League
13
519 1450591 52 Armenia Premier League
14
15
[518 rows x 73 columns]
16