I’m trying to collect and save all the links that are inside the called tags href
that are always on this path of elements:
JavaScript
x
3
1
<td class="score-time score">
2
<a href="/matches/2021/07/26/nc-america/concacaf-gold-cup/costa-rica/canada/3506351/" class=" ">0 - 2</a>
3
The script I’m using is this:
JavaScript
1
23
23
1
import datetime
2
import requests
3
from bs4 import BeautifulSoup
4
5
url = "https://int.soccerway.com/"
6
7
headers = {
8
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
9
}
10
11
site = requests.get(url, headers=headers)
12
soup = BeautifulSoup(site.content, "html.parser")
13
jogos = soup.find_all("td", class_="score-time score")
14
15
with open("Lista_de_Links.csv", "w+", newline="", encoding="UTF-8") as f:
16
titlemenu = "label"
17
f.write(titlemenu)
18
for tag in jogos:
19
linkslist = tag.find("a", href=True)["href"]
20
21
row = linkslist + "n"
22
f.write(row)
23
The problem is that in this model I can’t get the values that are hidden because of the elements that need to be expanded from a link in order to access them:
I’m trying to create a template so I can click all the buttons to expand and only then collect the links, but the button isn’t expanding even using the .click
, I would like some help to understand how I need to use it so that my need is met.
Element clickable:
JavaScript
1
2
1
<tr class="group-head clickable ">
2
My script trying to clicks:
JavaScript
1
12
12
1
from selenium import webdriver
2
from selenium.webdriver.common.keys import Keys
3
import time
4
5
PATH = r"C:UsersComputadorDesktopPythonchromedriver.exe"
6
driver = webdriver.Chrome(PATH)
7
8
driver.get(r"http://int.soccerway.com/")
9
10
link = driver.find_element_by_xpath("//tr[@class='group-head clickable']")
11
link.click()
12
Advertisement
Answer
JavaScript
1
17
17
1
import requests
2
from bs4 import BeautifulSoup
3
4
headers = {
5
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:90.0) Gecko/20100101 Firefox/90.0'
6
}
7
8
9
def main(url):
10
r = requests.get(url, headers=headers)
11
soup = BeautifulSoup(r.text, 'lxml')
12
goal = (x.a['href'] for x in soup.select('td.score-time.score'))
13
print(list(goal))
14
15
16
main('https://us.soccerway.com/')
17
Output:
JavaScript
1
2
1
['/matches/2021/07/26/nc-america/concacaf-gold-cup/costa-rica/canada/3506351/', '/matches/2021/07/26/nc-america/concacaf-gold-cup/united-states-of-america/jamaica/3506352/']
2
Updated Answer:
JavaScript
1
44
44
1
import requests
2
from bs4 import BeautifulSoup
3
import json
4
5
headers = {
6
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:90.0) Gecko/20100101 Firefox/90.0'
7
}
8
9
10
def get_soup(content):
11
return BeautifulSoup(content, 'lxml')
12
13
14
def main(url):
15
with requests.Session() as req:
16
req.headers.update(headers)
17
r = req.get(url)
18
soup = get_soup(r.text)
19
goal = ((num, x['id'].split('-', 1)[-1]) for num, x in enumerate(
20
soup.select('tr[id^=date_matches]'), start=1))
21
22
for n, m in goal:
23
params = {
24
"block_id": "block_home_matches_31",
25
"callback_params": json.dumps({
26
"block_service_id": "home_index_block_homematches",
27
"date": "2021-07-26",
28
"display": "all",
29
"stage-value": n
30
}),
31
"action": "showMatches",
32
"params": json.dumps({
33
"competition_id": m
34
})
35
}
36
r = req.get(
37
'https://us.soccerway.com/a/block_home_matches', params=params)
38
soup = get_soup(r.json()['commands'][0]['parameters']['content'])
39
res = (x.a['href'] for x in soup.select('td.score-time'))
40
print(list(res))
41
42
43
main('https://us.soccerway.com/')
44
Output:
JavaScript
1
72
72
1
['/matches/2021/07/26/nc-america/concacaf-gold-cup/costa-rica/canada/3506351/', '/matches/2021/07/26/nc-america/concacaf-gold-cup/united-states-of-america/jamaica/3506352/']
2
['/matches/2021/07/26/africa/cecafa-senior-challenge-cup/ethiopia-u23/eritrea-under-23/3567660/', '/matches/2021/07/26/africa/cecafa-senior-challenge-cup/uganda-under-23/congo-dr-under-23/3567667/']
3
['/matches/2021/07/26/argentina/primera-division/estudiantes-de-la-plata/ca-independiente/3528747/', '/matches/2021/07/26/argentina/primera-division/ca-talleres-cordoba/arsenal-de-sarandi/3528749/']
4
['/matches/2021/07/26/argentina/prim-b-nacional/deportivo-maipu/asociacion-atletica-estudiantes/3564124/', '/matches/2021/07/26/argentina/prim-b-nacional/club-atetico-atlanta/club-atletico-chacarita-juniors/3564131/']
5
['/matches/2021/07/26/argentina/prim-d-metro/barracas-bolivar/argentino-de-rosario/3502592/', '/matches/2021/07/26/argentina/prim-d-metro/deportivo-yupanqui/defensores-de-cambaceres/3502595/']
6
['/matches/2021/07/26/argentina/reserve-league/velez-sarsfield-res/defensa-y-justicia-res/3565713/']
7
['/matches/2021/07/26/australia/victoria-division-one/kingston-city/manningham-united-blues/3435468/']
8
['/matches/2021/07/26/australia/victoria-womens-premier-league/australia-fc-bulleen-lions/senior-ntc/3435562/']
9
['/matches/2021/07/26/bangladesh/b-league/bangladesh-police/mohammedan-sc/3500557/', '/matches/2021/07/26/bangladesh/b-league/sheikh-jamal-dhanmondi-club/abahani-ltd/3500558/']
10
['/matches/2021/07/26/bolivia/lfpb/real-santa-cruz-bol/club-deportivo-san-jose/3544580/']
11
['/matches/2021/07/26/brazil/serie-a/sport-club-do-recife/ceara-sporting-club/3482032/', '/matches/2021/07/26/brazil/serie-a/esporte-clube-juventude/associacao-chapecoense-kindermannmastervet/3482034/']
12
['/matches/2021/07/26/brazil/serie-b/avai-futebol-clube/gremio-esportivo-brasil/3482998/', '/matches/2021/07/26/brazil/serie-b/sampaio-correa-futebol-clube/clube-de-regatas-brasil/3483000/']
13
['/matches/2021/07/26/brazil/mineiro-ii/esporte-clube-democrata/ipatinga-futebol-clube/3507261/']
14
['/matches/2021/07/26/brazil/cbf-brasileiro-u20/gremio-fb-porto-alegrense-u20/sc-internacional-u20/3520264/']
15
['/matches/2021/07/26/bulgaria/a-pfg/pfk-botev-vraca/fk-arda-kardzhali/3520723/']
16
['/matches/2021/07/26/bulgaria/b-pfg/bulgaria-fk-minyor-pernik/ludogorets-ii/3520734/', '/matches/2021/07/26/bulgaria/b-pfg/etar/levski-lom/3520735/', '/matches/2021/07/26/bulgaria/b-pfg/pfc-montana-1921/cska-1948-sofia-ii/3520739/']
17
['/matches/2021/07/26/canada/plsq/mont-royal-outremont/blainville/3528616/']
18
['/matches/2021/07/26/chile/primera-division/cd-universidad-catolica/ohiggins/3478519/']
19
['/matches/2021/07/26/chile/segunda-division/colina/club-deportivo-general-velasquez-s-a/3519280/']
20
['/matches/2021/07/26/china-pr/china-league-one/anhui-litian/beijing-hongdeng/3545188/', '/matches/2021/07/26/china-pr/china-league-one/beijing-institute-of-technology/zhejiang-lucheng/3545189/']
21
['/matches/2021/07/26/colombia/primera-a/corporacion-popular-deportiva-junior/envigado-futbol-club/3554518/', '/matches/2021/07/26/colombia/primera-a/deportivo-cali/corporacion-deportiva-independiente-medellin/3554517/']
22
['/matches/2021/07/26/colombia/primera-b/union-magdalena/juventud-soacha/3553299/']
23
['/matches/2021/07/26/colombia/colombia-liga-femenina/santa-fe/colombia-cd-la-equidad-seguros-sa/3530168/']
24
['/matches/2021/07/26/curacao/curacao-sekshon-paga/crksv-jong-holland/rkv-fc-sithoc/3521947/', '/matches/2021/07/26/curacao/curacao-sekshon-paga/rksv-centro-dominguito/sv-centro-social-deportivo-barber/3521948/']
25
['/matches/2021/07/26/denmark/superliga/sonderjyske/vejle-boldklub-elitefodbold-as/3510679/']
26
['/matches/2021/07/26/ecuador/primera-a/barcelona-sporting-club-guayaquil/manta-futbol-club/3454011/']
27
['/matches/2021/07/26/egypt/premier-league/asyouty-sport/national-bank-of-egypt/3501429/']
28
['/matches/2021/07/26/estonia/ii-liiga/kuressaare-ii/fc-kose-/3507923/']
29
['/matches/2021/07/26/faroe-islands/1-deild/b36-torshavn-ii/vikingur-gotu-ii/3466575/', '/matches/2021/07/26/faroe-islands/1-deild/skala-itrottarfelag/nsi-runavik-ii/3466576/', '/matches/2021/07/26/faroe-islands/1-deild/hb-torshavn-ii/b71-sandur/3466577/']
30
['/matches/2021/07/26/finland/kolmonen/njs-ii/malmin-ponnistajat/3489842/', '/matches/2021/07/26/finland/kolmonen/pave/fc-komeetat/3488566/']
31
['/matches/2021/07/26/france/ligue-2/dijon-football-cote-dor/fc-sochaux-montbeliard/3525214/']
32
['/matches/2021/07/26/germany/3-liga/sc-freiburg-ii/sv-wehen-1926-taunusstein-ev/3532622/']
33
['/matches/2021/07/26/grenada/premier-division/carib-hurricane/chantimelle/3501622/', '/matches/2021/07/26/grenada/premier-division/asoms-paradise/hard-rock/3501623/']
34
['/matches/2021/07/26/iceland/urvalsdeild/knattspyrnufelagid-reykjavik/ithrottarfelag-fylkir-reykjavik/3457972/']
35
['/matches/2021/07/26/iceland/4-deild/ka-asvellir/umf-alftanes/3462684/']
36
['/matches/2021/07/26/iceland/u19-league/kadalvikreynirmag-u19/fram--ulfarnir-u19/3469313/']
37
['/matches/2021/07/26/iceland/u19-cup/selfoss--sl-u19/ia--kari-u19/3526551/']
38
['/matches/2021/07/26/iceland/1-deild-women/hk/afturelding-mosfellsbaer/3457490/', '/matches/2021/07/26/iceland/1-deild-women/knattspyrnudeild-fh/augnablik/3457491/', '/matches/2021/07/26/iceland/1-deild-women/vikingur-reykjavik/haukar/3457492/', '/matches/2021/07/26/iceland/1-deild-women/grv/grotta/3457493/']
39
['/matches/2021/07/26/indonesia/super-liga/bhayangkara-surabaya-utd/pelita-jaya/3514569/']
40
['/matches/2021/07/26/iraq/iraqi-league/arbil/al-zawraa/3520679/', '/matches/2021/07/26/iraq/iraqi-league/alsinaat-alkahrabaiya/al-shorta/3520681/', '/matches/2021/07/26/iraq/iraqi-league/al-najaf/al-qasim/3520684/', '/matches/2021/07/26/iraq/iraqi-league/kahrba/al-talaba/3520685/', '/matches/2021/07/26/iraq/iraqi-league/al-simawa/al-diwaniya/3520686/']
41
['/matches/2021/07/26/jamaica/premier-league/waterhouse-fc/tivoli-gardens-fc/3520515/']
42
['/matches/2021/07/26/kazakhstan/womens-football-championship/tomiris-turan/biik-kazygurt/3498914/', '/matches/2021/07/26/kazakhstan/womens-football-championship/kyzyl-zhar/sdyusshor-8/3498915/', '/matches/2021/07/26/kazakhstan/womens-football-championship/okzhetpes/kyzyl-orda/3498916/']
43
['/matches/2021/07/26/kenya/premier-league/nairobi-city-stars/bidco-united/3440296/', '/matches/2021/07/26/kenya/premier-league/ulinzi-stars/gor-mahia/3425362/']
44
['/matches/2021/07/26/libya/premier-league/al-tahaddi/al-hilal/3581894/', '/matches/2021/07/26/libya/premier-league/darnes/al-akhdar/3581895/', '/matches/2021/07/26/libya/premier-league/al-ahli['/matches/2021/07/26/mexico/primera-division/club-de-futbol-monterrey/club-puebla-fc/3533016/', '/matches/2021/07/26/mexico/primera-division/club-tijuana-xoloitzcuintles-de-caliente/club-tigres-de-la-unl/3533017/']
45
['/matches/2021/07/26/mexico/u20-league/tijuana-u20/tigres-uanl-u20/3550364/', '/matches/2021/07/26/mexico/u20-league/cruz-azul-u20/mazatlan-u20/3550365/']
46
['/matches/2021/07/26/mexico/u18-league/cruz-azul-u18/mazatlan-u18/3557977/']
47
['/matches/2021/07/26/mexico/liga-mx-femenil/mexico-atletico-san-luis/pumas-unam/3542869/']
48
['/matches/2021/07/26/nicaragua/primera-division/walter-ferreti/real-madriz/3557364/', '/matches/2021/07/26/nicaragua/primera-division/jalapa/deportivo-ocotal/3557365/', '/matches/2021/07/26/nicaragua/primera-division/managua-fc/juventus-fc-managua/3557366/']
49
['/matches/2021/07/26/norway/nm-cupen-u19/molde-fk-u19/ranheim-u19/3580568/']
50
['/matches/2021/07/26/paraguay/division-profesional/cerro-porteno/club-libertad/3538214/']
51
['/matches/2021/07/26/peru/segunda-division/club-deportivo-union-comercio/club-atletico-grau/3502658/']
52
['/matches/2021/07/26/poland/ekstraklasa/wisla-krakow-ssa/zaglebie-lubin-ssa/3519411/']
53
['/matches/2021/07/26/portugal/league-cup/vitoria-guimaraes/leixoes/3549466/']
54
['/matches/2021/07/26/romania/liga-i/cf-chindia-targoviste/sepsi/3536874/', '/matches/2021/07/26/romania/liga-i/u-craiova-1948/fc-dinamo-1948-sa-bucuresti/3536872/']
55
['/matches/2021/07/26/russia/premier-league/shakhter-volga-olimpiets/fk-petrotrest/3516375/']
56
['/matches/2021/07/26/russia/lfl/fk-raspadskaya-mezhdurechensk/fk-radian-baykal-irkutsk/3573348/', '/matches/2021/07/28/russia/lfl/fk-astrakhan/uor-dagestan/3505624/']
57
['/matches/2021/07/26/serbia/super-liga/fk-kolubara-lazarevac/fk-radnik-surdulica/3521256/']
58
['/matches/2021/07/26/sweden/allsvenskan/varbergs-bois-fc/ildrottsforeningen-kamraterna-goteborg/3445202/', '/matches/2021/07/26/sweden/allsvenskan/orebro-sportklubb-fotboll/allmanna-idrottsklubben/3445203/', '/matches/2021/07/26/sweden/allsvenskan/ostersunds-fk/halmstads-bollklubb/3445204/']
59
['/matches/2021/07/26/sweden/superettan/goteborgs-atlet---ildrottssallskap/akropolis-if/3444940/', '/matches/2021/07/26/sweden/superettan/norrby-if/vasteras-sk-fk/3444944/']
60
['/matches/2021/07/26/ukraine/premier-league/fc-vorskla-poltava/dnipro-1/3536537/']
61
['/matches/2021/07/26/ukraine/persha-liga/alians-lypova-dolyna/metal-kharkiv/3551190/']
62
['/matches/2021/07/26/ukraine/druha-liga/rubikon/dinaz-vyshhorod/3574023/']
63
['/matches/2021/07/26/united-states/mls/new-york-city/orlando-city-fc/3481369/', '/matches/2021/07/26/united-states/mls/new-england-revolution/montreal-impacts/3481373/', '/matches/2021/07/26/united-states/mls/inter-miami/philadelphia-union/3481371/', '/matches/2021/07/26/united-states/mls/washington-district-of-columbia-united/new-york-red-bulls/3481370/', '/matches/2021/07/26/united-states/mls/seattle-sounders-fc/kansas-city-wizards/3481372/']
64
['/matches/2021/07/26/united-states/mls-reserve-league/la-galaxy-ii/oakland-roots/3485114/']
65
['/matches/2021/07/26/united-states/pdl/western-mass-pioneers/carolina-dynamo/3581622/', '/matches/2021/07/26/united-states/pdl/des-moines-menace/usa-portland-timbers-under-23/3581623/']
66
['/matches/2021/07/26/united-states/nisa-independent-cup/california-utd-strikerrs/fc-arizona/3530151/', '/matches/2021/07/26/united-states/nisa-independent-cup/magia/los-angeles-force/3530100/']
67
['/matches/2021/07/26/united-states/national-womens-soccer-league/racing-louisville/washington-spirit/3499690/']
68
['/matches/2021/07/26/united-states/wpsl/usa-davis-fc/san-francisco-nighthawks/3506548/']
69
['/matches/2021/07/26/venezuela/primera-division/guaros-de-lara-fc/yaracuyanos-fc/3573336/']
70
['/matches/2021/07/26/world/club-friendlies/genclerbirligi/fenerbahce-spor-kulubu/3581875/', '/matches/2021/07/26/world/club-friendlies/trabzonspor/kasimpasa-sk/3547898/', '/matches/2021/07/26/world/club-friendlies/uc-sampdoria/piacenza-calcio/3568664/', '/matches/2021/07/26/world/club-friendlies/union-deportiva-las-palmas/wolverhampton-wanderers-fc/3552906/', '/matches/2021/07/26/world/club-friendlies/crawley-town-football-club/west-ham-united-u23/3526283/', '/matches/2021/07/26/world/club-friendlies/k-sports/dover-athletic-fc/3564632/', '/matches/2021/07/26/world/club-friendlies/chelmsford-city-fc/kings-lynn-town-fc/3514003/', '/matches/2021/07/26/world/club-friendlies/oakland-stompers/el-farolito/3574768/', '/matches/2021/07/26/world/club-friendlies/oakland-stompers/sonoma-county-sol-/3547644/', '/matches/2021/07/26/world/club-friendlies/union-sandersdorf/germania-03-kothen/3581919/', '/matches/2021/07/26/world/club-friendlies/hendon-fc/hampton-and-richmond-borough-fc/3580595/', '/matches/2021/07/26/world/club-friendlies/winsford-united/mossley-afc/3580596/', '/matches/2021/07/26/world/club-friendlies/afc-dunstable/biggleswade-united-fc/3581920/', '/matches/2021/07/26/world/club-friendlies/highworth-town-football-club/brimscombe--thrupp/3582397/']
71
['/matches/2021/07/26/world/florida-cup/everton-football-club/club-deportivo-los-millonarios/3510980/']
72
Quicker Version:
JavaScript
1
67
67
1
import trio
2
import httpx
3
from bs4 import BeautifulSoup
4
import json
5
6
headers = {
7
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:90.0) Gecko/20100101 Firefox/90.0'
8
}
9
10
mainurl = "https://us.soccerway.com"
11
12
13
async def get_soup(content):
14
return BeautifulSoup(content, 'lxml')
15
16
allin = []
17
18
19
async def worker(channel):
20
async with channel:
21
async for client, n, m in channel:
22
params = {
23
"block_id": "block_home_matches_31",
24
"callback_params": json.dumps({
25
"block_service_id": "home_index_block_homematches",
26
"date": "2021-07-26",
27
"display": "all",
28
"stage-value": n
29
}),
30
"action": "showMatches",
31
"params": json.dumps({
32
"competition_id": m
33
})
34
}
35
r = await client.get(mainurl + '/a/block_home_matches', params=params)
36
soup = await get_soup(r.json()['commands'][0]['parameters']['content'])
37
res = (x.a['href'] for x in soup.select('td.score-time'))
38
allin.extend(res)
39
40
41
async def main():
42
async with httpx.AsyncClient(timeout=None) as client, trio.open_nursery() as nurse:
43
client.headers.update(headers)
44
r = await client.get(mainurl)
45
soup = await get_soup(r.text)
46
goal = ((num, x['id'].split('-', 1)[-1]) for num, x in enumerate(
47
soup.select('tr[id^=date_matches]'), start=1))
48
49
sender, receiver = trio.open_memory_channel(0)
50
51
async with receiver:
52
for _ in range(100):
53
nurse.start_soon(worker, receiver.clone())
54
55
async with sender:
56
for n, m in goal:
57
await sender.send([client, n, m])
58
59
60
if __name__ == "__main__":
61
try:
62
trio.run(main)
63
links = map(lambda x: mainurl+x, allin)
64
print(list(links))
65
except KeyboardInterrupt:
66
exit('Bye!')
67
Note: pip install trio httpx