I’m trying to collect and save all the links that are inside the called tags href
that are always on this path of elements:
<td class="score-time score"> <a href="/matches/2021/07/26/nc-america/concacaf-gold-cup/costa-rica/canada/3506351/" class=" ">0 - 2</a>
The script I’m using is this:
import datetime import requests from bs4 import BeautifulSoup url = "https://int.soccerway.com/" headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" } site = requests.get(url, headers=headers) soup = BeautifulSoup(site.content, "html.parser") jogos = soup.find_all("td", class_="score-time score") with open("Lista_de_Links.csv", "w+", newline="", encoding="UTF-8") as f: titlemenu = "label" f.write(titlemenu) for tag in jogos: linkslist = tag.find("a", href=True)["href"] row = linkslist + "n" f.write(row)
The problem is that in this model I can’t get the values that are hidden because of the elements that need to be expanded from a link in order to access them:
I’m trying to create a template so I can click all the buttons to expand and only then collect the links, but the button isn’t expanding even using the .click
, I would like some help to understand how I need to use it so that my need is met.
Element clickable:
<tr class="group-head clickable ">
My script trying to clicks:
from selenium import webdriver from selenium.webdriver.common.keys import Keys import time PATH = r"C:UsersComputadorDesktopPythonchromedriver.exe" driver = webdriver.Chrome(PATH) driver.get(r"http://int.soccerway.com/") link = driver.find_element_by_xpath("//tr[@class='group-head clickable']") link.click()
Advertisement
Answer
import requests from bs4 import BeautifulSoup headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:90.0) Gecko/20100101 Firefox/90.0' } def main(url): r = requests.get(url, headers=headers) soup = BeautifulSoup(r.text, 'lxml') goal = (x.a['href'] for x in soup.select('td.score-time.score')) print(list(goal)) main('https://us.soccerway.com/')
Output:
['/matches/2021/07/26/nc-america/concacaf-gold-cup/costa-rica/canada/3506351/', '/matches/2021/07/26/nc-america/concacaf-gold-cup/united-states-of-america/jamaica/3506352/']
Updated Answer:
import requests from bs4 import BeautifulSoup import json headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:90.0) Gecko/20100101 Firefox/90.0' } def get_soup(content): return BeautifulSoup(content, 'lxml') def main(url): with requests.Session() as req: req.headers.update(headers) r = req.get(url) soup = get_soup(r.text) goal = ((num, x['id'].split('-', 1)[-1]) for num, x in enumerate( soup.select('tr[id^=date_matches]'), start=1)) for n, m in goal: params = { "block_id": "block_home_matches_31", "callback_params": json.dumps({ "block_service_id": "home_index_block_homematches", "date": "2021-07-26", "display": "all", "stage-value": n }), "action": "showMatches", "params": json.dumps({ "competition_id": m }) } r = req.get( 'https://us.soccerway.com/a/block_home_matches', params=params) soup = get_soup(r.json()['commands'][0]['parameters']['content']) res = (x.a['href'] for x in soup.select('td.score-time')) print(list(res)) main('https://us.soccerway.com/')
Output:
['/matches/2021/07/26/nc-america/concacaf-gold-cup/costa-rica/canada/3506351/', '/matches/2021/07/26/nc-america/concacaf-gold-cup/united-states-of-america/jamaica/3506352/'] ['/matches/2021/07/26/africa/cecafa-senior-challenge-cup/ethiopia-u23/eritrea-under-23/3567660/', '/matches/2021/07/26/africa/cecafa-senior-challenge-cup/uganda-under-23/congo-dr-under-23/3567667/'] ['/matches/2021/07/26/argentina/primera-division/estudiantes-de-la-plata/ca-independiente/3528747/', '/matches/2021/07/26/argentina/primera-division/ca-talleres-cordoba/arsenal-de-sarandi/3528749/'] ['/matches/2021/07/26/argentina/prim-b-nacional/deportivo-maipu/asociacion-atletica-estudiantes/3564124/', '/matches/2021/07/26/argentina/prim-b-nacional/club-atetico-atlanta/club-atletico-chacarita-juniors/3564131/'] ['/matches/2021/07/26/argentina/prim-d-metro/barracas-bolivar/argentino-de-rosario/3502592/', '/matches/2021/07/26/argentina/prim-d-metro/deportivo-yupanqui/defensores-de-cambaceres/3502595/'] ['/matches/2021/07/26/argentina/reserve-league/velez-sarsfield-res/defensa-y-justicia-res/3565713/'] ['/matches/2021/07/26/australia/victoria-division-one/kingston-city/manningham-united-blues/3435468/'] ['/matches/2021/07/26/australia/victoria-womens-premier-league/australia-fc-bulleen-lions/senior-ntc/3435562/'] ['/matches/2021/07/26/bangladesh/b-league/bangladesh-police/mohammedan-sc/3500557/', '/matches/2021/07/26/bangladesh/b-league/sheikh-jamal-dhanmondi-club/abahani-ltd/3500558/'] ['/matches/2021/07/26/bolivia/lfpb/real-santa-cruz-bol/club-deportivo-san-jose/3544580/'] ['/matches/2021/07/26/brazil/serie-a/sport-club-do-recife/ceara-sporting-club/3482032/', '/matches/2021/07/26/brazil/serie-a/esporte-clube-juventude/associacao-chapecoense-kindermannmastervet/3482034/'] ['/matches/2021/07/26/brazil/serie-b/avai-futebol-clube/gremio-esportivo-brasil/3482998/', '/matches/2021/07/26/brazil/serie-b/sampaio-correa-futebol-clube/clube-de-regatas-brasil/3483000/'] ['/matches/2021/07/26/brazil/mineiro-ii/esporte-clube-democrata/ipatinga-futebol-clube/3507261/'] ['/matches/2021/07/26/brazil/cbf-brasileiro-u20/gremio-fb-porto-alegrense-u20/sc-internacional-u20/3520264/'] ['/matches/2021/07/26/bulgaria/a-pfg/pfk-botev-vraca/fk-arda-kardzhali/3520723/'] ['/matches/2021/07/26/bulgaria/b-pfg/bulgaria-fk-minyor-pernik/ludogorets-ii/3520734/', '/matches/2021/07/26/bulgaria/b-pfg/etar/levski-lom/3520735/', '/matches/2021/07/26/bulgaria/b-pfg/pfc-montana-1921/cska-1948-sofia-ii/3520739/'] ['/matches/2021/07/26/canada/plsq/mont-royal-outremont/blainville/3528616/'] ['/matches/2021/07/26/chile/primera-division/cd-universidad-catolica/ohiggins/3478519/'] ['/matches/2021/07/26/chile/segunda-division/colina/club-deportivo-general-velasquez-s-a/3519280/'] ['/matches/2021/07/26/china-pr/china-league-one/anhui-litian/beijing-hongdeng/3545188/', '/matches/2021/07/26/china-pr/china-league-one/beijing-institute-of-technology/zhejiang-lucheng/3545189/'] ['/matches/2021/07/26/colombia/primera-a/corporacion-popular-deportiva-junior/envigado-futbol-club/3554518/', '/matches/2021/07/26/colombia/primera-a/deportivo-cali/corporacion-deportiva-independiente-medellin/3554517/'] ['/matches/2021/07/26/colombia/primera-b/union-magdalena/juventud-soacha/3553299/'] ['/matches/2021/07/26/colombia/colombia-liga-femenina/santa-fe/colombia-cd-la-equidad-seguros-sa/3530168/'] ['/matches/2021/07/26/curacao/curacao-sekshon-paga/crksv-jong-holland/rkv-fc-sithoc/3521947/', '/matches/2021/07/26/curacao/curacao-sekshon-paga/rksv-centro-dominguito/sv-centro-social-deportivo-barber/3521948/'] ['/matches/2021/07/26/denmark/superliga/sonderjyske/vejle-boldklub-elitefodbold-as/3510679/'] ['/matches/2021/07/26/ecuador/primera-a/barcelona-sporting-club-guayaquil/manta-futbol-club/3454011/'] ['/matches/2021/07/26/egypt/premier-league/asyouty-sport/national-bank-of-egypt/3501429/'] ['/matches/2021/07/26/estonia/ii-liiga/kuressaare-ii/fc-kose-/3507923/'] ['/matches/2021/07/26/faroe-islands/1-deild/b36-torshavn-ii/vikingur-gotu-ii/3466575/', '/matches/2021/07/26/faroe-islands/1-deild/skala-itrottarfelag/nsi-runavik-ii/3466576/', '/matches/2021/07/26/faroe-islands/1-deild/hb-torshavn-ii/b71-sandur/3466577/'] ['/matches/2021/07/26/finland/kolmonen/njs-ii/malmin-ponnistajat/3489842/', '/matches/2021/07/26/finland/kolmonen/pave/fc-komeetat/3488566/'] ['/matches/2021/07/26/france/ligue-2/dijon-football-cote-dor/fc-sochaux-montbeliard/3525214/'] ['/matches/2021/07/26/germany/3-liga/sc-freiburg-ii/sv-wehen-1926-taunusstein-ev/3532622/'] ['/matches/2021/07/26/grenada/premier-division/carib-hurricane/chantimelle/3501622/', '/matches/2021/07/26/grenada/premier-division/asoms-paradise/hard-rock/3501623/'] ['/matches/2021/07/26/iceland/urvalsdeild/knattspyrnufelagid-reykjavik/ithrottarfelag-fylkir-reykjavik/3457972/'] ['/matches/2021/07/26/iceland/4-deild/ka-asvellir/umf-alftanes/3462684/'] ['/matches/2021/07/26/iceland/u19-league/kadalvikreynirmag-u19/fram--ulfarnir-u19/3469313/'] ['/matches/2021/07/26/iceland/u19-cup/selfoss--sl-u19/ia--kari-u19/3526551/'] ['/matches/2021/07/26/iceland/1-deild-women/hk/afturelding-mosfellsbaer/3457490/', '/matches/2021/07/26/iceland/1-deild-women/knattspyrnudeild-fh/augnablik/3457491/', '/matches/2021/07/26/iceland/1-deild-women/vikingur-reykjavik/haukar/3457492/', '/matches/2021/07/26/iceland/1-deild-women/grv/grotta/3457493/'] ['/matches/2021/07/26/indonesia/super-liga/bhayangkara-surabaya-utd/pelita-jaya/3514569/'] ['/matches/2021/07/26/iraq/iraqi-league/arbil/al-zawraa/3520679/', '/matches/2021/07/26/iraq/iraqi-league/alsinaat-alkahrabaiya/al-shorta/3520681/', '/matches/2021/07/26/iraq/iraqi-league/al-najaf/al-qasim/3520684/', '/matches/2021/07/26/iraq/iraqi-league/kahrba/al-talaba/3520685/', '/matches/2021/07/26/iraq/iraqi-league/al-simawa/al-diwaniya/3520686/'] ['/matches/2021/07/26/jamaica/premier-league/waterhouse-fc/tivoli-gardens-fc/3520515/'] ['/matches/2021/07/26/kazakhstan/womens-football-championship/tomiris-turan/biik-kazygurt/3498914/', '/matches/2021/07/26/kazakhstan/womens-football-championship/kyzyl-zhar/sdyusshor-8/3498915/', '/matches/2021/07/26/kazakhstan/womens-football-championship/okzhetpes/kyzyl-orda/3498916/'] ['/matches/2021/07/26/kenya/premier-league/nairobi-city-stars/bidco-united/3440296/', '/matches/2021/07/26/kenya/premier-league/ulinzi-stars/gor-mahia/3425362/'] ['/matches/2021/07/26/libya/premier-league/al-tahaddi/al-hilal/3581894/', '/matches/2021/07/26/libya/premier-league/darnes/al-akhdar/3581895/', '/matches/2021/07/26/libya/premier-league/al-ahli['/matches/2021/07/26/mexico/primera-division/club-de-futbol-monterrey/club-puebla-fc/3533016/', '/matches/2021/07/26/mexico/primera-division/club-tijuana-xoloitzcuintles-de-caliente/club-tigres-de-la-unl/3533017/'] ['/matches/2021/07/26/mexico/u20-league/tijuana-u20/tigres-uanl-u20/3550364/', '/matches/2021/07/26/mexico/u20-league/cruz-azul-u20/mazatlan-u20/3550365/'] ['/matches/2021/07/26/mexico/u18-league/cruz-azul-u18/mazatlan-u18/3557977/'] ['/matches/2021/07/26/mexico/liga-mx-femenil/mexico-atletico-san-luis/pumas-unam/3542869/'] ['/matches/2021/07/26/nicaragua/primera-division/walter-ferreti/real-madriz/3557364/', '/matches/2021/07/26/nicaragua/primera-division/jalapa/deportivo-ocotal/3557365/', '/matches/2021/07/26/nicaragua/primera-division/managua-fc/juventus-fc-managua/3557366/'] ['/matches/2021/07/26/norway/nm-cupen-u19/molde-fk-u19/ranheim-u19/3580568/'] ['/matches/2021/07/26/paraguay/division-profesional/cerro-porteno/club-libertad/3538214/'] ['/matches/2021/07/26/peru/segunda-division/club-deportivo-union-comercio/club-atletico-grau/3502658/'] ['/matches/2021/07/26/poland/ekstraklasa/wisla-krakow-ssa/zaglebie-lubin-ssa/3519411/'] ['/matches/2021/07/26/portugal/league-cup/vitoria-guimaraes/leixoes/3549466/'] ['/matches/2021/07/26/romania/liga-i/cf-chindia-targoviste/sepsi/3536874/', '/matches/2021/07/26/romania/liga-i/u-craiova-1948/fc-dinamo-1948-sa-bucuresti/3536872/'] ['/matches/2021/07/26/russia/premier-league/shakhter-volga-olimpiets/fk-petrotrest/3516375/'] ['/matches/2021/07/26/russia/lfl/fk-raspadskaya-mezhdurechensk/fk-radian-baykal-irkutsk/3573348/', '/matches/2021/07/28/russia/lfl/fk-astrakhan/uor-dagestan/3505624/'] ['/matches/2021/07/26/serbia/super-liga/fk-kolubara-lazarevac/fk-radnik-surdulica/3521256/'] ['/matches/2021/07/26/sweden/allsvenskan/varbergs-bois-fc/ildrottsforeningen-kamraterna-goteborg/3445202/', '/matches/2021/07/26/sweden/allsvenskan/orebro-sportklubb-fotboll/allmanna-idrottsklubben/3445203/', '/matches/2021/07/26/sweden/allsvenskan/ostersunds-fk/halmstads-bollklubb/3445204/'] ['/matches/2021/07/26/sweden/superettan/goteborgs-atlet---ildrottssallskap/akropolis-if/3444940/', '/matches/2021/07/26/sweden/superettan/norrby-if/vasteras-sk-fk/3444944/'] ['/matches/2021/07/26/ukraine/premier-league/fc-vorskla-poltava/dnipro-1/3536537/'] ['/matches/2021/07/26/ukraine/persha-liga/alians-lypova-dolyna/metal-kharkiv/3551190/'] ['/matches/2021/07/26/ukraine/druha-liga/rubikon/dinaz-vyshhorod/3574023/'] ['/matches/2021/07/26/united-states/mls/new-york-city/orlando-city-fc/3481369/', '/matches/2021/07/26/united-states/mls/new-england-revolution/montreal-impacts/3481373/', '/matches/2021/07/26/united-states/mls/inter-miami/philadelphia-union/3481371/', '/matches/2021/07/26/united-states/mls/washington-district-of-columbia-united/new-york-red-bulls/3481370/', '/matches/2021/07/26/united-states/mls/seattle-sounders-fc/kansas-city-wizards/3481372/'] ['/matches/2021/07/26/united-states/mls-reserve-league/la-galaxy-ii/oakland-roots/3485114/'] ['/matches/2021/07/26/united-states/pdl/western-mass-pioneers/carolina-dynamo/3581622/', '/matches/2021/07/26/united-states/pdl/des-moines-menace/usa-portland-timbers-under-23/3581623/'] ['/matches/2021/07/26/united-states/nisa-independent-cup/california-utd-strikerrs/fc-arizona/3530151/', '/matches/2021/07/26/united-states/nisa-independent-cup/magia/los-angeles-force/3530100/'] ['/matches/2021/07/26/united-states/national-womens-soccer-league/racing-louisville/washington-spirit/3499690/'] ['/matches/2021/07/26/united-states/wpsl/usa-davis-fc/san-francisco-nighthawks/3506548/'] ['/matches/2021/07/26/venezuela/primera-division/guaros-de-lara-fc/yaracuyanos-fc/3573336/'] ['/matches/2021/07/26/world/club-friendlies/genclerbirligi/fenerbahce-spor-kulubu/3581875/', '/matches/2021/07/26/world/club-friendlies/trabzonspor/kasimpasa-sk/3547898/', '/matches/2021/07/26/world/club-friendlies/uc-sampdoria/piacenza-calcio/3568664/', '/matches/2021/07/26/world/club-friendlies/union-deportiva-las-palmas/wolverhampton-wanderers-fc/3552906/', '/matches/2021/07/26/world/club-friendlies/crawley-town-football-club/west-ham-united-u23/3526283/', '/matches/2021/07/26/world/club-friendlies/k-sports/dover-athletic-fc/3564632/', '/matches/2021/07/26/world/club-friendlies/chelmsford-city-fc/kings-lynn-town-fc/3514003/', '/matches/2021/07/26/world/club-friendlies/oakland-stompers/el-farolito/3574768/', '/matches/2021/07/26/world/club-friendlies/oakland-stompers/sonoma-county-sol-/3547644/', '/matches/2021/07/26/world/club-friendlies/union-sandersdorf/germania-03-kothen/3581919/', '/matches/2021/07/26/world/club-friendlies/hendon-fc/hampton-and-richmond-borough-fc/3580595/', '/matches/2021/07/26/world/club-friendlies/winsford-united/mossley-afc/3580596/', '/matches/2021/07/26/world/club-friendlies/afc-dunstable/biggleswade-united-fc/3581920/', '/matches/2021/07/26/world/club-friendlies/highworth-town-football-club/brimscombe--thrupp/3582397/'] ['/matches/2021/07/26/world/florida-cup/everton-football-club/club-deportivo-los-millonarios/3510980/']
Quicker Version:
import trio import httpx from bs4 import BeautifulSoup import json headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:90.0) Gecko/20100101 Firefox/90.0' } mainurl = "https://us.soccerway.com" async def get_soup(content): return BeautifulSoup(content, 'lxml') allin = [] async def worker(channel): async with channel: async for client, n, m in channel: params = { "block_id": "block_home_matches_31", "callback_params": json.dumps({ "block_service_id": "home_index_block_homematches", "date": "2021-07-26", "display": "all", "stage-value": n }), "action": "showMatches", "params": json.dumps({ "competition_id": m }) } r = await client.get(mainurl + '/a/block_home_matches', params=params) soup = await get_soup(r.json()['commands'][0]['parameters']['content']) res = (x.a['href'] for x in soup.select('td.score-time')) allin.extend(res) async def main(): async with httpx.AsyncClient(timeout=None) as client, trio.open_nursery() as nurse: client.headers.update(headers) r = await client.get(mainurl) soup = await get_soup(r.text) goal = ((num, x['id'].split('-', 1)[-1]) for num, x in enumerate( soup.select('tr[id^=date_matches]'), start=1)) sender, receiver = trio.open_memory_channel(0) async with receiver: for _ in range(100): nurse.start_soon(worker, receiver.clone()) async with sender: for n, m in goal: await sender.send([client, n, m]) if __name__ == "__main__": try: trio.run(main) links = map(lambda x: mainurl+x, allin) print(list(links)) except KeyboardInterrupt: exit('Bye!')
Note: pip install trio httpx