Skip to content
Advertisement

Click to expand all clickable elements inside a website by Python

I’m trying to collect and save all the links that are inside the called tags href that are always on this path of elements:

<td class="score-time score">
    <a href="/matches/2021/07/26/nc-america/concacaf-gold-cup/costa-rica/canada/3506351/" class=" ">0 - 2</a>

enter image description here

The script I’m using is this:

import datetime
import requests
from bs4 import BeautifulSoup

url = "https://int.soccerway.com/"

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}

site = requests.get(url, headers=headers)
soup = BeautifulSoup(site.content, "html.parser")
jogos = soup.find_all("td", class_="score-time score")

with open("Lista_de_Links.csv", "w+", newline="", encoding="UTF-8") as f:
    titlemenu = "label"
    f.write(titlemenu)
    for tag in jogos:
        linkslist = tag.find("a", href=True)["href"]

        row = linkslist + "n"
        f.write(row)

The problem is that in this model I can’t get the values that are hidden because of the elements that need to be expanded from a link in order to access them:

enter image description here

I’m trying to create a template so I can click all the buttons to expand and only then collect the links, but the button isn’t expanding even using the .click, I would like some help to understand how I need to use it so that my need is met.

Element clickable:

<tr class="group-head  clickable ">

My script trying to clicks:

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time

PATH = r"C:UsersComputadorDesktopPythonchromedriver.exe"
driver = webdriver.Chrome(PATH)

driver.get(r"http://int.soccerway.com/")

link = driver.find_element_by_xpath("//tr[@class='group-head  clickable']")
link.click()

Advertisement

Answer

import requests
from bs4 import BeautifulSoup

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:90.0) Gecko/20100101 Firefox/90.0'
}


def main(url):
    r = requests.get(url, headers=headers)
    soup = BeautifulSoup(r.text, 'lxml')
    goal = (x.a['href'] for x in soup.select('td.score-time.score'))
    print(list(goal))


main('https://us.soccerway.com/')

Output:

['/matches/2021/07/26/nc-america/concacaf-gold-cup/costa-rica/canada/3506351/', '/matches/2021/07/26/nc-america/concacaf-gold-cup/united-states-of-america/jamaica/3506352/']

Updated Answer:

import requests
from bs4 import BeautifulSoup
import json

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:90.0) Gecko/20100101 Firefox/90.0'
}


def get_soup(content):
    return BeautifulSoup(content, 'lxml')


def main(url):
    with requests.Session() as req:
        req.headers.update(headers)
        r = req.get(url)
        soup = get_soup(r.text)
        goal = ((num, x['id'].split('-', 1)[-1]) for num, x in enumerate(
            soup.select('tr[id^=date_matches]'), start=1))

        for n, m in goal:
            params = {
                "block_id": "block_home_matches_31",
                "callback_params": json.dumps({
                    "block_service_id": "home_index_block_homematches",
                    "date": "2021-07-26",
                    "display": "all",
                    "stage-value": n
                }),
                "action": "showMatches",
                "params": json.dumps({
                    "competition_id": m
                })
            }
            r = req.get(
                'https://us.soccerway.com/a/block_home_matches', params=params)
            soup = get_soup(r.json()['commands'][0]['parameters']['content'])
            res = (x.a['href'] for x in soup.select('td.score-time'))
            print(list(res))


main('https://us.soccerway.com/')

Output:

['/matches/2021/07/26/nc-america/concacaf-gold-cup/costa-rica/canada/3506351/', '/matches/2021/07/26/nc-america/concacaf-gold-cup/united-states-of-america/jamaica/3506352/']
['/matches/2021/07/26/africa/cecafa-senior-challenge-cup/ethiopia-u23/eritrea-under-23/3567660/', '/matches/2021/07/26/africa/cecafa-senior-challenge-cup/uganda-under-23/congo-dr-under-23/3567667/']
['/matches/2021/07/26/argentina/primera-division/estudiantes-de-la-plata/ca-independiente/3528747/', '/matches/2021/07/26/argentina/primera-division/ca-talleres-cordoba/arsenal-de-sarandi/3528749/']
['/matches/2021/07/26/argentina/prim-b-nacional/deportivo-maipu/asociacion-atletica-estudiantes/3564124/', '/matches/2021/07/26/argentina/prim-b-nacional/club-atetico-atlanta/club-atletico-chacarita-juniors/3564131/']
['/matches/2021/07/26/argentina/prim-d-metro/barracas-bolivar/argentino-de-rosario/3502592/', '/matches/2021/07/26/argentina/prim-d-metro/deportivo-yupanqui/defensores-de-cambaceres/3502595/']
['/matches/2021/07/26/argentina/reserve-league/velez-sarsfield-res/defensa-y-justicia-res/3565713/']
['/matches/2021/07/26/australia/victoria-division-one/kingston-city/manningham-united-blues/3435468/']
['/matches/2021/07/26/australia/victoria-womens-premier-league/australia-fc-bulleen-lions/senior-ntc/3435562/']
['/matches/2021/07/26/bangladesh/b-league/bangladesh-police/mohammedan-sc/3500557/', '/matches/2021/07/26/bangladesh/b-league/sheikh-jamal-dhanmondi-club/abahani-ltd/3500558/']
['/matches/2021/07/26/bolivia/lfpb/real-santa-cruz-bol/club-deportivo-san-jose/3544580/']
['/matches/2021/07/26/brazil/serie-a/sport-club-do-recife/ceara-sporting-club/3482032/', '/matches/2021/07/26/brazil/serie-a/esporte-clube-juventude/associacao-chapecoense-kindermannmastervet/3482034/']
['/matches/2021/07/26/brazil/serie-b/avai-futebol-clube/gremio-esportivo-brasil/3482998/', '/matches/2021/07/26/brazil/serie-b/sampaio-correa-futebol-clube/clube-de-regatas-brasil/3483000/']
['/matches/2021/07/26/brazil/mineiro-ii/esporte-clube-democrata/ipatinga-futebol-clube/3507261/']
['/matches/2021/07/26/brazil/cbf-brasileiro-u20/gremio-fb-porto-alegrense-u20/sc-internacional-u20/3520264/']
['/matches/2021/07/26/bulgaria/a-pfg/pfk-botev-vraca/fk-arda-kardzhali/3520723/']
['/matches/2021/07/26/bulgaria/b-pfg/bulgaria-fk-minyor-pernik/ludogorets-ii/3520734/', '/matches/2021/07/26/bulgaria/b-pfg/etar/levski-lom/3520735/', '/matches/2021/07/26/bulgaria/b-pfg/pfc-montana-1921/cska-1948-sofia-ii/3520739/']
['/matches/2021/07/26/canada/plsq/mont-royal-outremont/blainville/3528616/']
['/matches/2021/07/26/chile/primera-division/cd-universidad-catolica/ohiggins/3478519/']
['/matches/2021/07/26/chile/segunda-division/colina/club-deportivo-general-velasquez-s-a/3519280/']
['/matches/2021/07/26/china-pr/china-league-one/anhui-litian/beijing-hongdeng/3545188/', '/matches/2021/07/26/china-pr/china-league-one/beijing-institute-of-technology/zhejiang-lucheng/3545189/']
['/matches/2021/07/26/colombia/primera-a/corporacion-popular-deportiva-junior/envigado-futbol-club/3554518/', '/matches/2021/07/26/colombia/primera-a/deportivo-cali/corporacion-deportiva-independiente-medellin/3554517/']
['/matches/2021/07/26/colombia/primera-b/union-magdalena/juventud-soacha/3553299/']
['/matches/2021/07/26/colombia/colombia-liga-femenina/santa-fe/colombia-cd-la-equidad-seguros-sa/3530168/']
['/matches/2021/07/26/curacao/curacao-sekshon-paga/crksv-jong-holland/rkv-fc-sithoc/3521947/', '/matches/2021/07/26/curacao/curacao-sekshon-paga/rksv-centro-dominguito/sv-centro-social-deportivo-barber/3521948/']
['/matches/2021/07/26/denmark/superliga/sonderjyske/vejle-boldklub-elitefodbold-as/3510679/']
['/matches/2021/07/26/ecuador/primera-a/barcelona-sporting-club-guayaquil/manta-futbol-club/3454011/']
['/matches/2021/07/26/egypt/premier-league/asyouty-sport/national-bank-of-egypt/3501429/']
['/matches/2021/07/26/estonia/ii-liiga/kuressaare-ii/fc-kose-/3507923/']
['/matches/2021/07/26/faroe-islands/1-deild/b36-torshavn-ii/vikingur-gotu-ii/3466575/', '/matches/2021/07/26/faroe-islands/1-deild/skala-itrottarfelag/nsi-runavik-ii/3466576/', '/matches/2021/07/26/faroe-islands/1-deild/hb-torshavn-ii/b71-sandur/3466577/']
['/matches/2021/07/26/finland/kolmonen/njs-ii/malmin-ponnistajat/3489842/', '/matches/2021/07/26/finland/kolmonen/pave/fc-komeetat/3488566/']
['/matches/2021/07/26/france/ligue-2/dijon-football-cote-dor/fc-sochaux-montbeliard/3525214/']
['/matches/2021/07/26/germany/3-liga/sc-freiburg-ii/sv-wehen-1926-taunusstein-ev/3532622/']
['/matches/2021/07/26/grenada/premier-division/carib-hurricane/chantimelle/3501622/', '/matches/2021/07/26/grenada/premier-division/asoms-paradise/hard-rock/3501623/']
['/matches/2021/07/26/iceland/urvalsdeild/knattspyrnufelagid-reykjavik/ithrottarfelag-fylkir-reykjavik/3457972/']
['/matches/2021/07/26/iceland/4-deild/ka-asvellir/umf-alftanes/3462684/']
['/matches/2021/07/26/iceland/u19-league/kadalvikreynirmag-u19/fram--ulfarnir-u19/3469313/']
['/matches/2021/07/26/iceland/u19-cup/selfoss--sl-u19/ia--kari-u19/3526551/']
['/matches/2021/07/26/iceland/1-deild-women/hk/afturelding-mosfellsbaer/3457490/', '/matches/2021/07/26/iceland/1-deild-women/knattspyrnudeild-fh/augnablik/3457491/', '/matches/2021/07/26/iceland/1-deild-women/vikingur-reykjavik/haukar/3457492/', '/matches/2021/07/26/iceland/1-deild-women/grv/grotta/3457493/']
['/matches/2021/07/26/indonesia/super-liga/bhayangkara-surabaya-utd/pelita-jaya/3514569/']
['/matches/2021/07/26/iraq/iraqi-league/arbil/al-zawraa/3520679/', '/matches/2021/07/26/iraq/iraqi-league/alsinaat-alkahrabaiya/al-shorta/3520681/', '/matches/2021/07/26/iraq/iraqi-league/al-najaf/al-qasim/3520684/', '/matches/2021/07/26/iraq/iraqi-league/kahrba/al-talaba/3520685/', '/matches/2021/07/26/iraq/iraqi-league/al-simawa/al-diwaniya/3520686/']
['/matches/2021/07/26/jamaica/premier-league/waterhouse-fc/tivoli-gardens-fc/3520515/']
['/matches/2021/07/26/kazakhstan/womens-football-championship/tomiris-turan/biik-kazygurt/3498914/', '/matches/2021/07/26/kazakhstan/womens-football-championship/kyzyl-zhar/sdyusshor-8/3498915/', '/matches/2021/07/26/kazakhstan/womens-football-championship/okzhetpes/kyzyl-orda/3498916/']
['/matches/2021/07/26/kenya/premier-league/nairobi-city-stars/bidco-united/3440296/', '/matches/2021/07/26/kenya/premier-league/ulinzi-stars/gor-mahia/3425362/']
['/matches/2021/07/26/libya/premier-league/al-tahaddi/al-hilal/3581894/', '/matches/2021/07/26/libya/premier-league/darnes/al-akhdar/3581895/', '/matches/2021/07/26/libya/premier-league/al-ahli['/matches/2021/07/26/mexico/primera-division/club-de-futbol-monterrey/club-puebla-fc/3533016/', '/matches/2021/07/26/mexico/primera-division/club-tijuana-xoloitzcuintles-de-caliente/club-tigres-de-la-unl/3533017/']
['/matches/2021/07/26/mexico/u20-league/tijuana-u20/tigres-uanl-u20/3550364/', '/matches/2021/07/26/mexico/u20-league/cruz-azul-u20/mazatlan-u20/3550365/']
['/matches/2021/07/26/mexico/u18-league/cruz-azul-u18/mazatlan-u18/3557977/']
['/matches/2021/07/26/mexico/liga-mx-femenil/mexico-atletico-san-luis/pumas-unam/3542869/']
['/matches/2021/07/26/nicaragua/primera-division/walter-ferreti/real-madriz/3557364/', '/matches/2021/07/26/nicaragua/primera-division/jalapa/deportivo-ocotal/3557365/', '/matches/2021/07/26/nicaragua/primera-division/managua-fc/juventus-fc-managua/3557366/']
['/matches/2021/07/26/norway/nm-cupen-u19/molde-fk-u19/ranheim-u19/3580568/']
['/matches/2021/07/26/paraguay/division-profesional/cerro-porteno/club-libertad/3538214/']
['/matches/2021/07/26/peru/segunda-division/club-deportivo-union-comercio/club-atletico-grau/3502658/']
['/matches/2021/07/26/poland/ekstraklasa/wisla-krakow-ssa/zaglebie-lubin-ssa/3519411/']
['/matches/2021/07/26/portugal/league-cup/vitoria-guimaraes/leixoes/3549466/']
['/matches/2021/07/26/romania/liga-i/cf-chindia-targoviste/sepsi/3536874/', '/matches/2021/07/26/romania/liga-i/u-craiova-1948/fc-dinamo-1948-sa-bucuresti/3536872/']
['/matches/2021/07/26/russia/premier-league/shakhter-volga-olimpiets/fk-petrotrest/3516375/']
['/matches/2021/07/26/russia/lfl/fk-raspadskaya-mezhdurechensk/fk-radian-baykal-irkutsk/3573348/', '/matches/2021/07/28/russia/lfl/fk-astrakhan/uor-dagestan/3505624/']
['/matches/2021/07/26/serbia/super-liga/fk-kolubara-lazarevac/fk-radnik-surdulica/3521256/']
['/matches/2021/07/26/sweden/allsvenskan/varbergs-bois-fc/ildrottsforeningen-kamraterna-goteborg/3445202/', '/matches/2021/07/26/sweden/allsvenskan/orebro-sportklubb-fotboll/allmanna-idrottsklubben/3445203/', '/matches/2021/07/26/sweden/allsvenskan/ostersunds-fk/halmstads-bollklubb/3445204/']
['/matches/2021/07/26/sweden/superettan/goteborgs-atlet---ildrottssallskap/akropolis-if/3444940/', '/matches/2021/07/26/sweden/superettan/norrby-if/vasteras-sk-fk/3444944/']
['/matches/2021/07/26/ukraine/premier-league/fc-vorskla-poltava/dnipro-1/3536537/']
['/matches/2021/07/26/ukraine/persha-liga/alians-lypova-dolyna/metal-kharkiv/3551190/']
['/matches/2021/07/26/ukraine/druha-liga/rubikon/dinaz-vyshhorod/3574023/']
['/matches/2021/07/26/united-states/mls/new-york-city/orlando-city-fc/3481369/', '/matches/2021/07/26/united-states/mls/new-england-revolution/montreal-impacts/3481373/', '/matches/2021/07/26/united-states/mls/inter-miami/philadelphia-union/3481371/', '/matches/2021/07/26/united-states/mls/washington-district-of-columbia-united/new-york-red-bulls/3481370/', '/matches/2021/07/26/united-states/mls/seattle-sounders-fc/kansas-city-wizards/3481372/']
['/matches/2021/07/26/united-states/mls-reserve-league/la-galaxy-ii/oakland-roots/3485114/']
['/matches/2021/07/26/united-states/pdl/western-mass-pioneers/carolina-dynamo/3581622/', '/matches/2021/07/26/united-states/pdl/des-moines-menace/usa-portland-timbers-under-23/3581623/']       
['/matches/2021/07/26/united-states/nisa-independent-cup/california-utd-strikerrs/fc-arizona/3530151/', '/matches/2021/07/26/united-states/nisa-independent-cup/magia/los-angeles-force/3530100/']
['/matches/2021/07/26/united-states/national-womens-soccer-league/racing-louisville/washington-spirit/3499690/']
['/matches/2021/07/26/united-states/wpsl/usa-davis-fc/san-francisco-nighthawks/3506548/']
['/matches/2021/07/26/venezuela/primera-division/guaros-de-lara-fc/yaracuyanos-fc/3573336/']
['/matches/2021/07/26/world/club-friendlies/genclerbirligi/fenerbahce-spor-kulubu/3581875/', '/matches/2021/07/26/world/club-friendlies/trabzonspor/kasimpasa-sk/3547898/', '/matches/2021/07/26/world/club-friendlies/uc-sampdoria/piacenza-calcio/3568664/', '/matches/2021/07/26/world/club-friendlies/union-deportiva-las-palmas/wolverhampton-wanderers-fc/3552906/', '/matches/2021/07/26/world/club-friendlies/crawley-town-football-club/west-ham-united-u23/3526283/', '/matches/2021/07/26/world/club-friendlies/k-sports/dover-athletic-fc/3564632/', '/matches/2021/07/26/world/club-friendlies/chelmsford-city-fc/kings-lynn-town-fc/3514003/', '/matches/2021/07/26/world/club-friendlies/oakland-stompers/el-farolito/3574768/', '/matches/2021/07/26/world/club-friendlies/oakland-stompers/sonoma-county-sol-/3547644/', '/matches/2021/07/26/world/club-friendlies/union-sandersdorf/germania-03-kothen/3581919/', '/matches/2021/07/26/world/club-friendlies/hendon-fc/hampton-and-richmond-borough-fc/3580595/', '/matches/2021/07/26/world/club-friendlies/winsford-united/mossley-afc/3580596/', '/matches/2021/07/26/world/club-friendlies/afc-dunstable/biggleswade-united-fc/3581920/', '/matches/2021/07/26/world/club-friendlies/highworth-town-football-club/brimscombe--thrupp/3582397/']
['/matches/2021/07/26/world/florida-cup/everton-football-club/club-deportivo-los-millonarios/3510980/']

Quicker Version:

import trio
import httpx
from bs4 import BeautifulSoup
import json

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:90.0) Gecko/20100101 Firefox/90.0'
}

mainurl = "https://us.soccerway.com"


async def get_soup(content):
    return BeautifulSoup(content, 'lxml')

allin = []


async def worker(channel):
    async with channel:
        async for client, n, m in channel:
            params = {
                "block_id": "block_home_matches_31",
                "callback_params": json.dumps({
                    "block_service_id": "home_index_block_homematches",
                    "date": "2021-07-26",
                    "display": "all",
                    "stage-value": n
                }),
                "action": "showMatches",
                "params": json.dumps({
                    "competition_id": m
                })
            }
            r = await client.get(mainurl + '/a/block_home_matches', params=params)
            soup = await get_soup(r.json()['commands'][0]['parameters']['content'])
            res = (x.a['href'] for x in soup.select('td.score-time'))
            allin.extend(res)


async def main():
    async with httpx.AsyncClient(timeout=None) as client,  trio.open_nursery() as nurse:
        client.headers.update(headers)
        r = await client.get(mainurl)
        soup = await get_soup(r.text)
        goal = ((num, x['id'].split('-', 1)[-1]) for num, x in enumerate(
            soup.select('tr[id^=date_matches]'), start=1))

        sender, receiver = trio.open_memory_channel(0)

        async with receiver:
            for _ in range(100):
                nurse.start_soon(worker, receiver.clone())

            async with sender:
                for n, m in goal:
                    await sender.send([client, n, m])


if __name__ == "__main__":
    try:
        trio.run(main)
        links = map(lambda x: mainurl+x, allin)
        print(list(links))
    except KeyboardInterrupt:
        exit('Bye!')

Note: pip install trio httpx

Advertisement