Skip to content
Advertisement

Python requests.get of link, passed in a for, gets always the same content of the first link

I’m trying to make a for loop of links that are opened and from which I then retrieve data; right now I have the problem that at every cycle it always retrieves the same page (the first one) even though I change the link every time.

def getListCardFromExpansion(expansion):
    baseURL = 'https://www.cardmarket.com/it/YuGiOh/Products/Singles/'
    filterURL = '?idCategory=5&idExpansion=1178&idRarity=0&sortBy=collectorsnumber_asc&perSite=20'
    URL = baseURL + expansion + filterURL
    print("URL: " + URL)
    HEADERS = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.246'}

    resp = requests.get(url=URL, headers=HEADERS)
    #resp = requests.get(URL, headers={'Cookie': 'PHPSESSID=notimportant'})
    soup = BeautifulSoup(resp.content, 'html.parser')
    #print(soup)
    num_string = soup.find_all(text=re.compile('Risultati'))
    numOfCards = [score_string.split()[0] for score_string in num_string]
    numOfPage = math.ceil(int(numOfCards[0])/20)
    print(numOfCards)
    print(numOfPage)


if __name__ == '__main__':
    listNewExp = ['Stardust-Overdrive', 'Power-of-the-Elements', 'Darkwing-Blast']
    for exp in listNewExp:
        print("Inizio giro per espansione: " + exp)
        getListCardFromExpansion(exp)
        print("Fine giro per espansione: " + exp)

Advertisement

Answer

Try to remove idCategory=5&idExpansion=1178 from the filterURL:

import math
import requests
from bs4 import BeautifulSoup



def getListCardFromExpansion(expansion):
    baseURL = "https://www.cardmarket.com/it/YuGiOh/Products/Singles/"
    filterURL = '?idRarity=0&sortBy=collectorsnumber_asc&perSite=20'
    URL = baseURL + expansion + filterURL
    print("URL: " + URL)
    HEADERS = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.246"
    }

    resp = requests.get(url=URL, headers=HEADERS)
    soup = BeautifulSoup(resp.content, "html.parser")
    num_string = soup.find_all(text=re.compile("Risultati"))
    numOfCards = [score_string.split()[0] for score_string in num_string]
    numOfPage = math.ceil(int(numOfCards[0]) / 20)
    print(numOfCards)
    print(numOfPage)


if __name__ == "__main__":
    listNewExp = ["Stardust-Overdrive", "Power-of-the-Elements", "Darkwing-Blast"]
    for exp in listNewExp:
        print("Inizio giro per espansione: " + exp)
        getListCardFromExpansion(exp)
        print("Fine giro per espansione: " + exp)

Prints:

Inizio giro per espansione: Stardust-Overdrive
URL: https://www.cardmarket.com/it/YuGiOh/Products/Singles/Stardust-Overdrive?idRarity=0&sortBy=collectorsnumber_asc&perSite=20
['116']
6
Fine giro per espansione: Stardust-Overdrive
Inizio giro per espansione: Power-of-the-Elements
URL: https://www.cardmarket.com/it/YuGiOh/Products/Singles/Power-of-the-Elements?idRarity=0&sortBy=collectorsnumber_asc&perSite=20
['106']
6
Fine giro per espansione: Power-of-the-Elements
Inizio giro per espansione: Darkwing-Blast
URL: https://www.cardmarket.com/it/YuGiOh/Products/Singles/Darkwing-Blast?idRarity=0&sortBy=collectorsnumber_asc&perSite=20
['106']
6
Fine giro per espansione: Darkwing-Blast
User contributions licensed under: CC BY-SA
1 People found this is helpful
Advertisement