I’m trying to make a for loop of links that are opened and from which I then retrieve data; right now I have the problem that at every cycle it always retrieves the same page (the first one) even though I change the link every time.
def getListCardFromExpansion(expansion): baseURL = 'https://www.cardmarket.com/it/YuGiOh/Products/Singles/' filterURL = '?idCategory=5&idExpansion=1178&idRarity=0&sortBy=collectorsnumber_asc&perSite=20' URL = baseURL + expansion + filterURL print("URL: " + URL) HEADERS = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.246'} resp = requests.get(url=URL, headers=HEADERS) #resp = requests.get(URL, headers={'Cookie': 'PHPSESSID=notimportant'}) soup = BeautifulSoup(resp.content, 'html.parser') #print(soup) num_string = soup.find_all(text=re.compile('Risultati')) numOfCards = [score_string.split()[0] for score_string in num_string] numOfPage = math.ceil(int(numOfCards[0])/20) print(numOfCards) print(numOfPage) if __name__ == '__main__': listNewExp = ['Stardust-Overdrive', 'Power-of-the-Elements', 'Darkwing-Blast'] for exp in listNewExp: print("Inizio giro per espansione: " + exp) getListCardFromExpansion(exp) print("Fine giro per espansione: " + exp)
Advertisement
Answer
Try to remove idCategory=5&idExpansion=1178
from the filterURL
:
import math import requests from bs4 import BeautifulSoup def getListCardFromExpansion(expansion): baseURL = "https://www.cardmarket.com/it/YuGiOh/Products/Singles/" filterURL = '?idRarity=0&sortBy=collectorsnumber_asc&perSite=20' URL = baseURL + expansion + filterURL print("URL: " + URL) HEADERS = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.246" } resp = requests.get(url=URL, headers=HEADERS) soup = BeautifulSoup(resp.content, "html.parser") num_string = soup.find_all(text=re.compile("Risultati")) numOfCards = [score_string.split()[0] for score_string in num_string] numOfPage = math.ceil(int(numOfCards[0]) / 20) print(numOfCards) print(numOfPage) if __name__ == "__main__": listNewExp = ["Stardust-Overdrive", "Power-of-the-Elements", "Darkwing-Blast"] for exp in listNewExp: print("Inizio giro per espansione: " + exp) getListCardFromExpansion(exp) print("Fine giro per espansione: " + exp)
Prints:
Inizio giro per espansione: Stardust-Overdrive URL: https://www.cardmarket.com/it/YuGiOh/Products/Singles/Stardust-Overdrive?idRarity=0&sortBy=collectorsnumber_asc&perSite=20 ['116'] 6 Fine giro per espansione: Stardust-Overdrive Inizio giro per espansione: Power-of-the-Elements URL: https://www.cardmarket.com/it/YuGiOh/Products/Singles/Power-of-the-Elements?idRarity=0&sortBy=collectorsnumber_asc&perSite=20 ['106'] 6 Fine giro per espansione: Power-of-the-Elements Inizio giro per espansione: Darkwing-Blast URL: https://www.cardmarket.com/it/YuGiOh/Products/Singles/Darkwing-Blast?idRarity=0&sortBy=collectorsnumber_asc&perSite=20 ['106'] 6 Fine giro per espansione: Darkwing-Blast