I’m trying to make a for loop of links that are opened and from which I then retrieve data; right now I have the problem that at every cycle it always retrieves the same page (the first one) even though I change the link every time.
JavaScript
x
26
26
1
def getListCardFromExpansion(expansion):
2
baseURL = 'https://www.cardmarket.com/it/YuGiOh/Products/Singles/'
3
filterURL = '?idCategory=5&idExpansion=1178&idRarity=0&sortBy=collectorsnumber_asc&perSite=20'
4
URL = baseURL + expansion + filterURL
5
print("URL: " + URL)
6
HEADERS = {
7
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.246'}
8
9
resp = requests.get(url=URL, headers=HEADERS)
10
#resp = requests.get(URL, headers={'Cookie': 'PHPSESSID=notimportant'})
11
soup = BeautifulSoup(resp.content, 'html.parser')
12
#print(soup)
13
num_string = soup.find_all(text=re.compile('Risultati'))
14
numOfCards = [score_string.split()[0] for score_string in num_string]
15
numOfPage = math.ceil(int(numOfCards[0])/20)
16
print(numOfCards)
17
print(numOfPage)
18
19
20
if __name__ == '__main__':
21
listNewExp = ['Stardust-Overdrive', 'Power-of-the-Elements', 'Darkwing-Blast']
22
for exp in listNewExp:
23
print("Inizio giro per espansione: " + exp)
24
getListCardFromExpansion(exp)
25
print("Fine giro per espansione: " + exp)
26
Advertisement
Answer
Try to remove idCategory=5&idExpansion=1178
from the filterURL
:
JavaScript
1
31
31
1
import math
2
import requests
3
from bs4 import BeautifulSoup
4
5
6
7
def getListCardFromExpansion(expansion):
8
baseURL = "https://www.cardmarket.com/it/YuGiOh/Products/Singles/"
9
filterURL = '?idRarity=0&sortBy=collectorsnumber_asc&perSite=20'
10
URL = baseURL + expansion + filterURL
11
print("URL: " + URL)
12
HEADERS = {
13
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.246"
14
}
15
16
resp = requests.get(url=URL, headers=HEADERS)
17
soup = BeautifulSoup(resp.content, "html.parser")
18
num_string = soup.find_all(text=re.compile("Risultati"))
19
numOfCards = [score_string.split()[0] for score_string in num_string]
20
numOfPage = math.ceil(int(numOfCards[0]) / 20)
21
print(numOfCards)
22
print(numOfPage)
23
24
25
if __name__ == "__main__":
26
listNewExp = ["Stardust-Overdrive", "Power-of-the-Elements", "Darkwing-Blast"]
27
for exp in listNewExp:
28
print("Inizio giro per espansione: " + exp)
29
getListCardFromExpansion(exp)
30
print("Fine giro per espansione: " + exp)
31
Prints:
JavaScript
1
16
16
1
Inizio giro per espansione: Stardust-Overdrive
2
URL: https://www.cardmarket.com/it/YuGiOh/Products/Singles/Stardust-Overdrive?idRarity=0&sortBy=collectorsnumber_asc&perSite=20
3
['116']
4
6
5
Fine giro per espansione: Stardust-Overdrive
6
Inizio giro per espansione: Power-of-the-Elements
7
URL: https://www.cardmarket.com/it/YuGiOh/Products/Singles/Power-of-the-Elements?idRarity=0&sortBy=collectorsnumber_asc&perSite=20
8
['106']
9
6
10
Fine giro per espansione: Power-of-the-Elements
11
Inizio giro per espansione: Darkwing-Blast
12
URL: https://www.cardmarket.com/it/YuGiOh/Products/Singles/Darkwing-Blast?idRarity=0&sortBy=collectorsnumber_asc&perSite=20
13
['106']
14
6
15
Fine giro per espansione: Darkwing-Blast
16