This is the code I wrote for web scraping purposes.
I want to save all data in the dictionary and then save that data into a dataframe.
Up to the last iteration, it saves the dictionary, but when coming out of the loop all lists (that are the values of my dictionary) are empty. How can one fix that?
JavaScript
x
35
35
1
i=2011
2
#league_data={}
3
team_names=[]
4
team_points=[]
5
while i<2021:
6
print(i)
7
url="https://www.skysports.com/premier-league-table/"+str(i)
8
page=requests.get(url)
9
#print(page.status_code)
10
soup= BeautifulSoup(page.text,'html.parser')
11
league=soup.find('table',class_ ='standing-table__table')
12
league_table = league.find_all('tbody')
13
for league_teams in league_table:
14
rows = league_teams.find_all('tr')
15
for row in rows:
16
if i==2011:
17
team_name = row.find('td', class_ ='standing-table__cell standing-table__cell--name').text.strip()
18
team_names.append(team_name)
19
team_point = row.find_all('td', class_ = 'standing-table__cell')[9].text.strip()
20
team_points.append(team_point)
21
22
print(team_points)
23
24
25
league_data[i]=team_points
26
print(league_data)
27
team_points.clear()
28
i=i+1
29
30
#print(team_names)
31
#print(len(team_names))
32
33
34
print(league_data)
35
This is output. In the output, I have printed list and dictionary state in each iteration
JavaScript
1
32
32
1
2011
2
['89', '89', '70', '69', '65', '64', '56', '52', '52', '47', '47', '47', '45', '45', '43', '38', '37', '36', '31', '25']
3
{2011: ['89', '89', '70', '69', '65', '64', '56', '52', '52', '47', '47', '47', '45', '45', '43', '38', '37', '36', '31', '25'], 2012: [], 2013: [], 2014: [], 2015: [], 2016: [], 2017: [], 2018: [], 2019: [], 2020: []}
4
2012
5
['89', '78', '75', '73', '72', '63', '61', '49', '46', '46', '44', '43', '42', '41', '41', '41', '39', '36', '28', '25']
6
{2011: ['89', '78', '75', '73', '72', '63', '61', '49', '46', '46', '44', '43', '42', '41', '41', '41', '39', '36', '28', '25'], 2012: ['89', '78', '75', '73', '72', '63', '61', '49', '46', '46', '44', '43', '42', '41', '41', '41', '39', '36', '28', '25'], 2013: [], 2014: [], 2015: [], 2016: [], 2017: [], 2018: [], 2019: [], 2020: []}
7
2013
8
['86', '84', '82', '79', '72', '69', '64', '56', '50', '49', '45', '42', '40', '38', '38', '37', '36', '33', '32', '30']
9
{2011: ['86', '84', '82', '79', '72', '69', '64', '56', '50', '49', '45', '42', '40', '38', '38', '37', '36', '33', '32', '30'], 2012: ['86', '84', '82', '79', '72', '69', '64', '56', '50', '49', '45', '42', '40', '38', '38', '37', '36', '33', '32', '30'], 2013: ['86', '84', '82', '79', '72', '69', '64', '56', '50', '49', '45', '42', '40', '38', '38', '37', '36', '33', '32', '30'], 2014: [], 2015: [], 2016: [], 2017: [], 2018: [], 2019: [], 2020: []}
10
2014
11
['87', '79', '75', '70', '64', '62', '60', '56', '54', '48', '47', '47', '44', '41', '39', '38', '38', '35', '33', '30']
12
{2011: ['87', '79', '75', '70', '64', '62', '60', '56', '54', '48', '47', '47', '44', '41', '39', '38', '38', '35', '33', '30'], 2012: ['87', '79', '75', '70', '64', '62', '60', '56', '54', '48', '47', '47', '44', '41', '39', '38', '38', '35', '33', '30'], 2013: ['87', '79', '75', '70', '64', '62', '60', '56', '54', '48', '47', '47', '44', '41', '39', '38', '38', '35', '33', '30'], 2014: ['87', '79', '75', '70', '64', '62', '60', '56', '54', '48', '47', '47', '44', '41', '39', '38', '38', '35', '33', '30'], 2015: [], 2016: [], 2017: [], 2018: [], 2019: [], 2020: []}
13
2015
14
['81', '71', '70', '66', '66', '63', '62', '60', '51', '50', '47', '47', '45', '43', '42', '42', '39', '37', '34', '17']
15
{2011: ['81', '71', '70', '66', '66', '63', '62', '60', '51', '50', '47', '47', '45', '43', '42', '42', '39', '37', '34', '17'], 2012: ['81', '71', '70', '66', '66', '63', '62', '60', '51', '50', '47', '47', '45', '43', '42', '42', '39', '37', '34', '17'], 2013: ['81', '71', '70', '66', '66', '63', '62', '60', '51', '50', '47', '47', '45', '43', '42', '42', '39', '37', '34', '17'], 2014: ['81', '71', '70', '66', '66', '63', '62', '60', '51', '50', '47', '47', '45', '43', '42', '42', '39', '37', '34', '17'], 2015: ['81', '71', '70', '66', '66', '63', '62', '60', '51', '50', '47', '47', '45', '43', '42', '42', '39', '37', '34', '17'], 2016: [], 2017: [], 2018: [], 2019: [], 2020: []}
16
2016
17
['93', '86', '78', '76', '75', '69', '61', '46', '46', '45', '45', '44', '44', '41', '41', '40', '40', '34', '28', '24']
18
{2011: ['93', '86', '78', '76', '75', '69', '61', '46', '46', '45', '45', '44', '44', '41', '41', '40', '40', '34', '28', '24'], 2012: ['93', '86', '78', '76', '75', '69', '61', '46', '46', '45', '45', '44', '44', '41', '41', '40', '40', '34', '28', '24'], 2013: ['93', '86', '78', '76', '75', '69', '61', '46', '46', '45', '45', '44', '44', '41', '41', '40', '40', '34', '28', '24'], 2014: ['93', '86', '78', '76', '75', '69', '61', '46', '46', '45', '45', '44', '44', '41', '41', '40', '40', '34', '28', '24'], 2015: ['93', '86', '78', '76', '75', '69', '61', '46', '46', '45', '45', '44', '44', '41', '41', '40', '40', '34', '28', '24'], 2016: ['93', '86', '78', '76', '75', '69', '61', '46', '46', '45', '45', '44', '44', '41', '41', '40', '40', '34', '28', '24'], 2017: [], 2018: [], 2019: [], 2020: []}
19
2017
20
['100', '81', '77', '75', '70', '63', '54', '49', '47', '44', '44', '44', '42', '41', '40', '37', '36', '33', '33', '31']
21
{2011: ['100', '81', '77', '75', '70', '63', '54', '49', '47', '44', '44', '44', '42', '41', '40', '37', '36', '33', '33', '31'], 2012: ['100', '81', '77', '75', '70', '63', '54', '49', '47', '44', '44', '44', '42', '41', '40', '37', '36', '33', '33', '31'], 2013: ['100', '81', '77', '75', '70', '63', '54', '49', '47', '44', '44', '44', '42', '41', '40', '37', '36', '33', '33', '31'], 2014: ['100', '81', '77', '75', '70', '63', '54', '49', '47', '44', '44', '44', '42', '41', '40', '37', '36', '33', '33', '31'], 2015: ['100', '81', '77', '75', '70', '63', '54', '49', '47', '44', '44', '44', '42', '41', '40', '37', '36', '33', '33', '31'], 2016: ['100', '81', '77', '75', '70', '63', '54', '49', '47', '44', '44', '44', '42', '41', '40', '37', '36', '33', '33', '31'], 2017: ['100', '81', '77', '75', '70', '63', '54', '49', '47', '44', '44', '44', '42', '41', '40', '37', '36', '33', '33', '31'], 2018: [], 2019: [], 2020: []}
22
2018
23
['98', '97', '72', '71', '70', '66', '57', '54', '52', '52', '50', '49', '45', '45', '40', '39', '36', '34', '26', '16']
24
{2011: ['98', '97', '72', '71', '70', '66', '57', '54', '52', '52', '50', '49', '45', '45', '40', '39', '36', '34', '26', '16'], 2012: ['98', '97', '72', '71', '70', '66', '57', '54', '52', '52', '50', '49', '45', '45', '40', '39', '36', '34', '26', '16'], 2013: ['98', '97', '72', '71', '70', '66', '57', '54', '52', '52', '50', '49', '45', '45', '40', '39', '36', '34', '26', '16'], 2014: ['98', '97', '72', '71', '70', '66', '57', '54', '52', '52', '50', '49', '45', '45', '40', '39', '36', '34', '26', '16'], 2015: ['98', '97', '72', '71', '70', '66', '57', '54', '52', '52', '50', '49', '45', '45', '40', '39', '36', '34', '26', '16'], 2016: ['98', '97', '72', '71', '70', '66', '57', '54', '52', '52', '50', '49', '45', '45', '40', '39', '36', '34', '26', '16'], 2017: ['98', '97', '72', '71', '70', '66', '57', '54', '52', '52', '50', '49', '45', '45', '40', '39', '36', '34', '26', '16'], 2018: ['98', '97', '72', '71', '70', '66', '57', '54', '52', '52', '50', '49', '45', '45', '40', '39', '36', '34', '26', '16'], 2019: [], 2020: []}
25
2019
26
['99', '81', '66', '66', '62', '59', '59', '56', '54', '54', '52', '49', '44', '43', '41', '39', '35', '34', '34', '21']
27
{2011: ['99', '81', '66', '66', '62', '59', '59', '56', '54', '54', '52', '49', '44', '43', '41', '39', '35', '34', '34', '21'], 2012: ['99', '81', '66', '66', '62', '59', '59', '56', '54', '54', '52', '49', '44', '43', '41', '39', '35', '34', '34', '21'], 2013: ['99', '81', '66', '66', '62', '59', '59', '56', '54', '54', '52', '49', '44', '43', '41', '39', '35', '34', '34', '21'], 2014: ['99', '81', '66', '66', '62', '59', '59', '56', '54', '54', '52', '49', '44', '43', '41', '39', '35', '34', '34', '21'], 2015: ['99', '81', '66', '66', '62', '59', '59', '56', '54', '54', '52', '49', '44', '43', '41', '39', '35', '34', '34', '21'], 2016: ['99', '81', '66', '66', '62', '59', '59', '56', '54', '54', '52', '49', '44', '43', '41', '39', '35', '34', '34', '21'], 2017: ['99', '81', '66', '66', '62', '59', '59', '56', '54', '54', '52', '49', '44', '43', '41', '39', '35', '34', '34', '21'], 2018: ['99', '81', '66', '66', '62', '59', '59', '56', '54', '54', '52', '49', '44', '43', '41', '39', '35', '34', '34', '21'], 2019: ['99', '81', '66', '66', '62', '59', '59', '56', '54', '54', '52', '49', '44', '43', '41', '39', '35', '34', '34', '21'], 2020: []}
28
2020
29
['40', '38', '38', '34', '33', '32', '32', '29', '29', '27', '26', '23', '23', '22', '19', '19', '17', '12', '11', '5']
30
{2011: ['40', '38', '38', '34', '33', '32', '32', '29', '29', '27', '26', '23', '23', '22', '19', '19', '17', '12', '11', '5'], 2012: ['40', '38', '38', '34', '33', '32', '32', '29', '29', '27', '26', '23', '23', '22', '19', '19', '17', '12', '11', '5'], 2013: ['40', '38', '38', '34', '33', '32', '32', '29', '29', '27', '26', '23', '23', '22', '19', '19', '17', '12', '11', '5'], 2014: ['40', '38', '38', '34', '33', '32', '32', '29', '29', '27', '26', '23', '23', '22', '19', '19', '17', '12', '11', '5'], 2015: ['40', '38', '38', '34', '33', '32', '32', '29', '29', '27', '26', '23', '23', '22', '19', '19', '17', '12', '11', '5'], 2016: ['40', '38', '38', '34', '33', '32', '32', '29', '29', '27', '26', '23', '23', '22', '19', '19', '17', '12', '11', '5'], 2017: ['40', '38', '38', '34', '33', '32', '32', '29', '29', '27', '26', '23', '23', '22', '19', '19', '17', '12', '11', '5'], 2018: ['40', '38', '38', '34', '33', '32', '32', '29', '29', '27', '26', '23', '23', '22', '19', '19', '17', '12', '11', '5'], 2019: ['40', '38', '38', '34', '33', '32', '32', '29', '29', '27', '26', '23', '23', '22', '19', '19', '17', '12', '11', '5'], 2020: ['40', '38', '38', '34', '33', '32', '32', '29', '29', '27', '26', '23', '23', '22', '19', '19', '17', '12', '11', '5']}
31
{2011: [], 2012: [], 2013: [], 2014: [], 2015: [], 2016: [], 2017: [], 2018: [], 2019: [], 2020: []}
32
Advertisement
Answer
The problem in league_data[i]=team_points
, after this line execution league_data[i]
and team_points
point to same object (as you see in my output, both have same id)
JavaScript
1
38
38
1
i=2011
2
league_data={}
3
team_names=[]
4
team_points=[]
5
while i<2021:
6
print(i)
7
url="https://www.skysports.com/premier-league-table/"+str(i)
8
page=requests.get(url)
9
#print(page.status_code)
10
soup= BeautifulSoup(page.text,'html.parser')
11
league=soup.find('table',class_ ='standing-table__table')
12
league_table = league.find_all('tbody')
13
for league_teams in league_table:
14
rows = league_teams.find_all('tr')
15
for row in rows:
16
if i==2011:
17
team_name = row.find('td', class_ ='standing-table__cell standing-table__cell--name').text.strip()
18
team_names.append(team_name)
19
team_point = row.find_all('td', class_ = 'standing-table__cell')[9].text.strip()
20
team_points.append(team_point)
21
22
print(team_points)
23
24
25
league_data[i]=team_points
26
27
print(league_data)
28
print("Id of league_data[i]:", id(league_data[i]))
29
print("Id of team_points :", id(team_points))
30
team_points.clear()
31
i=i+1
32
break
33
#print(team_names)
34
#print(len(team_names))
35
36
37
print(league_data)
38
JavaScript
1
8
1
2011
2
['89', '89', '70', '69', '65', '64', '56', '52', '52', '47', '47', '47', '45', '45', '43', '38', '37', '36', '31', '25']
3
{2011: ['89', '89', '70', '69', '65', '64', '56', '52', '52', '47', '47', '47', '45', '45', '43', '38', '37', '36', '31', '25']}
4
5
Id of league_data[i]: 140615373022336
6
Id of team_points : 140615373022336
7
{2011: []}
8
Solution:
just modify this line league_data[i]=team_points
to league_data[i]=team_points.copy()
. and problem is solved
JavaScript
1
38
38
1
i=2011
2
league_data={}
3
team_names=[]
4
team_points=[]
5
while i<2021:
6
print(i)
7
url="https://www.skysports.com/premier-league-table/"+str(i)
8
page=requests.get(url)
9
#print(page.status_code)
10
soup= BeautifulSoup(page.text,'html.parser')
11
league=soup.find('table',class_ ='standing-table__table')
12
league_table = league.find_all('tbody')
13
for league_teams in league_table:
14
rows = league_teams.find_all('tr')
15
for row in rows:
16
if i==2011:
17
team_name = row.find('td', class_ ='standing-table__cell standing-table__cell--name').text.strip()
18
team_names.append(team_name)
19
team_point = row.find_all('td', class_ = 'standing-table__cell')[9].text.strip()
20
team_points.append(team_point)
21
22
print(team_points)
23
24
25
league_data[i]=team_points.copy()
26
27
print(league_data)
28
print("Id of league_data[i]:", id(league_data[i]))
29
print("Id of team_points :", id(team_points))
30
team_points.clear()
31
i=i+1
32
break
33
#print(team_names)
34
#print(len(team_names))
35
36
37
print(league_data)
38
JavaScript
1
7
1
2011
2
['89', '89', '70', '69', '65', '64', '56', '52', '52', '47', '47', '47', '45', '45', '43', '38', '37', '36', '31', '25']
3
{2011: ['89', '89', '70', '69', '65', '64', '56', '52', '52', '47', '47', '47', '45', '45', '43', '38', '37', '36', '31', '25']}
4
Id of league_data[i]: 140615375754176
5
Id of team_points : 140614558230912
6
{2011: ['89', '89', '70', '69', '65', '64', '56', '52', '52', '47', '47', '47', '45', '45', '43', '38', '37', '36', '31', '25']}
7