This is the code I wrote for web scraping purposes.
I want to save all data in the dictionary and then save that data into a dataframe.
Up to the last iteration, it saves the dictionary, but when coming out of the loop all lists (that are the values of my dictionary) are empty. How can one fix that?
i=2011 #league_data={} team_names=[] team_points=[] while i<2021: print(i) url="https://www.skysports.com/premier-league-table/"+str(i) page=requests.get(url) #print(page.status_code) soup= BeautifulSoup(page.text,'html.parser') league=soup.find('table',class_ ='standing-table__table') league_table = league.find_all('tbody') for league_teams in league_table: rows = league_teams.find_all('tr') for row in rows: if i==2011: team_name = row.find('td', class_ ='standing-table__cell standing-table__cell--name').text.strip() team_names.append(team_name) team_point = row.find_all('td', class_ = 'standing-table__cell')[9].text.strip() team_points.append(team_point) print(team_points) league_data[i]=team_points print(league_data) team_points.clear() i=i+1 #print(team_names) #print(len(team_names)) print(league_data)
This is output. In the output, I have printed list and dictionary state in each iteration
2011 ['89', '89', '70', '69', '65', '64', '56', '52', '52', '47', '47', '47', '45', '45', '43', '38', '37', '36', '31', '25'] {2011: ['89', '89', '70', '69', '65', '64', '56', '52', '52', '47', '47', '47', '45', '45', '43', '38', '37', '36', '31', '25'], 2012: [], 2013: [], 2014: [], 2015: [], 2016: [], 2017: [], 2018: [], 2019: [], 2020: []} 2012 ['89', '78', '75', '73', '72', '63', '61', '49', '46', '46', '44', '43', '42', '41', '41', '41', '39', '36', '28', '25'] {2011: ['89', '78', '75', '73', '72', '63', '61', '49', '46', '46', '44', '43', '42', '41', '41', '41', '39', '36', '28', '25'], 2012: ['89', '78', '75', '73', '72', '63', '61', '49', '46', '46', '44', '43', '42', '41', '41', '41', '39', '36', '28', '25'], 2013: [], 2014: [], 2015: [], 2016: [], 2017: [], 2018: [], 2019: [], 2020: []} 2013 ['86', '84', '82', '79', '72', '69', '64', '56', '50', '49', '45', '42', '40', '38', '38', '37', '36', '33', '32', '30'] {2011: ['86', '84', '82', '79', '72', '69', '64', '56', '50', '49', '45', '42', '40', '38', '38', '37', '36', '33', '32', '30'], 2012: ['86', '84', '82', '79', '72', '69', '64', '56', '50', '49', '45', '42', '40', '38', '38', '37', '36', '33', '32', '30'], 2013: ['86', '84', '82', '79', '72', '69', '64', '56', '50', '49', '45', '42', '40', '38', '38', '37', '36', '33', '32', '30'], 2014: [], 2015: [], 2016: [], 2017: [], 2018: [], 2019: [], 2020: []} 2014 ['87', '79', '75', '70', '64', '62', '60', '56', '54', '48', '47', '47', '44', '41', '39', '38', '38', '35', '33', '30'] {2011: ['87', '79', '75', '70', '64', '62', '60', '56', '54', '48', '47', '47', '44', '41', '39', '38', '38', '35', '33', '30'], 2012: ['87', '79', '75', '70', '64', '62', '60', '56', '54', '48', '47', '47', '44', '41', '39', '38', '38', '35', '33', '30'], 2013: ['87', '79', '75', '70', '64', '62', '60', '56', '54', '48', '47', '47', '44', '41', '39', '38', '38', '35', '33', '30'], 2014: ['87', '79', '75', '70', '64', '62', '60', '56', '54', '48', '47', '47', '44', '41', '39', '38', '38', '35', '33', '30'], 2015: [], 2016: [], 2017: [], 2018: [], 2019: [], 2020: []} 2015 ['81', '71', '70', '66', '66', '63', '62', '60', '51', '50', '47', '47', '45', '43', '42', '42', '39', '37', '34', '17'] {2011: ['81', '71', '70', '66', '66', '63', '62', '60', '51', '50', '47', '47', '45', '43', '42', '42', '39', '37', '34', '17'], 2012: ['81', '71', '70', '66', '66', '63', '62', '60', '51', '50', '47', '47', '45', '43', '42', '42', '39', '37', '34', '17'], 2013: ['81', '71', '70', '66', '66', '63', '62', '60', '51', '50', '47', '47', '45', '43', '42', '42', '39', '37', '34', '17'], 2014: ['81', '71', '70', '66', '66', '63', '62', '60', '51', '50', '47', '47', '45', '43', '42', '42', '39', '37', '34', '17'], 2015: ['81', '71', '70', '66', '66', '63', '62', '60', '51', '50', '47', '47', '45', '43', '42', '42', '39', '37', '34', '17'], 2016: [], 2017: [], 2018: [], 2019: [], 2020: []} 2016 ['93', '86', '78', '76', '75', '69', '61', '46', '46', '45', '45', '44', '44', '41', '41', '40', '40', '34', '28', '24'] {2011: ['93', '86', '78', '76', '75', '69', '61', '46', '46', '45', '45', '44', '44', '41', '41', '40', '40', '34', '28', '24'], 2012: ['93', '86', '78', '76', '75', '69', '61', '46', '46', '45', '45', '44', '44', '41', '41', '40', '40', '34', '28', '24'], 2013: ['93', '86', '78', '76', '75', '69', '61', '46', '46', '45', '45', '44', '44', '41', '41', '40', '40', '34', '28', '24'], 2014: ['93', '86', '78', '76', '75', '69', '61', '46', '46', '45', '45', '44', '44', '41', '41', '40', '40', '34', '28', '24'], 2015: ['93', '86', '78', '76', '75', '69', '61', '46', '46', '45', '45', '44', '44', '41', '41', '40', '40', '34', '28', '24'], 2016: ['93', '86', '78', '76', '75', '69', '61', '46', '46', '45', '45', '44', '44', '41', '41', '40', '40', '34', '28', '24'], 2017: [], 2018: [], 2019: [], 2020: []} 2017 ['100', '81', '77', '75', '70', '63', '54', '49', '47', '44', '44', '44', '42', '41', '40', '37', '36', '33', '33', '31'] {2011: ['100', '81', '77', '75', '70', '63', '54', '49', '47', '44', '44', '44', '42', '41', '40', '37', '36', '33', '33', '31'], 2012: ['100', '81', '77', '75', '70', '63', '54', '49', '47', '44', '44', '44', '42', '41', '40', '37', '36', '33', '33', '31'], 2013: ['100', '81', '77', '75', '70', '63', '54', '49', '47', '44', '44', '44', '42', '41', '40', '37', '36', '33', '33', '31'], 2014: ['100', '81', '77', '75', '70', '63', '54', '49', '47', '44', '44', '44', '42', '41', '40', '37', '36', '33', '33', '31'], 2015: ['100', '81', '77', '75', '70', '63', '54', '49', '47', '44', '44', '44', '42', '41', '40', '37', '36', '33', '33', '31'], 2016: ['100', '81', '77', '75', '70', '63', '54', '49', '47', '44', '44', '44', '42', '41', '40', '37', '36', '33', '33', '31'], 2017: ['100', '81', '77', '75', '70', '63', '54', '49', '47', '44', '44', '44', '42', '41', '40', '37', '36', '33', '33', '31'], 2018: [], 2019: [], 2020: []} 2018 ['98', '97', '72', '71', '70', '66', '57', '54', '52', '52', '50', '49', '45', '45', '40', '39', '36', '34', '26', '16'] {2011: ['98', '97', '72', '71', '70', '66', '57', '54', '52', '52', '50', '49', '45', '45', '40', '39', '36', '34', '26', '16'], 2012: ['98', '97', '72', '71', '70', '66', '57', '54', '52', '52', '50', '49', '45', '45', '40', '39', '36', '34', '26', '16'], 2013: ['98', '97', '72', '71', '70', '66', '57', '54', '52', '52', '50', '49', '45', '45', '40', '39', '36', '34', '26', '16'], 2014: ['98', '97', '72', '71', '70', '66', '57', '54', '52', '52', '50', '49', '45', '45', '40', '39', '36', '34', '26', '16'], 2015: ['98', '97', '72', '71', '70', '66', '57', '54', '52', '52', '50', '49', '45', '45', '40', '39', '36', '34', '26', '16'], 2016: ['98', '97', '72', '71', '70', '66', '57', '54', '52', '52', '50', '49', '45', '45', '40', '39', '36', '34', '26', '16'], 2017: ['98', '97', '72', '71', '70', '66', '57', '54', '52', '52', '50', '49', '45', '45', '40', '39', '36', '34', '26', '16'], 2018: ['98', '97', '72', '71', '70', '66', '57', '54', '52', '52', '50', '49', '45', '45', '40', '39', '36', '34', '26', '16'], 2019: [], 2020: []} 2019 ['99', '81', '66', '66', '62', '59', '59', '56', '54', '54', '52', '49', '44', '43', '41', '39', '35', '34', '34', '21'] {2011: ['99', '81', '66', '66', '62', '59', '59', '56', '54', '54', '52', '49', '44', '43', '41', '39', '35', '34', '34', '21'], 2012: ['99', '81', '66', '66', '62', '59', '59', '56', '54', '54', '52', '49', '44', '43', '41', '39', '35', '34', '34', '21'], 2013: ['99', '81', '66', '66', '62', '59', '59', '56', '54', '54', '52', '49', '44', '43', '41', '39', '35', '34', '34', '21'], 2014: ['99', '81', '66', '66', '62', '59', '59', '56', '54', '54', '52', '49', '44', '43', '41', '39', '35', '34', '34', '21'], 2015: ['99', '81', '66', '66', '62', '59', '59', '56', '54', '54', '52', '49', '44', '43', '41', '39', '35', '34', '34', '21'], 2016: ['99', '81', '66', '66', '62', '59', '59', '56', '54', '54', '52', '49', '44', '43', '41', '39', '35', '34', '34', '21'], 2017: ['99', '81', '66', '66', '62', '59', '59', '56', '54', '54', '52', '49', '44', '43', '41', '39', '35', '34', '34', '21'], 2018: ['99', '81', '66', '66', '62', '59', '59', '56', '54', '54', '52', '49', '44', '43', '41', '39', '35', '34', '34', '21'], 2019: ['99', '81', '66', '66', '62', '59', '59', '56', '54', '54', '52', '49', '44', '43', '41', '39', '35', '34', '34', '21'], 2020: []} 2020 ['40', '38', '38', '34', '33', '32', '32', '29', '29', '27', '26', '23', '23', '22', '19', '19', '17', '12', '11', '5'] {2011: ['40', '38', '38', '34', '33', '32', '32', '29', '29', '27', '26', '23', '23', '22', '19', '19', '17', '12', '11', '5'], 2012: ['40', '38', '38', '34', '33', '32', '32', '29', '29', '27', '26', '23', '23', '22', '19', '19', '17', '12', '11', '5'], 2013: ['40', '38', '38', '34', '33', '32', '32', '29', '29', '27', '26', '23', '23', '22', '19', '19', '17', '12', '11', '5'], 2014: ['40', '38', '38', '34', '33', '32', '32', '29', '29', '27', '26', '23', '23', '22', '19', '19', '17', '12', '11', '5'], 2015: ['40', '38', '38', '34', '33', '32', '32', '29', '29', '27', '26', '23', '23', '22', '19', '19', '17', '12', '11', '5'], 2016: ['40', '38', '38', '34', '33', '32', '32', '29', '29', '27', '26', '23', '23', '22', '19', '19', '17', '12', '11', '5'], 2017: ['40', '38', '38', '34', '33', '32', '32', '29', '29', '27', '26', '23', '23', '22', '19', '19', '17', '12', '11', '5'], 2018: ['40', '38', '38', '34', '33', '32', '32', '29', '29', '27', '26', '23', '23', '22', '19', '19', '17', '12', '11', '5'], 2019: ['40', '38', '38', '34', '33', '32', '32', '29', '29', '27', '26', '23', '23', '22', '19', '19', '17', '12', '11', '5'], 2020: ['40', '38', '38', '34', '33', '32', '32', '29', '29', '27', '26', '23', '23', '22', '19', '19', '17', '12', '11', '5']} {2011: [], 2012: [], 2013: [], 2014: [], 2015: [], 2016: [], 2017: [], 2018: [], 2019: [], 2020: []}
Advertisement
Answer
The problem in league_data[i]=team_points
, after this line execution league_data[i]
and team_points
point to same object (as you see in my output, both have same id)
i=2011 league_data={} team_names=[] team_points=[] while i<2021: print(i) url="https://www.skysports.com/premier-league-table/"+str(i) page=requests.get(url) #print(page.status_code) soup= BeautifulSoup(page.text,'html.parser') league=soup.find('table',class_ ='standing-table__table') league_table = league.find_all('tbody') for league_teams in league_table: rows = league_teams.find_all('tr') for row in rows: if i==2011: team_name = row.find('td', class_ ='standing-table__cell standing-table__cell--name').text.strip() team_names.append(team_name) team_point = row.find_all('td', class_ = 'standing-table__cell')[9].text.strip() team_points.append(team_point) print(team_points) league_data[i]=team_points print(league_data) print("Id of league_data[i]:", id(league_data[i])) print("Id of team_points :", id(team_points)) team_points.clear() i=i+1 break #print(team_names) #print(len(team_names)) print(league_data)
2011 ['89', '89', '70', '69', '65', '64', '56', '52', '52', '47', '47', '47', '45', '45', '43', '38', '37', '36', '31', '25'] {2011: ['89', '89', '70', '69', '65', '64', '56', '52', '52', '47', '47', '47', '45', '45', '43', '38', '37', '36', '31', '25']} Id of league_data[i]: 140615373022336 Id of team_points : 140615373022336 {2011: []}
Solution:
just modify this line league_data[i]=team_points
to league_data[i]=team_points.copy()
. and problem is solved
i=2011 league_data={} team_names=[] team_points=[] while i<2021: print(i) url="https://www.skysports.com/premier-league-table/"+str(i) page=requests.get(url) #print(page.status_code) soup= BeautifulSoup(page.text,'html.parser') league=soup.find('table',class_ ='standing-table__table') league_table = league.find_all('tbody') for league_teams in league_table: rows = league_teams.find_all('tr') for row in rows: if i==2011: team_name = row.find('td', class_ ='standing-table__cell standing-table__cell--name').text.strip() team_names.append(team_name) team_point = row.find_all('td', class_ = 'standing-table__cell')[9].text.strip() team_points.append(team_point) print(team_points) league_data[i]=team_points.copy() print(league_data) print("Id of league_data[i]:", id(league_data[i])) print("Id of team_points :", id(team_points)) team_points.clear() i=i+1 break #print(team_names) #print(len(team_names)) print(league_data)
2011 ['89', '89', '70', '69', '65', '64', '56', '52', '52', '47', '47', '47', '45', '45', '43', '38', '37', '36', '31', '25'] {2011: ['89', '89', '70', '69', '65', '64', '56', '52', '52', '47', '47', '47', '45', '45', '43', '38', '37', '36', '31', '25']} Id of league_data[i]: 140615375754176 Id of team_points : 140614558230912 {2011: ['89', '89', '70', '69', '65', '64', '56', '52', '52', '47', '47', '47', '45', '45', '43', '38', '37', '36', '31', '25']}