Skip to content
Advertisement

list in dictionary get empty out when coming out of the loop

This is the code I wrote for web scraping purposes.

I want to save all data in the dictionary and then save that data into a dataframe.

Up to the last iteration, it saves the dictionary, but when coming out of the loop all lists (that are the values of my dictionary) are empty. How can one fix that?

i=2011
#league_data={}
team_names=[]
team_points=[]
while i<2021:
    print(i)
    url="https://www.skysports.com/premier-league-table/"+str(i)
    page=requests.get(url)
    #print(page.status_code)
    soup= BeautifulSoup(page.text,'html.parser')
    league=soup.find('table',class_ ='standing-table__table')
    league_table = league.find_all('tbody')
    for league_teams in league_table: 
        rows = league_teams.find_all('tr')
        for row in rows:
            if i==2011:
                team_name = row.find('td', class_ ='standing-table__cell standing-table__cell--name').text.strip()
                team_names.append(team_name)   
            team_point = row.find_all('td', class_ = 'standing-table__cell')[9].text.strip()
            team_points.append(team_point)
                
        print(team_points)
        
    
    league_data[i]=team_points
    print(league_data)
    team_points.clear()
    i=i+1

#print(team_names)
#print(len(team_names))
 

print(league_data)

This is output. In the output, I have printed list and dictionary state in each iteration

2011
['89', '89', '70', '69', '65', '64', '56', '52', '52', '47', '47', '47', '45', '45', '43', '38', '37', '36', '31', '25']
{2011: ['89', '89', '70', '69', '65', '64', '56', '52', '52', '47', '47', '47', '45', '45', '43', '38', '37', '36', '31', '25'], 2012: [], 2013: [], 2014: [], 2015: [], 2016: [], 2017: [], 2018: [], 2019: [], 2020: []}
2012
['89', '78', '75', '73', '72', '63', '61', '49', '46', '46', '44', '43', '42', '41', '41', '41', '39', '36', '28', '25']
{2011: ['89', '78', '75', '73', '72', '63', '61', '49', '46', '46', '44', '43', '42', '41', '41', '41', '39', '36', '28', '25'], 2012: ['89', '78', '75', '73', '72', '63', '61', '49', '46', '46', '44', '43', '42', '41', '41', '41', '39', '36', '28', '25'], 2013: [], 2014: [], 2015: [], 2016: [], 2017: [], 2018: [], 2019: [], 2020: []}
2013
['86', '84', '82', '79', '72', '69', '64', '56', '50', '49', '45', '42', '40', '38', '38', '37', '36', '33', '32', '30']
{2011: ['86', '84', '82', '79', '72', '69', '64', '56', '50', '49', '45', '42', '40', '38', '38', '37', '36', '33', '32', '30'], 2012: ['86', '84', '82', '79', '72', '69', '64', '56', '50', '49', '45', '42', '40', '38', '38', '37', '36', '33', '32', '30'], 2013: ['86', '84', '82', '79', '72', '69', '64', '56', '50', '49', '45', '42', '40', '38', '38', '37', '36', '33', '32', '30'], 2014: [], 2015: [], 2016: [], 2017: [], 2018: [], 2019: [], 2020: []}
2014
['87', '79', '75', '70', '64', '62', '60', '56', '54', '48', '47', '47', '44', '41', '39', '38', '38', '35', '33', '30']
{2011: ['87', '79', '75', '70', '64', '62', '60', '56', '54', '48', '47', '47', '44', '41', '39', '38', '38', '35', '33', '30'], 2012: ['87', '79', '75', '70', '64', '62', '60', '56', '54', '48', '47', '47', '44', '41', '39', '38', '38', '35', '33', '30'], 2013: ['87', '79', '75', '70', '64', '62', '60', '56', '54', '48', '47', '47', '44', '41', '39', '38', '38', '35', '33', '30'], 2014: ['87', '79', '75', '70', '64', '62', '60', '56', '54', '48', '47', '47', '44', '41', '39', '38', '38', '35', '33', '30'], 2015: [], 2016: [], 2017: [], 2018: [], 2019: [], 2020: []}
2015
['81', '71', '70', '66', '66', '63', '62', '60', '51', '50', '47', '47', '45', '43', '42', '42', '39', '37', '34', '17']
{2011: ['81', '71', '70', '66', '66', '63', '62', '60', '51', '50', '47', '47', '45', '43', '42', '42', '39', '37', '34', '17'], 2012: ['81', '71', '70', '66', '66', '63', '62', '60', '51', '50', '47', '47', '45', '43', '42', '42', '39', '37', '34', '17'], 2013: ['81', '71', '70', '66', '66', '63', '62', '60', '51', '50', '47', '47', '45', '43', '42', '42', '39', '37', '34', '17'], 2014: ['81', '71', '70', '66', '66', '63', '62', '60', '51', '50', '47', '47', '45', '43', '42', '42', '39', '37', '34', '17'], 2015: ['81', '71', '70', '66', '66', '63', '62', '60', '51', '50', '47', '47', '45', '43', '42', '42', '39', '37', '34', '17'], 2016: [], 2017: [], 2018: [], 2019: [], 2020: []}
2016
['93', '86', '78', '76', '75', '69', '61', '46', '46', '45', '45', '44', '44', '41', '41', '40', '40', '34', '28', '24']
{2011: ['93', '86', '78', '76', '75', '69', '61', '46', '46', '45', '45', '44', '44', '41', '41', '40', '40', '34', '28', '24'], 2012: ['93', '86', '78', '76', '75', '69', '61', '46', '46', '45', '45', '44', '44', '41', '41', '40', '40', '34', '28', '24'], 2013: ['93', '86', '78', '76', '75', '69', '61', '46', '46', '45', '45', '44', '44', '41', '41', '40', '40', '34', '28', '24'], 2014: ['93', '86', '78', '76', '75', '69', '61', '46', '46', '45', '45', '44', '44', '41', '41', '40', '40', '34', '28', '24'], 2015: ['93', '86', '78', '76', '75', '69', '61', '46', '46', '45', '45', '44', '44', '41', '41', '40', '40', '34', '28', '24'], 2016: ['93', '86', '78', '76', '75', '69', '61', '46', '46', '45', '45', '44', '44', '41', '41', '40', '40', '34', '28', '24'], 2017: [], 2018: [], 2019: [], 2020: []}
2017
['100', '81', '77', '75', '70', '63', '54', '49', '47', '44', '44', '44', '42', '41', '40', '37', '36', '33', '33', '31']
{2011: ['100', '81', '77', '75', '70', '63', '54', '49', '47', '44', '44', '44', '42', '41', '40', '37', '36', '33', '33', '31'], 2012: ['100', '81', '77', '75', '70', '63', '54', '49', '47', '44', '44', '44', '42', '41', '40', '37', '36', '33', '33', '31'], 2013: ['100', '81', '77', '75', '70', '63', '54', '49', '47', '44', '44', '44', '42', '41', '40', '37', '36', '33', '33', '31'], 2014: ['100', '81', '77', '75', '70', '63', '54', '49', '47', '44', '44', '44', '42', '41', '40', '37', '36', '33', '33', '31'], 2015: ['100', '81', '77', '75', '70', '63', '54', '49', '47', '44', '44', '44', '42', '41', '40', '37', '36', '33', '33', '31'], 2016: ['100', '81', '77', '75', '70', '63', '54', '49', '47', '44', '44', '44', '42', '41', '40', '37', '36', '33', '33', '31'], 2017: ['100', '81', '77', '75', '70', '63', '54', '49', '47', '44', '44', '44', '42', '41', '40', '37', '36', '33', '33', '31'], 2018: [], 2019: [], 2020: []}
2018
['98', '97', '72', '71', '70', '66', '57', '54', '52', '52', '50', '49', '45', '45', '40', '39', '36', '34', '26', '16']
{2011: ['98', '97', '72', '71', '70', '66', '57', '54', '52', '52', '50', '49', '45', '45', '40', '39', '36', '34', '26', '16'], 2012: ['98', '97', '72', '71', '70', '66', '57', '54', '52', '52', '50', '49', '45', '45', '40', '39', '36', '34', '26', '16'], 2013: ['98', '97', '72', '71', '70', '66', '57', '54', '52', '52', '50', '49', '45', '45', '40', '39', '36', '34', '26', '16'], 2014: ['98', '97', '72', '71', '70', '66', '57', '54', '52', '52', '50', '49', '45', '45', '40', '39', '36', '34', '26', '16'], 2015: ['98', '97', '72', '71', '70', '66', '57', '54', '52', '52', '50', '49', '45', '45', '40', '39', '36', '34', '26', '16'], 2016: ['98', '97', '72', '71', '70', '66', '57', '54', '52', '52', '50', '49', '45', '45', '40', '39', '36', '34', '26', '16'], 2017: ['98', '97', '72', '71', '70', '66', '57', '54', '52', '52', '50', '49', '45', '45', '40', '39', '36', '34', '26', '16'], 2018: ['98', '97', '72', '71', '70', '66', '57', '54', '52', '52', '50', '49', '45', '45', '40', '39', '36', '34', '26', '16'], 2019: [], 2020: []}
2019
['99', '81', '66', '66', '62', '59', '59', '56', '54', '54', '52', '49', '44', '43', '41', '39', '35', '34', '34', '21']
{2011: ['99', '81', '66', '66', '62', '59', '59', '56', '54', '54', '52', '49', '44', '43', '41', '39', '35', '34', '34', '21'], 2012: ['99', '81', '66', '66', '62', '59', '59', '56', '54', '54', '52', '49', '44', '43', '41', '39', '35', '34', '34', '21'], 2013: ['99', '81', '66', '66', '62', '59', '59', '56', '54', '54', '52', '49', '44', '43', '41', '39', '35', '34', '34', '21'], 2014: ['99', '81', '66', '66', '62', '59', '59', '56', '54', '54', '52', '49', '44', '43', '41', '39', '35', '34', '34', '21'], 2015: ['99', '81', '66', '66', '62', '59', '59', '56', '54', '54', '52', '49', '44', '43', '41', '39', '35', '34', '34', '21'], 2016: ['99', '81', '66', '66', '62', '59', '59', '56', '54', '54', '52', '49', '44', '43', '41', '39', '35', '34', '34', '21'], 2017: ['99', '81', '66', '66', '62', '59', '59', '56', '54', '54', '52', '49', '44', '43', '41', '39', '35', '34', '34', '21'], 2018: ['99', '81', '66', '66', '62', '59', '59', '56', '54', '54', '52', '49', '44', '43', '41', '39', '35', '34', '34', '21'], 2019: ['99', '81', '66', '66', '62', '59', '59', '56', '54', '54', '52', '49', '44', '43', '41', '39', '35', '34', '34', '21'], 2020: []}
2020
['40', '38', '38', '34', '33', '32', '32', '29', '29', '27', '26', '23', '23', '22', '19', '19', '17', '12', '11', '5']
{2011: ['40', '38', '38', '34', '33', '32', '32', '29', '29', '27', '26', '23', '23', '22', '19', '19', '17', '12', '11', '5'], 2012: ['40', '38', '38', '34', '33', '32', '32', '29', '29', '27', '26', '23', '23', '22', '19', '19', '17', '12', '11', '5'], 2013: ['40', '38', '38', '34', '33', '32', '32', '29', '29', '27', '26', '23', '23', '22', '19', '19', '17', '12', '11', '5'], 2014: ['40', '38', '38', '34', '33', '32', '32', '29', '29', '27', '26', '23', '23', '22', '19', '19', '17', '12', '11', '5'], 2015: ['40', '38', '38', '34', '33', '32', '32', '29', '29', '27', '26', '23', '23', '22', '19', '19', '17', '12', '11', '5'], 2016: ['40', '38', '38', '34', '33', '32', '32', '29', '29', '27', '26', '23', '23', '22', '19', '19', '17', '12', '11', '5'], 2017: ['40', '38', '38', '34', '33', '32', '32', '29', '29', '27', '26', '23', '23', '22', '19', '19', '17', '12', '11', '5'], 2018: ['40', '38', '38', '34', '33', '32', '32', '29', '29', '27', '26', '23', '23', '22', '19', '19', '17', '12', '11', '5'], 2019: ['40', '38', '38', '34', '33', '32', '32', '29', '29', '27', '26', '23', '23', '22', '19', '19', '17', '12', '11', '5'], 2020: ['40', '38', '38', '34', '33', '32', '32', '29', '29', '27', '26', '23', '23', '22', '19', '19', '17', '12', '11', '5']}
{2011: [], 2012: [], 2013: [], 2014: [], 2015: [], 2016: [], 2017: [], 2018: [], 2019: [], 2020: []}

Advertisement

Answer

The problem in league_data[i]=team_points, after this line execution league_data[i] and team_points point to same object (as you see in my output, both have same id)

i=2011
league_data={}
team_names=[]
team_points=[]
while i<2021:
    print(i)
    url="https://www.skysports.com/premier-league-table/"+str(i)
    page=requests.get(url)
    #print(page.status_code)
    soup= BeautifulSoup(page.text,'html.parser')
    league=soup.find('table',class_ ='standing-table__table')
    league_table = league.find_all('tbody')
    for league_teams in league_table: 
        rows = league_teams.find_all('tr')
        for row in rows:
            if i==2011:
                team_name = row.find('td', class_ ='standing-table__cell standing-table__cell--name').text.strip()
                team_names.append(team_name)   
            team_point = row.find_all('td', class_ = 'standing-table__cell')[9].text.strip()
            team_points.append(team_point)
                
        print(team_points)
        
    
    league_data[i]=team_points

    print(league_data)
    print("Id of league_data[i]:", id(league_data[i]))
    print("Id of team_points   :",  id(team_points))
    team_points.clear()
    i=i+1
    break
#print(team_names)
#print(len(team_names))
 

print(league_data)
2011
['89', '89', '70', '69', '65', '64', '56', '52', '52', '47', '47', '47', '45', '45', '43', '38', '37', '36', '31', '25']
{2011: ['89', '89', '70', '69', '65', '64', '56', '52', '52', '47', '47', '47', '45', '45', '43', '38', '37', '36', '31', '25']}

Id of league_data[i]: 140615373022336
Id of team_points   : 140615373022336
{2011: []}

Solution: just modify this line league_data[i]=team_points to league_data[i]=team_points.copy(). and problem is solved

i=2011
league_data={}
team_names=[]
team_points=[]
while i<2021:
    print(i)
    url="https://www.skysports.com/premier-league-table/"+str(i)
    page=requests.get(url)
    #print(page.status_code)
    soup= BeautifulSoup(page.text,'html.parser')
    league=soup.find('table',class_ ='standing-table__table')
    league_table = league.find_all('tbody')
    for league_teams in league_table: 
        rows = league_teams.find_all('tr')
        for row in rows:
            if i==2011:
                team_name = row.find('td', class_ ='standing-table__cell standing-table__cell--name').text.strip()
                team_names.append(team_name)   
            team_point = row.find_all('td', class_ = 'standing-table__cell')[9].text.strip()
            team_points.append(team_point)
                
        print(team_points)
        
    
    league_data[i]=team_points.copy()

    print(league_data)
    print("Id of league_data[i]:", id(league_data[i]))
    print("Id of team_points   :",  id(team_points))
    team_points.clear()
    i=i+1
    break
#print(team_names)
#print(len(team_names))
 

print(league_data)
2011
['89', '89', '70', '69', '65', '64', '56', '52', '52', '47', '47', '47', '45', '45', '43', '38', '37', '36', '31', '25']
{2011: ['89', '89', '70', '69', '65', '64', '56', '52', '52', '47', '47', '47', '45', '45', '43', '38', '37', '36', '31', '25']}
Id of league_data[i]: 140615375754176
Id of team_points   : 140614558230912
{2011: ['89', '89', '70', '69', '65', '64', '56', '52', '52', '47', '47', '47', '45', '45', '43', '38', '37', '36', '31', '25']}
User contributions licensed under: CC BY-SA
4 People found this is helpful
Advertisement