Can’t write in csv file

When I try to write the information in the csv file, error is thrown:

Traceback (most recent call last):
File "sizeer.py", line 68, in <module> 
     writer.writerow([name,color,price])                     
ValueError: I/O operation on closed file

JavaScript
​x
 
Traceback (most recent call last):
File "sizeer.py", line 68, in <module> 
     writer.writerow([name,color,price])                     
ValueError: I/O operation on closed file
​

import requests
import csv
from bs4 import BeautifulSoup

proxies = {
    "http":"http://195.189.60.97:3128", 
    "http":"http://103.78.75.165:8080",
    "http":"http://212.87.220.2:3128",
    "http":"http://88.99.134.61:8080",
    "http":"http://103.102.139.178:8080",
    "http":"http://218.60.8.83:3129",
    "http":"http://124.121.105.193:8888",
    "http":"http://198.237.114.54:8080",
    "http":"http://36.67.106.58:8080",
    "http":"http://35.214.241.28:3128"
}

base_url = ...
page = requests.get(base_url, proxies=proxies)

if page.status_code != 200:
    exit("Page wasn't parsed")

soup = BeautifulSoup(page.content, 'lxml')

with open("result.csv", "w") as file:
    writer = csv.writer(file)
    writer.writerow(["Product","Color","Price"])

#Get categories
category_wrapper = soup.find_all(class_="m-menu_subItem")
categories = []
for cw in category_wrapper:
    anchor = cw.find("a", recursive=False)
    categories.append(anchor['href'])

#Iterrate categories
for category in categories:
    cat_page = requests.get(base_url + category, proxies=proxies)
    cat_soup = BeautifulSoup(cat_page.content, 'lxml')
    products_wrapper = cat_soup.find(class_="b-productList")
    cat_pagination = products_wrapper.find(class_="m-pagination").find_all("span")
    max_page = [int(s) for s in cat_pagination[-1].text.split() if s.isdigit()][0]
    #Iterrate category with pagination and get products
    for i in range(1, max_page+1):
        cat_pagination_page = requests.get(base_url+category+"/?sort=default&limit=60&page="+str(i), proxies=proxies)
        cat_pagination_page_soup = BeautifulSoup(cat_pagination_page.content, 'lxml')
        product_links = cat_pagination_page_soup.find_all(class_="b-itemList_photoLink")
        for link in product_links:
            #Get product data
            product_page = requests.get(base_url+link['href'], proxies=proxies)
            product_soup = BeautifulSoup(product_page.content, 'lxml')
            #Get product variations
            variations = product_soup.find_all(class_="m-productDescr_colorItem")
            #If there are variations
            if len(variations) > 0:
                for v in variations:
                    variation_page = requests.get(base_url+v['href'], proxies=proxies)
                    variation_soup = BeautifulSoup(variation_page.content, 'lxml')
                    price = variation_soup.find(class_="s-newPrice").text.strip().split(" ")[0]
                    name = variation_soup.find(class_="m-productDescr_headline").text.strip()
                    color = v['title']
                    print(name)
                    print(color)
                    print(price)
                    print("-------------")
                    #Save in csv
                    writer.writerow([name,color,price])                 

print("SCRAPING DONE")

JavaScript
 
import requests
import csv
from bs4 import BeautifulSoup
​
proxies = {
    "http":"http://195.189.60.97:3128", 
    "http":"http://103.78.75.165:8080",
    "http":"http://212.87.220.2:3128",
    "http":"http://88.99.134.61:8080",
    "http":"http://103.102.139.178:8080",
    "http":"http://218.60.8.83:3129",
    "http":"http://124.121.105.193:8888",
    "http":"http://198.237.114.54:8080",
    "http":"http://36.67.106.58:8080",
    "http":"http://35.214.241.28:3128"
}
​
base_url = ...
page = requests.get(base_url, proxies=proxies)
​
if page.status_code != 200:
    exit("Page wasn't parsed")
​
soup = BeautifulSoup(page.content, 'lxml')
​
with open("result.csv", "w") as file:
    writer = csv.writer(file)
    writer.writerow(["Product","Color","Price"])
​
#Get categories
category_wrapper = soup.find_all(class_="m-menu_subItem")
categories = []
for cw in category_wrapper:
    anchor = cw.find("a", recursive=False)
    categories.append(anchor['href'])
​
#Iterrate categories
for category in categories:
    cat_page = requests.get(base_url + category, proxies=proxies)
    cat_soup = BeautifulSoup(cat_page.content, 'lxml')
    products_wrapper = cat_soup.find(class_="b-productList")
    cat_pagination = products_wrapper.find(class_="m-pagination").find_all("span")
    max_page = [int(s) for s in cat_pagination[-1].text.split() if s.isdigit()][0]
    #Iterrate category with pagination and get products
    for i in range(1, max_page+1):
        cat_pagination_page = requests.get(base_url+category+"/?sort=default&limit=60&page="+str(i), proxies=proxies)
        cat_pagination_page_soup = BeautifulSoup(cat_pagination_page.content, 'lxml')
        product_links = cat_pagination_page_soup.find_all(class_="b-itemList_photoLink")
        for link in product_links:
            #Get product data
            product_page = requests.get(base_url+link['href'], proxies=proxies)
            product_soup = BeautifulSoup(product_page.content, 'lxml')
            #Get product variations
            variations = product_soup.find_all(class_="m-productDescr_colorItem")
            #If there are variations
            if len(variations) > 0:
                for v in variations:
                    variation_page = requests.get(base_url+v['href'], proxies=proxies)
                    variation_soup = BeautifulSoup(variation_page.content, 'lxml')
                    price = variation_soup.find(class_="s-newPrice").text.strip().split(" ")[0]
                    name = variation_soup.find(class_="m-productDescr_headline").text.strip()
                    color = v['title']
                    print(name)
                    print(color)
                    print(price)
                    print("-------------")
                    #Save in csv
                    writer.writerow([name,color,price])                 
​
print("SCRAPING DONE")
​

How to keep the file open through the whole script execution ? Or I have to open it every time I am adding content ? EDIT In fact, the file is not even created.

Answer

with open("result.csv", "w") as file:
    writer = csv.writer(file)
    writer.writerow(["Product","Color","Price"])

JavaScript
 
with open("result.csv", "w") as file:
    writer = csv.writer(file)
    writer.writerow(["Product","Color","Price"])
​

The file closes at the end of the with block – that is the block’s purpose.

You could put everything inside the block, but that only makes the existing problem worse: the code is reaching several levels of indents, is long and becomes difficult to understand. This is why you use functions to organize the code. For example, if you have the big for loop set in a function:

def do_stuff_with(categories, writer):
    for category in categories:
        # lots of logic here
        # use `writer.writerow` when needed

# Get everything else set up that doesn't need the file, first
categories = ... # do the BeautifulSoup input stuff

# then we can open the file and use the function:
with open("result.csv", "w") as file:
    writer = csv.writer(file)
    writer.writerow(["Product","Color","Price"])
    do_stuff_with(categories, writer)

JavaScript
 
def do_stuff_with(categories, writer):
    for category in categories:
        # lots of logic here
        # use `writer.writerow` when needed
​
# Get everything else set up that doesn't need the file, first
categories = ... # do the BeautifulSoup input stuff
​
# then we can open the file and use the function:
with open("result.csv", "w") as file:
    writer = csv.writer(file)
    writer.writerow(["Product","Color","Price"])
    do_stuff_with(categories, writer)
​

Once you have that working, you can probably think of ways to apply the technique further. For example, pull out the innermost logic, for handling the variations for a single product. Or you can have a function to handle the creation of the categories data, and return it.

Advertisement

Answer