Given below is the code for importing a pipe delimited csv file to monogdb.
JavaScript
x
18
18
1
import csv
2
import json
3
from pymongo import MongoClient
4
5
url = "mongodb://localhost:27017"
6
client = MongoClient(url)
7
db = client.Office
8
customer = db.Customer
9
jsonArray = []
10
11
with open("Names.txt", "r") as csv_file:
12
csv_reader = csv.DictReader(csv_file, dialect='excel', delimiter='|', quoting=csv.QUOTE_NONE)
13
for row in csv_reader:
14
jsonArray.append(row)
15
jsonString = json.dumps(jsonArray, indent=1, separators=(",", ":"))
16
jsonfile = json.loads(jsonString)
17
customer.insert_many(jsonfile)
18
Below is the error I get when running the above code.
JavaScript
1
9
1
Traceback (most recent call last):
2
File "E:Anaconda ProjectsMongo ProjectsOffice Toolcsvtojson.py", line 16, in <module>
3
jsonString = json.dumps(jsonArray, indent=1, separators=(",", ":"))
4
File "C:UsersPredatoranaconda3libjson__init__.py", line 234, in dumps
5
return cls(
6
File "C:UsersPredatoranaconda3libjsonencoder.py", line 201, in encode
7
chunks = list(chunks)
8
MemoryError
9
I if modify the code with some indents under the for loop. The MongoDB gets imported with the same data all over again without stopping.
JavaScript
1
18
18
1
import csv
2
import json
3
from pymongo import MongoClient
4
5
url = "mongodb://localhost:27017"
6
client = MongoClient(url)
7
db = client.Office
8
customer = db.Customer
9
jsonArray = []
10
11
with open("Names.txt", "r") as csv_file:
12
csv_reader = csv.DictReader(csv_file, dialect='excel', delimiter='|', quoting=csv.QUOTE_NONE)
13
for row in csv_reader:
14
jsonArray.append(row)
15
jsonString = json.dumps(jsonArray, indent=1, separators=(",", ":"))
16
jsonfile = json.loads(jsonString)
17
customer.insert_many(jsonfile)
18
Advertisement
Answer
The memory issue can be solved by inserting one record at a time.
JavaScript
1
23
23
1
import csv
2
import json
3
4
from pymongo import MongoClient
5
6
url_mongo = "mongodb://localhost:27017"
7
client = MongoClient(url_mongo)
8
db = client.Office
9
customer = db.Customer
10
jsonArray = []
11
file_txt = "Text.txt"
12
rowcount = 0
13
with open(file_txt, "r") as txt_file:
14
csv_reader = csv.DictReader(txt_file, dialect="excel", delimiter="|", quoting=csv.QUOTE_NONE)
15
for row in csv_reader:
16
rowcount += 1
17
jsonArray.append(row)
18
for i in range(rowcount):
19
jsonString = json.dumps(jsonArray[i], indent=1, separators=(",", ":"))
20
jsonfile = json.loads(jsonString)
21
customer.insert_one(jsonfile)
22
print("Finished")
23
Thank You All for Your Ideas