when I am passing manually json file as key to load in python shell then its working fine. code below
JavaScript
x
18
18
1
import os
2
import json
3
import boto3
4
import io
5
import requests
6
import botocore
7
bucket_name = 'dev-data'
8
folder_name = 'raw/test/'
9
key_source = 'raw/test/extract_api_20200719.json'
10
s3_client = boto3.client('s3')
11
json_obj = s3_client.get_object(Bucket=bucket_name, Key=key_source)
12
json_data = json_obj["Body"].read().decode('utf-8')
13
print("############################json_data####################### :", json_data )
14
print("############################json_data_type################## :", type(json_data))
15
json_dict = json.loads(json_data)
16
print("############################json_dict####################### :", json_dict )
17
print("############################json_dict_type ################# :", type(json_dict))
18
However when using for loop to read JSON object from s3 bucket, then I am getting the error
JavaScript
1
20
20
1
import os
2
import json
3
import boto3
4
import io
5
import requests
6
import botocore
7
bucket_name = 'dev-data'
8
folder_name = 'raw/test/'
9
s3_resource = boto3.resource('s3')
10
bucket = s3_resource.Bucket(bucket_name)
11
for obj in bucket.objects.filter(Prefix=folder_name):
12
print('Object to extract :', obj)
13
print('obj key: ', obj.key)
14
s3_client = boto3.client('s3')
15
json_obj = s3_client.get_object(Bucket=bucket_name, Key=obj.key)
16
json_data = json_obj["Body"].read().decode('utf-8')
17
json_dict = json.loads(json_data)
18
error:
19
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
20
Advertisement
Answer
Some of the entries in bucket.objects
don’t have any JSON in the data, so check for that and skip them.
JavaScript
1
11
11
1
for obj in bucket.objects.filter(Prefix=folder_name):
2
print('Object to extract :', obj)
3
print('obj key: ', obj.key)
4
s3_client = boto3.client('s3')
5
json_obj = s3_client.get_object(Bucket=bucket_name, Key=obj.key)
6
json_data = json_obj["Body"].read().decode('utf-8')
7
if not json_data:
8
print("Skipping empty", obj.key)
9
continue
10
json_dict = json.loads(json_data)
11