# Import libs
import pandas as pd
import requests
from bs4 import BeautifulSoup
import json
# Form Data for passing to the request body
formdata = {'objid': '14'}
# URL
url = "https://www.sec.kerala.gov.in/public/getalllbcmp/byd"
# Query
for i in range(1, 15):
formdata["objid"] = str(i)
response = requests.request("POST", url, data=formdata, timeout=1500)
out = response.content
soup = BeautifulSoup(out,"html.parser")
bat = json.loads(soup.text)
df = pd.DataFrame(bat["ops1"])
df.to_csv(str(i) + ".csv")
Right now this query creates 14 csv files. What I wanted is, the for loop to remove the first row of column headers and append the data to a dataframe I created outside the for loop. so that I can get it as single csv file.
I am using BS and Pandas.
Advertisement
Answer
This is one way of achieving your goal:
# Import libs
import pandas as pd
import requests
from tqdm import tqdm ## if using jupyter: from tqdm.notebook import tqdm
final_df = pd.DataFrame()
# URL
url = "https://www.sec.kerala.gov.in/public/getalllbcmp/byd"
# Query
for i in tqdm(range(1, 15)):
formdata = {'objid': i}
r = requests.post(url, data=formdata)
df = pd.json_normalize(r.json()["ops1"])
final_df = pd.concat([final_df, df], axis=0, ignore_index=True)
final_df.to_csv('some_data_saved.csv')
print(final_df)
Data will be saved to a csv file, and also printed in terminal:
100% 14/14 [00:14<00:00, 1.05s/it] value text 0 8o7LEdvX2e G14001-Kumbadaje 1 jw2XOQyZ4K G14002-Bellur 2 0lMB1O4LbV G14003-Karadka 3 zodLro2Z39 G14004-Muliyar 4 dWxLYn8ZME G14005-Delampady ... ... ... 1029 Qy6Z09bBKE G01073-Ottoor 1030 ywoXG8wLxV M01001-Neyyattinkara 1031 Kk8Xvz7XO9 M01002-Nedumangad 1032 r7eXQYgX8m M01003-Attingal 1033 b3KXlO2B8g M01004-Varkala 1034 rows × 2 columns
Requests can return responses in JSON format, so you don;t need to import bs4 & json.
For TQDM, please see https://pypi.org/project/tqdm/
For pandas documentation, visit https://pandas.pydata.org/docs/
Also for Requests: https://requests.readthedocs.io/en/latest/