I want to scrape exchange rate data from July 1 2021 to June 30 2022 by enumerating exchangeDate variable and save it to excel.
Here is my code so far:
import requests
from bs4 import BeautifulSoup
import pandas as pd
# Set the URL for the website you want to scrape
url = "https://www.bot.go.tz/ExchangeRate/previous_rates?__RequestVerificationToken=P0qGKEy8P6ISFMLlu7mKvMi4YrMyeHc1aCz4ZuGQVyJ6mK9w6StV6QPyinF7ym_mAZG6yO6ShU1DuFm6teqBAxCcCrEQSjz7KtXzi2kbJH41&exchangeDate=04%2F05%2F2022"
# Send an HTTP request to the website and retrieve the HTML content
response = requests.get(url)
html = response.content
# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(html, "html.parser")
# Find the table containing the data you want to scrape
table = soup.find("table", attrs={"class": "table"})
# Extract the data from the table and save it to a Pandas DataFrame
df = pd.read_html(str(table))[0]
# Save the DataFrame to an Excel file
df.to_excel("exchange_Rate_data.xlsx", index=False)
How do I loop through all dates?
Advertisement
Answer
You can use something like this:
import requests
from bs4 import BeautifulSoup
import pandas as pd
start='2021-07-01'
end='2022-06-30'
dates=[i.replace('-','%2F') for i in pd.date_range(start,end,freq='d').strftime('%m-%d-%Y').tolist()]
final_df=pd.DataFrame()
for i in dates:
# Set the URL for the website you want to scrape
url = "https://www.bot.go.tz/ExchangeRate/previous_rates?__RequestVerificationToken=P0qGKEy8P6ISFMLlu7mKvMi4YrMyeHc1aCz4ZuGQVyJ6mK9w6StV6QPyinF7ym_mAZG6yO6ShU1DuFm6teqBAxCcCrEQSjz7KtXzi2kbJH41&exchangeDate={}".format(i)
# Send an HTTP request to the website and retrieve the HTML content
response = requests.get(url)
html = response.content
# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(html, "html.parser")
# Find the table containing the data you want to scrape
table = soup.find("table", attrs={"class": "table"})
# Extract the data from the table and save it to a Pandas DataFrame
df = pd.read_html(str(table))[0]
final_df=pd.concat([final_df,df])
final_df.to_excel("exchange_Rate_data.xlsx", index=False)