I want to scrape exchange rate data from July 1 2021 to June 30 2022 by enumerating exchangeDate variable and save it to excel.
Here is my code so far:
import requests from bs4 import BeautifulSoup import pandas as pd # Set the URL for the website you want to scrape url = "https://www.bot.go.tz/ExchangeRate/previous_rates?__RequestVerificationToken=P0qGKEy8P6ISFMLlu7mKvMi4YrMyeHc1aCz4ZuGQVyJ6mK9w6StV6QPyinF7ym_mAZG6yO6ShU1DuFm6teqBAxCcCrEQSjz7KtXzi2kbJH41&exchangeDate=04%2F05%2F2022" # Send an HTTP request to the website and retrieve the HTML content response = requests.get(url) html = response.content # Parse the HTML content using BeautifulSoup soup = BeautifulSoup(html, "html.parser") # Find the table containing the data you want to scrape table = soup.find("table", attrs={"class": "table"}) # Extract the data from the table and save it to a Pandas DataFrame df = pd.read_html(str(table))[0] # Save the DataFrame to an Excel file df.to_excel("exchange_Rate_data.xlsx", index=False)
How do I loop through all dates?
Advertisement
Answer
You can use something like this:
import requests from bs4 import BeautifulSoup import pandas as pd start='2021-07-01' end='2022-06-30' dates=[i.replace('-','%2F') for i in pd.date_range(start,end,freq='d').strftime('%m-%d-%Y').tolist()] final_df=pd.DataFrame() for i in dates: # Set the URL for the website you want to scrape url = "https://www.bot.go.tz/ExchangeRate/previous_rates?__RequestVerificationToken=P0qGKEy8P6ISFMLlu7mKvMi4YrMyeHc1aCz4ZuGQVyJ6mK9w6StV6QPyinF7ym_mAZG6yO6ShU1DuFm6teqBAxCcCrEQSjz7KtXzi2kbJH41&exchangeDate={}".format(i) # Send an HTTP request to the website and retrieve the HTML content response = requests.get(url) html = response.content # Parse the HTML content using BeautifulSoup soup = BeautifulSoup(html, "html.parser") # Find the table containing the data you want to scrape table = soup.find("table", attrs={"class": "table"}) # Extract the data from the table and save it to a Pandas DataFrame df = pd.read_html(str(table))[0] final_df=pd.concat([final_df,df]) final_df.to_excel("exchange_Rate_data.xlsx", index=False)