The snippet below works fine not until the other day. Is there any way to extract all the data inside this div class=”row mb-4″ easily. What I am thinking is that if additional changes will be made to the page, still the script will not be affected.
import requests
from bs4 import BeautifulSoup
header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:90.0) Gecko/20100101 Firefox/90.0'
}
url = "https://bscscan.com/token/"
token = "0x4ce1a5cb12151423ea479cfd0c52ec5021d108d8"
tokenurl = str(url)+str(token)
contractpage = requests.get(tokenurl,header)
ca = BeautifulSoup(contractpage.content, 'html.parser')
tokenholders = ca.find(id='ContentPlaceHolder1_tr_tokenHolders').get_text()
tokenholdersa = (((tokenholders.strip().strip("Holders:")).strip()).strip(" a ")).strip()
tholders = ((((tokenholders.strip()).strip("Holders:")).strip()).strip(" a ")).strip()
tokenaname = ca.find('span', class_='text-secondary small').get_text().strip()
def get_transfer_count(str:token)->str:
with requests.Session() as s:
s.headers = {'User-Agent':'Mozilla/5.0'}
r = s.get(f'https://bscscan.com/token/{token}')
try:
sid = re.search(r"var sid = '(.*?)'", r.text).group(1)
r = s.get(f'https://bscscan.com/token/generic-tokentxns2?m=normal&contractAddress={token}&a=&sid={sid}&p=1')
return re.search(r"var totaltxns = '(.*?)'", r.text).group(1)
except:
pass
transcount = get_transfer_count(token)
print ("Token: ", tokenaname)
print ("Holders: ", tholders)
print ("Transfers: ", transcount)
Previous Output:
Token: Binemon Holders: 27,099 Transfers: 439,636
Wanted Improved Output:
Token: Binemon
PRICE: $0.01 @ 0.000037 BNB (-22.41%)
Fully Diluted Market Cap: $14,011,783.50
Total Supply: 975,000,000 BIN
Holders: 27,099 addresses
Transfers: 439,636
Contract: 0xe56842ed550ff2794f010738554db45e60730371
Decimals: 18
Official Site: https://binemon.io/
Social Profiles:
Tweets by BinemonNft
https://t.me/binemonchat
https://docs.binemon.io/
https://coinmarketcap.com/currencies/binemon/
https://www.coingecko.com/en/coins/binemon/
Advertisement
Answer
Try:
import requests
from bs4 import BeautifulSoup
header = {
"User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:92.0) Gecko/20100101 Firefox/92.0",
}
tokenurl = (
"https://bscscan.com/token/0x7083609fce4d1d8dc0c979aab8c869ea2c873402"
)
contractpage = requests.get(tokenurl, headers=header)
ca = BeautifulSoup(contractpage.content, "html.parser")
name = ca.h1.span.get_text(strip=True)
price = ca.select_one(".card-body .d-block").get_text(strip=True)
cap = ca.select_one("#pricebutton").get_text(strip=True)
print("Token:", name)
print("PRICE:", price)
print("Fully Diluted Market Cap:", cap)
print()
for c in ca.select(".row .col-md-8"):
pt = c.find_previous(class_="col-md-4").get_text(strip=True)
t = c.get_text(strip=True, separator=" ").split("(")[0]
if pt == "Social Profiles:":
links = [a["href"].strip() for a in c.select("a")]
print(pt, *links, sep="nt")
else:
print(pt, t)
Prints:
Token: Binance-Peg Polkadot Token
PRICE: $30.35@ 0.079643 BNB(-10.39%)
Fully Diluted Market Cap: $485,657,455.49
Total Supply: 15,999,999.991309 DOT
Holders: 80,065 addresses
Transfers: -
Contract: 0x7083609fce4d1d8dc0c979aab8c869ea2c873402
Decimals: 18
Official Site: https://polkadot.network/
Social Profiles:
https://polkadot.network/blog
https://reddit.com/r/dot
Tweets by polkadotnetwork
https://github.com/w3f
https://polkadot.network/PolkaDotPaper.pdf
https://coinmarketcap.com/currencies/polkadot-new/
https://www.coingecko.com/en/coins/polkadot/