The snippet below works fine not until the other day. Is there any way to extract all the data inside this div class=”row mb-4″ easily. What I am thinking is that if additional changes will be made to the page, still the script will not be affected.
import requests from bs4 import BeautifulSoup header = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:90.0) Gecko/20100101 Firefox/90.0' } url = "https://bscscan.com/token/" token = "0x4ce1a5cb12151423ea479cfd0c52ec5021d108d8" tokenurl = str(url)+str(token) contractpage = requests.get(tokenurl,header) ca = BeautifulSoup(contractpage.content, 'html.parser') tokenholders = ca.find(id='ContentPlaceHolder1_tr_tokenHolders').get_text() tokenholdersa = (((tokenholders.strip().strip("Holders:")).strip()).strip(" a ")).strip() tholders = ((((tokenholders.strip()).strip("Holders:")).strip()).strip(" a ")).strip() tokenaname = ca.find('span', class_='text-secondary small').get_text().strip() def get_transfer_count(str:token)->str: with requests.Session() as s: s.headers = {'User-Agent':'Mozilla/5.0'} r = s.get(f'https://bscscan.com/token/{token}') try: sid = re.search(r"var sid = '(.*?)'", r.text).group(1) r = s.get(f'https://bscscan.com/token/generic-tokentxns2?m=normal&contractAddress={token}&a=&sid={sid}&p=1') return re.search(r"var totaltxns = '(.*?)'", r.text).group(1) except: pass transcount = get_transfer_count(token) print ("Token: ", tokenaname) print ("Holders: ", tholders) print ("Transfers: ", transcount)
Previous Output:
Token: Binemon Holders: 27,099 Transfers: 439,636
Wanted Improved Output:
Token: Binemon PRICE: $0.01 @ 0.000037 BNB (-22.41%) Fully Diluted Market Cap: $14,011,783.50 Total Supply: 975,000,000 BIN Holders: 27,099 addresses Transfers: 439,636 Contract: 0xe56842ed550ff2794f010738554db45e60730371 Decimals: 18 Official Site: https://binemon.io/ Social Profiles: Tweets by BinemonNft https://t.me/binemonchat https://docs.binemon.io/ https://coinmarketcap.com/currencies/binemon/ https://www.coingecko.com/en/coins/binemon/
Advertisement
Answer
Try:
import requests from bs4 import BeautifulSoup header = { "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:92.0) Gecko/20100101 Firefox/92.0", } tokenurl = ( "https://bscscan.com/token/0x7083609fce4d1d8dc0c979aab8c869ea2c873402" ) contractpage = requests.get(tokenurl, headers=header) ca = BeautifulSoup(contractpage.content, "html.parser") name = ca.h1.span.get_text(strip=True) price = ca.select_one(".card-body .d-block").get_text(strip=True) cap = ca.select_one("#pricebutton").get_text(strip=True) print("Token:", name) print("PRICE:", price) print("Fully Diluted Market Cap:", cap) print() for c in ca.select(".row .col-md-8"): pt = c.find_previous(class_="col-md-4").get_text(strip=True) t = c.get_text(strip=True, separator=" ").split("(")[0] if pt == "Social Profiles:": links = [a["href"].strip() for a in c.select("a")] print(pt, *links, sep="nt") else: print(pt, t)
Prints:
Token: Binance-Peg Polkadot Token PRICE: $30.35@ 0.079643 BNB(-10.39%) Fully Diluted Market Cap: $485,657,455.49 Total Supply: 15,999,999.991309 DOT Holders: 80,065 addresses Transfers: - Contract: 0x7083609fce4d1d8dc0c979aab8c869ea2c873402 Decimals: 18 Official Site: https://polkadot.network/ Social Profiles: https://polkadot.network/blog https://reddit.com/r/dot Tweets by polkadotnetwork https://github.com/w3f https://polkadot.network/PolkaDotPaper.pdf https://coinmarketcap.com/currencies/polkadot-new/ https://www.coingecko.com/en/coins/polkadot/