Skip to content
Advertisement

Grabbing all data fields from a div in python beautifulsoup

The snippet below works fine not until the other day. Is there any way to extract all the data inside this div class=”row mb-4″ easily. What I am thinking is that if additional changes will be made to the page, still the script will not be affected.

import requests
from bs4 import BeautifulSoup

header = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:90.0) Gecko/20100101 Firefox/90.0'
}

url = "https://bscscan.com/token/"
token = "0x4ce1a5cb12151423ea479cfd0c52ec5021d108d8"
tokenurl = str(url)+str(token)

contractpage = requests.get(tokenurl,header)
ca = BeautifulSoup(contractpage.content, 'html.parser')
tokenholders = ca.find(id='ContentPlaceHolder1_tr_tokenHolders').get_text()
tokenholdersa = (((tokenholders.strip().strip("Holders:")).strip()).strip(" a ")).strip()
tholders = ((((tokenholders.strip()).strip("Holders:")).strip()).strip(" a ")).strip()
tokenaname = ca.find('span', class_='text-secondary small').get_text().strip()

def get_transfer_count(str:token)->str:
    with requests.Session() as s:
        s.headers = {'User-Agent':'Mozilla/5.0'}
        r = s.get(f'https://bscscan.com/token/{token}') 
        try:   
            sid = re.search(r"var sid = '(.*?)'", r.text).group(1)
            r = s.get(f'https://bscscan.com/token/generic-tokentxns2?m=normal&contractAddress={token}&a=&sid={sid}&p=1')
            return re.search(r"var totaltxns = '(.*?)'", r.text).group(1)
        except:
            pass
transcount = get_transfer_count(token)

print ("Token: ",     tokenaname)
print ("Holders: ",   tholders)
print ("Transfers: ", transcount)

Previous Output:

Token:     Binemon
Holders:   27,099
Transfers: 439,636

Wanted Improved Output:

Token:  Binemon
PRICE:  $0.01 @ 0.000037 BNB (-22.41%)
Fully Diluted Market Cap: $14,011,783.50

Total Supply:   975,000,000 BIN
Holders:        27,099 addresses
Transfers:      439,636
Contract:       0xe56842ed550ff2794f010738554db45e60730371
Decimals:       18
Official Site:  https://binemon.io/
Social Profiles:
    
    https://t.me/binemonchat
    https://docs.binemon.io/
    https://coinmarketcap.com/currencies/binemon/
    https://www.coingecko.com/en/coins/binemon/

Advertisement

Answer

Try:

import requests
from bs4 import BeautifulSoup

header = {
    "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:92.0) Gecko/20100101 Firefox/92.0",
}
tokenurl = (
    "https://bscscan.com/token/0x7083609fce4d1d8dc0c979aab8c869ea2c873402"
)

contractpage = requests.get(tokenurl, headers=header)
ca = BeautifulSoup(contractpage.content, "html.parser")

name = ca.h1.span.get_text(strip=True)
price = ca.select_one(".card-body .d-block").get_text(strip=True)
cap = ca.select_one("#pricebutton").get_text(strip=True)

print("Token:", name)
print("PRICE:", price)
print("Fully Diluted Market Cap:", cap)
print()

for c in ca.select(".row .col-md-8"):
    pt = c.find_previous(class_="col-md-4").get_text(strip=True)
    t = c.get_text(strip=True, separator=" ").split("(")[0]
    if pt == "Social Profiles:":
        links = [a["href"].strip() for a in c.select("a")]
        print(pt, *links, sep="nt")
    else:
        print(pt, t)

Prints:

Token: Binance-Peg Polkadot Token
PRICE: $30.35@ 0.079643 BNB(-10.39%)
Fully Diluted Market Cap: $485,657,455.49

Total Supply: 15,999,999.991309 DOT 
Holders: 80,065 addresses
Transfers: -
Contract: 0x7083609fce4d1d8dc0c979aab8c869ea2c873402
Decimals: 18
Official Site: https://polkadot.network/
Social Profiles:
        https://polkadot.network/blog
        https://reddit.com/r/dot
        
        https://github.com/w3f
        https://polkadot.network/PolkaDotPaper.pdf
        https://coinmarketcap.com/currencies/polkadot-new/
        https://www.coingecko.com/en/coins/polkadot/
User contributions licensed under: CC BY-SA
10 People found this is helpful
Advertisement