I wrote the following code in order to scrape the text of the element <h3 class="h4 mb-10">Total nodes: 1,587</h3>
from https://blockchair.com/dogecoin/nodes.
#!/usr/bin/python3 from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions path = "/usr/local/bin/chromedriver" driver = webdriver.Chrome(path) driver.get("https://blockchair.com/dogecoin/nodes") def scraping_fnd(): try: #nodes = driver.find_element_by_class_name("h4 mb-10") #NoSuchElementException: Message: no such element: Unable to locate element: {"method":"css selector","selector":".h4 mb-10"}. There are 3 elements pf this class #nodes = WebDriverWait(driver, 10).until(expected_conditions.presence_of_element_located((By.CLASS_NAME, "h4 mb-10")))#selenium.common.exceptions.TimeoutException: Message: nodes = WebDriverWait(driver, 10).until(expected_conditions.presence_of_element_located((By.CSS_SELECTOR, ".h4 mb-10")))#selenium.common.exceptions.TimeoutException: Message: nodes = nodes.text print(nodes) finally: driver.quit()#Closes the tab even when return is executed scraping_fnd()
I’m aware that there are perhaps less bloated options than selenium to scrape the target in question, yet the said code is just a snippet, a part of a more extensive script that relies on selenium for its other tasks. Thus let us limit the scope of the answers to selenium only.
Although there are three elements of the class "h4 mb-10"
on the page, I am unable to locate the element. When I call driver.find_element_by_class_name("h4 mb-10")
, I get:
Traceback (most recent call last): File "./protocols.py", line 34, in <module> scraping_fnd() File "./protocols.py", line 20, in scraping_fnd nodes = driver.find_element_by_class_name("h4 mb-10")#(f"//span[@title = "{name}"]") File "/home/jerzy/.local/lib/python3.8/site-packages/selenium/webdriver/remote/webdriver.py", line 564, in find_element_by_class_name return self.find_element(by=By.CLASS_NAME, value=name) File "/home/jerzy/.local/lib/python3.8/site-packages/selenium/webdriver/remote/webdriver.py", line 976, in find_element return self.execute(Command.FIND_ELEMENT, { File "/home/jerzy/.local/lib/python3.8/site-packages/selenium/webdriver/remote/webdriver.py", line 321, in execute self.error_handler.check_response(response) File "/home/jerzy/.local/lib/python3.8/site-packages/selenium/webdriver/remote/errorhandler.py", line 242, in check_response raise exception_class(message, screen, stacktrace) selenium.common.exceptions.NoSuchElementException: Message: no such element: Unable to locate element: {"method":"css selector","selector":".h4 mb-10"} (Session info: chrome=90.0.4430.212) XP
Applying waits, currently commented out in the snippet, was to no avail. I came across this question and so I tried calling WebDriverWait(driver, 10).until(expected_conditions.presence_of_element_located((By.CSS_SELECTOR, ".h4 mb-10")))
.
I got :
Traceback (most recent call last): File "./protocols.py", line 33, in <module> scraping_fnd() File "./protocols.py", line 23, in scraping_fnd nodes = WebDriverWait(driver, 10).until(expected_conditions.presence_of_element_located((By.CSS_SELECTOR, ".h4 mb-10")))#selenium.common.exceptions.TimeoutException: Message: File "/home/jerzy/.local/lib/python3.8/site-packages/selenium/webdriver/support/wait.py", line 80, in until raise TimeoutException(message, screen, stacktrace) selenium.common.exceptions.TimeoutException: Message:
I have no clue what am I doing wrong. Is it doable to scrape the target with selenium without using Xpaths?
Advertisement
Answer
Try with this xpath
. Find all the elements with this xpath
and with indexing extract the required Text.
//div[@class='nodes_chain']/div[1]/h3 # for "Total nodes" option //div[@class='nodes_chain']//h3 # for all the option.
from selenium import webdriver from selenium.webdriver.support.wait import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.by import By driver = webdriver.Chrome(executable_path="path to chromedriver.exe") driver.maximize_window() driver.implicitly_wait(10) driver.get("https://blockchair.com/dogecoin/nodes") wait = WebDriverWait(driver,30) option = wait.until(EC.presence_of_element_located((By.XPATH,"//div[@class='nodes_chain']/div[1]/h3"))) print(option.text) options = wait.until(EC.presence_of_all_elements_located((By.XPATH,"//div[@class='nodes_chain']//h3"))) # First option print(options[0].text) # All the options for opt in options: print(opt.text)
Total nodes: 1,562 Total nodes: 1,562 Total nodes: 1,562 Node versions Block heights