Get element text with a partial string match using Selenium (Python)

Question

I am trying to extract the text from within a <strong> tag that is deeply nested in the HTML content of this webpage: https://www.marinetraffic.com/en/ais/details/ships/imo:9854612 For example: The strong tag is the only one on the webpage that will contain the string 'cubic meters'. My objective is to extract the entire text, i.e., "138124 cubic meters Liquid Gas". When I try

Accepted Answer

Your code works on Firefox(), but not on Chrome().The page uses lazy loading, so you have to scroll to Summary and then it loads the text with the expected strong.I used a little slower method &#8211; I search allelements with class='lazyload-wrapper, and in the loop scroll to the item and check if there is strong. If there isn&#8217;t any strong, then I scroll to the next class='lazyload-wrapper.from selenium import webdriverimport time#driver = webdriver.Firefox()driver = webdriver.Chrome()url = "https://www.marinetraffic.com/en/ais/details/ships/imo:9854612"driver.get(url)time.sleep(3)from selenium.webdriver.common.action_chains import ActionChainsactions = ActionChains(driver)elements = driver.find_elements_by_xpath("//span[@class='lazyload-wrapper']")for number, item in enumerate(elements):    print('--- item', number, '---')    #print('--- before ---')    #print(item.text)    actions.move_to_element(item).perform()    time.sleep(0.1)    #print('--- after ---')    #print(item.text)    try:        strong = item.find_element_by_xpath("//strong[contains(text(), 'cubic')]")        print(strong.text)        break    except Exception as ex:        #print(ex)        passResult:--- item 0 ------ item 1 ------ item 2 ---173400 cubic meters Liquid GasThe result shows that I could use elements[2] to skip two elements, but I wasn&#8217;t sure if this text will be always in the third element.Before I created my version I tested other versions and here is the full working code:from selenium import webdriverimport time#driver = webdriver.Firefox()driver = webdriver.Chrome()url = "https://www.marinetraffic.com/en/ais/details/ships/imo:9854612"driver.get(url)time.sleep(3)def test0():    elements = driver.find_elements_by_xpath("//strong")    for item in elements:        print(item.text)    print('---')    item = driver.find_element_by_xpath("//strong[contains(text(), 'cubic')]")    print(item.text)def test1a():    from selenium.webdriver.common.action_chains import ActionChains    actions = ActionChains(driver)    element = driver.find_element_by_xpath("//div[contains(@class,'MuiTypography-body1')][last()]//div")    actions.move_to_element(element).build().perform()    text = element.text    print(text)def test1b():    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")    time.sleep(0.5)    text = driver.find_element_by_xpath("//div[contains(@class,'MuiTypography-body1')][last()]//strong").text    print(text)def test2():    from bs4 import BeautifulSoup    import re    soup = BeautifulSoup(driver.page_source, "html.parser")    soup.find_all(string=re.compile(r"d+ cubic meters"))def test3():    from selenium.webdriver.common.action_chains import ActionChains    actions = ActionChains(driver)    elements = driver.find_elements_by_xpath("//span[@class='lazyload-wrapper']")    for number, item in enumerate(elements, 1):        print('--- number', number, '---')        #print('--- before ---')        #print(item.text)        actions.move_to_element(item).perform()        time.sleep(0.1)        #print('--- after ---')        #print(item.text)        try:            strong = item.find_element_by_xpath("//strong[contains(text(), 'cubic')]")            print(strong.text)            break        except Exception as ex:            #print(ex)            pass#test0()#test1a()#test1b()#test2()test3()

Advertisement

Answer