Skip to content
Advertisement

Selenium taking a lot of time to find element by XPATH

I am trying to scraping name and email of agents from this site. The code firstly captures link to every profile on first page and then visits each profile to get name and email. But the problem is that it is taking alot of time to get anchor tag having name of agent in it. Here’s the code:

import os
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait

class MessageIndividual(webdriver.Chrome):
    def __init__(self, driver_path=r";C:/SeleniumDriver", teardown=False):
        self.driver_path = driver_path
        self.teardown = teardown
        os.environ['PATH'] += self.driver_path
        #options = webdriver.ChromeOptions()
        #options.headless = True
        super(MessageIndividual, self).__init__()
        self.implicitly_wait(5)
        self.maximize_window()

    def __exit__(self, exc_type, exc_val, exc_tb):
        if self.teardown:
            self.quit()
    
    def goToSite(self):

        url = 'https://www.bhhs.com/agent-search-results'
        self.get(url)            
    
    def getDetails(self):        

        mylist = [my_elem.get_attribute("href") for my_elem in WebDriverWait(self, 1000).until(
            EC.visibility_of_all_elements_located((By.XPATH, "//section[@class='cmp-agent-results-list-view']/div[@class='cmp-agent-results-list-view__content container ']/div[@class='row associate pt-3 pb-3 ']/div[@class='col-6 col-sm-4 col-lg-3 order-lg-3 associate__btn-group']/section[2]/a[@href]")))]
        
        for i in mylist:
            
            self.execute_script("window.open('');")
            self.switch_to.window(self.window_handles[1])
            self.get(i)                        
            
            name = WebDriverWait(self,5).until(
                EC.presence_of_element_located((By.XPATH,'//h1[@class="cmp-agent__name"]/a[1]'))
            )            
            print(name.text)

            email = WebDriverWait(self,1).until(EC.presence_of_element_located((By.CLASS_NAME,'cmp-agent-details__mail')))
            print(email.text)            

            self.close()
            self.switch_to.window(self.window_handles[0])
        

if __name__ == '__main__':
    inst = MessageIndividual(teardown=False)
    inst.goToSite()
    inst.getDetails()
    

Is there any way I can scrape name in lesser time?

Advertisement

Answer

I have change the xpath to identify the anchor tag and remove the new window open in each iteration. hope this will reduce some time.

def getDetails(self):        

        mylist = [my_elem.get_attribute("href") for my_elem in WebDriverWait(self, 1000).until(
            EC.visibility_of_all_elements_located((By.XPATH, "//a[.//span[normalize-space(.)='agent details']]")))]
        
        for i in mylist:
            
            #self.execute_script("window.open('');")
            #self.switch_to.window(self.window_handles[1])
            self.get(i)                        
            
            name = WebDriverWait(self,5).until(
                EC.presence_of_element_located((By.XPATH,'//h1[@class="cmp-agent__name"]/a[1]'))
            )            
            print(name.text)

            email = WebDriverWait(self,1).until(EC.presence_of_element_located((By.CLASS_NAME,'cmp-agent-details__mail')))
            print(email.text)            

            #self.close()
            #self.switch_to.window(self.window_handles[0])
User contributions licensed under: CC BY-SA
6 People found this is helpful
Advertisement