from selenium import webdriver from selenium.webdriver.chrome.service import Service as ChromeService from selenium.webdriver.common.by import By import configparser from datetime import datetime parser = configparser.RawConfigParser() parser.read('config.ini') url= parser['PROPERTIES']['URL'] END_DATE = datetime.strptime(parser['DATE']['END'], '%Y-%m-%d') START_DATE=datetime.strptime(parser['DATE']['START'],'%Y-%m-%d') # Setting up driver options options = webdriver.ChromeOptions() # Setting up Path to chromedriver executable file CHROMEDRIVER_PATH =r'C:UsersHPDesktopINTERNSHIPinfluensterchromedriver.exe' # Adding options options.add_experimental_option("excludeSwitches", ["enable-automation"]) options.add_experimental_option("useAutomationExtension", False) # Setting up chrome service service = ChromeService(executable_path=CHROMEDRIVER_PATH) # Establishing Chrom web driver using set services and options driver = webdriver.Chrome(service=service, options=options) driver.get(url) reviews=driver.find_elements_by_xpath('//*[@id="app-base"]/div[1]/div[4]/div[1]/div[1]/div[3]') count=0 item_list = [] for review in reviews: item={ 'username': review.find_element_by_xpath(".//a[contains(@class,'name')]").text, } item_list.append(item) print(item_list)
OUTPUT IS JUST ONE NAME AND NOT ALL I need to scrape all the reviews from https://www.influenster.com/reviews/loreal-paris-elvive-extraordinary-oil-deep-nourishing-shampoo-and-conditioner-set-126-fl-oz. Even I am running a loop I am getting only one username. Please help me out
Advertisement
Answer
You getting only 1 review because XPath locator you are using //*[@id="app-base"]/div[1]/div[4]/div[1]/div[1]/div[3]
returns only 1 element, so your for
loop is performed only once.
You can improve this code by improving that XPath locator.
Also you need to close the cookies banner there.
Also you need to add a wait to waif for elements to be clickable / visible before you accessing them, as following.
This should work better:
from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC import configparser from datetime import datetime parser = configparser.RawConfigParser() parser.read('config.ini') url= parser['PROPERTIES']['URL'] END_DATE = datetime.strptime(parser['DATE']['END'], '%Y-%m-%d') START_DATE=datetime.strptime(parser['DATE']['START'],'%Y-%m-%d') # Setting up driver options options = webdriver.ChromeOptions() # Setting up Path to chromedriver executable file CHROMEDRIVER_PATH =r'C:UsersHPDesktopINTERNSHIPinfluensterchromedriver.exe' # Adding options options.add_experimental_option("excludeSwitches", ["enable-automation"]) options.add_experimental_option("useAutomationExtension", False) # Setting up chrome service service = ChromeService(executable_path=CHROMEDRIVER_PATH) # Establishing Chrom web driver using set services and options driver = webdriver.Chrome(service=service, options=options) wait = WebDriverWait(driver, 20) driver.get(url) # The 2 lines below is what I actually added here + necessary imports # and `wait` object initialization wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button#onetrust-accept-btn-handler"))).click() reviews = wait.until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, ".conversations-left .item"))) count=0 item_list = [] for review in reviews: item={ 'username': review.find_element_by_xpath(".//a[contains(@class,'name')]").text, } item_list.append(item) print(item_list)