I’m trying to scrape the reviews from this link:
For what I’m using the following code to load the page
JavaScript
x
46
46
1
from selenium import webdriver
2
import datetime
3
import time
4
import argparse
5
import os
6
import time
7
8
#Define the argument parser to read in the URL
9
10
url = "https://www.google.com/search?q=google+reviews+2nd+chance+treatment+40th+street&rlz=1C1JZAP_enUS697US697&oq=google+reviews+2nd+chance+treatment+40th+street&aqs=chrome..69i57j69i64.6183j0j7&sourceid=chrome&ie=UTF-8#lrd=0x872b7179b68e33d5:0x24b5517d86a95f89,1"
11
12
13
# Initialize the Chrome webdriver and open the URL
14
#driver = webdriver.Chromium()
15
16
17
profile = webdriver.FirefoxProfile()
18
profile.set_preference("general.useragent.override", "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; AS; rv:11.0) like Gecko")
19
#driver = webdriver.Firefox(profile)
20
# https://stackoverflow.com/questions/22476112/using-chromedriver-with-selenium-python-ubuntu
21
driver = webdriver.Chrome("/usr/lib/chromium-browser/chromedriver")
22
23
driver.get(url)
24
25
driver.implicitly_wait(2)
26
27
28
29
SCROLL_PAUSE_TIME = 0.5
30
31
# Get scroll height
32
last_height = driver.execute_script("return document.body.scrollHeight")
33
34
while True:
35
# Scroll down to bottom
36
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
37
38
# Wait to load page
39
time.sleep(SCROLL_PAUSE_TIME)
40
41
# Calculate new scroll height and compare with last scroll height
42
new_height = driver.execute_script("return document.body.scrollHeight")
43
if new_height == last_height:
44
break
45
last_height = new_height
46
The page load fine, it is not scrolling down, I have used the same code for other sites like linkedn and it works there.
Advertisement
Answer
Here is the logic that you can use without using the javascript scroll down. Simple and effective by using the location_once_scrolled_into_view
method which will scroll to the element.
As part of logic below, we are scrolling to the last review, and then checking if we loaded the number of reviews desired as per the request.
Imports Needed:
JavaScript
1
4
1
from selenium.webdriver.common.by import By
2
from selenium.webdriver.support import expected_conditions as EC
3
from selenium.webdriver.support.ui import WebDriverWait
4
change the desiredReviewsCount
variable value as per your requirement in the below code.
JavaScript
1
12
12
1
wait = WebDriverWait(driver,10)
2
url = "https://www.google.com/search?q=google+reviews+2nd+chance+treatment+40th+street&rlz=1C1JZAP_enUS697US697&oq=google+reviews+2nd+chance+treatment+40th+street&aqs=chrome..69i57j69i64.6183j0j7&sourceid=chrome&ie=UTF-8#lrd=0x872b7179b68e33d5:0x24b5517d86a95f89,1"
3
driver.get(url)
4
x=0
5
desiredReviewsCount=30
6
wait.until(EC.presence_of_all_elements_located((By.XPATH,"//div[@class='gws-localreviews__general-reviews-block']//div[@class='WMbnJf gws-localreviews__google-review']")))
7
while x<desiredReviewsCount:
8
driver.find_element_by_xpath("(//div[@class='gws-localreviews__general-reviews-block']//div[@class='WMbnJf gws-localreviews__google-review'])[last()]").location_once_scrolled_into_view
9
x = len(driver.find_elements_by_xpath("//div[@class='gws-localreviews__general-reviews-block']//div[@class='WMbnJf gws-localreviews__google-review']"))
10
11
print (len(driver.find_elements_by_xpath("//div[@class='gws-localreviews__general-reviews-block']//div[@class='WMbnJf gws-localreviews__google-review']")))
12