I try to scrape all the pages of a URL by selenium python, but only could get the values from the 1st page. The code is going to the next page but same code is not working to scrape ending with an error. The error is ” Element … is not clickable at point (208, 17). Other element would receive the click: …”. Here is the code
JavaScript
x
80
80
1
import pandas as pd
2
from selenium import webdriver
3
import time
4
from selenium.webdriver.common.by import By
5
from selenium.webdriver.support.ui import WebDriverWait as W
6
from selenium.webdriver.support import expected_conditions as E
7
8
9
def page_scrape():
10
driver.maximize_window()
11
ADDRESS_LOCATIONS_TEASER = '//div[contains(@class,"all-stores accordian ng-star-inserted")]'
12
ADDRESS_LOCATIONS = './/div[contains(@class,"accordian-header")]'
13
teaser = driver.find_element(By.XPATH, ADDRESS_LOCATIONS_TEASER)
14
locations = teaser.find_elements(By.XPATH, ADDRESS_LOCATIONS)
15
16
for loc in locations:
17
add = loc.find_element(By.XPATH, './/a[@href]').click()
18
add1 = driver.find_element(By.XPATH, ".//address[contains(@class, 'address-block')]").text
19
print(add1)
20
Services_Types_TEASER = '//div[contains(@class, "store-accordian store-accordian-flex ng-star-inserted")]'
21
Service_Types = './/div[contains(@class, "store-dine-flx ng-star-inserted")]'
22
services_teaser = driver.find_element(By.XPATH, Services_Types_TEASER)
23
services_list = services_teaser.find_elements(By.XPATH, Service_Types)
24
25
types_of_services = '//div[contains(@id,"divrestaurant2")]'
26
types_of_services_teaser = './/div[contains(@class,"services ng-star-inserted")]'
27
types_of_services_find = driver.find_element(By.XPATH, types_of_services)
28
types_of_service_list = types_of_services_find.find_elements(By.XPATH, types_of_services_teaser)
29
text = []
30
for types in types_of_service_list:
31
if types.text == 'Services in diesem Restaurant':
32
text = "'{}':".format(types.text)
33
elif types.text == 'Betreiber':
34
exit
35
else:
36
text = "{0},'{1}'".format(text, types.text)
37
38
print(text)
39
for services_types in services_list:
40
# service = driver.find_element(By.XPATH, './/h5').text
41
print(services_types.text)
42
43
44
45
driver = webdriver.Chrome("C:/Users/doyel/Downloads/chromedriver_win32/chromedriver.exe")
46
driver.get('https://www.kfc.de/find-a-kfc')
47
48
results = pd.DataFrame(columns=['address', 'PLZ', 'Telephone' 'Restaurant Services'])
49
50
51
COOKIE_PATH = '//button[contains(@id,"onetrust-accept-btn-handler")]'
52
driver.find_element(By.XPATH,COOKIE_PATH).click()
53
flag = True
54
while True:
55
page_scrape()
56
next_page = '//a[@aria-label="Next page"]'
57
try:
58
driver.find_element(By.XPATH, next_page).click()
59
print("next page")
60
time.sleep(2)
61
62
except:
63
print("last page reached")
64
break
65
66
67
68
69
70
71
72
73
74
75
#//p[@class='mb-2']//font//font[contains(text(),'Schnellerstr.')]
76
77
#print(driver.page_source)
78
driver.quit()
79
80
Advertisement
Answer
While not trivial, it’s doable, and here is one way to do it:
JavaScript
1
58
58
1
from selenium import webdriver
2
from selenium.webdriver.chrome.service import Service
3
from selenium.webdriver.chrome.options import Options
4
from selenium.webdriver.common.by import By
5
from selenium.webdriver.support.ui import WebDriverWait
6
from selenium.webdriver.support import expected_conditions as EC
7
from selenium.webdriver.common.keys import Keys
8
import pandas as pd
9
import time as t
10
from tqdm import tqdm ## if using Jupyter notebook, import as from tqdm.notebook import tqdm
11
12
pd.set_option('display.max_columns', None)
13
pd.set_option('display.max_colwidth', None)
14
15
chrome_options = Options()
16
chrome_options.add_argument("--no-sandbox")
17
chrome_options.add_argument('disable-notifications')
18
chrome_options.add_argument("window-size=1920,1080")
19
20
webdriver_service = Service("chromedriver/chromedriver") ## path to where you saved chromedriver binary
21
driver = webdriver.Chrome(service=webdriver_service, options=chrome_options)
22
wait = WebDriverWait(driver, 5)
23
24
restaurant_list = []
25
driver.get('https://www.kfc.de/find-a-kfc')
26
27
try:
28
wait.until(EC.element_to_be_clickable((By.ID, "onetrust-reject-all-handler"))).click()
29
print('dismissed cookies')
30
except Exception as e:
31
print('no cookie button!')
32
header = wait.until(EC.element_to_be_clickable((By.TAG_NAME, "app-common-header")))
33
driver.execute_script("""
34
var element = arguments[0];
35
element.parentNode.removeChild(element);
36
""", header)
37
for x in tqdm(range(1, 21)):
38
kfc_rests = wait.until(EC.presence_of_all_elements_located((By.XPATH, '//div[@class="all-stores accordian ng-star-inserted"]//app-accordian[@class="card carryout-address ng-star-inserted"]')))
39
for k in kfc_rests:
40
k.location_once_scrolled_into_view
41
k.click()
42
name = k.find_element(By.TAG_NAME, 'strong').text
43
address = wait.until(EC.element_to_be_clickable((By.XPATH, '//address[@class="address-block"]/p'))).text.replace('n', ' ').strip()
44
try:
45
services = ', '.join([x.text.strip() for x in wait.until(EC.presence_of_all_elements_located((By.XPATH, '//div[@class="services ng-star-inserted"]//li')))])
46
except Exception as e:
47
services = 'Not specified'
48
restaurant_list.append((name, address, services))
49
try:
50
next_page = wait.until(EC.element_to_be_clickable((By.XPATH, '//li[@class="pagination-next ng-star-inserted"]//a[@aria-label="Next page"]')))
51
next_page.location_once_scrolled_into_view
52
next_page.click()
53
except Exception as e:
54
print('end of list')
55
break
56
df = pd.DataFrame(restaurant_list, columns = ['Name', 'Address', 'Services'])
57
print(df)
58
Result in terminal:
JavaScript
1
18
18
1
dismissed cookies
2
95%
3
19/20 [01:41<00:05, 5.91s/it]
4
end of list
5
Name Address Services
6
0 KFC BERLIN Grenzallee 37 12057 Berlin Lieferung, Drive Thru, Free Refill, EC-Zahlung, Click & Collect
7
1 KFC BERLIN Gatower Straße 56 13595 Berlin Lieferung, Drive Thru, Free Refill, EC-Zahlung, Click & Collect
8
2 KFC BERLIN Mall of Berlin Leipziger Platz 12 10117 Berlin Lieferung, Free Refill, EC-Zahlung, Click & Collect
9
3 KFC BERLIN Klosterstraße 3 13581 Berlin Lieferung, EC-Zahlung, Click & Collect
10
4 KFC BERLIN Schnellerstr. 18a 12439 Berlin Drive Thru, Free Refill, EC-Zahlung, Click & Collect
11
12
191 KFC SAARBRÜCKEN Wolfseck 6 66130 Saarbrücken Drive Thru, Free Refill, EC-Zahlung
13
192 KFC SAARLOUIS Provinzialstr. 246 66740 Saarlouis Drive Thru, Free Refill, EC-Zahlung
14
193 KFC OFFENBURG Heinrich-Hertz-Str. 3 77656 Offenburg Drive Thru, Free Refill, EC-Zahlung
15
194 KFC FREIBURG Tullastraße 68 79108 Freiburg Lieferung, Drive Thru, Free Refill, EC-Zahlung
16
195 KFC FRANKFURT FLUGHAFEN Tullastraße 68 79108 Freiburg Lieferung, Drive Thru, Free Refill, EC-Zahlung
17
196 rows × 3 columns
18
Selenium documentation can be found at: https://www.selenium.dev/documentation/
Pandas documentation: https://pandas.pydata.org/docs/
And for TQDM, go to https://pypi.org/project/tqdm/