I am trying to scrape the url given below with python selenium.
here is my code
from pprint import pprint from selenium import webdriver from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.action_chains import ActionChains from PIL import Image import requests from time import sleep chrome_options = webdriver.ChromeOptions() chrome_options.add_argument('--headless') chrome_options.add_argument('--no-sandbox') chrome_options.add_argument('--disable-dev-shm-usage') wd = webdriver.Chrome('chromedriver',options=chrome_options) wd.get("https://www.rtilinks.com/?5b5483ba2d=OUhWbXlXOGY4cEE0VEtsK1pWSU5CdEJob0hiR0xFNjN2M252ZXlOWnp0RC9yaFpvN3ZNeW9SazlONWJSTWpvNGNpR0FwWUZwQWduaXdFY202bkcrUHAybkVDc0hMMk9EWFdweitsS0xHa0U9 ") WebDriverWait(wd, 20).until(EC.element_to_be_clickable((By.ID, "soralink-human-verif-main"))).click() sleep(10) WebDriverWait(wd, 20).until(EC.element_to_be_clickable((By.XPATH, "//img[@id='showlink' and @x-onclick]"))).click()
After running through this code I should be redirected to https://rareapk.com/finance/?n1p0ei2ng5yd3gz but It stuck at the same page.
The element, I am clicking is given below.
<img class="spoint" id="showlink" x-onclick="changeLink()" src="https://eductin.com/wp-content/uploads/2021/06/Download.png">
What is my code doing?
- First it go to this url
- Then click to
I'M NOT A ROBOT
. - After that next page is loaded and selenium waits for 10 seconds.
- Then an Image (having text
DOWNLOAD RTI
) is clicked which should redirect it to REDIRECTED URL
But in the last step it stuck at the same url, it do not redirect
I have tried the following ways
WebDriverWait(wd, 20).until(EC.element_to_be_clickable((By.XPATH, "//img[@id='showlink' and @x-onclick]"))).click()
wd.find_element(By.ID, "showlink").click()
Advertisement
Answer
I tested code without headless
and I see browser opens expected page but wd.current_url
still show old URL (and wd.title
also show old title)
All problem can be because page opens new URL in new tab
and it needs to use wd.switch_to_window(...)
to access other tab
.
This code uses switch_to_window(...)
and it shows correct URL (and title) in other tab
.
BTW: I had to add "Consent"
because my browser sometimes show it.
from selenium import webdriver from selenium.webdriver.chrome.service import Service from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC from time import sleep from webdriver_manager.chrome import ChromeDriverManager, ChromeType #from webdriver_manager.firefox import GeckoDriverManager chrome_options = webdriver.ChromeOptions() chrome_options.add_argument('--headless') chrome_options.add_argument('--no-sandbox') chrome_options.add_argument('--disable-dev-shm-usage') #wd = webdriver.Chrome('chromedriver', options=chrome_options) wd = webdriver.Chrome(service=Service(ChromeDriverManager(chrome_type=ChromeType.CHROMIUM).install()), options=chrome_options) #wd = webdriver.Firefox(service=Service(GeckoDriverManager().install())) wd.get("https://www.rtilinks.com/?5b5483ba2d=OUhWbXlXOGY4cEE0VEtsK1pWSU5CdEJob0hiR0xFNjN2M252ZXlOWnp0RC9yaFpvN3ZNeW9SazlONWJSTWpvNGNpR0FwWUZwQWduaXdFY202bkcrUHAybkVDc0hMMk9EWFdweitsS0xHa0U9") p = wd.current_window_handle print('current_window_handle:', p) try: print('Waiting for: "Consent"') WebDriverWait(wd, 20).until(EC.element_to_be_clickable((By.XPATH, "//button[@aria-label='Consent']"))).click() except Exception as ex: print('Exception:', ex) print('Waiting for: "I'm not a robot"') WebDriverWait(wd, 20).until(EC.element_to_be_clickable((By.ID, "soralink-human-verif-main"))).click() print('Waiting for: "Download (RTI)"') WebDriverWait(wd, 20).until(EC.element_to_be_clickable((By.XPATH, "//img[@id='showlink' and @x-onclick]"))).click() print('--- active tab ---') print('current_window_handle:', p) print('current_url:', wd.current_url) print('title:', wd.title) print('--- other tabs ---') chwd = wd.window_handles for w in chwd: #switch focus to child window if w != p: wd.switch_to.window(w) print('current_window_handle:', w) print('current_url:', wd.current_url) print('title:', wd.title) print('---') wd.close()
Result:
Waiting for: "Consent" Waiting for: "I'm not a robot" Waiting for: "Download (RTI)" --- active tab --- current_window_handle: CDwindow-31FDEC2C62AA0666A8F3A1DD2133D02C current_url: https://eductin.com/how-to-fix-and-restore-deleted-mac-system-files/ title: How to fix and Restore deleted Mac system files. – Eductin --- other tabs --- current_window_handle: CDwindow-CB1EAE5B6DCD4ACF5D061ED4ECC314CD current_url: https://sakarnewz.com/ title: SakarNewz – BOOST YOUR KNOWLEDGE WITH TECH NEWS AND UPDATES ---
EDIT:
Sometimes this code has problem to display information about other tabs because it seems tab runs all time JavaScript and probably Selenium can’t access data.