Please find the aatched screenshot.
and Below code is printing only first 4-5 rows which is visible in the screenshot. It is not scrolling down and inspecting element it is prining blank spaces.
Same code is running succesfully without i write code written in main function outside the function.
def close_up(driver, actions): time.sleep(1) actions.move_to_element(wait.until(EC.element_to_be_clickable((By.XPATH, "//button[@data-dismiss='modal']")))) button = driver.find_element_by_xpath("//button[@data-dismiss='modal']") driver.execute_script("arguments[0].click();", button) time.sleep(1) def check_model_winodows(driver, actions): try: if len(driver.find_elements(By.XPATH, "(//button[@data-dismiss='modal'])[1]")) > 0: # print("Pop up is visible") close_up(driver, actions) else: print("") except: # print("Something went wrong") pass return driver, actions def main(hashtag): options = webdriver.ChromeOptions() options.add_argument("--disable-infobars") options.add_argument("--disable-notifications") options.add_argument("--start-maximized") options.add_argument("--disable-extensions") options.add_experimental_option("prefs", {"profile.default_content_setting_values.notifications": 2}) options.add_argument('--window-size=1920,1080') options.add_experimental_option("prefs", {"profile.default_content_settings.cookies": 2}) driver = webdriver.Chrome(executable_path='/home/tukaram/chromedriver', options=options) # driver = webdriver.Chrome(driver_path) driver.maximize_window() driver.implicitly_wait(50) driver.get("https://www.trackmyhashtag.com/") wait = WebDriverWait(driver, 10) actions = ActionChains(driver) wait.until(EC.visibility_of_element_located((By.ID, "search_keyword"))).send_keys(hashtag, Keys.RETURN) check_model_winodows(driver, actions) wait = WebDriverWait(driver, 10) time.sleep(3) button = driver.find_element_by_css_selector("a[onclick*='preview-tweets']") driver.execute_script("arguments[0].click();", button) # wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "a[onclick*='preview-tweets']"))).click() check_model_winodows(driver, actions) total_number_of_tweet_row = len(driver.find_elements(By.XPATH, "//tbody/tr")) # print(total_number_of_tweet_row) rank = 1 page_number = 2 total_number_of_pages = 5 myhashtag = {} for a, idx in enumerate(range(total_number_of_pages)): print("idx>>>>", idx) j = 0 for i in range(total_number_of_tweet_row): check_model_winodows(driver, actions) elems = driver.find_elements(By.XPATH, "//tbody/tr") time.sleep(1) # final_ele = elems[j].find_element_by_xpath(".//td[2]") # print("code worked till here") name = elems[j].find_element_by_xpath(".//div[@class='tweet-name']").text print("name>", name) myhashtag['user_name'] = name userid = elems[j].find_element_by_tag_name("td").text userid = userid.partition('@')[2] userid = '@' + userid print("userid>", userid) myhashtag['user_screen_name'] = userid content = elems[j].find_element_by_xpath(".//td[2]").text print("content", content) myhashtag['content'] = content date = elems[j].find_element_by_xpath(".//td[3]").text print("1>>>>", date) date = str(date).replace("n", " ") print("2>>>", date) date = datetime.strptime(date, '%d %b %Y %H:%M:%S %p') print("3>>>", date) date = date.strftime('%Y-%m-%dT%H:%M:%SZ') print("date", date) myhashtag['articleDate'] = date engm = elems[j].find_element_by_xpath(".//td[4]").text print("engagement", engm) myhashtag['engagement'] = engm impressions = elems[j].find_element_by_xpath(".//td[6]").text print("impressions", impressions) myhashtag['impressions'] = impressions myhashtag['rank'] = rank rank = rank + 1 j = j + 1 print(myhashtag) check_model_winodows(driver, actions) driver.execute_script( "var scrollingElement = (document.scrollingElement || document.body);scrollingElement.scrollTop = " "scrollingElement.scrollHeight;") wait.until(EC.element_to_be_clickable((By.XPATH, f"//a[text()='{page_number}']"))).click() page_number = page_number + 1 print("Page numberrrr", page_number) if page_number == 7: break driver.quit() return driver, actions if __name__ == '__main__': for x in add_data.words: main(x)
add_data.py ->
words = ['India','@pakistan'] #words to crawl
Advertisement
Answer
May be you need to scroll to each row to extract details. I added driver.execute_script("arguments[0].scrollIntoView(true);",elems[j])
in the code, and it extracted all the details. Try this once.
for a, idx in enumerate(range(total_number_of_pages)): print("idx>>>>", idx) j = 0 for i in range(total_number_of_tweet_row): check_model_winodows(driver, actions) elems = driver.find_elements(By.XPATH, "//tbody/tr") time.sleep(1) # final_ele = elems[j].find_element_by_xpath(".//td[2]") # print("code worked till here") driver.execute_script("arguments[0].scrollIntoView(true);",elems[j]) # Line to be addded. name = elems[j].find_element_by_xpath(".//div[@class='tweet-name']").text