Please find the aatched screenshot.
and Below code is printing only first 4-5 rows which is visible in the screenshot. It is not scrolling down and inspecting element it is prining blank spaces.
Same code is running succesfully without i write code written in main function outside the function.
JavaScript
x
118
118
1
def close_up(driver, actions):
2
time.sleep(1)
3
actions.move_to_element(wait.until(EC.element_to_be_clickable((By.XPATH, "//button[@data-dismiss='modal']"))))
4
button = driver.find_element_by_xpath("//button[@data-dismiss='modal']")
5
driver.execute_script("arguments[0].click();", button)
6
time.sleep(1)
7
8
9
def check_model_winodows(driver, actions):
10
try:
11
if len(driver.find_elements(By.XPATH, "(//button[@data-dismiss='modal'])[1]")) > 0:
12
# print("Pop up is visible")
13
close_up(driver, actions)
14
else:
15
print("")
16
except:
17
# print("Something went wrong")
18
pass
19
return driver, actions
20
21
22
def main(hashtag):
23
options = webdriver.ChromeOptions()
24
options.add_argument("--disable-infobars")
25
options.add_argument("--disable-notifications")
26
options.add_argument("--start-maximized")
27
options.add_argument("--disable-extensions")
28
options.add_experimental_option("prefs", {"profile.default_content_setting_values.notifications": 2})
29
options.add_argument('--window-size=1920,1080')
30
options.add_experimental_option("prefs", {"profile.default_content_settings.cookies": 2})
31
32
driver = webdriver.Chrome(executable_path='/home/tukaram/chromedriver', options=options)
33
# driver = webdriver.Chrome(driver_path)
34
driver.maximize_window()
35
driver.implicitly_wait(50)
36
driver.get("https://www.trackmyhashtag.com/")
37
wait = WebDriverWait(driver, 10)
38
39
actions = ActionChains(driver)
40
wait.until(EC.visibility_of_element_located((By.ID, "search_keyword"))).send_keys(hashtag, Keys.RETURN)
41
42
check_model_winodows(driver, actions)
43
wait = WebDriverWait(driver, 10)
44
time.sleep(3)
45
button = driver.find_element_by_css_selector("a[onclick*='preview-tweets']")
46
driver.execute_script("arguments[0].click();", button)
47
# wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "a[onclick*='preview-tweets']"))).click()
48
check_model_winodows(driver, actions)
49
total_number_of_tweet_row = len(driver.find_elements(By.XPATH, "//tbody/tr"))
50
# print(total_number_of_tweet_row)
51
rank = 1
52
page_number = 2
53
total_number_of_pages = 5
54
myhashtag = {}
55
for a, idx in enumerate(range(total_number_of_pages)):
56
print("idx>>>>", idx)
57
j = 0
58
for i in range(total_number_of_tweet_row):
59
check_model_winodows(driver, actions)
60
elems = driver.find_elements(By.XPATH, "//tbody/tr")
61
time.sleep(1)
62
# final_ele = elems[j].find_element_by_xpath(".//td[2]")
63
# print("code worked till here")
64
name = elems[j].find_element_by_xpath(".//div[@class='tweet-name']").text
65
print("name>", name)
66
myhashtag['user_name'] = name
67
68
userid = elems[j].find_element_by_tag_name("td").text
69
userid = userid.partition('@')[2]
70
userid = '@' + userid
71
print("userid>", userid)
72
myhashtag['user_screen_name'] = userid
73
74
content = elems[j].find_element_by_xpath(".//td[2]").text
75
print("content", content)
76
myhashtag['content'] = content
77
78
date = elems[j].find_element_by_xpath(".//td[3]").text
79
print("1>>>>", date)
80
date = str(date).replace("n", " ")
81
print("2>>>", date)
82
date = datetime.strptime(date, '%d %b %Y %H:%M:%S %p')
83
print("3>>>", date)
84
date = date.strftime('%Y-%m-%dT%H:%M:%SZ')
85
print("date", date)
86
myhashtag['articleDate'] = date
87
88
engm = elems[j].find_element_by_xpath(".//td[4]").text
89
print("engagement", engm)
90
myhashtag['engagement'] = engm
91
92
impressions = elems[j].find_element_by_xpath(".//td[6]").text
93
print("impressions", impressions)
94
myhashtag['impressions'] = impressions
95
96
myhashtag['rank'] = rank
97
rank = rank + 1
98
j = j + 1
99
100
print(myhashtag)
101
102
check_model_winodows(driver, actions)
103
driver.execute_script(
104
"var scrollingElement = (document.scrollingElement || document.body);scrollingElement.scrollTop = "
105
"scrollingElement.scrollHeight;")
106
wait.until(EC.element_to_be_clickable((By.XPATH, f"//a[text()='{page_number}']"))).click()
107
page_number = page_number + 1
108
print("Page numberrrr", page_number)
109
if page_number == 7:
110
break
111
driver.quit()
112
return driver, actions
113
114
115
if __name__ == '__main__':
116
for x in add_data.words:
117
main(x)
118
add_data.py ->
JavaScript
1
2
1
words = ['India','@pakistan'] #words to crawl
2
Advertisement
Answer
May be you need to scroll to each row to extract details. I added driver.execute_script("arguments[0].scrollIntoView(true);",elems[j])
in the code, and it extracted all the details. Try this once.
JavaScript
1
12
12
1
for a, idx in enumerate(range(total_number_of_pages)):
2
print("idx>>>>", idx)
3
j = 0
4
for i in range(total_number_of_tweet_row):
5
check_model_winodows(driver, actions)
6
elems = driver.find_elements(By.XPATH, "//tbody/tr")
7
time.sleep(1)
8
# final_ele = elems[j].find_element_by_xpath(".//td[2]")
9
# print("code worked till here")
10
driver.execute_script("arguments[0].scrollIntoView(true);",elems[j]) # Line to be addded.
11
name = elems[j].find_element_by_xpath(".//div[@class='tweet-name']").text
12