I am trying to scrape youtube videos from a channel by doing the following code below however, it seems that my element_titles don’t have a href attribute. This worked about a year ago and I am unsure why it doesn’t work now? Did youtube change the way we can get href?
JavaScript
x
11
11
1
#Scrape for videos
2
# WARNING: Takes very long
3
4
5
HOME = "https://www.youtube.com/user/theneedledrop/videos"
6
driver = webdriver.Chrome("C:webdriverchromedriver.exe")
7
driver.get(HOME)
8
9
scroll()
10
element_titles = driver.find_elements(By.ID,"video-title")
11
The following attribtues are what is found in the WebDriver objects
JavaScript
1
96
96
1
> element_titles[0].get_property('attributes')[0]
2
3
{'ATTRIBUTE_NODE': 2,
4
'CDATA_SECTION_NODE': 4,
5
'COMMENT_NODE': 8,
6
'DOCUMENT_FRAGMENT_NODE': 11,
7
'DOCUMENT_NODE': 9,
8
'DOCUMENT_POSITION_CONTAINED_BY': 16,
9
'DOCUMENT_POSITION_CONTAINS': 8,
10
'DOCUMENT_POSITION_DISCONNECTED': 1,
11
'DOCUMENT_POSITION_FOLLOWING': 4,
12
'DOCUMENT_POSITION_IMPLEMENTATION_SPECIFIC': 32,
13
'DOCUMENT_POSITION_PRECEDING': 2,
14
'DOCUMENT_TYPE_NODE': 10,
15
'ELEMENT_NODE': 1,
16
'ENTITY_NODE': 6,
17
'ENTITY_REFERENCE_NODE': 5,
18
'NOTATION_NODE': 12,
19
'PROCESSING_INSTRUCTION_NODE': 7,
20
'TEXT_NODE': 3,
21
'__shady_addEventListener': {},
22
'__shady_appendChild': {},
23
'__shady_childNodes': [],
24
'__shady_cloneNode': {},
25
'__shady_contains': {},
26
'__shady_dispatchEvent': {},
27
'__shady_firstChild': None,
28
'__shady_getRootNode': {},
29
'__shady_insertBefore': {},
30
'__shady_isConnected': False,
31
'__shady_lastChild': None,
32
'__shady_native_addEventListener': {},
33
'__shady_native_appendChild': {},
34
'__shady_native_childNodes': [],
35
'__shady_native_cloneNode': {},
36
'__shady_native_contains': {},
37
'__shady_native_dispatchEvent': {},
38
'__shady_native_firstChild': None,
39
'__shady_native_insertBefore': {},
40
'__shady_native_lastChild': None,
41
'__shady_native_nextSibling': None,
42
'__shady_native_parentElement': None,
43
'__shady_native_parentNode': None,
44
'__shady_native_previousSibling': None,
45
'__shady_native_removeChild': {},
46
'__shady_native_removeEventListener': {},
47
'__shady_native_replaceChild': {},
48
'__shady_native_textContent': 'video-title',
49
'__shady_nextSibling': None,
50
'__shady_parentElement': None,
51
'__shady_parentNode': None,
52
'__shady_previousSibling': None,
53
'__shady_removeChild': {},
54
'__shady_removeEventListener': {},
55
'__shady_replaceChild': {},
56
'__shady_textContent': 'video-title',
57
'addEventListener': {},
58
'appendChild': {},
59
'baseURI': 'https://www.youtube.com/user/theneedledrop/videos',
60
'childNodes': [],
61
'cloneNode': {},
62
'compareDocumentPosition': {},
63
'contains': {},
64
'dispatchEvent': {},
65
'firstChild': None,
66
'getRootNode': {},
67
'hasChildNodes': {},
68
'insertBefore': {},
69
'isConnected': False,
70
'isDefaultNamespace': {},
71
'isEqualNode': {},
72
'isSameNode': {},
73
'lastChild': None,
74
'localName': 'id',
75
'lookupNamespaceURI': {},
76
'lookupPrefix': {},
77
'name': 'id',
78
'namespaceURI': None,
79
'nextSibling': None,
80
'nodeName': 'id',
81
'nodeType': 2,
82
'nodeValue': 'video-title',
83
'normalize': {},
84
'ownerDocument': <selenium.webdriver.remote.webelement.WebElement (session="906f0b2a91a96de78811a8b48c702ce9", element="4105d26d-55b3-49a1-b657-10bbbbf43c84")>,
85
'ownerElement': <selenium.webdriver.remote.webelement.WebElement (session="906f0b2a91a96de78811a8b48c702ce9", element="c0d38452-435c-489a-8cb8-858adc4828b9")>,
86
'parentElement': None,
87
'parentNode': None,
88
'prefix': None,
89
'previousSibling': None,
90
'removeChild': {},
91
'removeEventListener': {},
92
'replaceChild': {},
93
'specified': True,
94
'textContent': 'video-title',
95
'value': 'video-title'}
96
I have tried exploring the web pages on youtube videos for the href however I am unable to find them
Advertisement
Answer
The below full working code will pull the required data here all the video links smoothly.
Example:
JavaScript
1
56
56
1
from selenium import webdriver
2
from selenium.webdriver.chrome.service import Service
3
from selenium.webdriver.common.by import By
4
import time
5
import pandas as pd
6
from selenium.webdriver.support.wait import WebDriverWait
7
from selenium.webdriver.support import expected_conditions as EC
8
9
options = webdriver.ChromeOptions()
10
#All are optional
11
#options.add_experimental_option("detach", True)
12
options.add_argument("--disable-extensions")
13
options.add_argument("--disable-notifications")
14
options.add_argument("--disable-Advertisement")
15
options.add_argument("--disable-popup-blocking")
16
options.add_argument("start-maximized")
17
18
s=Service('./chromedriver')
19
driver= webdriver.Chrome(service=s,options=options)
20
21
driver.get('https://www.youtube.com/user/theneedledrop/videos')
22
time.sleep(3)
23
24
item = []
25
SCROLL_PAUSE_TIME = 1
26
last_height = driver.execute_script("return document.documentElement.scrollHeight")
27
28
item_count = 100
29
30
while item_count > len(item):
31
driver.execute_script("window.scrollTo(0,document.documentElement.scrollHeight);")
32
time.sleep(SCROLL_PAUSE_TIME)
33
new_height = driver.execute_script("return document.documentElement.scrollHeight")
34
35
if new_height == last_height:
36
break
37
last_height = new_height
38
39
40
data = []
41
try:
42
for e in WebDriverWait(driver, 20).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'div#details'))):
43
vurl = e.find_element(By.CSS_SELECTOR,'a#video-title-link').get_attribute('href')
44
data.append({
45
'video_url':vurl,
46
47
})
48
except:
49
pass
50
51
item = data
52
#print(item)
53
#print(len(item))
54
df = pd.DataFrame(item).drop_duplicates()
55
print(df.to_markdown())
56
Output:
JavaScript
1
116
116
1
| video_url |
2
|----:|:--------------------------------------------|
3
| 0 | https://www.youtube.com/watch?v=UZcSkasvj5c |
4
| 1 | https://www.youtube.com/watch?v=9c8AXKAnp_E |
5
| 2 | https://www.youtube.com/watch?v=KaLUHF7nQic |
6
| 3 | https://www.youtube.com/watch?v=rxb2L0Bgp3U |
7
| 4 | https://www.youtube.com/watch?v=z3L1wXvMN0Q |
8
| 5 | https://www.youtube.com/watch?v=q7vqR74WVYc |
9
| 6 | https://www.youtube.com/watch?v=Kb31OTOYYG8 |
10
| 7 | https://www.youtube.com/watch?v=F-CaQbxwMZ0 |
11
| 8 | https://www.youtube.com/watch?v=AWDWTyC0jls |
12
| 9 | https://www.youtube.com/watch?v=LXWbnTgxeT4 |
13
| 10 | https://www.youtube.com/watch?v=5KlHjDnefYQ |
14
| 11 | https://www.youtube.com/watch?v=yfq8rdBcAMg |
15
| 12 | https://www.youtube.com/watch?v=lATG1JBzVIU |
16
| 13 | https://www.youtube.com/watch?v=SNmZfHDOHQw |
17
| 14 | https://www.youtube.com/watch?v=IsQBbO_4EQI |
18
| 15 | https://www.youtube.com/watch?v=wcSyXUOM63g |
19
| 16 | https://www.youtube.com/watch?v=5hIaJZ9M8ZI |
20
| 17 | https://www.youtube.com/watch?v=ikryWQEHsCE |
21
| 18 | https://www.youtube.com/watch?v=5ARVgrao6E0 |
22
| 19 | https://www.youtube.com/watch?v=_1q6-POT8sY |
23
| 20 | https://www.youtube.com/watch?v=ycyxm3rgQG0 |
24
| 21 | https://www.youtube.com/watch?v=InirkRGnC2w |
25
| 22 | https://www.youtube.com/watch?v=nrvq5lY9oy0 |
26
| 23 | https://www.youtube.com/watch?v=M1yGh3D_KI8 |
27
| 24 | https://www.youtube.com/watch?v=Yn_4mtMYyXU |
28
| 25 | https://www.youtube.com/watch?v=8vmm8x_Cq4s |
29
| 26 | https://www.youtube.com/watch?v=Zfyojbr-cEQ |
30
| 27 | https://www.youtube.com/watch?v=NqrVX-WOrc0 |
31
| 28 | https://www.youtube.com/watch?v=Hx6k20LsAJ4 |
32
| 29 | https://www.youtube.com/watch?v=OB6ZI5Bicww |
33
| 30 | https://www.youtube.com/watch?v=uNMnIRKx0GE |
34
| 31 | https://www.youtube.com/watch?v=U7w_MKl5_hE |
35
| 32 | https://www.youtube.com/watch?v=KGi4Cpbh_Y0 |
36
| 33 | https://www.youtube.com/watch?v=mQqRtaoyAdw |
37
| 34 | https://www.youtube.com/watch?v=s3VzTy9oXXM |
38
| 35 | https://www.youtube.com/watch?v=eCaojgO-ZWs |
39
| 36 | https://www.youtube.com/watch?v=SeOLXwvu87E |
40
| 37 | https://www.youtube.com/watch?v=IlZ6Y21rxTU |
41
| 38 | https://www.youtube.com/watch?v=HxoRbEQFx3U |
42
| 39 | https://www.youtube.com/watch?v=NDCAImW1o6o |
43
| 40 | https://www.youtube.com/watch?v=gE778rR6-EM |
44
| 41 | https://www.youtube.com/watch?v=cQ0eY9NJACQ |
45
| 42 | https://www.youtube.com/watch?v=-x5Bx-leRWI |
46
| 43 | https://www.youtube.com/watch?v=XQ0C_Dmf0hI |
47
| 44 | https://www.youtube.com/watch?v=0eJ4JRNi4J8 |
48
| 45 | https://www.youtube.com/watch?v=YczkDCv3GiM |
49
| 46 | https://www.youtube.com/watch?v=GQmUsdUI20A |
50
| 47 | https://www.youtube.com/watch?v=4CFnoywFia4 |
51
| 48 | https://www.youtube.com/watch?v=A0Bzv8weX4s |
52
| 49 | https://www.youtube.com/watch?v=YbxcaHn_d_o |
53
| 50 | https://www.youtube.com/watch?v=GwUNT2k26mQ |
54
| 51 | https://www.youtube.com/watch?v=zktcHftIhDs |
55
| 52 | https://www.youtube.com/watch?v=_rY7Hvxe4x4 |
56
| 53 | https://www.youtube.com/watch?v=rqB9gd4fbfE |
57
| 54 | https://www.youtube.com/watch?v=oNPAhe7G3yg |
58
| 55 | https://www.youtube.com/watch?v=37_aCQW98sU |
59
| 56 | https://www.youtube.com/watch?v=GjA4fWIUv-A |
60
| 57 | https://www.youtube.com/watch?v=8THBFF024ho |
61
| 58 | https://www.youtube.com/watch?v=HLErXgsV3Nk |
62
| 59 | https://www.youtube.com/watch?v=GsvdLIxY6Fg |
63
| 60 | https://www.youtube.com/watch?v=iUU48DuTpl8 |
64
| 61 | https://www.youtube.com/watch?v=5UluxcFJVx0 |
65
| 62 | https://www.youtube.com/watch?v=5lOvAHg12uw |
66
| 63 | https://www.youtube.com/watch?v=2UADjU66-4M |
67
| 64 | https://www.youtube.com/watch?v=Qvr2labD_Es |
68
| 65 | https://www.youtube.com/watch?v=qUWRnIn5oB0 |
69
| 66 | https://www.youtube.com/watch?v=Qk7MPEyGhQ4 |
70
| 67 | https://www.youtube.com/watch?v=bN7SDJFanS4 |
71
| 68 | https://www.youtube.com/watch?v=6YoUjUGvHUk |
72
| 69 | https://www.youtube.com/watch?v=NjiLz3HoWkM |
73
| 70 | https://www.youtube.com/watch?v=rRdU7VhoWdI |
74
| 71 | https://www.youtube.com/watch?v=zOm5n0OJLfc |
75
| 72 | https://www.youtube.com/watch?v=z9jMFiSUe5Q |
76
| 73 | https://www.youtube.com/watch?v=M6VLYjFnXMU |
77
| 74 | https://www.youtube.com/watch?v=4iFEpKDQx-o |
78
| 75 | https://www.youtube.com/watch?v=Zc1SE66DEYo |
79
| 76 | https://www.youtube.com/watch?v=645qisC4slI |
80
| 77 | https://www.youtube.com/watch?v=QeIRfgsVX5k |
81
| 78 | https://www.youtube.com/watch?v=0jUr57dIMq4 |
82
| 79 | https://www.youtube.com/watch?v=EjaTJGmoT_w |
83
| 80 | https://www.youtube.com/watch?v=roXy5LA17fU |
84
| 81 | https://www.youtube.com/watch?v=UeSwqepnAX0 |
85
| 82 | https://www.youtube.com/watch?v=BDYSYypzhxE |
86
| 83 | https://www.youtube.com/watch?v=iyBNxEnP7rk |
87
| 84 | https://www.youtube.com/watch?v=YCUmI9f77qs |
88
| 85 | https://www.youtube.com/watch?v=h21LYpHEfNU |
89
| 86 | https://www.youtube.com/watch?v=LBQDuTn6T0c |
90
| 87 | https://www.youtube.com/watch?v=le_0jyqCXFU |
91
| 88 | https://www.youtube.com/watch?v=tGClvgTCrIY |
92
| 89 | https://www.youtube.com/watch?v=969qt4RUx74 |
93
| 90 | https://www.youtube.com/watch?v=XL8li__PnaA |
94
| 91 | https://www.youtube.com/watch?v=RKf3ppfFUkg |
95
| 92 | https://www.youtube.com/watch?v=xY5RyjaQJCE |
96
| 93 | https://www.youtube.com/watch?v=6bjliN6hJTs |
97
| 94 | https://www.youtube.com/watch?v=KcYBolH-j9c |
98
| 95 | https://www.youtube.com/watch?v=nlsnpbRyvtU |
99
| 96 | https://www.youtube.com/watch?v=AOWmL1eydWI |
100
| 97 | https://www.youtube.com/watch?v=I8RPsF-hdXo |
101
| 98 | https://www.youtube.com/watch?v=9NSOGd2p530 |
102
| 99 | https://www.youtube.com/watch?v=8EdqpZu9lkM |
103
| 100 | https://www.youtube.com/watch?v=a23wQEA4EAA |
104
| 101 | https://www.youtube.com/watch?v=7g6TXGY-T6k |
105
| 102 | https://www.youtube.com/watch?v=iXZNlGwOuWY |
106
| 103 | https://www.youtube.com/watch?v=miR30bsSH4E |
107
| 104 | https://www.youtube.com/watch?v=zb8-aHiTKL4 |
108
| 105 | https://www.youtube.com/watch?v=rTEZmXq9K3k |
109
| 106 | https://www.youtube.com/watch?v=OBeOJiolMug |
110
| 107 | https://www.youtube.com/watch?v=fA0nxixnS-A |
111
| 108 | https://www.youtube.com/watch?v=dMhpDlUTT_U |
112
| 109 | https://www.youtube.com/watch?v=SgjDaPWjzuU |
113
| 110 | https://www.youtube.com/watch?v=2lokqffmF2A |
114
| 111 | https://www.youtube.com/watch?v=jmHZvGMe8pQ |
115
| 112 | https://www.youtube.com/watch?v=KPYvMIMON9g |
116
… so on