I tried running my web scraping code. Sometime it works fine, but sometime it will give me Traceback Type Error code. I am wondering what is causing the error code?
Here is the error message:
JavaScript
x
7
1
Traceback (most recent call last):
2
File "D:python-learninglistings.py", line 22, in <module>
3
pageLink='https://www.vancouverforsale.ca'+getData(pageLink)
4
File "D:python-learninglistings.py", line 17, in getData
5
return nextLink['href']
6
TypeError: 'NoneType' object is not subscriptable
7
JavaScript
1
27
27
1
from bs4 import BeautifulSoup
2
import lxml
3
import requests
4
5
def getData(url):
6
html_text = requests.get(url).text
7
soup = BeautifulSoup(html_text,'lxml')
8
listings = soup.find_all('div', class_ = 'row property results')
9
for listing in listings:
10
address = listing.find('a', class_ = 'address').text
11
price = listing.find('a', class_ = 'price').text
12
print(address)
13
print(price)
14
15
#find next page
16
nextLink=soup.find('a', string='Next »')
17
return nextLink['href']
18
19
pageLink='https://www.vancouverforsale.ca/search/results/?
20
city=Langley®ion=all&list_price_min=50000&list_price_max=a
21
ll&beds_min=all&baths_min=all&type=con'
22
23
count=0
24
while count<3:
25
pageLink='https://www.vancouverforsale.ca'+getData(pageLink)
26
count+=1
27
Advertisement
Answer
You have to check if nextLink
is not None
before you try to get ['href']
JavaScript
1
4
1
next_link = soup.find('a', string='Next »')
2
if next_link:
3
return 'https://www.vancouverforsale.ca' + next_link['href']
4
and when nextLink
is None
then it can return None
and you have to check it in main loop
JavaScript
1
5
1
for count in range(3):
2
page_link = get_data(page_link)
3
if not page_link:
4
break
5
Full working code
JavaScript
1
36
36
1
import requests
2
from bs4 import BeautifulSoup
3
#import urllib.parse
4
5
# PEP8: `lower_case_names` for functions and variables
6
7
def get_data(url):
8
9
response = requests.get(url)
10
#print(response.status_code)
11
soup = BeautifulSoup(response.text, 'lxml')
12
13
listings = soup.find_all('div', class_='row property results')
14
for listing in listings:
15
address = listing.find('a', class_='address').text.strip() # PEP8: `=` without spaces inside `()`
16
price = listing.find('a', class_='price').text.replace('▲', '').replace('▼', '').strip()
17
print('address:', address)
18
print('price :', price)
19
print('---')
20
21
# find next page
22
next_link = soup.find('a', string='Next »')
23
if next_link:
24
#return urllib.parse.urljoin('https://www.vancouverforsale.ca', next_link['href'])
25
return 'https://www.vancouverforsale.ca' + next_link['href']
26
27
# --- main ---
28
29
page_link = 'https://www.vancouverforsale.ca/search/results/?city=Langley®ion=all&list_price_min=50000&list_price_max=all&beds_min=all&baths_min=all&type=con'
30
31
#while True:
32
for count in range(3):
33
page_link = get_data(page_link)
34
if not page_link:
35
break
36
Result:
JavaScript
1
91
91
1
address: 19681 75 Avenue, Langley
2
price : $1,695,000
3
---
4
address: 20806 52a Avenue, Langley
5
price : $1,649,900
6
---
7
address: 20804 52a Avenue, Langley
8
price : $1,649,900
9
---
10
address: 7138 210 Street Unit 43, Langley
11
price : $1,638,000
12
---
13
address: 8567 204 Street Unit 13, Langley
14
price : $1,624,999
15
---
16
address: 19842 75b Avenue, Langley
17
price : $1,599,000
18
---
19
address: 8567 204 Street Unit 1, Langley
20
price : $1,598,000
21
---
22
address: 8258 202 Street, Langley
23
price : $1,588,800
24
---
25
address: 7138 210 Street Unit 59, Langley
26
price : $1,579,000
27
---
28
address: 8567 204 Street Unit 3, Langley
29
price : $1,499,900
30
---
31
address: 7429 197 Street, Langley
32
price : $1,489,900
33
---
34
address: 22981 Billy Brown Road, Langley
35
price : $1,399,000
36
---
37
address: 23168 Billy Brown Road, Langley
38
price : $1,399,000
39
---
40
address: 26718 32 Avenue, Langley
41
price : $1,399,000
42
---
43
address: 20327 82 Avenue, Langley
44
price : $1,395,000
45
---
46
address: 8567 204 Street Unit 7, Langley
47
price : $1,390,000
48
---
49
address: 20873 71b Avenue, Langley
50
price : $1,388,000
51
---
52
address: 20321 80 Avenue Unit 27, Langley
53
price : $1,370,000
54
---
55
address: 20924 80a Avenue, Langley
56
price : $1,350,000
57
---
58
address: 20463 70 Avenue Unit 2, Langley
59
price : $1,349,900
60
---
61
address: 23189 Francis Avenue Unit 203, Langley
62
price : $1,349,000
63
---
64
address: 20576 84a Avenue, Langley
65
price : $1,349,000
66
---
67
address: 20451 84 Avenue Unit 10, Langley
68
price : $1,348,000
69
---
70
address: 7138 210 Street Unit 85, Langley
71
price : $1,348,000
72
---
73
address: 19897 75a Avenue Unit 46, Langley
74
price : $1,325,000
75
---
76
address: 9567 217a Street Unit 3, Langley
77
price : $1,299,900
78
---
79
address: 20321 80 Avenue Unit 45, Langley
80
price : $1,299,900
81
---
82
address: 9762 182a Street Unit 21, Langley
83
price : $1,298,888
84
---
85
address: 8450 204 Street Unit 29, Langley
86
price : $1,258,000
87
---
88
address: 20770 97b Avenue Unit 3, Langley
89
price : $1,250,000
90
---
91