I tried running my web scraping code. Sometime it works fine, but sometime it will give me Traceback Type Error code. I am wondering what is causing the error code?
Here is the error message:
Traceback (most recent call last): File "D:python-learninglistings.py", line 22, in <module> pageLink='https://www.vancouverforsale.ca'+getData(pageLink) File "D:python-learninglistings.py", line 17, in getData return nextLink['href'] TypeError: 'NoneType' object is not subscriptable
from bs4 import BeautifulSoup import lxml import requests def getData(url): html_text = requests.get(url).text soup = BeautifulSoup(html_text,'lxml') listings = soup.find_all('div', class_ = 'row property results') for listing in listings: address = listing.find('a', class_ = 'address').text price = listing.find('a', class_ = 'price').text print(address) print(price) #find next page nextLink=soup.find('a', string='Next »') return nextLink['href'] pageLink='https://www.vancouverforsale.ca/search/results/? city=Langley®ion=all&list_price_min=50000&list_price_max=a ll&beds_min=all&baths_min=all&type=con' count=0 while count<3: pageLink='https://www.vancouverforsale.ca'+getData(pageLink) count+=1
Advertisement
Answer
You have to check if nextLink
is not None
before you try to get ['href']
next_link = soup.find('a', string='Next »') if next_link: return 'https://www.vancouverforsale.ca' + next_link['href']
and when nextLink
is None
then it can return None
and you have to check it in main loop
for count in range(3): page_link = get_data(page_link) if not page_link: break
Full working code
import requests from bs4 import BeautifulSoup #import urllib.parse # PEP8: `lower_case_names` for functions and variables def get_data(url): response = requests.get(url) #print(response.status_code) soup = BeautifulSoup(response.text, 'lxml') listings = soup.find_all('div', class_='row property results') for listing in listings: address = listing.find('a', class_='address').text.strip() # PEP8: `=` without spaces inside `()` price = listing.find('a', class_='price').text.replace('▲', '').replace('▼', '').strip() print('address:', address) print('price :', price) print('---') # find next page next_link = soup.find('a', string='Next »') if next_link: #return urllib.parse.urljoin('https://www.vancouverforsale.ca', next_link['href']) return 'https://www.vancouverforsale.ca' + next_link['href'] # --- main --- page_link = 'https://www.vancouverforsale.ca/search/results/?city=Langley®ion=all&list_price_min=50000&list_price_max=all&beds_min=all&baths_min=all&type=con' #while True: for count in range(3): page_link = get_data(page_link) if not page_link: break
Result:
address: 19681 75 Avenue, Langley price : $1,695,000 --- address: 20806 52a Avenue, Langley price : $1,649,900 --- address: 20804 52a Avenue, Langley price : $1,649,900 --- address: 7138 210 Street Unit 43, Langley price : $1,638,000 --- address: 8567 204 Street Unit 13, Langley price : $1,624,999 --- address: 19842 75b Avenue, Langley price : $1,599,000 --- address: 8567 204 Street Unit 1, Langley price : $1,598,000 --- address: 8258 202 Street, Langley price : $1,588,800 --- address: 7138 210 Street Unit 59, Langley price : $1,579,000 --- address: 8567 204 Street Unit 3, Langley price : $1,499,900 --- address: 7429 197 Street, Langley price : $1,489,900 --- address: 22981 Billy Brown Road, Langley price : $1,399,000 --- address: 23168 Billy Brown Road, Langley price : $1,399,000 --- address: 26718 32 Avenue, Langley price : $1,399,000 --- address: 20327 82 Avenue, Langley price : $1,395,000 --- address: 8567 204 Street Unit 7, Langley price : $1,390,000 --- address: 20873 71b Avenue, Langley price : $1,388,000 --- address: 20321 80 Avenue Unit 27, Langley price : $1,370,000 --- address: 20924 80a Avenue, Langley price : $1,350,000 --- address: 20463 70 Avenue Unit 2, Langley price : $1,349,900 --- address: 23189 Francis Avenue Unit 203, Langley price : $1,349,000 --- address: 20576 84a Avenue, Langley price : $1,349,000 --- address: 20451 84 Avenue Unit 10, Langley price : $1,348,000 --- address: 7138 210 Street Unit 85, Langley price : $1,348,000 --- address: 19897 75a Avenue Unit 46, Langley price : $1,325,000 --- address: 9567 217a Street Unit 3, Langley price : $1,299,900 --- address: 20321 80 Avenue Unit 45, Langley price : $1,299,900 --- address: 9762 182a Street Unit 21, Langley price : $1,298,888 --- address: 8450 204 Street Unit 29, Langley price : $1,258,000 --- address: 20770 97b Avenue Unit 3, Langley price : $1,250,000 ---