Im making a python web scraper for a project, It
s getting all info that I want, but the only problem is that he does it for the first profile without getting others
I tried to found out the problem but I`m stuck, any kind of advice will be helpful
JavaScript
x
42
42
1
import requests
2
import pandas
3
from bs4 import BeautifulSoup
4
5
6
base_url = "https://www.ratemds.com/best-doctors/?page=1"
7
for page in range(1, 2, 1):
8
r = requests.get(base_url)
9
c = r.content
10
soup = BeautifulSoup(c, 'html.parser')
11
all = soup.find_all("div", {"class": "search-item doctor-profile"})
12
l = []
13
for item in all:
14
d = {}
15
d["Name"] = item.find("a", {"class": "search-item-doctor-link"}).text
16
d["Phone Number"] = item.find("div", {"class": "search-item-specialty"}).text
17
n = item.find("a", {"class": "search-item-doctor-link"})
18
a = n.get('href')
19
new_url = ("https://www.ratemds.com"+a)
20
r1 = requests.get(new_url)
21
c1 = r1.content
22
soup1 = BeautifulSoup(c1, 'html.parser')
23
sve = soup1.find_all("div", {"class": "col-sm-3 col-md-4 search-item-extra"})
24
for profil in sve:
25
try:
26
d["Phone Number"] = profil.find("meta", itemprop = "telephone")["content"]
27
except:
28
d["Phone Number"] = None
29
try:
30
d["Adress"] = profil.find("meta", itemprop = "streetAddress")["content"]
31
except:
32
d["Adress"] = None
33
try:
34
d["Website"] = profil.find("a", itemprop = "sameAs")["href"]
35
except:
36
d["Website"] = None
37
pass
38
l.append(d)
39
df = pandas.DataFrame(l)
40
df.to_csv("123.csv")
41
print(df)
42
Advertisement
Answer
Here is your code with a couple of adjustments:
JavaScript
1
39
39
1
base_url = "https://www.ratemds.com/best-doctors/?page={}" # Change base url to this
2
# Moved the list of dicts outsided of the main loop
3
l = []
4
5
for page in range(1, 5):
6
r = requests.get(base_url.format(page)) # substitute 'page' variable in base_url
7
c = r.content
8
soup = BeautifulSoup(c, 'html.parser')
9
all = soup.find_all("div", {"class": "search-item doctor-profile"})
10
for item in all:
11
d = {}
12
d["Name"] = item.find("a", {"class": "search-item-doctor-link"}).text
13
d["Phone Number"] = item.find("div", {"class": "search-item-specialty"}).text
14
n = item.find("a", {"class": "search-item-doctor-link"})
15
a = n.get('href')
16
new_url = ("https://www.ratemds.com"+a)
17
r1 = requests.get(new_url)
18
c1 = r1.content
19
soup1 = BeautifulSoup(c1, 'html.parser')
20
sve = soup1.find_all("div", {"class": "col-sm-3 col-md-4 search-item-extra"})
21
for profil in sve:
22
try:
23
d["Phone Number"] = profil.find("meta", itemprop = "telephone")["content"]
24
except:
25
d["Phone Number"] = None
26
try:
27
d["Adress"] = profil.find("meta", itemprop = "streetAddress")["content"]
28
except:
29
d["Adress"] = None
30
try:
31
d["Website"] = profil.find("a", itemprop = "sameAs")["href"]
32
except:
33
d["Website"] = None
34
pass
35
l.append(d) # indented this line to append within this loop
36
37
df = pd.DataFrame(l)
38
df.to_csv("123.csv")
39