I want to extract Name, Position and Email from the webpage for every person, I did extract the name and position but as the email is not possible to extract since you have to contact them to know. I just want to extract their contact URL. Every person has different contact URL I want to extract in a way that the CSV file should contain third column as contact in front of their row and whenever I click to contact I should be redirect to there particular contact page. Here is my code:
JavaScript
x
33
33
1
import requests
2
from bs4 import BeautifulSoup
3
from csv import writer
4
for page in range(0,30):
5
url = 'https://fairfaxhs.fcps.edu/staff-directory?field_last_name_from=&field_last_name_to=&items_per_page=10&keywords=&page={page}'.format(page =page)
6
R = requests.get(url)
7
8
soup = BeautifulSoup(R.content, 'html.parser')
9
lists = soup.find_all('tr')
10
with open('FCPS.csv', 'a', encoding='utf8', newline='') as l:
11
thewriter = writer(l)
12
if page == 0:
13
header = (['Name', 'Position','Contact'])
14
thewriter.writerow(header)
15
else :
16
for list in lists:
17
name = list.find('td', class_ = 'views-field views-field-field-last-name')
18
if name:
19
name = name.text
20
else :
21
name = 'N/A'
22
position = list.find('td', class_='views-field views-field-field-staff-title')
23
if position:
24
position = position.text
25
else :
26
position = 'N/A'
27
#contact = list.find('td', class_='views-field views-field-rendered-item')
28
#if contact:
29
# contact = contact.text
30
#else :
31
# contact = 'N/A'
32
thewriter.writerow([name,position])
33
Advertisement
Answer
You can grab data according to your requireqment from each listing page/detailed page
following the next example:
JavaScript
1
26
26
1
import pandas as pd
2
from bs4 import BeautifulSoup
3
import requests
4
5
url = 'https://fairfaxhs.fcps.edu/staff-directory?field_last_name_from=&field_last_name_to=&items_per_page=10&keywords=&page={page}'
6
data = []
7
for page in range(0,30):
8
soup = BeautifulSoup(requests.get(url.format(page=page)).text,'lxml')
9
try:
10
for u in ['https://fairfaxhs.fcps.edu'+link.a.get('href') for link in soup.table.select('tr td[class="views-field views-field-rendered-item"]')]:
11
12
soup2 = BeautifulSoup(requests.get(u).text,'lxml')
13
d={
14
'Name': soup2.select_one('h1.node__title.fcps-color--dark11').get_text(strip=True),
15
'Position': soup2.select_one('h1+div').get_text(strip=True),
16
'contact_url': u
17
}
18
data.append(d)
19
except:
20
pass
21
22
23
24
df=pd.DataFrame(data).to_csv('out.csv',index=False)
25
#print(df)
26