Skip to content
Advertisement

Want to extract contact URL link for the every single person from a web page using python in CSV file

I want to extract Name, Position and Email from the webpage for every person, I did extract the name and position but as the email is not possible to extract since you have to contact them to know. I just want to extract their contact URL. Every person has different contact URL I want to extract in a way that the CSV file should contain third column as contact in front of their row and whenever I click to contact I should be redirect to there particular contact page. Here is my code:

import requests
from bs4 import BeautifulSoup
from csv import writer
for page in range(0,30):
    url = 'https://fairfaxhs.fcps.edu/staff-directory?field_last_name_from=&field_last_name_to=&items_per_page=10&keywords=&page={page}'.format(page =page)
    R = requests.get(url)

soup = BeautifulSoup(R.content, 'html.parser')
lists = soup.find_all('tr')
with open('FCPS.csv', 'a', encoding='utf8', newline='') as l:
    thewriter = writer(l)
    if page == 0:
        header = (['Name', 'Position','Contact'])
        thewriter.writerow(header)
    else :
        for list in lists:
            name = list.find('td', class_ = 'views-field views-field-field-last-name')
            if name:
                name = name.text
            else :
                name = 'N/A'
            position = list.find('td', class_='views-field views-field-field-staff-title')
            if position:
                position = position.text
            else :
                position = 'N/A'
            #contact = list.find('td', class_='views-field views-field-rendered-item')
            #if contact:
            #    contact = contact.text
            #else :
            #    contact = 'N/A'
            thewriter.writerow([name,position])

Advertisement

Answer

You can grab data according to your requireqment from each listing page/detailed page following the next example:

import pandas as pd
from bs4 import BeautifulSoup
import requests

url = 'https://fairfaxhs.fcps.edu/staff-directory?field_last_name_from=&field_last_name_to=&items_per_page=10&keywords=&page={page}'
data = []
for page in range(0,30):
    soup = BeautifulSoup(requests.get(url.format(page=page)).text,'lxml')
    try:
        for u in ['https://fairfaxhs.fcps.edu'+link.a.get('href') for link in soup.table.select('tr td[class="views-field views-field-rendered-item"]')]:
          
            soup2 = BeautifulSoup(requests.get(u).text,'lxml')
            d={
                'Name': soup2.select_one('h1.node__title.fcps-color--dark11').get_text(strip=True), 
                'Position': soup2.select_one('h1+div').get_text(strip=True),
                'contact_url': u
                }
            data.append(d)
    except:
        pass
       
        

df=pd.DataFrame(data).to_csv('out.csv',index=False)
#print(df)
Advertisement