url to scrape Genre
JavaScript
x
4
1
url = requests.get('http://books.toscrape.com/index.html')
2
soup = BeautifulSoup(url.text, 'html.parser')
3
navlist = soup.select('.nav-list')[0].find('li').find('ul').findAll('li')
4
List for Genre in the Website
JavaScript
1
2
1
genre_list = []
2
List for scraped Novels
JavaScript
1
2
1
novel_list = []
2
For Loop to loop through navlist to append genre into the genre_list
JavaScript
1
4
1
for i in navlist:
2
a = i.find('a').getText()
3
genre_list.append(a.strip().lower())
4
For Loop to loop through the genre_list
JavaScript
1
19
19
1
for x, y in enumerate(genre_list):
2
3
count = 1
4
5
url_1=requests.get(f'http://books.toscrape.com/catalogue/category/books/{y}_{x+2}/page
6
{count}.html')
7
url_2 = requests.get(f'http://books.toscrape.com/catalogue/category/books/{y}_{x+2}/index.html')
8
url = url_1
9
if url.status_code == 404:
10
url = url_2
11
else:
12
url = url_1
13
while url:
14
15
soup1 = BeautifulSoup(url.text, 'html.parser')
16
novel = soup1.select('.product_pod')
17
count += 1
18
19
for loop to loop each novel to get its title, cost, ratings
JavaScript
1
7
1
for i, j in enumerate(novel):
2
rate_list = ['One', 'Two', 'Three', 'Four', 'Five']
3
novel_1 = novel[i].find('h3')
4
title = novel_1.find('a').get('title')
5
cost = novel[i].find('p', class_='price_color').getText().strip('Â')
6
7
Loop to find novels of a particular rating
JavaScript
1
10
10
1
for rating in rate_list:
2
rate = novel[i].find('p', class_=f'{rating}')
3
if rate:
4
novel_list.append({'Title': title, 'Rating': rating, 'Price': cost, 'Genre': y})
5
if url == url_2:
6
break
7
else:
8
url=requests.get(f'http://books.toscrape.com/catalogue/category/books/{y}_{x + 2}/page
9
{count}.html')
10
Writing these novels into my CSV file
JavaScript
1
8
1
with open('novel.csv','w', encoding="utf-8", newline='') as f:
2
fieldnames = ['Title', 'Rating', 'Price', 'Genre']
3
writer = csv.DictWriter(f, fieldnames=fieldnames)
4
writer.writeheader()
5
6
for i in novel_list:
7
writer.writerow(i)
8
Advertisement
Answer
Use for loop and provide the page number.Then use pandas dataframe
to load data into dataframe
and then do to_csv.
Code:
JavaScript
1
21
21
1
from bs4 import BeautifulSoup
2
import requests
3
import pandas as pd
4
url='http://books.toscrape.com/catalogue/page-{}.html'
5
6
title=[]
7
ratings=[]
8
cost=[]
9
for page in range(1,51):
10
finalurl=url.format(page)
11
res=requests.get(finalurl).text
12
soup=BeautifulSoup(res,'html.parser')
13
for t,r,c in zip(soup.select('.image_container >a>img'),soup.select('p.star-rating'),soup.select('p.price_color')):
14
title.append(t['alt'])
15
ratings.append(r.attrs['class'][-1])
16
cost.append(c.text[1:])
17
18
df = pd.DataFrame({"Title":title,"Ratings":ratings,"Cost":cost})
19
print(df)
20
df.to_csv('Titlebooks.csv')
21
Output on console:
JavaScript
1
65
65
1
Cost Ratings Title
2
0 £51.77 Three A Light in the Attic
3
1 £53.74 One Tipping the Velvet
4
2 £50.10 One Soumission
5
3 £47.82 Four Sharp Objects
6
4 £54.23 Five Sapiens: A Brief History of Humankind
7
5 £22.65 One The Requiem Red
8
6 £33.34 Four The Dirty Little Secrets of Getting Your Dream
9
7 £17.93 Three The Coming Woman: A Novel Based on the Life of
10
8 £22.60 Four The Boys in the Boat: Nine Americans and Their
11
9 £52.15 One The Black Maria
12
10 £13.99 Two Starving Hearts (Triangular Trade Trilogy, #1)
13
11 £20.66 Four Shakespeare's Sonnets
14
12 £17.46 Five Set Me Free
15
13 £52.29 Five Scott Pilgrim's Precious Little Life (Scott Pi...
16
14 £35.02 Five Rip it Up and Start Again
17
15 £57.25 Three Our Band Could Be Your Life: Scenes from the A
18
16 £23.88 One Olio
19
17 £37.59 One Mesaerion: The Best Science Fiction Stories 18...
20
18 £51.33 Two Libertarianism for Beginners
21
19 £45.17 Two It's Only the Himalayas
22
20 £12.84 One In Her Wake
23
21 £37.32 Two How Music Works
24
22 £30.52 Three Foolproof Preserving: A Guide to Small Batch J
25
23 £25.27 Five Chase Me (Paris Nights #2)
26
24 £34.53 Five Black Dust
27
25 £54.64 Three Birdsong: A Story in Pictures
28
26 £22.50 Three America's Cradle of Quarterbacks: Western Penn...
29
27 £53.13 Three Aladdin and His Wonderful Lamp
30
28 £40.30 Five Worlds Elsewhere: Journeys Around Shakespeareâ
31
29 £44.18 Four Wall and Piece
32
..
33
970 £24.89 Three Lord of the Flies
34
971 £58.99 Three Listen to Me (Fusion #1)
35
972 £57.20 Five Kitchens of the Great Midwest
36
973 £38.43 Five Jane Eyre
37
974 £34.74 Four Imperfect Harmony
38
975 £40.44 Four Icing (Aces Hockey #2)
39
976 £45.24 Three Hawkeye, Vol. 1: My Life as a Weapon (Hawkeye #1)
40
977 £34.96 Four Having the Barbarian's Baby (Ice Planet Barbar...
41
978 £56.76 Four Giant Days, Vol. 1 (Giant Days #1-4)
42
979 £40.28 Five Fruits Basket, Vol. 1 (Fruits Basket #1)
43
980 £38.00 Two Frankenstein
44
981 £28.80 Three Forever Rockers (The Rocker #12)
45
982 £39.24 Three Fighting Fate (Fighting #6)
46
983 £32.93 Two Emma
47
984 £51.32 Three Eat, Pray, Love
48
985 £47.09 Five Deep Under (Walker Security #1)
49
986 £28.42 Four Choosing Our Religion: The Spiritual Lives of
50
987 £22.85 Three Charlie and the Chocolate Factory (Charlie Buc
51
988 £41.24 One Charity's Cross (Charles Towne Belles #4)
52
989 £39.07 Five Bright Lines
53
990 £29.82 One Bridget Jones's Diary (Bridget Jones #1)
54
991 £37.26 Four Bounty (Colorado Mountain #7)
55
992 £20.30 Three Blood Defense (Samantha Brinkman #1)
56
993 £34.65 Five Bleach, Vol. 1: Strawberry and the Soul Reaper
57
994 £43.38 One Beyond Good and Evil
58
995 £55.53 One Alice in Wonderland (Alice's Adventures in Won...
59
996 £57.06 Four Ajin: Demi-Human, Volume 1 (Ajin: Demi-Human #1)
60
997 £16.97 Five A Spy's Devotion (The Regency Spies of London #1)
61
998 £53.98 One 1st to Die (Women's Murder Club #1)
62
999 £26.08 Five 1,000 Places to See Before You Die
63
64
[1000 rows x 3 columns]
65
If you don’t want to hardcode final page count.Then try this.
JavaScript
1
25
25
1
from bs4 import BeautifulSoup
2
import requests
3
import pandas as pd
4
5
res=requests.get("http://books.toscrape.com/index.html").text
6
soup=BeautifulSoup(res,'html.parser')
7
#Get the total page count
8
pagecount=soup.select_one('.current').text.split('of')[-1].strip()
9
10
title=[]
11
ratings=[]
12
cost=[]
13
for page in range(1,int(pagecount)+1):
14
finalurl="http://books.toscrape.com/catalogue/page-{}.html".format(page)
15
res=requests.get(finalurl).text
16
soup=BeautifulSoup(res,'html.parser')
17
for t,r,c in zip(soup.select('.image_container >a>img'),soup.select('p.star-rating'),soup.select('p.price_color')):
18
title.append(t['alt'])
19
ratings.append(r.attrs['class'][-1])
20
cost.append(c.text[1:])
21
22
df = pd.DataFrame({"Title":title,"Ratings":ratings,"Cost":cost})
23
print(df)
24
df.to_csv('Titlebooks.csv')
25