I want to scrape a certain website weather data but the default page layout gives max of 40 results but when layout changed to simple list gives 100 results and the layout is set to default which is difficult to achieve with selenium. Is there any way to get the cookies saved in chrome to be used with beautiful soup.
import requests
from bs4 import BeautifulSoup
import browser_cookie3
cj = browser_cookie3.load()
s = requests.Session()
url = "https:/something.org/titles/2"
i=1
print(cj)
for c in cj:
if 'mangadex' in str(c):
s.cookies.set_cookie(c)
r = s.get(url)
soup = BeautifulSoup(r.content, 'lxml')
for anime in soup.find_all('div', {'class': 'manga-entry col-lg-6 border-bottom pl-0 my-1'}):
det = anime.find('a', {"class": "ml-1 manga_title text-truncate"})
anime_name = det.text
anime_link = det['href']
stars = anime.select("span")[3].text
print(anime_name, anime_link, stars,i)
i=i+1
Advertisement
Answer
Try:
import browser_cookie3
import requests
cj = browser_cookie3.load()
s = requests.Session()
for c in cj:
if 'sitename' in str(c):
s.cookies.set_cookie(c)
r = s.get(the_site)
This code use the browsers cookies in the requests module in as Session. Simply change sitename to the site you want cookies from.
Your new code:
import requests
from bs4 import BeautifulSoup
import browser_cookie3
cj = browser_cookie3.load()
s = requests.Session()
url = "https://something.org/titles/2"
i = 1
print(cj)
for c in cj:
if 'mangadex' in str(c):
s.cookies.set_cookie(c)
r = s.get(url)
soup = BeautifulSoup(r.content, 'lxml')
for anime in soup.find_all('div', {'class': 'manga-entry row m-0 border-bottom'}):
det = anime.find('a', {"class": "ml-1 manga_title text-truncate"})
anime_name = det.text
anime_link = det['href']
stars = anime.select("span")[3].text
print(anime_name, anime_link, stars, i)
i = i + 1
prints:
-Hitogatana- /title/540/hitogatana 4 1 -PIQUANT- /title/44134/piquant 5 2 -Rain- /title/37103/rain 4 3 -SINS- /title/1098/sins 4 :radical /title/46819/radical 1 5 :REverSAL /title/3877/reversal 3 6 ... /title/52206/ 7 ...Curtain. ~Sensei to Kiyoraka ni Dousei~ /title/7829/curtain-sensei-to-kiyoraka-ni-dousei 8 ...Junai no Seinen /title/28947/junai-no-seinen 9 ...no Onna /title/10162/no-onna 2 10 ...Seishunchuu! /title/19186/seishunchuu 11 ...Virgin Love /title/28945/virgin-love 12 .flow - Untitled (Doujinshi) /title/27292/flow-untitled-doujinshi 2 13 .gohan /title/50410/gohan 14 .hack//4koma + Gag Senshuken /title/7750/hack-4koma-gag-senshuken 24 15 .hack//Alcor - Hagun no Jokyoku /title/24375/hack-alcor-hagun-no-jokyoku 16 .hack//G.U.+ /title/7757/hack-g-u 1 17 .hack//GnU /title/7758/hack-gnu 18 .hack//Link - Tasogare no Kishidan /title/24374/hack-link-tasogare-no-kishidan 1 19 .hack//Tasogare no Udewa Densetsu /title/5817/hack-tasogare-no-udewa-densetsu 20 .hack//XXXX /title/7759/hack-xxxx 21 .traeH /title/9789/traeh 22 (G) Edition /title/886/g-edition 1 23 (Not) a Househusband /title/22832/not-a-househusband 6 24 (R)estauraNTR /title/37551/r-estaurantr 14 25 [ rain ] 1st Story /title/25587/rain-1st-story 3 26 [another] Xak /title/24881/another-xak 27 [es] ~Eternal Sisters~ /title/4879/es-eternal-sisters 1 28
and so on to 100…