The issue with the below code is “list index out of range error”.
JavaScript
x
18
18
1
import bs4
2
import requests
3
my_url = requests.get('play.google.com/store/apps/details?
4
id=com.delta.mobile.android&hl=en_US&showAllReviews=true')
5
uClient = uReq(my_url)
6
page_soup = uClient.read()
7
uClient.close()
8
#Parsing the content
9
soup = BeautifulSoup(page_soup, "html.parser")
10
txt = soup.find('div', class_='review-body').get_text()
11
print(soup.get_text())
12
temp = pd.DataFrame({'Review Text': txt}, index=[0])
13
print('-' * 10)
14
#Appending temp values into DataFrame
15
reviews_df.append(temp)
16
#Printing DataFrame
17
print(reviews_df)
18
Advertisement
Answer
Try:
JavaScript
1
21
21
1
import urllib , json , requests
2
from bs4 import BeautifulSoup
3
URL='http://play.google.com/store/apps/details?id=com.delta.mobile.android&hl=en_US&showAllReviews=true'
4
USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:65.0) Gecko/20100101 Firefox/65.0"
5
headers = {"user-agent": USER_AGENT}
6
resp = requests.get(URL, headers=headers)
7
soup = BeautifulSoup(resp.content, "html.parser")
8
#print(soup.prettify())
9
a=[]
10
txt = soup.find_all('script',text=True)
11
for i in txt:
12
if("gp:" in i.text):
13
a.append(i.text)
14
i=a[-1]
15
i=i.split(",null,"")
16
del i[0]
17
for j in i:
18
if('http' not in j):
19
print(j[:j.index(""")])
20
print()
21
It worked for me!