The issue with the below code is “list index out of range error”.
import bs4
import requests
my_url = requests.get('play.google.com/store/apps/details?
id=com.delta.mobile.android&hl=en_US&showAllReviews=true')
uClient = uReq(my_url)
page_soup = uClient.read()
uClient.close()
#Parsing the content
soup = BeautifulSoup(page_soup, "html.parser")
txt = soup.find('div', class_='review-body').get_text()
print(soup.get_text())
temp = pd.DataFrame({'Review Text': txt}, index=[0])
print('-' * 10)
#Appending temp values into DataFrame
reviews_df.append(temp)
#Printing DataFrame
print(reviews_df)
Advertisement
Answer
Try:
import urllib , json , requests
from bs4 import BeautifulSoup
URL='http://play.google.com/store/apps/details?id=com.delta.mobile.android&hl=en_US&showAllReviews=true'
USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:65.0) Gecko/20100101 Firefox/65.0"
headers = {"user-agent": USER_AGENT}
resp = requests.get(URL, headers=headers)
soup = BeautifulSoup(resp.content, "html.parser")
#print(soup.prettify())
a=[]
txt = soup.find_all('script',text=True)
for i in txt:
if("gp:" in i.text):
a.append(i.text)
i=a[-1]
i=i.split(",null,"")
del i[0]
for j in i:
if('http' not in j):
print(j[:j.index(""")])
print()
It worked for me!