Parsing JSON web scraper output

I am practicing web scraping using the requests and BeautifulSoup modules on the following website:

https://www.imdb.com/title/tt0080684/

My code thus far properly outputs the json in question. I’d like help in extracting from the json only the name and description into a response dictionary.

Code

# Send HTTP requests
import requests

import json

from bs4 import BeautifulSoup


class WebScraper:

    def send_http_request():

        # Obtain the URL via user input
        url = input('Input the URL:n')

        # Get the webpage
        r = requests.get(url)

        soup = BeautifulSoup(r.content, 'html.parser')

        # Check response object's status code
        if r:
            p = json.loads("".join(soup.find('script', {'type':'application/ld+json'}).contents))
            print(p)
        else:
            print('nInvalid movie page!')


WebScraper.send_http_request()

JavaScript
​x
 
# Send HTTP requests
import requests
​
import json
​
from bs4 import BeautifulSoup
​
​
class WebScraper:
​
    def send_http_request():
​
        # Obtain the URL via user input
        url = input('Input the URL:n')
​
        # Get the webpage
        r = requests.get(url)
​
        soup = BeautifulSoup(r.content, 'html.parser')
​
        # Check response object's status code
        if r:
            p = json.loads("".join(soup.find('script', {'type':'application/ld+json'}).contents))
            print(p)
        else:
            print('nInvalid movie page!')
​
​
WebScraper.send_http_request()
​

Desired Output

{"title": "Star Wars: Episode V - The Empire Strikes Back", "description": "After the Rebels are brutally overpowered by the Empire on the ice planet Hoth, Luke Skywalker begins Jedi training with Yoda, while his friends are pursued by Darth Vader and a bounty hunter named Boba Fett all over the galaxy."}

JavaScript
 
{"title": "Star Wars: Episode V - The Empire Strikes Back", "description": "After the Rebels are brutally overpowered by the Empire on the ice planet Hoth, Luke Skywalker begins Jedi training with Yoda, while his friends are pursued by Darth Vader and a bounty hunter named Boba Fett all over the galaxy."}
​

Answer

You can parse the dictonary and then print a new JSON object using the dumps method:

# Send HTTP requests
import requests

import json

from bs4 import BeautifulSoup


class WebScraper:

    def send_http_request():

        # Obtain the URL via user input
        url = input('Input the URL:n')

        # Get the webpage
        r = requests.get(url)

        soup = BeautifulSoup(r.content, 'html.parser')

        # Check response object's status code
        if r:
            p = json.loads("".join(soup.find('script', {'type':'application/ld+json'}).contents))
            output = json.dumps({"title": p["name"], "description": p["description"]})
            print(output)
        else:
            print('nInvalid movie page!')


WebScraper.send_http_request()

JavaScript
 
# Send HTTP requests
import requests
​
import json
​
from bs4 import BeautifulSoup
​
​
class WebScraper:
​
    def send_http_request():
​
        # Obtain the URL via user input
        url = input('Input the URL:n')
​
        # Get the webpage
        r = requests.get(url)
​
        soup = BeautifulSoup(r.content, 'html.parser')
​
        # Check response object's status code
        if r:
            p = json.loads("".join(soup.find('script', {'type':'application/ld+json'}).contents))
            output = json.dumps({"title": p["name"], "description": p["description"]})
            print(output)
        else:
            print('nInvalid movie page!')
​
​
WebScraper.send_http_request()
​

Output:

{"title": "Star Wars: Episode V - The Empire Strikes Back", "description": "Star Wars: Episode V - The Empire Strikes Back is a movie starring Mark Hamill, Harrison Ford, and Carrie Fisher. After the Rebels are brutally overpowered by the Empire on the ice planet Hoth, Luke Skywalker begins Jedi training..."}

JavaScript
 
{"title": "Star Wars: Episode V - The Empire Strikes Back", "description": "Star Wars: Episode V - The Empire Strikes Back is a movie starring Mark Hamill, Harrison Ford, and Carrie Fisher. After the Rebels are brutally overpowered by the Empire on the ice planet Hoth, Luke Skywalker begins Jedi training..."}
​

Advertisement

Answer