I want to scrape price and status of website. I am able to scrape price but unable to scrape status. Couldn’t find in JSON as well.
here is link: https://www.zoro.com/jonard-tools-diagonal-cutting-plier-8-l-jic-2488/i/G2736212/?recommended=true
JavaScript
x
13
13
1
from requests import get
2
from bs4 import BeautifulSoup
3
4
resp = get(url)
5
soup = BeautifulSoup(resp.text, 'lxml')
6
7
# print(soup.prettify())
8
price = soup.find('div', class_ = 'product-price')
9
10
status = soup.find('div', class_ = 'avl-status buy-box__shipping-item')
11
12
print(status.text)
13
Advertisement
Answer
You can use Json microformat embedded inside the page to obtain availability (price, images, description…).
For example:
JavaScript
1
16
16
1
import json
2
import requests
3
from bs4 import BeautifulSoup
4
5
url = "https://www.zoro.com/jonard-tools-diagonal-cutting-plier-8-l-jic-2488/i/G2736212/?recommended=true"
6
7
soup = BeautifulSoup( requests.get(url).content, 'html.parser' )
8
9
data = json.loads(soup.select_one('script[type="application/ld+json"]').contents[0])
10
11
# uncomment this to print all data:
12
# print(json.dumps(data, indent=4))
13
14
print('Price : ', data['offers']['price'])
15
print('Availability: ', data['offers']['availability'])
16
Prints:
JavaScript
1
3
1
Price : 17.13
2
Availability: http://schema.org/InStock
3
EDIT: You can observe all product data that is embedded within the page:
JavaScript
1
20
20
1
import json
2
import requests
3
from bs4 import BeautifulSoup
4
5
url = "https://www.zoro.com/baldwin-filters-filter-service-kit-thermo-king-bk6092/i/G1609513/"
6
# url = 'https://www.zoro.com/jonard-tools-diagonal-cutting-plier-8-l-jic-2488/i/G2736212/?recommended=true'
7
8
soup = BeautifulSoup( requests.get(url).content, 'html.parser' )
9
data = json.loads(soup.select_one('div.hidden[data-state]')['data-state'] )
10
11
# uncomment this to print all data:
12
# print(json.dumps(data, indent=4))
13
14
_, product_data = data['product']['productDetailsData'].popitem()
15
16
print(json.dumps(product_data, indent=4))
17
18
print()
19
print('isExpeditable = ', product_data['isExpeditable'])
20
When this key isExpeditable
is set to False
, it means Drop Shipping (I think). When I tested it with product that is in stock, it prints True
.
The output:
JavaScript
1
75
75
1
{
2
"packageQty": 1,
3
"isMotorCompliant": false,
4
"zoroNo": "G1609513",
5
"brand": "Baldwin Filters",
6
"salesStatus": "TP",
7
"orderChannel": "Default",
8
"description": "Filter Service Kit, For Vehicle Type - Filter Kits Thermo King, Includes Lube Spin-On, Fuel, Water Separator Element, Fuel Spin-On",
9
"restrictedStates": [],
10
"title": "Filter Service Kit",
11
"categoryPaths": [
12
[
13
{
14
"name": "Automotive Filters",
15
"slug": "automotive-filters",
16
"code": "7540"
17
},
18
{
19
"name": "Filter Service Kits",
20
"slug": "filter-service-kits",
21
"code": "10660"
22
}
23
]
24
],
25
"restrictedSaleItemCode": "",
26
"slug": "baldwin-filters-filter-service-kit-thermo-king-bk6092",
27
"energyGuideLabelFileName": "",
28
"variants": null,
29
"isForcedOutOfStock": false,
30
"lightingFactLabelFileName": "",
31
"isExpeditable": false,
32
"erpId": "2770121",
33
"californiaProp65Message": null,
34
"isHazmat": false,
35
"leadTime": 8,
36
"mfrNo": "BK6092",
37
"attributes": [
38
{
39
"name": "For Vehicle Type - Filter Kits",
40
"value": "Thermo King"
41
},
42
{
43
"name": "Item",
44
"value": "Filter Service Kit"
45
},
46
{
47
"name": "For Use With",
48
"value": "Thermo King"
49
},
50
{
51
"name": "Includes",
52
"value": "Lube Spin-On, Fuel, Water Separator Element, Fuel Spin-On"
53
},
54
{
55
"name": "Country of Origin (subject to change)",
56
"value": "United States"
57
}
58
],
59
"originalPrice": null,
60
"isCircleECompliant": false,
61
"lowLeadComplianceLevel": "",
62
"priceUnit": "EA",
63
"isDropShipDirect": false,
64
"minRetailQty": 1,
65
"price": 118.29,
66
"media": [
67
{
68
"name": "Z1qr7ymcpEx_.JPG",
69
"type": "image/jpeg"
70
}
71
]
72
}
73
74
isExpeditable = False
75