I’ve now tried everything for the past few hours but I can’t extract a specific thing from the HTML below. I want to grab the “sessionCartId” but I can’t figure out how….
Thats what i tried so far :
JavaScript
x
4
1
sessioncartid = BeautifulSoup(response.text, "html.parser").findAll("script", {"type":"text/javascript"})[2]
2
data = json.loads(sessioncartid.text)
3
print(data)
4
^^ This gives me the correct script tag but i cant transform it into a json nor get the sessioncarId
JavaScript
1
60
60
1
<script type="text/javascript">
2
/*<![CDATA[*/
3
4
var ACC = {config: {}};
5
ACC.config.contextPath = "";
6
ACC.config.encodedContextPath = "/de/web";
7
ACC.config.commonResourcePath = "/_ui/20220811221438/responsive/common";
8
ACC.config.themeResourcePath = "/_ui/20220811221438/responsive/theme-gh";
9
ACC.config.siteResourcePath = "/_ui/20220811221438/responsive/site-ghstore";
10
ACC.config.rootPath = "/_ui/20220811221438/responsive";
11
ACC.config.CSRFToken = "81b0156a-5a78-4969-b52e-e5080473fb83";
12
ACC.pwdStrengthVeryWeak = 'password.strength.veryweak';
13
ACC.pwdStrengthWeak = 'password.strength.weak';
14
ACC.pwdStrengthMedium = 'password.strength.medium';
15
ACC.pwdStrengthStrong = 'password.strength.strong';
16
ACC.pwdStrengthVeryStrong = 'password.strength.verystrong';
17
ACC.pwdStrengthUnsafePwd = 'password.strength.unsafepwd';
18
ACC.pwdStrengthTooShortPwd = 'password.strength.tooshortpwd';
19
ACC.pwdStrengthMinCharText = 'password.strength.minchartext';
20
ACC.accessibilityLoading = 'aria.pickupinstore.loading';
21
ACC.accessibilityStoresLoaded = 'aria.pickupinstore.storesloaded';
22
ACC.config.googleApiKey = "";
23
ACC.config.googleApiVersion = "3.7";
24
25
26
ACC.autocompleteUrl = '/de/web/search/autocompleteSecure';
27
28
29
ACC.config.loginUrl = '/de/web/login';
30
31
32
ACC.config.authenticationStatusUrl = '/de/web/authentication/status';
33
34
35
/*]]>*/
36
37
var OCC =
38
39
40
{
41
42
"token": "1799248c-8de0-4199-b5fe-1d610452010a",
43
44
"currentUser": "test@gmail.com",
45
"sessionCartGuid": "2323121232323",
46
"sessionCartId": "121212123435324",
47
"sessionLanguageIso": "de",
48
"sessionCountryIso": "DE",
49
"urlPosCode": "web",
50
"isASM": false,
51
"intermediaryID": "",
52
"isASMCustomerEmulated": false,
53
"siteId": "ghstore",
54
"OCCBaseUrl": "/ghcommercewebservices/v2/ghstore",
55
"availablePointsOfService": "BUD,FRA,DTM,HAM,GRZ,HAJ,SZG,VIE,WEB,BER",
56
"primaryPointOfSevice": "WEB",
57
"clientChannel": "web-eu"
58
};
59
</script>
60
Advertisement
Answer
This is how you can extract that dictionary:
JavaScript
1
73
73
1
from bs4 import BeautifulSoup
2
import json
3
import re
4
5
html = '''
6
<script type="text/javascript">
7
/*<![CDATA[*/
8
9
var ACC = {config: {}};
10
ACC.config.contextPath = "";
11
ACC.config.encodedContextPath = "/de/web";
12
ACC.config.commonResourcePath = "/_ui/20220811221438/responsive/common";
13
ACC.config.themeResourcePath = "/_ui/20220811221438/responsive/theme-gh";
14
ACC.config.siteResourcePath = "/_ui/20220811221438/responsive/site-ghstore";
15
ACC.config.rootPath = "/_ui/20220811221438/responsive";
16
ACC.config.CSRFToken = "81b0156a-5a78-4969-b52e-e5080473fb83";
17
ACC.pwdStrengthVeryWeak = 'password.strength.veryweak';
18
ACC.pwdStrengthWeak = 'password.strength.weak';
19
ACC.pwdStrengthMedium = 'password.strength.medium';
20
ACC.pwdStrengthStrong = 'password.strength.strong';
21
ACC.pwdStrengthVeryStrong = 'password.strength.verystrong';
22
ACC.pwdStrengthUnsafePwd = 'password.strength.unsafepwd';
23
ACC.pwdStrengthTooShortPwd = 'password.strength.tooshortpwd';
24
ACC.pwdStrengthMinCharText = 'password.strength.minchartext';
25
ACC.accessibilityLoading = 'aria.pickupinstore.loading';
26
ACC.accessibilityStoresLoaded = 'aria.pickupinstore.storesloaded';
27
ACC.config.googleApiKey = "";
28
ACC.config.googleApiVersion = "3.7";
29
30
31
ACC.autocompleteUrl = '/de/web/search/autocompleteSecure';
32
33
34
ACC.config.loginUrl = '/de/web/login';
35
36
37
ACC.config.authenticationStatusUrl = '/de/web/authentication/status';
38
39
40
/*]]>*/
41
42
var OCC =
43
44
45
{
46
47
"token": "1799248c-8de0-4199-b5fe-1d610452010a",
48
49
"currentUser": "test@gmail.com",
50
"sessionCartGuid": "2323121232323",
51
"sessionCartId": "121212123435324",
52
"sessionLanguageIso": "de",
53
"sessionCountryIso": "DE",
54
"urlPosCode": "web",
55
"isASM": false,
56
"intermediaryID": "",
57
"isASMCustomerEmulated": false,
58
"siteId": "ghstore",
59
"OCCBaseUrl": "/ghcommercewebservices/v2/ghstore",
60
"availablePointsOfService": "BUD,FRA,DTM,HAM,GRZ,HAJ,SZG,VIE,WEB,BER",
61
"primaryPointOfSevice": "WEB",
62
"clientChannel": "web-eu"
63
};
64
</script>
65
'''
66
soup = BeautifulSoup(html, 'html.parser')
67
info = soup.select_one('script', string = re.compile('sessionCartGuid'))
68
json_obj = json.loads(info.text.split('var OCC =')[1].split(';')[0])
69
# print(json_obj)
70
print(json_obj['token'])
71
print(json_obj['currentUser'])
72
print(json_obj['sessionCartId'])
73
Result:
JavaScript
1
4
1
1799248c-8de0-4199-b5fe-1d610452010a
2
test@gmail.com
3
121212123435324
4
BeautifulSoup docs: https://beautiful-soup-4.readthedocs.io/en/latest/index.html