I’ve now tried everything for the past few hours but I can’t extract a specific thing from the HTML below. I want to grab the “sessionCartId” but I can’t figure out how….
Thats what i tried so far :
sessioncartid = BeautifulSoup(response.text, "html.parser").findAll("script", {"type":"text/javascript"})[2] data = json.loads(sessioncartid.text) print(data)
^^ This gives me the correct script tag but i cant transform it into a json nor get the sessioncarId
<script type="text/javascript"> /*<![CDATA[*/ var ACC = {config: {}}; ACC.config.contextPath = ""; ACC.config.encodedContextPath = "/de/web"; ACC.config.commonResourcePath = "/_ui/20220811221438/responsive/common"; ACC.config.themeResourcePath = "/_ui/20220811221438/responsive/theme-gh"; ACC.config.siteResourcePath = "/_ui/20220811221438/responsive/site-ghstore"; ACC.config.rootPath = "/_ui/20220811221438/responsive"; ACC.config.CSRFToken = "81b0156a-5a78-4969-b52e-e5080473fb83"; ACC.pwdStrengthVeryWeak = 'password.strength.veryweak'; ACC.pwdStrengthWeak = 'password.strength.weak'; ACC.pwdStrengthMedium = 'password.strength.medium'; ACC.pwdStrengthStrong = 'password.strength.strong'; ACC.pwdStrengthVeryStrong = 'password.strength.verystrong'; ACC.pwdStrengthUnsafePwd = 'password.strength.unsafepwd'; ACC.pwdStrengthTooShortPwd = 'password.strength.tooshortpwd'; ACC.pwdStrengthMinCharText = 'password.strength.minchartext'; ACC.accessibilityLoading = 'aria.pickupinstore.loading'; ACC.accessibilityStoresLoaded = 'aria.pickupinstore.storesloaded'; ACC.config.googleApiKey = ""; ACC.config.googleApiVersion = "3.7"; ACC.autocompleteUrl = '/de/web/search/autocompleteSecure'; ACC.config.loginUrl = '/de/web/login'; ACC.config.authenticationStatusUrl = '/de/web/authentication/status'; /*]]>*/ var OCC = { "token": "1799248c-8de0-4199-b5fe-1d610452010a", "currentUser": "test@gmail.com", "sessionCartGuid": "2323121232323", "sessionCartId": "121212123435324", "sessionLanguageIso": "de", "sessionCountryIso": "DE", "urlPosCode": "web", "isASM": false, "intermediaryID": "", "isASMCustomerEmulated": false, "siteId": "ghstore", "OCCBaseUrl": "/ghcommercewebservices/v2/ghstore", "availablePointsOfService": "BUD,FRA,DTM,HAM,GRZ,HAJ,SZG,VIE,WEB,BER", "primaryPointOfSevice": "WEB", "clientChannel": "web-eu" }; </script>
Advertisement
Answer
This is how you can extract that dictionary:
from bs4 import BeautifulSoup import json import re html = ''' <script type="text/javascript"> /*<![CDATA[*/ var ACC = {config: {}}; ACC.config.contextPath = ""; ACC.config.encodedContextPath = "/de/web"; ACC.config.commonResourcePath = "/_ui/20220811221438/responsive/common"; ACC.config.themeResourcePath = "/_ui/20220811221438/responsive/theme-gh"; ACC.config.siteResourcePath = "/_ui/20220811221438/responsive/site-ghstore"; ACC.config.rootPath = "/_ui/20220811221438/responsive"; ACC.config.CSRFToken = "81b0156a-5a78-4969-b52e-e5080473fb83"; ACC.pwdStrengthVeryWeak = 'password.strength.veryweak'; ACC.pwdStrengthWeak = 'password.strength.weak'; ACC.pwdStrengthMedium = 'password.strength.medium'; ACC.pwdStrengthStrong = 'password.strength.strong'; ACC.pwdStrengthVeryStrong = 'password.strength.verystrong'; ACC.pwdStrengthUnsafePwd = 'password.strength.unsafepwd'; ACC.pwdStrengthTooShortPwd = 'password.strength.tooshortpwd'; ACC.pwdStrengthMinCharText = 'password.strength.minchartext'; ACC.accessibilityLoading = 'aria.pickupinstore.loading'; ACC.accessibilityStoresLoaded = 'aria.pickupinstore.storesloaded'; ACC.config.googleApiKey = ""; ACC.config.googleApiVersion = "3.7"; ACC.autocompleteUrl = '/de/web/search/autocompleteSecure'; ACC.config.loginUrl = '/de/web/login'; ACC.config.authenticationStatusUrl = '/de/web/authentication/status'; /*]]>*/ var OCC = { "token": "1799248c-8de0-4199-b5fe-1d610452010a", "currentUser": "test@gmail.com", "sessionCartGuid": "2323121232323", "sessionCartId": "121212123435324", "sessionLanguageIso": "de", "sessionCountryIso": "DE", "urlPosCode": "web", "isASM": false, "intermediaryID": "", "isASMCustomerEmulated": false, "siteId": "ghstore", "OCCBaseUrl": "/ghcommercewebservices/v2/ghstore", "availablePointsOfService": "BUD,FRA,DTM,HAM,GRZ,HAJ,SZG,VIE,WEB,BER", "primaryPointOfSevice": "WEB", "clientChannel": "web-eu" }; </script> ''' soup = BeautifulSoup(html, 'html.parser') info = soup.select_one('script', string = re.compile('sessionCartGuid')) json_obj = json.loads(info.text.split('var OCC =')[1].split(';')[0]) # print(json_obj) print(json_obj['token']) print(json_obj['currentUser']) print(json_obj['sessionCartId'])
Result:
1799248c-8de0-4199-b5fe-1d610452010a test@gmail.com 121212123435324
BeautifulSoup docs: https://beautiful-soup-4.readthedocs.io/en/latest/index.html