I am parsing JS files stored in a directory to fetch out the values inside tags
. Upon finding relevant values in the tags
using regex, I want to move those values under a new section called controls
. I have been able to get the required values in the controls
variable using the script below:
def parse_sigs(target_folder): try: count = 0 for root, dir, files in os.walk(target_folder): for file in files: file_extension = os.path.splitext(file)[1] if file_extension.lower() == ".js": print(file) filename = os.path.join(root, file) print(f'Processing : {filename}') with open(filename, "r") as f_in: data = f_in.read() data = re.search(r"btagss*:s*([.*?])", data, flags=re.S) if data: data = literal_eval(data.group(1)) #print(type(data) cregex = re.compile(r".*-.*-.*[.|-].*") controls = list(filter(cregex.match, data)) count +=1 print(controls) print(count) except Exception as e: print(f"Error in Processing {filename} Skipping -- ") print(e)
I am not able to think of a way to now insert the controls
list as a new field into the JS file and update it. It should appear after the tags
field.
An example JS file looks like:
var fs = require('fs'); var path = require('path') const remediation = fs.readFileSync(path.resolve(__dirname, '../sig/file.md'), 'utf-8') module.exports = { status: "ENABLED", sig: { name: "iam", tags: [ "aws", "iam", "compliance", "chg-02.1", "AWS-CIS-v1.4", "AWS-CIS-v1.4-1.12", "AWS-CIS-v1.4-1.14", "SOC-2", "SOC-2-CC6.8", "NIST-800-53rev5", "NIST-800-53rev5-CM-3(1)" ], result: "A.id, A.name", result_header: [ "account_id", "account_name", ], primary_resource: "user_arn", }, expected: { "all": [ { user_name: "2506" } ] } };
My end goal is to create a JS file whose tags
and newly added controls
section looks like this:
tags: [ "aws", "iam", "compliance", "chg-02.1", "AWS-CIS-v1.4", "SOC-2", "NIST-800-53rev5" ], controls: [ "AWS-CIS-v1.4-1.12", "AWS-CIS-v1.4-1.14", "NIST-800-53rev5-CM-3(1)", "SOC-2-CC6.8", ],
Edit:
Able to get it working with the solution provided by @Dan-Dev.
The only thing which is still out of place is the closing ]
. Is there a way to fix the closing bracket to indent correctly?
tags: [ "aws", "docdb", "kms", "configuration-check", ], controls: [ "SOC-2-CC6.1", "NIST-800-53rev5-SC-12" ]
Advertisement
Answer
You can extract the tags and convert them to JSON using the package jsonnet
iterate over them using your regex then build a string to replace the original tags like this:
import json import os import re import _jsonnet def parse_sigs(target_folder): count = 0 cregex = re.compile(r".*-.*-.*[.|-].*") for root, _, files in os.walk(target_folder): for file in files: file_extension = os.path.splitext(file)[1] if file_extension.lower() == ".js": new_tags = [] controls = [] # pint(file) filename = os.path.join(root, file) print(f'Processing : {filename}') try: with open(filename, "r") as f_in: data = f_in.read() snippet = re.findall(r'module.exports = (.*);', data, flags=re.S)[0] json_data = json.loads(_jsonnet.evaluate_snippet('snippet', snippet)) # print(json.dumps(json_data['sig']['tags'], indent=4)) for tag in json_data['sig']['tags']: if re.match(cregex, tag): controls.append(tag) else: new_tags.append(tag) new_string = f"tags: {json.dumps(new_tags, indent=8)},n controls: {json.dumps(controls, indent=8)}" data = re.sub(r'tags: [(.*?)]', new_string, data, flags=re.S) if data: print(data) count += 1 print(count) except Exception as e: print(f"Error in Processing {filename} Skipping -- ") print(e) parse_sigs('./')
Outputs:
Processing : ./test.js var fs = require('fs'); var path = require('path') const remediation = fs.readFileSync(path.resolve(__dirname, '../sig/file.md'), 'utf-8') module.exports = { status: "ENABLED", sig: { name: "iam", tags: [ "aws", "iam", "compliance", "chg-02.1", "SOC-2", "NIST-800-53rev5" ], controls: [ "AWS-CIS-v1.4", "AWS-CIS-v1.4-1.12", "AWS-CIS-v1.4-1.14", "SOC-2-CC6.8", "NIST-800-53rev5-CM-3(1)" ], result: "A.id, A.name", result_header: [ "account_id", "account_name", ], primary_resource: "user_arn", }, expected: { "all": [ { user_name: "2506" } ] } }; 1
UPDATED WITH FORMATTING:
import json import os import re import _jsonnet import jsbeautifier opts = jsbeautifier.default_options() opts.indent_size = 2 opts.space_in_empty_paren = True def parse_sigs(target_folder): count = 0 cregex = re.compile(r".*-.*-.*[.|-].*") for root, _, files in os.walk(target_folder): for file in files: file_extension = os.path.splitext(file)[1] if file_extension.lower() == ".js": new_tags = [] controls = [] # pint(file) filename = os.path.join(root, file) print(f'Processing : {filename}') try: with open(filename, "r") as f_in: data = f_in.read() snippet = re.findall(r'module.exports = (.*);', data, flags=re.S)[0] json_data = json.loads(_jsonnet.evaluate_snippet('snippet', snippet)) # print(json.dumps(json_data['sig']['tags'], indent=4)) for tag in json_data['sig']['tags']: if re.match(cregex, tag): controls.append(tag) else: new_tags.append(tag) new_string = f"tags: {json.dumps(new_tags, indent=8)},n controls: {json.dumps(controls, indent=8)}" data = re.sub(r'tags: [(.*?)]', new_string, data, flags=re.S) data = jsbeautifier.beautify(data, opts) if data: print(data) count += 1 print(count) except Exception as e: print(f"Error in Processing {filename} Skipping -- ") print(e) parse_sigs('./')