I need to process console output which looks like this and make a csv from it:
ID,FLAG,ADDRESS,MAC-ADDRESS,HOST-NAME,SERVER,STATUS,LAST-SEEN 0 10.0.0.11 00:1D:72:29:F2:4F lan waiting never ;;; test comment 1 10.0.0.19 00:13:21:15:D4:00 lan waiting never 2 10.0.0.10 00:60:6E:05:0C:E0 lan waiting never 3 D 10.0.1.199 24:E9:B3:20:FA:C7 home server1 bound 4h54m52s 4 D 100.64.1.197 E6:17:AE:21:EA:00 Suzana-s-A51 dhcp1 bound 2h16m45s
I have managed to split lines but regex is not working for tabs and spaces. Can someone point me in the right direction?
The code I am using is this:
import csv import re # Open the input file in read-only mode with open('output.txt', 'r') as input_file: # Open the output file in write-only mode with open('output.csv', 'w') as output_file: # Create a CSV writer that will write to the output file writer = csv.writer(output_file) # Read the first line of the input file (the header) # and write it to the output file as a single value # (i.e. do not split it on commas) header = input_file.readline() writer.writerow([header.strip()]) # Iterate over the remaining lines of the input file for line in input_file: # Ignore lines that start with ";;;" (these are comments) if line.startswith(';;;'): continue # Split the line on newlines values = line.split('n') line = re.sub(r'[t ]+', ',', line) # Iterate over the resulting values for i, value in enumerate(values): # If the value contains a comma, split it on commas # and assign the resulting values to the `values` list if ',' in value: values[i:i+1] = value.split(',') # Write the values to the output file writer.writerow(values)
Advertisement
Answer
The regular expression can be handy here, make a mask, and then take each value from line read. you can refer the regex and will give you great visuals.
so for each line will put a regex reg_format=r"(d*?)(?:s+)(.*?)(?:s)(?:s*?)(w*.w*.w*.w*)(?:s*)(w*?:w*?:w*?:w*?:w*?:w*)(?:s*)(w*)(?:s*)(w*)(?:s*)(w*)"
pls note that when we write to csv using writer.writerow
it expects a list.
following would work for you, and you can tweak it as needed.
tweaked your code, and added the comments
Update:
Added masking for records
import csv import re #reg_format=r"(d*?)(?:s+)(.*?)(?:s)(?:s*?)(w*.w*.w*.w*)(?:s*)(w*?:w*?:w*?:w*?:w*?:w*)(?:s*)(w*)(?:s*)(w*)(?:s*)(w*)" all_fields=r"(d*?)(?:s+)(.*?)(?:s)(?:s*?)(w*.w*.w*.w*)(?:s*)(w*?:w*?:w*?:w*?:w*?:w*)(?:s{1,2})([w-]{1,14})(?:s*?)(w+)(?:s*)(w+)(?:s*)(w*)(?:s*)(w*)" all_fields_minus_host=r"(d*?)(?:s+)(.*?)(?:s)(?:s*?)(w*.w*.w*.w*)(?:s*)(w*?:w*?:w*?:w*?:w*?:w*)(?:s{1,})([w-]{1,14})(?:s*?)(w+)(?:s*)(w+)(?:s*)(w*)(?:s*)(w*)" # Open the input file in read-only mode with open('testreg.txt', 'r') as input_file: # Open the output file in write-only mode with open('output.csv', 'w') as output_file: # Create a CSV writer that will write to the output file writer = csv.writer(output_file) # Read the first line of the input file (the header) # and write it to the output file as a single value # (i.e. do not split it on commas) header = input_file.readline() writer.writerow(header.split(',')) # split by "," as write row need list #writer.writerow([header.strip()]) # Iterate over the remaining lines of the input file for line in input_file: # Ignore lines that start with ";;;" (these are comments) if line.startswith(';;;'): continue #print(line) gps=re.findall(all_fields,line) if gps: line_write=(['"'+gp+'"' for gp in list(gps[0])]) # if dont need quotes, put like gp for gp in list(gps[0])] writer.writerow(line_write[:-1]) else: gps=re.findall(all_fields_minus_host,line) line_write=(['"'+gp+'"' for gp in list(gps[0])]) # if dont need quotes, put like gp for gp in list(gps[0])] line_write.insert(4,'""') writer.writerow(line_write[:-2]) #writer.writerow(line_write) # commented below line ''' # Split the line on newlines values = line.split('n') line = re.sub(r'[t ]+', ',', line) # Iterate over the resulting values for i, value in enumerate(values): # If the value contains a comma, split it on commas # and assign the resulting values to the `values` list if ',' in value: values[i:i+1] = value.split(',') # Write the values to the output file #writer.writerow(values) '''