I need to process console output which looks like this and make a csv from it:
JavaScript
x
8
1
ID,FLAG,ADDRESS,MAC-ADDRESS,HOST-NAME,SERVER,STATUS,LAST-SEEN
2
0 10.0.0.11 00:1D:72:29:F2:4F lan waiting never
3
;;; test comment
4
1 10.0.0.19 00:13:21:15:D4:00 lan waiting never
5
2 10.0.0.10 00:60:6E:05:0C:E0 lan waiting never
6
3 D 10.0.1.199 24:E9:B3:20:FA:C7 home server1 bound 4h54m52s
7
4 D 100.64.1.197 E6:17:AE:21:EA:00 Suzana-s-A51 dhcp1 bound 2h16m45s
8
I have managed to split lines but regex is not working for tabs and spaces. Can someone point me in the right direction?
The code I am using is this:
JavaScript
1
37
37
1
import csv
2
import re
3
4
# Open the input file in read-only mode
5
with open('output.txt', 'r') as input_file:
6
# Open the output file in write-only mode
7
with open('output.csv', 'w') as output_file:
8
# Create a CSV writer that will write to the output file
9
writer = csv.writer(output_file)
10
11
# Read the first line of the input file (the header)
12
# and write it to the output file as a single value
13
# (i.e. do not split it on commas)
14
header = input_file.readline()
15
writer.writerow([header.strip()])
16
17
# Iterate over the remaining lines of the input file
18
for line in input_file:
19
# Ignore lines that start with ";;;" (these are comments)
20
if line.startswith(';;;'):
21
continue
22
23
# Split the line on newlines
24
values = line.split('n')
25
26
line = re.sub(r'[t ]+', ',', line)
27
28
# Iterate over the resulting values
29
for i, value in enumerate(values):
30
# If the value contains a comma, split it on commas
31
# and assign the resulting values to the `values` list
32
if ',' in value:
33
values[i:i+1] = value.split(',')
34
35
# Write the values to the output file
36
writer.writerow(values)
37
Advertisement
Answer
The regular expression can be handy here, make a mask, and then take each value from line read. you can refer the regex and will give you great visuals.
so for each line will put a regex reg_format=r"(d*?)(?:s+)(.*?)(?:s)(?:s*?)(w*.w*.w*.w*)(?:s*)(w*?:w*?:w*?:w*?:w*?:w*)(?:s*)(w*)(?:s*)(w*)(?:s*)(w*)"
pls note that when we write to csv using writer.writerow
it expects a list.
following would work for you, and you can tweak it as needed.
tweaked your code, and added the comments
Update:
Added masking for records
JavaScript
1
59
59
1
import csv
2
import re
3
4
#reg_format=r"(d*?)(?:s+)(.*?)(?:s)(?:s*?)(w*.w*.w*.w*)(?:s*)(w*?:w*?:w*?:w*?:w*?:w*)(?:s*)(w*)(?:s*)(w*)(?:s*)(w*)"
5
all_fields=r"(d*?)(?:s+)(.*?)(?:s)(?:s*?)(w*.w*.w*.w*)(?:s*)(w*?:w*?:w*?:w*?:w*?:w*)(?:s{1,2})([w-]{1,14})(?:s*?)(w+)(?:s*)(w+)(?:s*)(w*)(?:s*)(w*)"
6
all_fields_minus_host=r"(d*?)(?:s+)(.*?)(?:s)(?:s*?)(w*.w*.w*.w*)(?:s*)(w*?:w*?:w*?:w*?:w*?:w*)(?:s{1,})([w-]{1,14})(?:s*?)(w+)(?:s*)(w+)(?:s*)(w*)(?:s*)(w*)"
7
# Open the input file in read-only mode
8
with open('testreg.txt', 'r') as input_file:
9
# Open the output file in write-only mode
10
with open('output.csv', 'w') as output_file:
11
# Create a CSV writer that will write to the output file
12
writer = csv.writer(output_file)
13
14
# Read the first line of the input file (the header)
15
# and write it to the output file as a single value
16
# (i.e. do not split it on commas)
17
header = input_file.readline()
18
writer.writerow(header.split(',')) # split by "," as write row need list
19
#writer.writerow([header.strip()])
20
21
# Iterate over the remaining lines of the input file
22
for line in input_file:
23
# Ignore lines that start with ";;;" (these are comments)
24
if line.startswith(';;;'):
25
continue
26
#print(line)
27
gps=re.findall(all_fields,line)
28
if gps:
29
line_write=(['"'+gp+'"' for gp in list(gps[0])]) # if dont need quotes, put like gp for gp in list(gps[0])]
30
writer.writerow(line_write[:-1])
31
else:
32
gps=re.findall(all_fields_minus_host,line)
33
line_write=(['"'+gp+'"' for gp in list(gps[0])]) # if dont need quotes, put like gp for gp in list(gps[0])]
34
line_write.insert(4,'""')
35
writer.writerow(line_write[:-2])
36
37
38
#writer.writerow(line_write)
39
# commented below line
40
'''
41
# Split the line on newlines
42
values = line.split('n')
43
44
line = re.sub(r'[t ]+', ',', line)
45
46
# Iterate over the resulting values
47
for i, value in enumerate(values):
48
# If the value contains a comma, split it on commas
49
# and assign the resulting values to the `values` list
50
if ',' in value:
51
values[i:i+1] = value.split(',')
52
53
# Write the values to the output file
54
#writer.writerow(values)
55
'''
56
57
58
59