I have an input data of the form:
JavaScript
x
13
13
1
[2] IN: 2.12 INOUT: 3.52 (Input)
2
[2] IN: 2.12 INOUT: 3.52 (Input)
3
OUT: 2.42 INOUT: 2.62 (Output)
4
5
[2] OUT: 2.42 INOUT: 2.62 (Output)
6
[2] IN: 2.12 INOUT: 3.52 (Input)
7
OUT: 2.42 INOUT: 2.62 (Output)
8
9
[2] IN: 2.12 INOUT: 3.52 (Input)
10
[2] OUT: 2.42 INOUT: 2.62 (Output)
11
[2] IN: 2.12 INOUT: 3.52 (Input)
12
OUT: 2.42 INOUT: 2.62 (Output)
13
I need to parse through this data and the IN: / OUT: /INOUT: depending on three regexes given as:
JavaScript
1
5
1
regex1 = r"[2]s*IN:s*(S+?)s*INOUT:s*(S+?)s"
2
regex2 = r"[2]s*OUT:s*(S+?)s*INOUT:s*(S+?)s"
3
regex3 = r"[2]s*IN:s*(S+?)s*INOUT:s*(S+?)s.*?.s*OUT:s*(S+?)s*INOUT:s*(S+?)s"
4
5
My output should be:
JavaScript
1
10
10
1
IN_r1 2.12 INOUT_r1 3.52
2
IN_r3 2.12 INOUT1_r3 3.52 OUT_r3 2.42 INOUT2_r3 2.62
3
4
OUT_r2 2.42 INOUT_r2 2.62
5
IN_r3 2.12 INOUT1_r3 3.52 OUT_r3 2.42 INOUT2_r3 2.62
6
7
IN_r1 2.12 INOUT_r1 3.52
8
OUT_r2 2.42 INOUT_r2 2.62
9
IN_r3 2.12 INOUT1_r3 3.52 OUT_r3 2.42 INOUT2_r3 2.62
10
The code I tried:
JavaScript
1
44
44
1
import re
2
regex1 = r"[2]s*IN:s*(S+?)s*INOUT:s*(S+?)s"
3
regex2 = r"[2]s*OUT:s*(S+?)s*INOUT:s*(S+?)s"
4
regex3 = r"[2]s*IN:s*(S+?)s*INOUT:s*(S+?)s.*?.s*OUT:s*(S+?)s*INOUT:s*(S+?)s"
5
6
data = "
7
[2] IN: 2.12 INOUT: 3.52 (Input)
8
[2] IN: 2.12 INOUT: 3.52 (Input)
9
OUT: 2.42 INOUT: 2.62 (Output)
10
11
[2] OUT: 2.42 INOUT: 2.62 (Output)
12
[2] IN: 2.12 INOUT: 3.52 (Input)
13
OUT: 2.42 INOUT: 2.62 (Output)
14
15
[2] IN: 2.12 INOUT: 3.52 (Input)
16
[2] OUT: 2.42 INOUT: 2.62 (Output)
17
[2] IN: 2.12 INOUT: 3.52 (Input)
18
OUT: 2.42 INOUT: 2.62 (Output)
19
"
20
21
lines = re.split("[2]",data)
22
23
for line in lines:
24
25
26
if re.search(regex1,data):
27
tracks = re.findall(regex1,data,re.DOTALL)
28
for track in tracks:
29
input,inout = (float(z) for z in track)
30
with open("checked_ant.txt",'a') as a:
31
print("IN_r1",input,"INOUT_r1",inout,file=a)
32
elif re.search(regex2,data):
33
tracks = re.findall(regex2,data,re.DOTALL)
34
for track in tracks:
35
output,inout = (float(z) for z in track)
36
with open("checked_ant.txt",'a') as a:
37
print("OUT_r2",output,"INOUT_r2",inout,file=a)
38
elif re.search(regex3,data):
39
tracks = re.findall(regex3,data,re.DOTALL)
40
for track in tracks:
41
input,inout1,output,inout2 = (float(z) for z in track)
42
with open("checked_ant.txt",'a') as a:
43
print("IN_r3",input,"INOUT1_r3",inout1,"OUT_r3",output,"INOUT2_r3",inout2,file=a)
44
The problem I face is that it does not parse correctly and it is not getting matched for each subdata beginning with [2]
Advertisement
Answer
Though I find the requirement quite strange(regex is provided and cannot change), I got the expected result. Can you try.
JavaScript
1
34
34
1
import re
2
3
s = '''[2] IN: 2.12 INOUT: 3.52 (Input)
4
[2] IN: 2.12 INOUT: 3.52 (Input)
5
OUT: 2.42 INOUT: 2.62 (Output)
6
7
[2] OUT: 2.42 INOUT: 2.62 (Output)
8
[2] IN: 2.12 INOUT: 3.52 (Input)
9
OUT: 2.42 INOUT: 2.62 (Output)
10
11
[2] IN: 2.12 INOUT: 3.52 (Input)
12
[2] OUT: 2.42 INOUT: 2.62 (Output)
13
[2] IN: 2.12 INOUT: 3.52 (Input)
14
OUT: 2.42 INOUT: 2.62 (Output)'''
15
16
r1 = r"[2]s*IN:s*(S+?)s*INOUT:s*(S+?)s"
17
r2 = r"[2]s*OUT:s*(S+?)s*INOUT:s*(S+?)s"
18
r3 = r"[2]s*IN:s*(S+?)s*INOUT:s*(S+?)s.*?.s*OUT:s*(S+?)s*INOUT:s*(S+?)s"
19
20
21
def g(reg, s, n):
22
return float(re.search(reg, s).group(n))
23
24
25
paras = s.split('nn')
26
for p in paras:
27
if re.search(r1, p):
28
print(f'IN_r1 {g(r1, p, 1)} INOUT_r1 {g(r1, p, 2)}')
29
if re.search(r2, p):
30
print(f'OUT_r2 {g(r2, p, 1)} INOUT_r2 {g(r2, p, 2)}')
31
if re.search(r3, p):
32
print(
33
f'IN_r3 {g(r3, p, 1)} INOUT1_r3 {g(r3, p, 2)} OUT_r3 {g(r3, p, 3)} INOUT2_r3 {g(r3, p, 4)}')
34
Update
For better performance, you can match only once, and get the groups. Take r1 as example:
JavaScript
1
4
1
gs = re.search(r1, p)
2
if gs:
3
print(f'IN_r1 {gs.group(1)} INOUT_r1 {gs.group(2)}')
4