I am trying to sort filepaths according to their respective file extensions.
I would like to have an output like this:
FileType | FilePath |
---|---|
.h | a/b/c/d/xyz.h |
.h | a/b/c/d/xyz1.h |
.class | a/b/c/d/xyz.class |
.class | a/b/c/d/xyz1.class |
.jar | a/b/c/d/xyz.jar |
.jar | a/b/c/d/xyz1.jar |
But the output I have now is like this: output in excel
Below is my code:
JavaScript
x
18
18
1
import pandas as pd
2
import glob
3
4
path = "The path goes here"
5
6
yes = [glob.glob(path+e,recursive = True) for e in ["/**/*.h","/**/*.class","/**/*..jar"]]
7
8
print(type(yes)) #File type is list
9
10
df = pd.DataFrame(yes)
11
df = df.transpose()
12
df.columns = [".h", ".class",".jar"]
13
print (df)
14
15
writer = pd.ExcelWriter('test.xlsx', engine='xlsxwriter')
16
df.to_excel(writer, sheet_name='filepath', index=False)
17
writer.save()
18
Could anyone please help me with this. Thanks in advance!
Advertisement
Answer
Please try this code:
JavaScript
1
24
24
1
import os
2
import pathlib
3
import pandas as pd
4
5
path = 'C:/'
6
7
full_file_paths = []
8
file_suffix = []
9
for (root,dirs,files) in os.walk(path):
10
for f in files:
11
file_suffix.append(pathlib.PurePosixPath(f).suffix)
12
full_file_paths.append(path+f)
13
14
file_suffix = set(file_suffix)
15
processed_files = dict()
16
for fs in file_suffix:
17
processed_files[fs]=[]
18
for f in full_file_paths:
19
if f.find(fs) > 0:
20
processed_files[fs].append(f)
21
print ('--------------------------------')
22
print(fs)
23
print(processed_files[fs])
24