I have a Git repository with several thousand files, and would like to get the date and time of the last commit for each individual file. Can this be done using Python (e.g., by using something like os.path.getmtime(path)
)?
Advertisement
Answer
An interesting question. Below is a quick and dirty implementation.
I’ve used multiprocessing.Pool.imap()
to start subprocesses because it’s convenient.
JavaScript
x
114
114
1
#!/usr/bin/env python
2
# vim:fileencoding=utf-8:ft=python
3
#
4
# Author: R.F. Smith <rsmith@xs4all.nl>
5
# Last modified: 2015-05-24 12:28:45 +0200
6
#
7
# To the extent possible under law, Roland Smith has waived all
8
# copyright and related or neighboring rights to gitdates.py. This
9
# work is published from the Netherlands. See
10
# http://creativecommons.org/publicdomain/zero/1.0/
11
12
"""For each file in a directory managed by git, get the short hash and
13
data of the most recent commit of that file."""
14
15
from __future__ import print_function
16
from multiprocessing import Pool
17
import os
18
import subprocess
19
import sys
20
import time
21
22
# Suppres annoying command prompts on ms-windows.
23
startupinfo = None
24
if os.name == 'nt':
25
startupinfo = subprocess.STARTUPINFO()
26
startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
27
28
29
def main():
30
"""
31
Entry point for gitdates.
32
"""
33
checkfor(['git', '--version'])
34
# Get a list of all files
35
allfiles = []
36
# Get a list of excluded files.
37
if '.git' not in os.listdir('.'):
38
print('This directory is not managed by git.')
39
sys.exit(0)
40
exargs = ['git', 'ls-files', '-i', '-o', '--exclude-standard']
41
exc = subprocess.check_output(exargs, startupinfo=startupinfo).split()
42
for root, dirs, files in os.walk('.'):
43
for d in ['.git', '__pycache__']:
44
try:
45
dirs.remove(d)
46
except ValueError:
47
pass
48
tmp = [os.path.join(root, f) for f in files if f not in exc]
49
allfiles += tmp
50
# Gather the files' data using a Pool.
51
p = Pool()
52
filedata = [res for res in p.imap_unordered(filecheck, allfiles)
53
if res is not None]
54
p.close()
55
# Sort the data (latest modified first) and print it
56
filedata.sort(key=lambda a: a[2], reverse=True)
57
dfmt = '%Y-%m-%d %H:%M:%S %Z'
58
for name, tag, date in filedata:
59
print('{}|{}|{}'.format(name, tag, time.strftime(dfmt, date)))
60
61
62
def checkfor(args, rv=0):
63
"""
64
Make sure that a program necessary for using this script is available.
65
Calls sys.exit when this is not the case.
66
67
Arguments:
68
args: String or list of strings of commands. A single string may
69
not contain spaces.
70
rv: Expected return value from evoking the command.
71
"""
72
if isinstance(args, str):
73
if ' ' in args:
74
raise ValueError('no spaces in single command allowed')
75
args = [args]
76
try:
77
with open(os.devnull, 'w') as bb:
78
rc = subprocess.call(args, stdout=bb, stderr=bb,
79
startupinfo=startupinfo)
80
if rc != rv:
81
raise OSError
82
except OSError as oops:
83
outs = "Required program '{}' not found: {}."
84
print(outs.format(args[0], oops.strerror))
85
sys.exit(1)
86
87
88
def filecheck(fname):
89
"""
90
Start a git process to get file info. Return a string containing the
91
filename, the abbreviated commit hash and the author date in ISO 8601
92
format.
93
94
Arguments:
95
fname: Name of the file to check.
96
97
Returns:
98
A 3-tuple containing the file name, latest short hash and latest
99
commit date.
100
"""
101
args = ['git', '--no-pager', 'log', '-1', '--format=%h|%at', fname]
102
try:
103
b = subprocess.check_output(args, startupinfo=startupinfo)
104
data = b.decode()[:-1]
105
h, t = data.split('|')
106
out = (fname[2:], h, time.gmtime(float(t)))
107
except (subprocess.CalledProcessError, ValueError):
108
return None
109
return out
110
111
112
if __name__ == '__main__':
113
main()
114
Example output:
JavaScript
1
8
1
serve-git|8d92934|2012-08-31 21:21:38 +0200
2
setres|8d92934|2012-08-31 21:21:38 +0200
3
mydec|e711e27|2008-04-09 21:26:05 +0200
4
sync-iaudio|8d92934|2012-08-31 21:21:38 +0200
5
tarenc|8d92934|2012-08-31 21:21:38 +0200
6
keypress.sh|a5c0fb5|2009-09-29 00:00:51 +0200
7
tolower|8d92934|2012-08-31 21:21:38 +0200
8
Edit: Updated to use the os.devnull
(that works on ms-windows as well) instead of /dev/null
.
Edit2: Used startupinfo
to suppress command prompts popping up on ms-windows.
Edit3: Used __future__
to make this compatible with both Python 2 and 3. Tested with 2.7.9 and 3.4.3. Now also available on github.