summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFlorian Bruhin <git@the-compiler.org>2013-11-22 13:03:56 +0100
committerFlorian Bruhin <git@the-compiler.org>2013-11-22 13:03:56 +0100
commitd025871cbaf12e0173b81b697f6661307557a5a6 (patch)
tree77d1393cd73e6aeed4669253bda0a40ae423cda5
downloadpyrmlint-d025871cbaf12e0173b81b697f6661307557a5a6.tar.gz
pyrmlint-d025871cbaf12e0173b81b697f6661307557a5a6.zip
Initial commit
-rw-r--r--pyrmlint.py62
1 files changed, 62 insertions, 0 deletions
diff --git a/pyrmlint.py b/pyrmlint.py
new file mode 100644
index 0000000..338854b
--- /dev/null
+++ b/pyrmlint.py
@@ -0,0 +1,62 @@
+#!/usr/bin/python
+
+import sys
+
+class File:
+ dupflag = None
+ md5sum = None
+ path = None
+ size = None
+ devid = None
+ inode = None
+
+ def fromlog(self, line):
+ elems = line.rstrip(b'/\r\n').split(b'//')
+ if len(elems) != 6:
+ raise ValueError
+ self.dupflag = elems[0].decode('ASCII')
+ self.md5sum = elems[1].decode('ASCII')
+ self.path = elems[2].replace(b"'\"'\"'", b"'") # Now this is scary
+ self.size = int(elems[3])
+ self.devid = int(elems[4])
+ self.inode = int(elems[5])
+
+class Logreader:
+ data = {}
+ duplicates = []
+
+ def __init__(self):
+ for e in ['BLNK', 'OTMP', 'BASE', 'EDIR', 'JNKD', 'JNKF', 'ZERO',
+ 'NBIN', 'BGID', 'BUID', 'ORIG', 'DUPL']:
+ self.data[e] = []
+
+ def _parsedupl(self):
+ d = {}
+ for f in self.data['ORIG'] + self.data['DUPL']:
+ if f.md5sum in d:
+ d[f.md5sum].append(f)
+ else:
+ d[f.md5sum] = [f]
+ self.duplicates = list(d.values())
+
+ def parse(self, f):
+ for line in f:
+ if line.startswith(b'#'):
+ continue
+ fobj = File()
+ fobj.fromlog(line)
+ self.data[fobj.dupflag].append(fobj)
+ self._parsedupl()
+
+def main():
+ r = Logreader()
+ with open(sys.argv[1], 'rb') as f:
+ r.parse(f)
+ for pair in r.duplicates:
+ for fobj in pair:
+ sys.stdout.buffer.write(fobj.path)
+ sys.stdout.buffer.write(b'\n')
+ sys.stdout.buffer.write(b'\n')
+
+if __name__ == '__main__':
+ main()