summaryrefslogtreecommitdiffstats
path: root/pyrmlint.py
blob: 72d320b46f0016cf6e5fed4f986d99572a9d744c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#!/usr/bin/python

import sys

class File:
    dupflag = None
    md5sum = None
    path = None
    size = None
    devid = None
    inode = None

    def fromlog(self, line):
        elems = line.rstrip(b'/\r\n').split(b'//')
        if len(elems) != 6:
            raise ValueError
        self.dupflag = elems[0].decode('ASCII')
        self.md5sum = elems[1].decode('ASCII')
        self.path = elems[2].replace(b"'\"'\"'", b"'") # Now this is scary
        self.size = int(elems[3])
        self.devid = int(elems[4])
        self.inode = int(elems[5])

class Logreader:
    data = {}
    duplicates = []

    def __init__(self):
        for e in ['BLNK', 'OTMP', 'BASE', 'EDIR', 'JNKD', 'JNKF', 'ZERO',
                  'NBIN', 'BGID', 'BUID', 'ORIG', 'DUPL']:
            self.data[e] = []

    def _parsedupl(self):
        d = {}
        for f in self.data['ORIG'] + self.data['DUPL']:
            if f.md5sum in d:
                d[f.md5sum].append(f)
            else:
                d[f.md5sum] = [f]
        self.duplicates = list(d.values())

    def parse(self, f):
        for line in f:
            if line.startswith(b'#'):
                continue
            fobj = File()
            fobj.fromlog(line)
            self.data[fobj.dupflag].append(fobj)
        self._parsedupl()

def main():
    pass

if __name__ == '__main__':
    main()