summaryrefslogtreecommitdiffstats
path: root/pyrmlint.py
blob: 1e9a91eadf6f2cb55064a6b5aeeda4a28f80e491 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#!/usr/bin/python

import sys

class File:
    dupflag = None
    md5sum = None
    path = None
    size = None
    devid = None
    inode = None

    def fromlog(self, line):
        elems = line.rstrip(b'/\r\n').split(b'//')
        if len(elems) != 6:
            raise ValueError
        self.dupflag = elems[0].decode('ASCII')
        self.md5sum = elems[1].decode('ASCII')
        self.path = elems[2].replace(b"'\"'\"'", b"'") # Now this is scary
        self.size = int(elems[3])
        self.devid = int(elems[4])
        self.inode = int(elems[5])

class Logreader:
    data = {}
    duplicates = []
    _curmd5 = None
    _duplist = None

    def __init__(self):
        for e in ['BLNK', 'OTMP', 'BASE', 'EDIR', 'JNKD', 'JNKF', 'ZERO',
                  'NBIN', 'BGID', 'BUID', 'ORIG', 'DUPL']:
            self.data[e] = []

    def _parsedupl(self, fobj):
        if fobj.dupflag == 'ORIG':
            if self._duplist is not None:
                self.duplicates.append(self._duplist[:])
            self._curmd5 = fobj.md5sum
            self._duplist = [fobj]
        elif fobj.dupflag == 'DUPL':
            if self._curmd5 != fobj.md5sum:
                raise IOError("DUPL with unexpected md5 found")
            self._duplist.append(fobj)

    def parse(self, f):
        for line in f:
            if line.startswith(b'#'):
                continue
            fobj = File()
            fobj.fromlog(line)
            self.data[fobj.dupflag].append(fobj)
            self._parsedupl(fobj)
        self.duplicates.append(self._duplist[:])

def main():
    pass

if __name__ == '__main__':
    main()