summaryrefslogtreecommitdiffstats
path: root/webopac.py
blob: f630eb11adf70b8eebfbaeeae612027b1d244aab (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#!/usr/bin/python
# vim: set fileencoding=utf-8 ts=4 sts=4 sw=4 tw=80 expandtab :

# Copyright (C) 2012 Florian Bruhin <me@the-compiler.org>

# webopac.py is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# 
# webopac.py is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with webopac.py.  If not, see <http://www.gnu.org/licenses/>.

import http.cookiejar
import urllib.request
import urllib.parse
import csv
import logging
import optparse
import io
import sys
from time import sleep

def get_webopac_data(query, baseurl):
    cj = http.cookiejar.CookieJar()
    cp = urllib.request.HTTPCookieProcessor(cj)
    opener = urllib.request.build_opener(cp)
    assert(isinstance(query, str))
    query = urllib.parse.quote_plus(query)
    f = opener.open(baseurl + "start.do?Query=-1=%22" + query + "%22")
    postdata = {
        'methodToCall': 'submit',
        'listFormat':   'csv',
        'save':         'Speichern',
        'hitPart':      'pages',
        'hitFrom':      1,
        'hitTo':        200,
    }
    url = baseurl + "hitOutput.do?" + urllib.parse.urlencode(postdata)
    fb = opener.open(url)
    f = io.TextIOWrapper(io.BytesIO(fb.read()), encoding='UTF-8')
    return(f)

def decode_csv(f):
    data = csv.DictReader(f)
    return(data)

def initlog(options):
    """ Initialisation of the log """
    if (options.loglevel):
        loglevel = options.loglevel
    else:
        loglevel = 'error'
    numeric_level = getattr(logging, loglevel.upper(), None)

    if not isinstance(numeric_level, int):
        raise ValueError('Invalid log level: {}'.format(loglevel))
    logging.basicConfig(level=numeric_level,
                        format='%(asctime)s [%(levelname)s] %(message)s',
                        datefmt='%m/%d/%Y %H:%M:%S')
    logging.info('Initalized.')

def parseopts():
    parser = optparse.OptionParser("usage: %prog [options] query")
    parser.add_option('-l', '--log', dest='loglevel', help='Set loglevel',
                      default=0)
    parser.add_option('-b', '--baseurl', dest='baseurl', help='Set baseurl'
                      ' of the page', default='https://katalog.winterthur.ch/'
                                              'webOPACClient.sisis/')
    (options, args) = parser.parse_args()
    if len(args) == 0:
        parser.error("Error: You need to provide a query")
    elif len(args) > 2:
        parser.error("Too many arguments!")
    return (options, args)

def init():
    (options, args) = parseopts()
    initlog(options)
    return (options, args)

def printdata(data):
    for row in data:
        formatdict={
            'title': row['Titel'],
            'author': row['Autor'], 
            'year': row['Erscheinungsjahr'],
            'sig': row['Signatur'],
        }
        print("Title:    {title}\nAuthor:   {author}\n"
              "Year:     {year}\nSignatur: {sig}\n".format(**formatdict))

def main():
    (options, args) = init()
    success=0
    while (success == 0):
        f = get_webopac_data(args[0], baseurl=options.baseurl)
        data = decode_csv(f)
        data = list(data)
        success=1

        for elem in data:
            if None in elem:
                print('ERROR!')
                success=0
                sleep(30)
            
    data = (elem for elem in data if ' # CD_' in elem['Signatur'])
    if data:
        printdata(data)
    else:
        sys.exit(1)

if __name__ == '__main__':
    main()