1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
|
#!/usr/bin/python
# vim: set fileencoding=utf-8 ts=4 sts=4 sw=4 tw=80 expandtab :
# Copyright (C) 2012 Florian Bruhin <me@the-compiler.org>
# webopac.py is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# webopac.py is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with webopac.py. If not, see <http://www.gnu.org/licenses/>.
import http.cookiejar
import urllib.request
import urllib.parse
import csv
import logging
import optparse
import io
import sys
from time import sleep
def get_webopac_data(query, baseurl):
cj = http.cookiejar.CookieJar()
cp = urllib.request.HTTPCookieProcessor(cj)
opener = urllib.request.build_opener(cp)
assert(isinstance(query, str))
query = urllib.parse.quote_plus(query)
f = opener.open(baseurl + "start.do?Query=-1=%22" + query + "%22")
postdata = {
'methodToCall': 'submit',
'listFormat': 'csv',
'save': 'Speichern',
'hitPart': 'pages',
'hitFrom': 1,
'hitTo': 200,
}
url = baseurl + "hitOutput.do?" + urllib.parse.urlencode(postdata)
fb = opener.open(url)
f = io.TextIOWrapper(io.BytesIO(fb.read()), encoding='UTF-8')
return(f)
def decode_csv(f):
data = csv.DictReader(f)
return(data)
def initlog(options):
""" Initialisation of the log """
if (options.loglevel):
loglevel = options.loglevel
else:
loglevel = 'error'
numeric_level = getattr(logging, loglevel.upper(), None)
if not isinstance(numeric_level, int):
raise ValueError('Invalid log level: {}'.format(loglevel))
logging.basicConfig(level=numeric_level,
format='%(asctime)s [%(levelname)s] %(message)s',
datefmt='%m/%d/%Y %H:%M:%S')
logging.info('Initalized.')
def parseopts():
parser = optparse.OptionParser("usage: %prog [options] query")
parser.add_option('-l', '--log', dest='loglevel', help='Set loglevel',
default=0)
parser.add_option('-b', '--baseurl', dest='baseurl', help='Set baseurl'
' of the page', default='https://katalog.winterthur.ch/'
'webOPACClient.sisis/')
(options, args) = parser.parse_args()
if len(args) == 0:
parser.error("Error: You need to provide a query")
elif len(args) > 2:
parser.error("Too many arguments!")
return (options, args)
def init():
(options, args) = parseopts()
initlog(options)
return (options, args)
def printdata(data):
for row in data:
formatdict={
'title': row['Titel'],
'author': row['Autor'],
'year': row['Erscheinungsjahr'],
'sig': row['Signatur'],
}
print("Title: {title}\nAuthor: {author}\n"
"Year: {year}\nSignatur: {sig}\n".format(**formatdict))
def main():
(options, args) = init()
success=0
while (success == 0):
f = get_webopac_data(args[0], baseurl=options.baseurl)
data = decode_csv(f)
data = list(data)
success=1
for elem in data:
if None in elem:
print('ERROR!')
success=0
sleep(30)
data = (elem for elem in data if ' # CD_' in elem['Signatur'])
if data:
printdata(data)
else:
sys.exit(1)
if __name__ == '__main__':
main()
|