summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFlorian Bruhin <me@the-compiler.org>2012-07-04 00:06:28 +0200
committerFlorian Bruhin <me@the-compiler.org>2012-07-04 00:06:28 +0200
commit5f144b722d8cc0bb00d2575c464128a25e74d750 (patch)
treef0eb9471bd3d8387f9b2554902e25cc959335494
parent8ef57566f767890a3dd3619efc7774cea38e09c3 (diff)
downloadwebopac-5f144b722d8cc0bb00d2575c464128a25e74d750.tar.gz
webopac-5f144b722d8cc0bb00d2575c464128a25e74d750.zip
Cleanup
-rwxr-xr-x[-rw-r--r--]webopac.py160
1 files changed, 93 insertions, 67 deletions
diff --git a/webopac.py b/webopac.py
index 48b2392..f630eb1 100644..100755
--- a/webopac.py
+++ b/webopac.py
@@ -1,4 +1,20 @@
#!/usr/bin/python
+# vim: set fileencoding=utf-8 ts=4 sts=4 sw=4 tw=80 expandtab :
+
+# Copyright (C) 2012 Florian Bruhin <me@the-compiler.org>
+
+# webopac.py is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# webopac.py is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with webopac.py. If not, see <http://www.gnu.org/licenses/>.
import http.cookiejar
import urllib.request
@@ -8,87 +24,97 @@ import logging
import optparse
import io
import sys
+from time import sleep
def get_webopac_data(query, baseurl):
- cj = http.cookiejar.CookieJar()
- opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj))
- assert(isinstance(query, str))
- query = urllib.parse.quote_plus(query)
- f = opener.open(baseurl + "start.do?Query=-1=%22" + query + "%22")
-
- postdata = {
- 'methodToCall': 'submit',
- 'listFormat': 'csv',
- 'save': 'Speichern',
- 'hitPart': 'pages',
- 'hitFrom': 1,
- 'hitTo': 200,
- }
-
- fb = opener.open(baseurl + "hitOutput.do?" + urllib.parse.urlencode(postdata))
- f = io.TextIOWrapper(io.BytesIO(fb.read()), encoding='UTF-8')
-
- return(f)
+ cj = http.cookiejar.CookieJar()
+ cp = urllib.request.HTTPCookieProcessor(cj)
+ opener = urllib.request.build_opener(cp)
+ assert(isinstance(query, str))
+ query = urllib.parse.quote_plus(query)
+ f = opener.open(baseurl + "start.do?Query=-1=%22" + query + "%22")
+ postdata = {
+ 'methodToCall': 'submit',
+ 'listFormat': 'csv',
+ 'save': 'Speichern',
+ 'hitPart': 'pages',
+ 'hitFrom': 1,
+ 'hitTo': 200,
+ }
+ url = baseurl + "hitOutput.do?" + urllib.parse.urlencode(postdata)
+ fb = opener.open(url)
+ f = io.TextIOWrapper(io.BytesIO(fb.read()), encoding='UTF-8')
+ return(f)
def decode_csv(f):
- data = csv.DictReader(f)
- return(data)
+ data = csv.DictReader(f)
+ return(data)
def initlog(options):
- """ Initialisation of the log """
- if (options.loglevel):
- loglevel = options.loglevel
- else:
- loglevel = 'error'
- numeric_level = getattr(logging, loglevel.upper(), None)
+ """ Initialisation of the log """
+ if (options.loglevel):
+ loglevel = options.loglevel
+ else:
+ loglevel = 'error'
+ numeric_level = getattr(logging, loglevel.upper(), None)
- if not isinstance(numeric_level, int):
- raise ValueError('Invalid log level: {}'.format(loglevel))
- logging.basicConfig(level=numeric_level,
- format='%(asctime)s [%(levelname)s] %(message)s',
- datefmt='%m/%d/%Y %H:%M:%S')
- logging.info('Initalized.')
+ if not isinstance(numeric_level, int):
+ raise ValueError('Invalid log level: {}'.format(loglevel))
+ logging.basicConfig(level=numeric_level,
+ format='%(asctime)s [%(levelname)s] %(message)s',
+ datefmt='%m/%d/%Y %H:%M:%S')
+ logging.info('Initalized.')
def parseopts():
- parser = optparse.OptionParser("usage: %prog [options] query")
- parser.add_option('-l', '--log', dest='loglevel', help='Set loglevel',
- default=0)
- parser.add_option('-b', '--baseurl', dest='baseurl', help='Set baseurl'
- ' of the page', default='https://katalog.bibliotheken.'
- 'winterthur.ch/webOPACClient.sisis/')
- (options, args) = parser.parse_args()
- if len(args) == 0:
- parser.error("Error: You need to provide a query")
- elif len(args) > 2:
- parser.error("Too many arguments!")
- return (options, args)
+ parser = optparse.OptionParser("usage: %prog [options] query")
+ parser.add_option('-l', '--log', dest='loglevel', help='Set loglevel',
+ default=0)
+ parser.add_option('-b', '--baseurl', dest='baseurl', help='Set baseurl'
+ ' of the page', default='https://katalog.winterthur.ch/'
+ 'webOPACClient.sisis/')
+ (options, args) = parser.parse_args()
+ if len(args) == 0:
+ parser.error("Error: You need to provide a query")
+ elif len(args) > 2:
+ parser.error("Too many arguments!")
+ return (options, args)
def init():
- (options, args) = parseopts()
- initlog(options)
- return (options, args)
+ (options, args) = parseopts()
+ initlog(options)
+ return (options, args)
def printdata(data):
- for row in data:
- formatdict={
- 'title': row['Titel'],
- 'author': row['Autor'],
- 'year': row['Erscheinungsjahr'],
- 'sig': row['Signatur'],
- }
- print("Title: {title}\nAuthor: {author}\n"
- "Year: {year}\nSignatur: {sig}\n".format(**formatdict))
+ for row in data:
+ formatdict={
+ 'title': row['Titel'],
+ 'author': row['Autor'],
+ 'year': row['Erscheinungsjahr'],
+ 'sig': row['Signatur'],
+ }
+ print("Title: {title}\nAuthor: {author}\n"
+ "Year: {year}\nSignatur: {sig}\n".format(**formatdict))
def main():
- (options, args) = init()
- f = get_webopac_data(args[0], baseurl=options.baseurl)
- data = decode_csv(f)
- data = (elem for elem in data if ' # CD_' in elem['Signatur'])
- data = list(data)
- if data:
- printdata(data)
- else:
- sys.exit(1)
+ (options, args) = init()
+ success=0
+ while (success == 0):
+ f = get_webopac_data(args[0], baseurl=options.baseurl)
+ data = decode_csv(f)
+ data = list(data)
+ success=1
+
+ for elem in data:
+ if None in elem:
+ print('ERROR!')
+ success=0
+ sleep(30)
+
+ data = (elem for elem in data if ' # CD_' in elem['Signatur'])
+ if data:
+ printdata(data)
+ else:
+ sys.exit(1)
if __name__ == '__main__':
- main()
+ main()