diff options
author | Florian Bruhin <me@the-compiler.org> | 2012-02-12 13:31:19 +0100 |
---|---|---|
committer | Florian Bruhin <me@the-compiler.org> | 2012-02-12 13:31:19 +0100 |
commit | 5105fd1b68ffe250415c9b2a9f584927d0a808a0 (patch) | |
tree | 7adcdc7e53d9eda1d77f16521f5aa9d1651a00ef | |
parent | bf16fa29e343c753b19eb77f59e678be3551ecd1 (diff) | |
download | webopac-5105fd1b68ffe250415c9b2a9f584927d0a808a0.tar.gz webopac-5105fd1b68ffe250415c9b2a9f584927d0a808a0.zip |
Change from XML to CSV
-rw-r--r-- | webopac.py | 27 |
1 files changed, 15 insertions, 12 deletions
@@ -3,11 +3,12 @@ import http.cookiejar import urllib.request import urllib.parse -import lxml.etree +import csv import logging import optparse +import io -def get_webopac_xml(query, baseurl): +def get_webopac_data(query, baseurl): cj = http.cookiejar.CookieJar() opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj)) assert(isinstance(query, str)) @@ -15,20 +16,21 @@ def get_webopac_xml(query, baseurl): postdata = { 'methodToCall': 'submit', - 'listFormat': 'xml', + 'listFormat': 'csv', 'save': 'Speichern', 'hitPart': 'pages', 'hitFrom': 1, 'hitTo': 200, } - f = opener.open(baseurl + "hitOutput.do?" + urllib.parse.urlencode(postdata)) + fb = opener.open(baseurl + "hitOutput.do?" + urllib.parse.urlencode(postdata)) + f = io.TextIOWrapper(io.BytesIO(fb.read()), encoding='UTF-8') + return(f) -def decode_xml(f): - data = f.read() - root = lxml.etree.XML(data) - return(root) +def decode_csv(f): + data = csv.DictReader(f) + return(data) def initlog(options): """ Initialisation of the log """ @@ -66,10 +68,11 @@ def init(): def main(): (options, args) = init() - f = get_webopac_xml(args[0], baseurl=options.baseurl) - root = decode_xml(f) - f.close() - print(root) + f = get_webopac_data(args[0], baseurl=options.baseurl) + data = decode_csv(f) + for row in data: + print(row) + print() if __name__ == '__main__': main() |