#!/usr/bin/env python from sys import stderr from random import randint import atexit import web import simplejson from rdflib import ConjunctiveGraph, Namespace, URIRef, Literal, BNode from webob.acceptparse import Accept from solr import SolrConnection from lcsh.namespaces import * from lcsh.config import Config from widgets import ConceptWidget # configure rdflib and solr config = Config() graph = ConjunctiveGraph('Sleepycat') graph.open(config.store) solr = SolrConnection(config.solr) # set up web.py url mappings and templates urls = ( '/search(\.html|\.rdf|\.json)?$', 'Search', '/(.+?)(\.html|\.rdf|\.json|\.n3|\.gg)?$', 'Concept', '/', 'Index', ) render = web.template.render('templates', cache=False) class Concept: """Generates rdf/xml, n3, json and html views for a specific LCSH heading """ def GET(self, lccn, accept_override): concept_uri = LCSH[lccn + '#concept'] # TODO: instantiate a Resource object concept = get_concept_subgraph(concept_uri) if not concept: return web.notfound() # set the Vary header so that caches will know # that we could be sending back different responses # for the same URI based on the Accept header web.header('Vary', 'Accept') # determine representation to send back mime_type = get_mime_type(accept_override) if mime_type == 'application/rdf+xml': web.header('Content-location', LCSH[lccn] + '.rdf') self.rdfxml(concept) elif mime_type == 'text/n3': web.header('Content-location', LCSH[lccn] + '.n3') self.n3(concept) elif mime_type == 'application/json': web.header('Content-location', LCSH[lccn] + '.json') self.json(concept_uri) elif mime_type == 'application/vnd.gg+xml': self.graphgear(concept_uri) else: web.header('Content-location', LCSH[lccn] + '.html') self.html(concept_uri) def rdfxml(self, concept): web.header('Content-type', 'application/rdf+xml; charset=UTF-8') print concept.serialize(format='xml') def n3(self, concept): web.header('Content-type', 'text/n3; charset=UTF-8') print concept.serialize(format='n3') def html(self, concept_uri): # TODO use same object as rdfxml and n3 concept = ConceptWidget(concept_uri, graph) set_html_content_type() print render.concept(concept) def json(self, concept_uri): # TODO use same object as rdfxml and n3 concept = ConceptWidget(concept_uri, graph) web.header('Content-type', 'application/json; charset=UTF-8') print concept.json() def graphgear(self, concept_uri): concept = ConceptWidget(concept_uri, graph) web.header('Content-type', 'application/vnd.gg+xml; charset=UTF-8') print render.graphgear(concept) class Index: """The homepage""" def GET(self): set_html_content_type() print render.index() class Search: """A search service that serves up rdf, json and html views""" def GET(self, accept_override): i = web.input() # create and execute the solr query samples = ['python', 'world wide web', 'semantic web', 'rdf', 'graph theory', 'cataloging'] q = i.get('q', samples[randint(0, len(samples)-1)]) r = solr.query(q, rows=1000, qt='dismax', qf='pref_label alt_label', pf='pref_label alt_label', fl='*') # generate the appropriate representation of results mime_type = get_mime_type(accept_override) if mime_type == 'application/rdf+xml': web.header('Content-type', 'application/rdf+xml; charset=UTF-8') print render.search_rdf(r) elif mime_type == 'application/json': web.header('Content-type', 'application/json; charset=UTF-8') print simplejson.dumps(r.results) else: set_html_content_type() print render.search(r, q) def get_concept_subgraph(s): g = ConjunctiveGraph() g.bind('skos', SKOS) g.bind('lcsh', LCSH) g.bind('dcterms', DCTERMS) for p, o in graph.predicate_objects(s): g.add((s, p, o)) # collect up any blank nodes, they really are evil aren't they? for r in g.objects(): if isinstance(r, BNode): for p, o in graph.predicate_objects(r): g.add((r, p, o)) # collect up prefLabels for related concepts, just to be nice and # and not require the user to initiate another GET to figure out the labels #for p, r in g.predicate_objects(): # if r in [SKOS['related'], SKOS['broader'], SKOS['narrower']]: # for r, o in g.predicate_objects( if len(g) == 0: return None return g def get_mime_type(accept_override=None): # take into account override that allows a client to request # a specific representation, bypassing content negotiation if accept_override == '.html': return 'text/html' elif accept_override == '.rdf': return 'application/rdf+xml' elif accept_override == '.json': return 'application/json' elif accept_override == '.n3': return 'text/n3' elif accept_override == '.gg': return 'application/vnd.gg+xml' # do content negotiation accept = Accept('Accept', web.ctx.environ.get('HTTP_ACCEPT', 'text/html')) mime_type = accept.best_match(['application/rdf+xml', 'text/n3', 'application/json', 'text/xml', 'application/xhtml+xml']) if mime_type == None: mime_type = "text/html" return mime_type def set_html_content_type(): """IE 6/7 doesn't understand xhtml MIME type """ user_agent = web.ctx.environ.get('HTTP_USER_AGENT', '') if 'MSIE 6.0' in user_agent or 'MSIE 7.0' in user_agent: web.header('Content-type', 'text/html; charset=UTF-8') else: web.header('Content-type', 'application/xhtml+xml; charset=UTF-8') application = web.wsgifunc(web.webpyfunc(urls, globals())) atexit.register(lambda: graph.close()) if __name__ == '__main__': web.run(urls, globals()) graph.close()