#!/usr/bin/env python """ Populates a solr index with LCSH concepts from the triple store. """ from rdflib import ConjunctiveGraph from rdflib import RDF from lcsh.namespaces import SKOS from lcsh.config import Config from solr import SolrConnection config = Config() # connect to solr solr = SolrConnection(config.solr) # clean out the index solr.delete_query('uri:[*TO *]') solr.optimize() # open triplestore graph = ConjunctiveGraph('Sleepycat') graph.open(config.store) try: count = 0 # find each concept for s in graph.subjects(predicate=RDF.type, object=SKOS['Concept']): # gather properties for each concept pref_label = None alt_labels = [] for p, o in graph.predicate_objects(s): if p == SKOS['prefLabel']: pref_label = o elif p == SKOS['altLabel']: alt_labels.append(o) # add to index solr.add(uri=s, pref_label=pref_label, alt_label=alt_labels) # diagnostics count += 1 print "%i %s" % (count, s) if count % 10000 == 0: solr.commit() finally: graph.close()