#!/usr/bin/env python """ Put your key in ~/.calais and give entities.py a URL and it'll use semanticproxy to extract names, places, etc from it. """ import rdflib import sys import os.path def find_license(): try: return file(os.path.expanduser('~/.calais')).read().strip() except: raise Exception("unable to find calais license key in ~/.calais") g = rdflib.ConjunctiveGraph() url = sys.argv[1] key = find_license() calais = "http://service.semanticproxy.com/processurl/%s/rdf/%s" % (key, url) name = rdflib.URIRef('http://s.opencalais.com/1/pred/name') g.load(calais) e = 'http://s.opencalais.com/1/type/em/e/' for s, o in g.subject_objects(predicate=rdflib.RDF.type): if o.startswith('http://s.opencalais.com/1/type/em/e/'): o_type = o.split('/')[-1] for o_name in g.objects(s, name): print 'a %s named %s' % (o_type, o_name)