#!/usr/bin/env python

"""
Queries a populated rdflib store (created with crawl.py), generates a 
networkx graph of author/title links, and outputs as a PNG.
"""

from rdflib.graph import ConjunctiveGraph
from rdflib.namespace import Namespace

import networkx as nx
import matplotlib.pyplot as plt

dct = Namespace('http://purl.org/dc/terms/')
foaf = Namespace('http://xmlns.com/foaf/0.1/')

graph = ConjunctiveGraph('Sleepycat')
graph.open('store')
G = nx.Graph()

q = '''
    SELECT ?title_uri ?title ?author_uri ?author
    WHERE { 
      ?title_uri dct:title ?title .
      ?title_uri dct:creator ?author_uri .
      ?author_uri foaf:name ?author .
    }
    '''
results = graph.query(q, initNs={'dct': dct, 'foaf': foaf})

# build networkx graph of authors and titles
for title_uri, title, author_uri, author in results:
    G.add_node(author_uri, label=str(author), type='author')
    G.add_node(title_uri, label=str(title), type='title')
    G.add_edge(author_uri, title_uri)
    print "%s -> %s" % (author_uri, title_uri)

# remove smallish clusters of 4 (makes the graph less cluttered)
for c in nx.connected_component_subgraphs(G):
    if len(c) <= 4: 
        G.remove_nodes_from(c.nodes())

# create colors for titles and authors, and labels for authors who have 
# written more than 1 book
node_colors = []
node_labels = {}
for node_id in G.nodes_iter():
    node = G.node[node_id]
    type = node['type']
    label = node['label']
    if type == 'author':
        node_colors.append('r')
        if G.degree(node_id) > 2:
            node_labels[node_id] = label
    else:
        node_colors.append('g')

# draw the graph
pos = nx.graphviz_layout(G)
nx.draw(G, 
        pos=pos, 
        node_size=10, 
        node_color=node_colors, 
        labels=node_labels, 
        font_size=6, 
        vmin=80.0)
plt.savefig('authorship.png', dpi=500)

# finish up
graph.close()
