Python offers the library NetworkX for manipulating graphs. You can learn more here:
https://networkx.github.io/documentation/stable/tutorial.html
import networkx as nx
%matplotlib inline
G = nx.Graph()
Add nodes to the graph
G.add_node(1)
G.add_nodes_from([2,3])
G.add_node('Alice')
G.add_node('Bob')
print(G.nodes())
Draw the graph
nx.draw_networkx(G)
Add edges to the graph
G.add_edge(1,2)
G.add_edges_from([(1,3),('Alice','Bob')])
e = (1,'Alice')
G.add_edge(*e)
nx.draw_networkx(G)
Adding an edge with a new node will create the node Charlie in the graph
G.add_edge('Alice','Charlie')
print(G.edges())
print(G.nodes())
nx.draw_networkx(G)
A graph is a dictionary with nodes as the keys
Each node is a dictionary with the neighbors as the keys, and the edge properties as values
type(G)
G[1]
print(G.nodes)
print(G.edges)
print(G.nodes[1])
Creating a graph from edges
G2 = nx.Graph()
G2.add_edges_from([(1,2),(1,3),('Alice','Bob'),(1,'Alice')])
print(G2.nodes())
print(G2.edges())
nx.draw_networkx(G2)
G2.remove_edge(1,3)
G2.remove_node(3)
G2.remove_node(1)
print(G2.nodes())
print(G2.edges())
nx.draw_networkx(G2)
Reading a graph from a file with list of edges
https://networkx.org/documentation/stable/reference/readwrite/index.html
#Read a graph from a list of edges
G3 = nx.read_edgelist('graph_edges.txt')
print(G3.nodes())
print(G3.edges())
nx.draw_networkx(G3)
You can assign attributes and values to the nodes and edges of the graph
G3.nodes['Alice']['gender'] = 'female'
G3.nodes['Bob']['gender'] = 'male'
G3.nodes['Charlie']['gender'] = 'male'
G3.nodes['1']['value'] = 1
G3.nodes['2']['value'] = -1
G3.nodes['3']['value'] = 0
for n in G3.nodes():
print(G3.nodes[n])
for n in G3.nodes():
print(G3[n])
G3.nodes['Alice']['value'] = 1
G3.nodes['Bob']['value'] = -1
G3.nodes['Charlie']['value'] = 1
for n in G3.nodes():
print(n+ ":" + str(G3.nodes[n]['value']))
for n in G3.nodes():
print(n,G3.nodes[n])
G3['Alice']['Bob']['label'] = 'strong'
print(G3['Bob']['Alice'])
print(G3['Alice'])
print(G3['Bob'])
nx.draw_networkx(G3, with_labels=True)
A special attribute of a an edge is the "weight". When adding weighted edges, you enter triples consisting of the two edge endpoints and the weight of the edge. This weight is stored in an attribute "weight" by default.
G4 = nx.Graph()
G4.add_weighted_edges_from([(1,2,0.5),(2,3,0.1),(3,4,0.7)])
for (a,b) in G4.edges():
print (G4[a][b])
for (a,b,w) in G4.edges(data =True): #data=True returns weight as well
print (str(a)+" "+ str(b) + " " + str(w['weight']))
for n in G4:
print(G4[n])
DG=nx.DiGraph()
DG.add_weighted_edges_from([(1,2,0.5), (3,1,0.75), (1,4,0.1)])
print(DG.edges())
for n in DG:
print(DG[n])
nx.draw_networkx(DG)
Some common graph operations and algorithms are implemented in networkx library.
http://networkx.readthedocs.io/en/networkx-1.11/reference/algorithms.html
Neighbors, degrees and adjancency matrix
nx.draw_networkx(G)
print(G.neighbors(1)) # returns the neighbors of a node
for x in G.neighbors(1):
print(x)
print('degree of 1:',G.degree(1)) # returns the degree of a node
A = nx.adjacency_matrix(G)
print(A)
#the adjacency matrix is stored as a sparse matrix
print(type(A))
print(G4[3])
print(G4.degree(3, weight='weight'))
print(G4.degree(3))
Neighbors and degrees for directed or weighted graphs
nx.draw_networkx(DG)
print(list(DG.successors(1)))
print(list(DG.neighbors(1)))
print(list(DG.predecessors(1)))
print(DG.out_degree(1))
print(DG.in_degree(1))
print(DG.out_degree(1,weight='weight'))
G3.add_edge('1','Alice')
G3.remove_edge('1','Alice') #you can also remove_node, or a list of nodes or edges (remove_nodes_from, remove_edges_from)
G3.add_edge('Alice','Charlie')
G3.add_edge('1','4')
nx.draw_networkx(G3)
print(nx.number_connected_components(G3))
C = nx.connected_components(G3)
print(type(C))
for c in C:
print(c)
Get the connected component subgraphs
connected_subgraphs = [nx.subgraph(G3,c) for c in nx.connected_components(G3)]
for GC in connected_subgraphs:
print(GC.nodes())
print(GC.edges())
print(len(GC))
Get the largest connected component
# Get the nodes
largest_cc = max(nx.connected_components(G3), key=len)
print(largest_cc)
#Get the subgraph
largest_cc = max(nx.connected_components(G3), key=len)
print(largest_cc)
CC_max = nx.subgraph(G3,largest_cc)
nx.draw_networkx(CC_max)
G3.add_edge('1','Alice')
nx.draw_networkx(G3)
sp = nx.shortest_path(G3,'3','Bob')
print(sp)
print(len(sp)-1)
print(nx.shortest_path_length(G3,'3','Bob'))
SP1 = nx.single_source_shortest_path(G3,'1')
print(SP1)
#print(nx.single_source_shortest_path_length(G3,'1'))
SP = dict(nx.all_pairs_shortest_path(G3))
print(SP)
print(SP['1']['Bob'])
DG2 = nx.DiGraph()
DG2.add_edges_from([(1,2),(1,3),(3,2),(2,5),(4,1),(4,2),(4,3),(5,1),(5,4)])
nx.draw_networkx(DG2)
pr = nx.pagerank(DG2)
print(pr)
pr = nx.pagerank(G3)
print(pr)
[h,a] = nx.hits(DG2)
print(h)
print(a)
print(a[2])
The class example
G4 = nx.read_edgelist('graph-example.txt')
nx.draw_networkx(G4)
print(nx.pagerank(G4))
print(nx.pagerank(G4, alpha = 0.5))
print(nx.pagerank(G4, alpha = 0.5, personalization = {'1':1}))
print(nx.pagerank(G4, alpha = 0.5, personalization = {'6':1}))
BC = nx.edge_betweenness_centrality(G3)
print(BC)
nx.draw_networkx(G3)
nx.draw_circular(G3)
nx.draw_spectral(G3, with_labels=True)
nx.draw_spring(G3)
nx.draw_spring(DG2)
karate=nx.read_gml("karate.gml",label='id')
nx.draw_networkx(karate)
nx.draw_spring(karate,with_labels=True)
Change the size of the nodes depending on their pagerank value
pr = nx.pagerank(karate)
nx.draw_networkx(karate,node_size=[10000*pr[x] for x in pr])
We will now do our own implementation of Pagerank. Pagerank values for node $i$ are computed by iterating the following formula:
$$p_i = 0.85\sum_{j\rightarrow i}\frac{p_j}{d_{out}(j)} +0.15\frac 1n$$We will associate each node with two values: the old pagerank in the previous step and the new one that is currently computed. We initialize the old pagerank to $1/n$
for x in karate.nodes:
karate.nodes[x]['old_pr'] = 1/len(karate.nodes)
karate.nodes[x]['pr'] = 0;
The algorithm goes over the edges in the graph, and for each edge (x,y) transfers a fraction of the Pagerank of x to y (and vice versa since the graph is undirected).
For convergece check we want the maximum difference between old and new Pagerank values to be less than eps.
eps = 0.0000001
while (True):
for (x,y) in karate.edges:
karate.nodes[y]['pr'] += karate.nodes[x]['old_pr']/karate.degree(x)
karate.nodes[x]['pr'] += karate.nodes[y]['old_pr']/karate.degree(y)
diff = 0
for x in karate.nodes:
karate.nodes[x]['pr'] = karate.nodes[x]['pr']*0.85 + 0.15/len(karate.nodes)
diff = max(diff, abs(karate.nodes[x]['pr'] - karate.nodes[x]['old_pr']))
if diff < eps: break
for x in karate.nodes:
karate.nodes[x]['old_pr'] = karate.nodes[x]['pr']
karate.nodes[x]['pr'] = 0
print({x:karate.nodes[x]['pr'] for x in karate.nodes})
We got essentially the same values as for the Pagerank vector.
pr