-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmultigraph.py
More file actions
511 lines (411 loc) · 21 KB
/
Copy pathmultigraph.py
File metadata and controls
511 lines (411 loc) · 21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
# *********************************************************************************************
# Copyright (C) 2016 Jillian Anderson, Joel Becker, Steve McColl and Dr. John McLevey
#
# This file is part of the gitnet package developed for Dr John McLevey's Networks Lab
# at the University of Waterloo. For more information, see http://networkslab.org/gitnet/.
#
# gitnet is free software: you can redistribute it and/or modify it under the terms of a
# GNU General Public License as published by the Free Software Foundation. gitnet is
# distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even
# the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along with gitnet.
# If not, see <http://www.gnu.org/licenses/>.
# *********************************************************************************************
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
import os
import warnings
import copy
from gitnet.exceptions import MergeError
from gitnet.helpers import list_to_scd
from gitnet.helpers import datetime_git
from networkx.drawing.nx_agraph import graphviz_layout
from networkx.algorithms import bipartite
class MultiGraphPlus(nx.MultiGraph):
mode1 = ""
mode2 = ""
def collapse_edges(self, sum_weights=False):
"""
Collapses all edges which share nodes into one edge, with a new weight assigned to it. How this weight is
assigned depends on the `sum_weights` parameter.
**Parameters** :
> *sum_weights* : `bool`
>> An optional boolean parameter. Determines how weights will be assigned to the final edges.
>> If False, the weight will be the number of edges which were collapsed. If True, the weight will be the sum of
>> the weights of collapsed edges.
**Return** :
> A new MultiGraphPlus object, which has collapsed all duplicate edges, assigned a new weight, and
> stores other edge data in lists.
**Note** :
> The default weight of an edge is 1. Thus, if sum_weights is set to True, but an edge does not have a
> weight attribute, this method assumes the weight of the edge is 1.
"""
gnew = MultiGraphPlus()
for n1, n2, data in self.edges(data=True):
if gnew.has_edge(n1, n2):
gnew_data = gnew.edge[n1][n2][0]
if 'weight' not in data:
gnew_data['weight'] += 1
for k in data:
if k in gnew_data:
if k == 'weight':
if sum_weights:
gnew_data[k] += data[k]
else:
gnew_data[k] += 1
elif isinstance(data[k], list):
gnew_data[k] += data[k]
else:
if isinstance(gnew_data[k], list):
gnew_data[k] += [data[k]]
else:
gnew_data[k] = [gnew_data[k], data[k]]
else:
gnew_data[k] = data[k]
else:
edge_attr = {'weight': 1}
for k in data:
if k == 'weight':
if sum_weights:
edge_attr[k] = data[k]
elif isinstance(data[k], list):
edge_attr[k] = data[k]
else:
edge_attr[k] = data[k]
gnew.add_edge(n1, n2, attr_dict=edge_attr)
return gnew
def describe(self, extra=False):
"""
Provides a summary of graph statistics. Includes basic statistics like the number of nodes, edges,
denstiy, and the average degree for one mode. Prints a string that contains each of the items that make up the summary.
Density is calculated using one of the modes of the original bipartite network graph.
**Parameters** :
> *extra* : `bool`
>> Runs the low efficiency algorithms, which can be resource-intensive on large networks.
>> Recommended maximum network size for the low efficiency algorithms is around 100 nodes.
**Returns** : `string`
> Returns the descriptive string that contains information about the `MultiGraphPlus` object.
"""
mode1 = self.mode1
mode2 = self.mode2
density = bipartite.density(self, bipartite.sets(self)[0])
edges = self.number_of_edges()
nodes_mode1 = 0
nodes_mode2 = 0
for n in self.nodes():
if self.node[n]['type'] == mode1:
nodes_mode1 += 1
elif self.node[n]['type'] == mode2:
nodes_mode2 += 1
descriptives_nodes = "This is a bipartite network of types '{}' and '{}'.\n " \
"{} nodes are of the type '{}'.\n " \
"{} nodes are of the type '{}'.\n".format(str(mode1), str(mode2), str(nodes_mode1),
str(mode1), str(nodes_mode2), str(mode2))
descriptives_edges = "There are {} edges.\n".format(str(edges))
descriptives_density = "Density: {}.\n".format(str(density))
descriptives = descriptives_nodes + descriptives_edges + descriptives_density
if extra:
# Note: for each mode of the bipartite graph, degree and betweenness centrality are the same.
# Keeping them both makes it easy to compare them and make sure they are the same.
degree_mode1 = bipartite.degree_centrality(self, bipartite.sets(self)[0])
degree_mode2 = bipartite.degree_centrality(self, bipartite.sets(self)[1])
degree_mode1 = list(degree_mode1.values())
degree_mode2 = list(degree_mode2.values())
degree_mode1 = np.mean(degree_mode1)
degree_mode2 = np.mean(degree_mode2)
betweenness_mode1 = bipartite.betweenness_centrality(self, bipartite.sets(self)[0])
betweenness_mode1 = list(betweenness_mode1.values())
betweenness_mode1 = np.mean(betweenness_mode1)
betweenness_mode2 = bipartite.betweenness_centrality(self, bipartite.sets(self)[1])
betweenness_mode2 = list(betweenness_mode2.values())
betweenness_mode2 = np.mean(betweenness_mode2)
g = nx.Graph(self)
projection = bipartite.projected_graph(g, bipartite.sets(g)[0])
transitivity = nx.transitivity(projection)
descriptives_transitivity = "Transitivity: {}.\n".format(str(transitivity))
descriptives_degree_centrality = "Mean Degree Centrality for '{}': {}.\n" \
"Mean Degree Centrality for '{}': {}.\n".format(str(mode1),
str(degree_mode1),
str(mode2),
str(degree_mode2))
descriptives_btwn_centrality = "Mean Betweenness Centrality for '{}': {}.\n"\
"Mean Betweenness Centrality for '{}': {}.\n".format(str(mode1),
str(betweenness_mode1),
str(mode2),
str(betweenness_mode2))
descriptives = descriptives + descriptives_transitivity +\
descriptives_degree_centrality + descriptives_btwn_centrality
print(descriptives)
return descriptives
def node_merge(self, node1, node2, show_warning=True):
"""
Combines node1 and node2. After merge, node1 will remain, while node2 will be removed. node2's edges will become
node1 edges, while retaining all their edge attributes. Vector attributes of node1 and node2 whose
identifiers match will be combined, retaining all values. Atomic attributes which exist in only one of the
two nodes will be included in the merge node. Finally, if node1 and node2 contain a conflicting atomic
attribute, node1's value will overwrite node2's value.
**Parameters** :
> *node1* : `string`
>> The identifier for a node. This node's attributes will persist to the merged node.
> *node2* : `string`
>> The identifier for a second node. Any non-conflicting attributes will persist to the merged node.
> *show_warning* : `bool`
>> A boolean parameter indicating whether overwrite warnings should be displayed.
**Return** : `MultiGraphPlus`
> a new multigraphplus object which has merged nodes 1 and 2 together into node1, which will also have gained node2's edges.
"""
merged_graph = copy.deepcopy(self)
# Check: Both node1 and node2 exist in merged_graph
if node1 not in merged_graph.nodes():
raise MergeError(node1 + " is not a valid node")
if node2 not in merged_graph.nodes():
raise MergeError(node2 + " is not a valid node")
# Check: node1 and node2's types are the same
if 'type' in merged_graph.node[node1] and 'type' in merged_graph.node[node2]:
if merged_graph.node[node1]['type'] != merged_graph.node[node2]['type']:
raise MergeError("node1 and node2's types do not match")
# Moves all edges from node2 to node1
for e in merged_graph.edges(node2, data=True):
merged_graph.add_edge(node1, e[1], attr_dict=e[2])
merged_graph.remove_edge(e[0], e[1])
# Adds node2's attributes to node1. There are three cases for this:
# 1. Vector attributes are joined to create one larger vector
# 2. Non-conflicting Atomic attributes are kept and preserved in the final node
# 3. Conflicting Atomic attributes are not added from node2 (node1 values persist)
node_merge_warn = False
node_merge_warn_list = []
for na in merged_graph.node[node2]:
if na not in merged_graph.node[node1]: # Deals with case 2
merged_graph.node[node1][na] = merged_graph.node[node2][na]
elif isinstance(merged_graph.node[node2][na], list): # Deals with case 1
merged_graph.node[node1][na] = merged_graph.node[node1][na] + merged_graph.node[node2][na]
elif merged_graph.node[node1][na] != merged_graph.node[node2][na]:
node_merge_warn = True
node_merge_warn_list.append(na)
merged_graph.remove_node(node2) # Removes node2
if node_merge_warn and show_warning:
print("Note: nodes '{}' and '{}' have the following conflicting atomic attributes: {}. In these cases, "
"'{}' attribute values have been retained, while '{}' values have been ignored. If you would rather "
"retain '{}' attributes, set '{}' to node1 and '{}' to node2."
.format(node1, node2, node_merge_warn_list, node1, node2, node2, node2, node1))
return merged_graph
def node_attributes(self, name, helper):
"""
Creates a new node attribute.
**Parameters** :
> *name* : `string`
>> The name of the new attribute.
> *helper* : `None`
>> A helper function, which takes an attribute dict and produces the new attribute.
**Return** :
> A new MultiGraphPlus object, identical to self but with the desired attribute.
"""
self_copy = copy.deepcopy(self)
for n in self_copy.nodes():
self_copy.node[n][name] = helper(self_copy.node[n])
return self_copy
def quickplot(self, fname, k="4/sqrt(n)", iterations=50, layout="neato", size=20, default_colour="lightgrey"):
"""
Makes a quick visualization of the network.
**Parameters** :
> *fname* : `string`
>> A string indicating the path or file name to write to.
> *k* : `None`
>> Default function used for plotting.
> *iterations* : `int`
>> Default number of iterations to run on the plot.
> *layout* : `string`
>> The type of layout to draw, the available layouts are: ("spring", "circular", "shell", "spectral", or "random").
> *size* : `int`
>> The size of the nodes. Default is 20.
> *default_colour* : `string`
>> Only default nodes will be coloured with this colour.
**Return** : `None`
"""
if type(k) is str:
k = 4/np.sqrt(self.number_of_nodes())
# Make a copy
copy_net = copy.deepcopy(self)
# Remove isolates
copy_net.remove_nodes_from(nx.isolates(copy_net))
# Add detect colour attribute
colour_data = {}
for n in copy_net.nodes():
if "colour" in copy_net.node[n].keys():
colour_data[n] = copy_net.node[n]["colour"]
elif "color" in copy_net.node[n].keys():
colour_data[n] = copy_net.node[n]["color"]
else:
colour_data[n] = default_colour
colour_list = [colour_data[node] for node in copy_net.nodes()]
# Plot the network
print("Plotting...")
if layout in ["dot", "neato", "fdp", "circo"]:
warnings.warn("Because of outstanding issues in the source package, graphviz layouts"
"don't work on some computers.")
nx.draw(copy_net,
pos=graphviz_layout(copy_net, prog=layout),
node_size=size,
font_size=5,
node_color=colour_list,
linewidths=.5,
edge_color="DarkGray",
width=.1)
if layout == "spring":
nx.draw(copy_net,
pos=nx.spring_layout(copy_net, k=k, iterations=iterations),
node_size=size,
font_size=5,
node_color=colour_list,
linewidths=.5,
edge_color="DarkGray",
width=.1)
elif layout == "circular":
nx.draw_circular(copy_net,
node_size=size,
font_size=5,
node_color=colour_list,
linewidths=.5,
edge_color="DarkGray",
width=.1)
elif layout == "shell":
nx.draw_shell(copy_net,
node_size=size,
font_size=5,
node_color=colour_list,
linewidths=.5,
edge_color="DarkGray",
width=.1)
elif layout == "spectral":
nx.draw_spectral(copy_net,
node_size=size,
font_size=5,
node_color=colour_list,
linewidths=.5,
edge_color="DarkGray",
width=.1)
elif layout == "random":
nx.draw_random(copy_net,
node_size=size,
font_size=5,
node_color=colour_list,
linewidths=.5,
edge_color="DarkGray",
width=.1)
# Save figure if applicable
if fname is not None:
plt.savefig(fname, bbox_inches="tight")
print("Wrote file: {} to {}".format(fname, os.getcwd()))
def write_graphml(self, fname):
"""
Converts a `MultiGraphPlus` object to a graphml file.
**Parameters** :
> *fname* :
>> A string indicating the path or file name to write to. File names which end in `.gz` or `.bz2` will be compressed.
**Return** : `None`
> This method will have the side effect of creating a file, specified by fpath.
> This method cannot use vector attributes within the graphml file. Instead, vector attributes are converted into
> a semicolon-delimited string. When this occurs, a warning is raised indicating the vector attributes (node
> attributes are preceded by 'n:' while edge attributes are preceded by 'e:').
"""
graph = copy.deepcopy(self)
warning = False
warning_set = set([])
for n in graph.nodes():
for attr in graph.node[n].keys():
if isinstance(graph.node[n][attr], list):
warning = True
warning_set = {'n:' + attr} | warning_set
graph.node[n][attr] = list_to_scd(graph.node[n][attr])
for n1, n2, data in graph.edges(data=True):
for k in data:
if isinstance(data[k], list):
warning = True
warning_set = {'e:'+k} | warning_set
data[k] = list_to_scd(data[k])
if warning:
warnings.warn("The provided graph contained the vector attributes: {}. All values of vector attributes have"
" been converted to semicolon-delimited strings. To prevent this, remove vector attributes or"
" convert them to atomic attributes prior to calling .write_graphml"
.format(warning_set))
nx.write_graphml(graph, fname)
print("Success. Wrote GraphML file {} to {}".format(fname, os.getcwd()))
def write_tnet(self, fname, mode_string="type", weighted=False, time_string="date", node_index_string="tnet_id",
weight_string='weight'):
"""
A function to write an edgelist formatted for the tnet library for network analysis in R.
**Parameters** :
> *fname* : `string`
>> A string indicating the path or file name to write to.
> *mode_string* : `string`
>> The name string of the mode node attribute.
> *weighted* : `bool`
>> Do the edges have weights? True or false.
> *time_string* : `string`
>> the name string of the date/time node attribute.
> *node_index_string* : `int`
>> Creates a new integer id attribute.
> *weight_string* : `string`
>> The name of the weight node attribute.
**Return** : `None`
*Source* :
> Adapted from code written by Reid McIlroy Young for the Metaknowledge python library.
"""
modes = []
mode1set = set()
for node_index, node in enumerate(self.nodes_iter(data=True), start=1):
try:
nmode = node[1][mode_string]
except KeyError:
# too many modes so will fail
modes = [1, 2, 3]
nmode = 4
if nmode not in modes:
if len(modes) < 2:
modes.append(nmode)
else:
raise ValueError(
"Too many modes of '{}' found in the network or one of the nodes was missing its mode. "
"There must be exactly 2 modes.".format(mode_string))
if nmode == modes[0]:
mode1set.add(node[0])
node[1][node_index_string] = node_index
if len(modes) != 2:
raise ValueError(
"Too few modes of '{}' found in the network. There must be exactly 2 modes.".format(mode_string))
with open(fname, 'w', encoding='utf-8') as f:
for n1, n2, eDict in self.edges_iter(data=True):
if n1 in mode1set:
if n2 in mode1set:
raise ValueError(
"The nodes '{}' and '{}' have an edge and the same type. "
"The network must be purely 2-mode.".format(n1, n2))
elif n2 in mode1set:
n1, n2 = n2, n1
else:
raise ValueError(
"The nodes '{}' and '{}' have an edge and the same type. "
"The network must be purely 2-mode.".format(n1, n2))
if time_string is not None and time_string in eDict:
edt = eDict[time_string]
if type(edt) is str:
edt = datetime_git(eDict[time_string])
e_time_string = edt.strftime("\"%y-%m-%d %H:%M:%S\"")
else:
e_time_string = ''
# Write to file
node1 = self.node[n1][node_index_string]
node2 = self.node[n2][node_index_string]
if time_string is not None:
f.write("{} {} {}".format(e_time_string, node1, node2))
else:
f.write("{} {}".format(node1, node2))
if weighted:
weight = eDict[weight_string]
f.write(" {}\n".format(weight))
else:
f.write("\n")
print("Success. Wrote Tnet file {} to {}".format(fname, os.getcwd()))