class Igraph(ig.Graph):
def __init__(self, *args, **kwds):
X = kwds.pop("X", None) # check if X passed as argument
if isinstance(X, pd.DataFrame):
X = X.to_dict(orient="list")
super().__init__(*args, **kwds)
def graph_stats(self):
ddict = {}
ddict["density"] = self.density()
ddict["n"] = self.vcount()
ddict["E"] = self.ecount()
C = self.component_sizes(pcent=False)
ddict["Nc"] = C.shape[1]
ddict["ncmax"] = C[1, 0]
ddict["diameter"] = self.diameter()
ddict["globalcc"] = self.transitivity_undirected(mode="zero")
ddict["avglocalcc"] = self.transitivity_avglocal_undirected(mode="zero")
ddict["assortativity"] = self.assortativity_degree(directed=False)
ddict["avg_path_length"] = self.average_path_length(directed=False)
dd = np.array(self.degree())
ddict["avg_degree"] = dd.mean()
ddict["median_degree"] = np.median(dd)
ddict["max_degree"] = dd.max()
return ddict
def component_stats(self):
ddict = {}
ddict["E"] = self.ecount()
cc = self.connected_components()
ddict["Nc"] = len(cc)
idxmax, idx2 = self.find_first_second_largest_cc(cc.membership)
gc = cc.subgraph(idx=idxmax)
gc = self.from_igraph(gc)
ddict["cc_max"] = gc.graph_stats()
ccmem = np.array(cc.membership)
ddict["cc_max_mem"] = ccmem == idxmax
if idx2 is not None:
gc2 = cc.subgraph(idx=idx2)
gc2 = self.from_igraph(gc2)
ddict["cc2"] = gc2.graph_stats()
else:
ddict["cc2"] = None
return ddict
def component_sizes(self, cc=None, pcent=True):
if cc is None:
cc = self.connected_components()
C = np.vstack(np.unique(cc.membership, return_counts=True))
C = C[:, np.argsort(C[1, :])[::-1]] # sort so largest is first
if pcent:
C = C.astype("float64")
C[1, :] = C[1, :] / len(cc.membership)
return C
def large_components(self, large_comp_cutoff=0.1, cc=None, return_idx=False):
"""Find components in network greater than large_comp_cutoff.
Args:
large_comp_cutoff (float/int, optional): Cutoff to be considered a large component.
If int is number of nodes, if float is percentage of total nodes.
note decimal percentage so 33% cutoff would be 0.33.Defaults to 0.1
cc (ig.VertexClustering, optional): precalculated components/clustering.
Typically output of g.connected_components. Defaults to None.
return_idx (bool, optional): flag wether to return indexs of components/clusters. Defaults to False.
Returns:
list/tuple of lists: if return idx returns tuple containing list of large component subgraphs and list of indexes
otherwise returns just list of component subgraphs
"""
if cc is None:
cc = self.connected_components()
if isinstance(large_comp_cutoff, float):
pcent = True
elif isinstance(large_comp_cutoff, int):
pcent = False
else:
raise ValueError(
"large_comp_cutoff should be node number or decimal percentage (i.e. 33% cutoff should be 0.33)"
)
C = self.component_sizes(cc, pcent=pcent)
large_comps = C[0, C[1, :] >= large_comp_cutoff]
components = [cc.subgraph(idx) for idx in large_comps]
return (components, large_comps) if return_idx else components
def find_first_second_largest_cc(self, cc=None):
if cc is None:
cc = self.connected_components()
cc = cc.membership
C = np.vstack(np.unique(cc, return_counts=True))
cmax = C[:, C[1, :].argmax()]
cc2 = C[
:, (C[1, :] > 1) & (C[1, :] < cmax[1])
] # find values lower than max but greater than 1 (might be empty?)
try:
cc2 = cc2[:, cc2[1, :].argmax()]
except ValueError:
return cmax[0], None # if no second component return None.
return cmax[0], cc2[0] # returns index for largest and 2nd largest components
def get_comp(self, idx, cc=None):
if cc is None:
cc = self.connected_components()
gc = cc.subgraph(idx=idx)
return self.from_igraph(gc)
def max_component(self):
cc = self.connected_components()
idxmax, idx2 = self.find_first_second_largest_cc(cc.membership)
return self.get_comp(idx=idxmax, cc=cc)
def get_2ndmax_comp(self):
cc = self.connected_components()
idxmax, idx2 = self.find_first_second_largest_cc(cc.membership)
if idx2 is None:
raise ValueError("No 2nd largest component")
return self.get_comp(idx=idx2, cc=cc)
def get_X(self):
X = []
for k in self.vs.attributes():
v = self.vs[k]
X.append(v)
X = np.array(X).T # Transpose to have shape n x d
return X
def plot_degree_dist(self, logbinsize=0.1, LOG_ONLY=False):
degree = self.degree()
plot_degree_dist(degree_dist=degree, logbinsize=logbinsize, LOG_ONLY=LOG_ONLY)
def plottable(self, mode="undirected"):
A = self.get_adjacency_sparse()
return ig.Graph().Adjacency(A, mode=mode)
@classmethod
def from_igraph(cls, gg, *args, **kwds):
return cls(*args, **kwds) + gg