Skip to content

Graph

Graph

igraphf

Igraph

Bases: Graph

Source code in src/simnetpy/graph/igraphf.py
class Igraph(ig.Graph):
    def __init__(self, *args, **kwds):
        X = kwds.pop("X", None)  # check if X passed as argument

        if isinstance(X, pd.DataFrame):
            X = X.to_dict(orient="list")

        super().__init__(*args, **kwds)

    def graph_stats(self):
        ddict = {}
        ddict["density"] = self.density()
        ddict["n"] = self.vcount()
        ddict["E"] = self.ecount()
        C = self.component_sizes(pcent=False)
        ddict["Nc"] = C.shape[1]
        ddict["ncmax"] = C[1, 0]
        ddict["diameter"] = self.diameter()
        ddict["globalcc"] = self.transitivity_undirected(mode="zero")
        ddict["avglocalcc"] = self.transitivity_avglocal_undirected(mode="zero")
        ddict["assortativity"] = self.assortativity_degree(directed=False)
        ddict["avg_path_length"] = self.average_path_length(directed=False)
        dd = np.array(self.degree())
        ddict["avg_degree"] = dd.mean()
        ddict["median_degree"] = np.median(dd)
        ddict["max_degree"] = dd.max()

        return ddict

    def component_stats(self):
        ddict = {}
        ddict["E"] = self.ecount()

        cc = self.connected_components()
        ddict["Nc"] = len(cc)

        idxmax, idx2 = self.find_first_second_largest_cc(cc.membership)

        gc = cc.subgraph(idx=idxmax)
        gc = self.from_igraph(gc)
        ddict["cc_max"] = gc.graph_stats()

        ccmem = np.array(cc.membership)
        ddict["cc_max_mem"] = ccmem == idxmax

        if idx2 is not None:
            gc2 = cc.subgraph(idx=idx2)
            gc2 = self.from_igraph(gc2)
            ddict["cc2"] = gc2.graph_stats()
        else:
            ddict["cc2"] = None
        return ddict

    def component_sizes(self, cc=None, pcent=True):
        if cc is None:
            cc = self.connected_components()

        C = np.vstack(np.unique(cc.membership, return_counts=True))
        C = C[:, np.argsort(C[1, :])[::-1]]  # sort so largest is first
        if pcent:
            C = C.astype("float64")
            C[1, :] = C[1, :] / len(cc.membership)

        return C

    def large_components(self, large_comp_cutoff=0.1, cc=None, return_idx=False):
        """Find components in network greater than large_comp_cutoff.

        Args:
            large_comp_cutoff (float/int, optional): Cutoff to be considered a large component.
                        If int is number of nodes, if float is percentage of total nodes.
                        note decimal percentage so 33% cutoff would be 0.33.Defaults to 0.1
            cc (ig.VertexClustering, optional): precalculated components/clustering.
                        Typically output of g.connected_components. Defaults to None.
            return_idx (bool, optional): flag wether to return indexs of components/clusters. Defaults to False.

        Returns:
            list/tuple of lists: if return idx returns tuple containing list of large component subgraphs and list of indexes
                                otherwise returns just list of component subgraphs
        """
        if cc is None:
            cc = self.connected_components()

        if isinstance(large_comp_cutoff, float):
            pcent = True
        elif isinstance(large_comp_cutoff, int):
            pcent = False
        else:
            raise ValueError(
                "large_comp_cutoff should be node number or decimal percentage (i.e. 33% cutoff should be 0.33)"
            )

        C = self.component_sizes(cc, pcent=pcent)

        large_comps = C[0, C[1, :] >= large_comp_cutoff]
        components = [cc.subgraph(idx) for idx in large_comps]

        return (components, large_comps) if return_idx else components

    def find_first_second_largest_cc(self, cc=None):
        if cc is None:
            cc = self.connected_components()
            cc = cc.membership

        C = np.vstack(np.unique(cc, return_counts=True))
        cmax = C[:, C[1, :].argmax()]
        cc2 = C[
            :, (C[1, :] > 1) & (C[1, :] < cmax[1])
        ]  # find values lower than max but greater than 1 (might be empty?)
        try:
            cc2 = cc2[:, cc2[1, :].argmax()]
        except ValueError:
            return cmax[0], None  # if no second component return None.

        return cmax[0], cc2[0]  # returns index for largest and 2nd largest components

    def get_comp(self, idx, cc=None):
        if cc is None:
            cc = self.connected_components()

        gc = cc.subgraph(idx=idx)

        return self.from_igraph(gc)

    def max_component(self):
        cc = self.connected_components()
        idxmax, idx2 = self.find_first_second_largest_cc(cc.membership)
        return self.get_comp(idx=idxmax, cc=cc)

    def get_2ndmax_comp(self):
        cc = self.connected_components()
        idxmax, idx2 = self.find_first_second_largest_cc(cc.membership)
        if idx2 is None:
            raise ValueError("No 2nd largest component")
        return self.get_comp(idx=idx2, cc=cc)

    def get_X(self):
        X = []
        for k in self.vs.attributes():
            v = self.vs[k]
            X.append(v)
        X = np.array(X).T  # Transpose to have shape n x d
        return X

    def plot_degree_dist(self, logbinsize=0.1, LOG_ONLY=False):
        degree = self.degree()
        plot_degree_dist(degree_dist=degree, logbinsize=logbinsize, LOG_ONLY=LOG_ONLY)

    def plottable(self, mode="undirected"):
        A = self.get_adjacency_sparse()
        return ig.Graph().Adjacency(A, mode=mode)

    @classmethod
    def from_igraph(cls, gg, *args, **kwds):
        return cls(*args, **kwds) + gg

large_components(large_comp_cutoff=0.1, cc=None, return_idx=False)

Find components in network greater than large_comp_cutoff.

Parameters:

  • large_comp_cutoff (float / int, default: 0.1 ) –

    Cutoff to be considered a large component. If int is number of nodes, if float is percentage of total nodes. note decimal percentage so 33% cutoff would be 0.33.Defaults to 0.1

  • cc (VertexClustering, default: None ) –

    precalculated components/clustering. Typically output of g.connected_components. Defaults to None.

  • return_idx (bool, default: False ) –

    flag wether to return indexs of components/clusters. Defaults to False.

Returns:

  • list/tuple of lists: if return idx returns tuple containing list of large component subgraphs and list of indexes otherwise returns just list of component subgraphs

Source code in src/simnetpy/graph/igraphf.py
def large_components(self, large_comp_cutoff=0.1, cc=None, return_idx=False):
    """Find components in network greater than large_comp_cutoff.

    Args:
        large_comp_cutoff (float/int, optional): Cutoff to be considered a large component.
                    If int is number of nodes, if float is percentage of total nodes.
                    note decimal percentage so 33% cutoff would be 0.33.Defaults to 0.1
        cc (ig.VertexClustering, optional): precalculated components/clustering.
                    Typically output of g.connected_components. Defaults to None.
        return_idx (bool, optional): flag wether to return indexs of components/clusters. Defaults to False.

    Returns:
        list/tuple of lists: if return idx returns tuple containing list of large component subgraphs and list of indexes
                            otherwise returns just list of component subgraphs
    """
    if cc is None:
        cc = self.connected_components()

    if isinstance(large_comp_cutoff, float):
        pcent = True
    elif isinstance(large_comp_cutoff, int):
        pcent = False
    else:
        raise ValueError(
            "large_comp_cutoff should be node number or decimal percentage (i.e. 33% cutoff should be 0.33)"
        )

    C = self.component_sizes(cc, pcent=pcent)

    large_comps = C[0, C[1, :] >= large_comp_cutoff]
    components = [cc.subgraph(idx) for idx in large_comps]

    return (components, large_comps) if return_idx else components