Source code for data.sbm.sbm_processing
import numpy as np
import pyemb as eb
[docs]
def get_sbm_data():
r"""
Generate a Stochastic Block Model (SBM) dataset as described in
`https://arxiv.org/abs/2405.19230 <https://arxiv.org/abs/2405.19230>`_.
This dataset represents a three-community Dynamic Stochastic Block Model (DSBM) with an
inter-community edge probability matrix:
.. math::
B(t) =
\begin{bmatrix}
s_1 & 0.02 & 0.02 \\
0.02 & s_2 & 0.02 \\
0.02 & 0.02 & s_3
\end{bmatrix}
where :math:`s_1`, :math:`s_2`, and :math:`s_3` represent within-community connection states.
Each :math:`s` can take one of two values: 0.08 or 0.16.
We simulate a dynamic network over :math:`T = 8` time points, corresponding to the
:math:`2^3 = 8` possible combinations of :math:`s_1`, :math:`s_2`, and :math:`s_3`.
For each time point, the adjacency matrix :math:`A(t)` is drawn from the corresponding
probability matrix :math:`B(t)`. The ordering of these time points is random.
The task is to predict the community label of each node.
Returns:
tuple: A tuple containing:
- As (list of np.ndarray): List of adjacency matrices for each time point.
- node_labels (np.ndarray): Array of node labels for each time point.
"""
K = 3
n = 100 * K
T = 8
pi = np.repeat(1 / K, K)
a = [0.08, 0.16]
Bs = 0.02 * np.ones((T, K, K))
T_list = [t for t in range(T)]
np.random.shuffle(T_list)
for t in range(T):
for k in range(K):
Bs[t, k, k] = a[(T_list[t] & (1 << k)) >> k]
As, spatial_node_labels = eb.simulation.SBM(n, Bs, pi)
node_labels = np.tile(spatial_node_labels, T)
return As, node_labels