Source code for data.school.school_processing

import os
import numpy as np


[docs] def get_school_data(return_all_labels=False): """ A dynamic social network between pupils at a primary school in Lyon, France (Stehlé et al., 2011). Each of the 232 pupils wore a radio identification device such that each interaction, with its timestamp, could be recorded, forming a dynamic network. An interaction was defined by close proximity for 20 seconds. The task is to predict the classroom allocation of each pupil. This dataset has a temporal structure that particularly distinguishes: - **Class time**: Pupils cluster together based on their class (easier). - **Lunchtime**: The cluster structure breaks down (harder). The data covers two full school days, making it roughly repeating. Returns: tuple: A tuple containing: - As (np.ndarray): Adjacency matrices for each time window. - node_labels (np.ndarray): Labels for each node at each time window. """ window = 60 * 60 day_1_start = (8 * 60 + 30) * 60 day_1_end = (17 * 60 + 30) * 60 day_2_start = ((24 + 8) * 60 + 30) * 60 day_2_end = ((24 + 17) * 60 + 30) * 60 T1 = int((day_1_end - day_1_start) // window) T2 = int((day_2_end - day_2_start) // window) T = T1 + T2 print(f"Number of time windows: {T}") base_dir = os.path.dirname(__file__) fname = base_dir + "/ia-primary-school-proximity-attr.edges" file = open(fname) label_dict = { "1A": 0, "1B": 1, "2A": 2, "2B": 3, "3A": 4, "3B": 5, "4A": 6, "4B": 7, "5A": 8, "5B": 9, "Teachers": 10, } nodes = [] spatial_node_labels = [] edge_tuples = [] for line in file: node_i, node_j, time, id_i, id_j = line.strip("\n").split(",") if day_1_start <= int(time) < day_1_end: t = (int(time) - day_1_start) // window elif day_2_start <= int(time) < day_2_end: t = T1 + (int(time) - day_2_start) // window else: continue if node_i not in nodes: nodes.append(node_i) spatial_node_labels.append(label_dict[id_i]) if node_j not in nodes: nodes.append(node_j) spatial_node_labels.append(label_dict[id_j]) edge_tuples.append([t, node_i, node_j]) edge_tuples = np.unique(edge_tuples, axis=0) nodes = np.array(nodes) n = len(nodes) print(f"Number of nodes: {n}") node_dict = dict(zip(nodes[np.argsort(spatial_node_labels)], range(n))) spatial_node_labels = np.sort(spatial_node_labels) As = np.zeros((T, n, n)) for m in range(len(edge_tuples)): t, i, j = edge_tuples[m] As[int(t), node_dict[i], node_dict[j]] = 1 As[int(t), node_dict[j], node_dict[i]] = 1 node_labels = np.tile(spatial_node_labels, T) if return_all_labels: all_labels = np.array(list(label_dict.keys())) return As, node_labels, all_labels else: return As, node_labels