Source code for pygho.hodata.SpTupleSampler

from torch_geometric.data import Data as PygData, Batch as PygBatch
from torch_geometric.utils import to_scipy_sparse_matrix, k_hop_subgraph
import torch
from typing import List, Optional, Tuple, Union
from torch import Tensor, LongTensor
from torch_geometric.utils.num_nodes import maybe_num_nodes
from typing import Tuple
import scipy.sparse as ssp
from ..backend.SpTensor import coalesce, SparseTensor



[docs]
def k_hop_subgraph(
    node_idx: Union[int, List[int], LongTensor],
    num_hops: int,
    edge_index: LongTensor,
    relabel_nodes: bool = False,
    num_nodes: Optional[int] = None,
    flow: str = 'source_to_target',
    directed: bool = False,
) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor]:
    """
    Compute the k-hop subgraph around a set of nodes in an edge list.

    Args:
    
    - node_idx (Union[int, List[int], LongTensor]): The root node(s) for the subgraph.
    - num_hops (int): The number of hops for the subgraph.
    - edge_index (LongTensor): The edge indices of the graph.
    - relabel_nodes (bool, optional): Whether to relabel node indices. Defaults to False.
    - num_nodes (Optional[int], optional): The total number of nodes. Defaults to None.
    - flow (str, optional): The direction of traversal ('source_to_target' or 'target_to_source'). Defaults to 'source_to_target'.
    - directed (bool, optional): Whether the graph is directed. Defaults to False.

    Returns:
    
        Tuple[Tensor, Tensor, Tensor, Tensor]: A tuple containing:
            - subset (Tensor): The node indices in the subgraph.
            - edge_index (Tensor): The edge indices of the subgraph.
            - inv (Tensor): The inverse mapping of node indices in the original graph to the subgraph.
            - edge_mask (Tensor): A mask indicating which edges are part of the subgraph.
            - dist (Tensor): A distance of each node to the root node.
    """
    num_nodes = maybe_num_nodes(edge_index, num_nodes)

    assert flow in ['source_to_target', 'target_to_source']
    if flow == 'target_to_source':
        row, col = edge_index
    else:
        col, row = edge_index

    node_mask = row.new_empty(num_nodes, dtype=torch.bool)
    edge_mask = row.new_empty(row.size(0), dtype=torch.bool)

    if isinstance(node_idx, (int, list, tuple)):
        node_idx = torch.tensor([node_idx], device=row.device).flatten()
    else:
        node_idx = node_idx.to(row.device)

    subsets = [node_idx]
    dist = torch.empty_like(node_mask, dtype=torch.long).fill_(num_nodes + 1)
    for _ in range(num_hops):
        node_mask.fill_(False)
        node_mask[subsets[-1]] = True
        torch.index_select(node_mask, 0, row, out=edge_mask)
        subsets.append(col[edge_mask])

    for _ in range(num_hops, -1, -1):
        dist[subsets[_]] = _

    subset, inv = torch.cat(subsets).unique(return_inverse=True)
    inv = inv[:node_idx.numel()]

    dist = dist[subset]

    node_mask.fill_(False)
    node_mask[subset] = True

    if not directed:
        edge_mask = node_mask[row] & node_mask[col]

    edge_index = edge_index[:, edge_mask]

    if relabel_nodes:
        node_idx = row.new_full((num_nodes, ), -1)
        node_idx[subset] = torch.arange(subset.size(0), device=row.device)
        edge_index = node_idx[edge_index]

    return subset, edge_index, inv, edge_mask, dist




[docs]
def KhopSampler(
        data: PygData,
        hop: int = 2) -> SparseTensor:
    """
    sample k-hop subgraph on a given PyG graph.

    Args:
    
    - data (PygData): The input PyG dataset.
    - hop (int, optional): The number of hops for subgraph sampling. Defaults to 2.

    Returns:
    
        SparseTensor for the precomputed tuple features.
    """

    subgraphs = []

    for i in range(data.num_nodes):
        subset, _, _, _, dist = k_hop_subgraph(i,
                                               hop,
                                               data.edge_index,
                                               relabel_nodes=True,
                                               num_nodes=data.num_nodes)
        assert subset.shape[0] > 1, "empty subgraph!"
        nodeidx1 = subset.clone()
        nodeidx1.fill_(i)
        subgraphs.append(
            PygData(
                x=dist,
                subg_nodeidx=torch.stack((nodeidx1, subset), dim=-1),
                num_nodes=subset.shape[0],
            ))
    subgbatch = PygBatch.from_data_list(subgraphs)
    tupleid, tuplefeat = subgbatch.subg_nodeidx.t(), subgbatch.x
    return SparseTensor(tupleid, tuplefeat, shape=2*[data.num_nodes]+list(tuplefeat.shape[1:]), is_coalesced=False, reduce="min")




[docs]
def I2Sampler(
        data: PygData,
        hop: int = 3) -> SparseTensor:
    """
    Perform subgraph sampling on a given graph for I2GNN.

    Args:
    
    - data (PygData): The input PyG dataset.
    - hop (int, optional): The number of hops for subgraph sampling. Defaults to 3.

    Returns:
    
        SparseTensor for the precomputed tuple features.
    """
    subgraphs = []
    spadj = to_scipy_sparse_matrix(data.edge_index, num_nodes=data.num_nodes)
    dist_matrix = torch.from_numpy(
        ssp.csgraph.shortest_path(spadj,
                                  directed=False,
                                  unweighted=True,
                                  return_predecessors=False)).to(torch.long)
    ei = data.edge_index
    for i in range(ei.shape[1]):
        nodepair = ei[:, i]
        subset, _, _, _, _ = k_hop_subgraph(nodepair,
                                            hop,
                                            data.edge_index,
                                            relabel_nodes=True,
                                            num_nodes=data.num_nodes)
        assert subset.shape[0] > 1, "empty subgraph!"
        nodeidx1 = subset.clone()
        nodeidx1.fill_(nodepair[0])
        nodeidx2 = subset.clone()
        nodeidx2.fill_(nodepair[1])
        subgraphs.append(
            PygData(
                x=torch.stack((dist_matrix[nodepair[0].item()][subset],
                               dist_matrix[nodepair[1].item()][subset]),
                              dim=-1),
                subg_nodeidx=torch.stack((nodeidx1, nodeidx2, subset), dim=-1),
                num_nodes=subset.shape[0],
            ))
    subgbatch = PygBatch.from_data_list(subgraphs)
    tupleid, tuplefeat = subgbatch.subg_nodeidx.t(), subgbatch.x
    return SparseTensor(tupleid, tuplefeat, shape=3*[data.num_nodes]+list(tuplefeat.shape[1:]), is_coalesced=False, reduce="min")