Skip to content

lisa_clustering

Perform Local Indicators of Spatial Association (LISA) clustering.

Parameters:

Name Type Description Default
gdf GeoDataFrame

The GeoDataFrame containing the data.

required
spatial_weights W

The spatial weights object.

required
feat str

The feature to use for clustering.

required
seed int

Random seed for reproducibility.

42
permutations int

Number of permutations for significance testing.

100

Returns:

Type Description
ndarray

np.ndarray: The cluster labels for each object in the GeoDataFrame.

Examples:

>>> from histolytics.spatial_clust.lisa_clustering import lisa_clustering
>>> from histolytics.data import cervix_nuclei
>>> from histolytics.spatial_graph.graph import fit_graph
>>> from histolytics.spatial_geom.shape_metrics import shape_metric
>>>
>>> nuc = cervix_nuclei()
>>> nuc = nuc[nuc["class_name"] == "neoplastic"]
>>> # Fit distband graph to the neoplastic nuclei
>>> w, _ = fit_graph(nuc, "distband", threshold=100)
>>> # Compute the nuclei areas
>>> nuc = shape_metric(nuc, ["area"])
>>> # Perform LISA clustering on the area feature
>>> labels = lisa_clustering(nuc, w, feat="area", seed=4, permutations=999)
>>> print(labels)
    array(['LL', 'ns', 'LL', ..., 'ns', 'ns', 'ns'], dtype='<U2')
Source code in src/histolytics/spatial_clust/lisa_clustering.py
def lisa_clustering(
    gdf: gpd.GeoDataFrame,
    spatial_weights: W,
    feat: str,
    seed: int = 42,
    permutations: int = 100,
) -> np.ndarray:
    """Perform Local Indicators of Spatial Association (LISA) clustering.

    Parameters:
        gdf (gpd.GeoDataFrame):
            The GeoDataFrame containing the data.
        spatial_weights (W):
            The spatial weights object.
        feat (str):
            The feature to use for clustering.
        seed (int):
            Random seed for reproducibility.
        permutations (int):
            Number of permutations for significance testing.

    Returns:
        np.ndarray:
            The cluster labels for each object in the GeoDataFrame.

    Examples:
        >>> from histolytics.spatial_clust.lisa_clustering import lisa_clustering
        >>> from histolytics.data import cervix_nuclei
        >>> from histolytics.spatial_graph.graph import fit_graph
        >>> from histolytics.spatial_geom.shape_metrics import shape_metric
        >>>
        >>> nuc = cervix_nuclei()
        >>> nuc = nuc[nuc["class_name"] == "neoplastic"]
        >>> # Fit distband graph to the neoplastic nuclei
        >>> w, _ = fit_graph(nuc, "distband", threshold=100)
        >>> # Compute the nuclei areas
        >>> nuc = shape_metric(nuc, ["area"])
        >>> # Perform LISA clustering on the area feature
        >>> labels = lisa_clustering(nuc, w, feat="area", seed=4, permutations=999)
        >>> print(labels)
            array(['LL', 'ns', 'LL', ..., 'ns', 'ns', 'ns'], dtype='<U2')
    """
    lisa = esda.Moran_Local(
        gdf[feat],
        spatial_weights,
        island_weight=np.nan,
        seed=seed,
        permutations=permutations,
    )

    # Classify the gdf to HH, LL, LH, HL
    clusters = moran_hot_cold_spots(lisa)

    cluster_labels = ["ns", "HH", "LH", "LL", "HL"]
    lisa_labels = np.array([cluster_labels[i] for i in clusters])

    return lisa_labels