Skip to content

cluster_feats

Compute centrography features of a cluster represented by a GeoDataFrame.

Note

Computes the following features:

  • area: The area of the cluster.
  • dispersion: The dispersion of the cluster.
  • size: The size of the cluster (number of objects).
  • orientation: The orientation angle of the cluster.

Parameters:

Name Type Description Default
gdf GeoDataFrame

The GeoDataFrame containing the cluster data.

required
hull_type str

The type of hull to compute. One of: "alpha_shape", "convex_hull", "ellipse". The hull is used to compute the area and orientation of the cluster.

'alpha_shape'
normalize_orientation bool

Whether to normalize the orientation angle to be within [0, 90].

True
**kwargs Any

Additional keyword arguments for the hull computation (e.g., step for alpha shape).

{}

Returns:

Type Description
Dict[str, float]

Dict[str, float]: A dictionary containing the computed features.

Examples:

>>> import pandas as pd
>>> from histolytics.spatial_clust.density_clustering import density_clustering
>>> from histolytics.data import hgsc_cancer_nuclei
>>> from histolytics.spatial_clust.centrography import cluster_tendency
>>> from histolytics.spatial_clust.clust_metrics import cluster_feats
>>>
>>> nuc = hgsc_cancer_nuclei()
>>> nuc_imm = nuc[nuc["class_name"] == "neoplastic"]
>>> labels = density_clustering(nuc_imm, eps=250, min_samples=100, method="dbscan")
>>> nuc_imm = nuc_imm.assign(labels=labels)
>>> # Calculate cluster features for each cluster label
>>> clust_features = (
...    nuc_imm.groupby("labels")
...    .apply(
...        lambda x: pd.Series(
...            cluster_feats(x, hull_type="convex_hull", normalize_orientation=True)
...        ),
...        include_groups=False,
...    )
...    .reset_index(drop=False)
... )
>>> print(clust_features)
    labels           area  dispersion   size  orientation
0      -1  732641.332024  483.830111   83.0    34.979649
1       0  368383.654562  249.680419  205.0    81.664728
Source code in src/histolytics/spatial_clust/clust_metrics.py
def cluster_feats(
    gdf: gpd.GeoDataFrame,
    hull_type: str = "alpha_shape",
    normalize_orientation: bool = True,
    **kwargs,
) -> Dict[str, float]:
    """Compute centrography features of a cluster represented by a GeoDataFrame.

    Note:
        Computes the following features:

        - `area`: The area of the cluster.
        - `dispersion`: The dispersion of the cluster.
        - `size`: The size of the cluster (number of objects).
        - `orientation`: The orientation angle of the cluster.

    Parameters:
        gdf (gpd.GeoDataFrame):
            The GeoDataFrame containing the cluster data.
        hull_type (str):
            The type of hull to compute. One of: "alpha_shape", "convex_hull", "ellipse".
            The hull is used to compute the area and orientation of the cluster.
        normalize_orientation (bool):
            Whether to normalize the orientation angle to be within [0, 90].
        **kwargs (Any):
            Additional keyword arguments for the hull computation
            (e.g., `step` for alpha shape).

    Returns:
        Dict[str, float]:
            A dictionary containing the computed features.

    Examples:
        >>> import pandas as pd
        >>> from histolytics.spatial_clust.density_clustering import density_clustering
        >>> from histolytics.data import hgsc_cancer_nuclei
        >>> from histolytics.spatial_clust.centrography import cluster_tendency
        >>> from histolytics.spatial_clust.clust_metrics import cluster_feats
        >>>
        >>> nuc = hgsc_cancer_nuclei()
        >>> nuc_imm = nuc[nuc["class_name"] == "neoplastic"]
        >>> labels = density_clustering(nuc_imm, eps=250, min_samples=100, method="dbscan")
        >>> nuc_imm = nuc_imm.assign(labels=labels)
        >>> # Calculate cluster features for each cluster label
        >>> clust_features = (
        ...    nuc_imm.groupby("labels")
        ...    .apply(
        ...        lambda x: pd.Series(
        ...            cluster_feats(x, hull_type="convex_hull", normalize_orientation=True)
        ...        ),
        ...        include_groups=False,
        ...    )
        ...    .reset_index(drop=False)
        ... )
        >>> print(clust_features)
            labels           area  dispersion   size  orientation
        0      -1  732641.332024  483.830111   83.0    34.979649
        1       0  368383.654562  249.680419  205.0    81.664728
    """
    return {
        "area": cluster_area(gdf, hull_type=hull_type, **kwargs),
        "dispersion": cluster_dispersion(gdf),
        "size": cluster_size(gdf),
        "orientation": cluster_orientation(
            gdf, hull_type=hull_type, normalize=normalize_orientation, **kwargs
        ),
    }