Skip to content

kmeans_img

Performs KMeans clustering on the input image.

Parameters:

Name Type Description Default
img ndarray

Image to cluster. Shape (H, W, 3).

required
n_clust int

Number of clusters.

3
seed int

Random seed.

42
device str

Device to use for computation. Options are 'cpu' or 'cuda'. If set to 'cuda', Cuml will be used for GPU acceleration.

'cpu'

Returns:

Type Description
ndarray

np.ndarray: Label image. Shape (H, W).

Source code in src/histolytics/utils/im.py
def kmeans_img(
    img: np.ndarray, n_clust: int = 3, seed: int = 42, device: str = "cpu"
) -> np.ndarray:
    """Performs KMeans clustering on the input image.

    Parameters:
        img (np.ndarray):
            Image to cluster. Shape (H, W, 3).
        n_clust (int):
            Number of clusters.
        seed (int):
            Random seed.
        device (str):
            Device to use for computation. Options are 'cpu' or 'cuda'. If set to 'cuda',
            Cuml will be used for GPU acceleration.

    Returns:
        np.ndarray:
            Label image. Shape (H, W).
    """
    if device == "cuda" and not _has_cp:
        raise RuntimeError(
            "CuPy and cucim are required for GPU acceleration (device='cuda'). "
            "Please install them with:\n"
            "  pip install cupy-cuda12x cucim-cu12\n"
            "or set device='cpu'."
        )

    # Check for sufficient color variation
    pixels = img.reshape(-1, 3)
    unique_colors = np.unique(pixels, axis=0)

    # If we have fewer unique colors than requested clusters, reduce n_clust
    if len(unique_colors) < n_clust:
        n_clust = max(1, len(unique_colors))
        return np.zeros((img.shape[0], img.shape[1]), dtype=np.int32)

    if device == "cuda":
        return _kmeans_cp(img, n_clust=n_clust, seed=seed)
    elif device == "cpu":
        return _kmeans_np(img, n_clust=n_clust, seed=seed)
    else:
        raise ValueError(f"Invalid device '{device}'. Use 'cpu' or 'cuda'.")