Skip to content

WSIPanopticSegmenter

histolytics.wsi.wsi_segmenter.WsiPanopticSegmenter

Source code in src/histolytics/wsi/wsi_segmenter.py
class WsiPanopticSegmenter:
    def __init__(
        self,
        reader: SlideReader,
        model: BaseModelPanoptic,
        level: int,
        coordinates: List[Tuple[int, int, int, int]],
        batch_size: int = 8,
        transforms: A.Compose = None,
    ) -> None:
        """Class handling the panoptic segmentation of WSIs.

        Parameters:
            reader (SlideReader):
                The `SlideReader` object for reading the WSIs.
            model (BaseModelPanoptic):
                The model for segmentation.
            level (int):
                The level of the WSI to segment.
            coordinates (List[Tuple[int, int, int, int]]):
                The bounding box coordinates from `reader.get_tile_coordinates()`.
            batch_size (int):
                The batch size for the DataLoader.
            transforms (A.Compose):
                The transformations for the input patches.
        """
        if not has_albu:
            warnings.warn(
                "The albumentations lib is needed to apply transformations. "
                "Setting transforms=None"
            )
            transforms = None

        self.batch_size = batch_size
        self.coordinates = coordinates
        self.model = model

        self.dataset = WSIDatasetInfer(
            reader, coordinates, level=level, transforms=transforms
        )
        self.dataloader = DataLoader(
            self.dataset, batch_size=batch_size, shuffle=False, pin_memory=True
        )
        self._has_processed = False

    def segment(
        self,
        save_dir: str,
        use_sliding_win: bool = False,
        window_size: Tuple[int, int] = None,
        stride: int = None,
        use_async_postproc: bool = True,
        postproc_njobs: int = 4,
        postproc_start_method: str = "threading",
        class_dict_nuc: Dict[int, str] = None,
        class_dict_cyto: Dict[int, str] = None,
        class_dict_tissue: Dict[int, str] = None,
    ) -> None:
        """Segment the WSIs and save the instances as parquet files to `save_dir`.

        Parameters:
            save_dir (str):
                The directory to save the output segmentations in .parquet-format.
        """
        save_dir = Path(save_dir)
        tissue_dir = save_dir / "tissue"
        nuc_dir = save_dir / "nuc"
        cyto_dir = save_dir / "cyto"
        tissue_dir.mkdir(parents=True, exist_ok=True)
        nuc_dir.mkdir(parents=True, exist_ok=True)
        cyto_dir.mkdir(parents=True, exist_ok=True)

        with tqdm(self.dataloader, unit="batch") as loader:
            with torch.no_grad():
                for data in loader:
                    im = data["image"].to(self.model.device).permute(0, 3, 1, 2).float()
                    coords = data["coords"]
                    names = data["name"]

                    # set args
                    save_paths_nuc = [
                        (
                            nuc_dir / f"{n}_x{c[0]}-y{c[1]}_w{c[2]}-h{c[3]}_nuc"
                        ).with_suffix(".parquet")
                        for n, c in zip(names, coords)
                    ]
                    save_paths_tissue = [
                        (
                            tissue_dir / f"{n}_x{c[0]}-y{c[1]}_w{c[2]}-h{c[3]}_tissue"
                        ).with_suffix(".parquet")
                        for n, c in zip(names, coords)
                    ]
                    save_paths_cyto = [
                        (
                            cyto_dir / f"{n}_x{c[0]}-y{c[1]}_w{c[2]}-h{c[3]}_cyto"
                        ).with_suffix(".parquet")
                        for n, c in zip(names, coords)
                    ]
                    coords = [tuple(map(int, coord)) for coord in coords]

                    # predict
                    probs = self.model.predict(
                        im,
                        use_sliding_win=use_sliding_win,
                        window_size=window_size,
                        stride=stride,
                    )

                    # post-process
                    self.model.post_process(
                        probs,
                        use_async_postproc=use_async_postproc,
                        start_method=postproc_start_method,
                        n_jobs=postproc_njobs,
                        save_paths_nuc=save_paths_nuc,
                        save_paths_cyto=save_paths_cyto,
                        save_paths_tissue=save_paths_tissue,
                        coords=coords,
                        class_dict_nuc=class_dict_nuc,
                        class_dict_cyto=class_dict_cyto,
                        class_dict_tissue=class_dict_tissue,
                    )

        self._has_processed = True

    def merge_instances(
        self,
        src: str,
        dst: str,
        clear_in_dir: bool = False,
        simplify_level: float = 0.3,
    ) -> None:
        """Merge the instances at the image boundaries.

        Parameters:
            src (str):
                The directory containing the instances segmentations (.parquet-files).
            dst (str):
                The destination path for the output file. Allowed formats are
                '.parquet', '.geojson', and '.feather'.
            clear_in_dir (bool):
                Whether to clear the source directory after merging.
            simplify_level (float):
                The level of simplification to apply to the merged instances.
        """
        if not self._has_processed:
            raise ValueError("You must segment the instances first.")

        in_dir = Path(src)
        gdf = gpd.read_parquet(in_dir)
        merger = InstMerger(gdf, self.coordinates)
        merger.merge(dst, simplify_level=simplify_level)

        if clear_in_dir:
            for f in in_dir.glob("*"):
                f.unlink()
            in_dir.rmdir()

    def merge_tissues(
        self,
        src: str,
        dst: str,
        clear_in_dir: bool = False,
        simplify_level: float = 1,
    ) -> None:
        """Merge the tissue segmentations.

        Parameters:
            src (str):
                The directory containing the tissue segmentations (.parquet-files).
            dst (str):
                The destination path for the output file. Allowed formats are
                '.parquet', '.geojson', and '.feather'.
            clear_in_dir (bool):
                Whether to clear the source directory after merging.
            simplify_level (float):
                The level of simplification to apply to the merged tissues.
        """
        if not self._has_processed:
            raise ValueError("You must segment the instances first.")

        in_dir = Path(src)
        gdf = gpd.read_parquet(in_dir)
        merger = TissueMerger(gdf, self.coordinates)
        merger.merge(dst, simplify_level=simplify_level)

        if clear_in_dir:
            for f in in_dir.glob("*"):
                f.unlink()
            in_dir.rmdir()

__init__

__init__(reader: SlideReader, model: BaseModelPanoptic, level: int, coordinates: List[Tuple[int, int, int, int]], batch_size: int = 8, transforms: Compose = None) -> None

Class handling the panoptic segmentation of WSIs.

Parameters:

Name Type Description Default
reader SlideReader

The SlideReader object for reading the WSIs.

required
model BaseModelPanoptic

The model for segmentation.

required
level int

The level of the WSI to segment.

required
coordinates List[Tuple[int, int, int, int]]

The bounding box coordinates from reader.get_tile_coordinates().

required
batch_size int

The batch size for the DataLoader.

8
transforms Compose

The transformations for the input patches.

None
Source code in src/histolytics/wsi/wsi_segmenter.py
def __init__(
    self,
    reader: SlideReader,
    model: BaseModelPanoptic,
    level: int,
    coordinates: List[Tuple[int, int, int, int]],
    batch_size: int = 8,
    transforms: A.Compose = None,
) -> None:
    """Class handling the panoptic segmentation of WSIs.

    Parameters:
        reader (SlideReader):
            The `SlideReader` object for reading the WSIs.
        model (BaseModelPanoptic):
            The model for segmentation.
        level (int):
            The level of the WSI to segment.
        coordinates (List[Tuple[int, int, int, int]]):
            The bounding box coordinates from `reader.get_tile_coordinates()`.
        batch_size (int):
            The batch size for the DataLoader.
        transforms (A.Compose):
            The transformations for the input patches.
    """
    if not has_albu:
        warnings.warn(
            "The albumentations lib is needed to apply transformations. "
            "Setting transforms=None"
        )
        transforms = None

    self.batch_size = batch_size
    self.coordinates = coordinates
    self.model = model

    self.dataset = WSIDatasetInfer(
        reader, coordinates, level=level, transforms=transforms
    )
    self.dataloader = DataLoader(
        self.dataset, batch_size=batch_size, shuffle=False, pin_memory=True
    )
    self._has_processed = False

segment

segment(save_dir: str, use_sliding_win: bool = False, window_size: Tuple[int, int] = None, stride: int = None, use_async_postproc: bool = True, postproc_njobs: int = 4, postproc_start_method: str = 'threading', class_dict_nuc: Dict[int, str] = None, class_dict_cyto: Dict[int, str] = None, class_dict_tissue: Dict[int, str] = None) -> None

Segment the WSIs and save the instances as parquet files to save_dir.

Parameters:

Name Type Description Default
save_dir str

The directory to save the output segmentations in .parquet-format.

required
Source code in src/histolytics/wsi/wsi_segmenter.py
def segment(
    self,
    save_dir: str,
    use_sliding_win: bool = False,
    window_size: Tuple[int, int] = None,
    stride: int = None,
    use_async_postproc: bool = True,
    postproc_njobs: int = 4,
    postproc_start_method: str = "threading",
    class_dict_nuc: Dict[int, str] = None,
    class_dict_cyto: Dict[int, str] = None,
    class_dict_tissue: Dict[int, str] = None,
) -> None:
    """Segment the WSIs and save the instances as parquet files to `save_dir`.

    Parameters:
        save_dir (str):
            The directory to save the output segmentations in .parquet-format.
    """
    save_dir = Path(save_dir)
    tissue_dir = save_dir / "tissue"
    nuc_dir = save_dir / "nuc"
    cyto_dir = save_dir / "cyto"
    tissue_dir.mkdir(parents=True, exist_ok=True)
    nuc_dir.mkdir(parents=True, exist_ok=True)
    cyto_dir.mkdir(parents=True, exist_ok=True)

    with tqdm(self.dataloader, unit="batch") as loader:
        with torch.no_grad():
            for data in loader:
                im = data["image"].to(self.model.device).permute(0, 3, 1, 2).float()
                coords = data["coords"]
                names = data["name"]

                # set args
                save_paths_nuc = [
                    (
                        nuc_dir / f"{n}_x{c[0]}-y{c[1]}_w{c[2]}-h{c[3]}_nuc"
                    ).with_suffix(".parquet")
                    for n, c in zip(names, coords)
                ]
                save_paths_tissue = [
                    (
                        tissue_dir / f"{n}_x{c[0]}-y{c[1]}_w{c[2]}-h{c[3]}_tissue"
                    ).with_suffix(".parquet")
                    for n, c in zip(names, coords)
                ]
                save_paths_cyto = [
                    (
                        cyto_dir / f"{n}_x{c[0]}-y{c[1]}_w{c[2]}-h{c[3]}_cyto"
                    ).with_suffix(".parquet")
                    for n, c in zip(names, coords)
                ]
                coords = [tuple(map(int, coord)) for coord in coords]

                # predict
                probs = self.model.predict(
                    im,
                    use_sliding_win=use_sliding_win,
                    window_size=window_size,
                    stride=stride,
                )

                # post-process
                self.model.post_process(
                    probs,
                    use_async_postproc=use_async_postproc,
                    start_method=postproc_start_method,
                    n_jobs=postproc_njobs,
                    save_paths_nuc=save_paths_nuc,
                    save_paths_cyto=save_paths_cyto,
                    save_paths_tissue=save_paths_tissue,
                    coords=coords,
                    class_dict_nuc=class_dict_nuc,
                    class_dict_cyto=class_dict_cyto,
                    class_dict_tissue=class_dict_tissue,
                )

    self._has_processed = True

merge_instances

merge_instances(src: str, dst: str, clear_in_dir: bool = False, simplify_level: float = 0.3) -> None

Merge the instances at the image boundaries.

Parameters:

Name Type Description Default
src str

The directory containing the instances segmentations (.parquet-files).

required
dst str

The destination path for the output file. Allowed formats are '.parquet', '.geojson', and '.feather'.

required
clear_in_dir bool

Whether to clear the source directory after merging.

False
simplify_level float

The level of simplification to apply to the merged instances.

0.3
Source code in src/histolytics/wsi/wsi_segmenter.py
def merge_instances(
    self,
    src: str,
    dst: str,
    clear_in_dir: bool = False,
    simplify_level: float = 0.3,
) -> None:
    """Merge the instances at the image boundaries.

    Parameters:
        src (str):
            The directory containing the instances segmentations (.parquet-files).
        dst (str):
            The destination path for the output file. Allowed formats are
            '.parquet', '.geojson', and '.feather'.
        clear_in_dir (bool):
            Whether to clear the source directory after merging.
        simplify_level (float):
            The level of simplification to apply to the merged instances.
    """
    if not self._has_processed:
        raise ValueError("You must segment the instances first.")

    in_dir = Path(src)
    gdf = gpd.read_parquet(in_dir)
    merger = InstMerger(gdf, self.coordinates)
    merger.merge(dst, simplify_level=simplify_level)

    if clear_in_dir:
        for f in in_dir.glob("*"):
            f.unlink()
        in_dir.rmdir()

merge_tissues

merge_tissues(src: str, dst: str, clear_in_dir: bool = False, simplify_level: float = 1) -> None

Merge the tissue segmentations.

Parameters:

Name Type Description Default
src str

The directory containing the tissue segmentations (.parquet-files).

required
dst str

The destination path for the output file. Allowed formats are '.parquet', '.geojson', and '.feather'.

required
clear_in_dir bool

Whether to clear the source directory after merging.

False
simplify_level float

The level of simplification to apply to the merged tissues.

1
Source code in src/histolytics/wsi/wsi_segmenter.py
def merge_tissues(
    self,
    src: str,
    dst: str,
    clear_in_dir: bool = False,
    simplify_level: float = 1,
) -> None:
    """Merge the tissue segmentations.

    Parameters:
        src (str):
            The directory containing the tissue segmentations (.parquet-files).
        dst (str):
            The destination path for the output file. Allowed formats are
            '.parquet', '.geojson', and '.feather'.
        clear_in_dir (bool):
            Whether to clear the source directory after merging.
        simplify_level (float):
            The level of simplification to apply to the merged tissues.
    """
    if not self._has_processed:
        raise ValueError("You must segment the instances first.")

    in_dir = Path(src)
    gdf = gpd.read_parquet(in_dir)
    merger = TissueMerger(gdf, self.coordinates)
    merger.merge(dst, simplify_level=simplify_level)

    if clear_in_dir:
        for f in in_dir.glob("*"):
            f.unlink()
        in_dir.rmdir()