Skip to content

gdf2sem

Converts a GeoDataFrame to a semantic segmentation raster mask.

Parameters:

Name Type Description Default
gdf GeoDataFrame

GeoDataFrame with a "class_name" column.

required
xoff int

X offset. This is used to translate the geometries in the GeoDataFrame to burn the geometries in correctly to the raster mask.

0
yoff int

Y offset. This is used to translate the geometries in the GeoDataFrame to burn the geometries in correctly to the raster mask.

0
class_dict Dict[str, int], default=None

Dictionary mapping class names to integers. e.g. {"neoplastic":1, "immune":2} If None, the classes will be mapped to integers in the order they appear in the GeoDataFrame.

None
width int

Width of the output. This should match with the underlying image width. If None, the width will be calculated from the input gdf.

None
height int

Height of the output. This should match with the underlying image height. If None, the height will be calculated from the input gdf.

None

Returns:

Type Description
ndarray

np.ndarray: Semantic segmentation mask of the input gdf.

Examples:

>>> from histolytics.data import hgsc_cancer_nuclei
>>> from histolytics.utils.raster import gdf2sem
>>> import matplotlib.pyplot as plt
>>> from skimage.measure import label
>>> from skimage.color import label2rgb
>>>
>>> nuc = hgsc_cancer_nuclei()
>>> # Convert the GeoDataFrame to an instance segmentation raster
>>> nuc_raster = gdf2sem(nuc, xoff=0, yoff=0, width=1500, height=1500)
>>> # Visualize the semantic segmentation raster and the GeoDataFrame
>>> fig, ax = plt.subplots(1, 2, figsize=(8, 4))
>>> ax[0].imshow(label2rgb(nuc_raster, bg_label=0))
>>> ax[0].set_axis_off()
>>> nuc.plot(column="class_name", ax=ax[1])
>>> ax[1].set_axis_off()
>>> fig.tight_layout()

out

Source code in src/histolytics/utils/raster.py
def gdf2sem(
    gdf: gpd.GeoDataFrame,
    xoff: int = 0,
    yoff: int = 0,
    class_dict: Dict[str, int] = None,
    width: int = None,
    height: int = None,
) -> np.ndarray:
    """Converts a GeoDataFrame to a semantic segmentation raster mask.

    Parameters:
        gdf (gpd.GeoDataFrame):
            GeoDataFrame with a "class_name" column.
        xoff (int):
            X offset. This is used to translate the geometries in the GeoDataFrame to
            burn the geometries in correctly to the raster mask.
        yoff (int):
            Y offset. This is used to translate the geometries in the GeoDataFrame to
            burn the geometries in correctly to the raster mask.
        class_dict (Dict[str, int], default=None):
            Dictionary mapping class names to integers. e.g. {"neoplastic":1, "immune":2}
            If None, the classes will be mapped to integers in the order they appear in
            the GeoDataFrame.
        width (int):
            Width of the output. This should match with the underlying image width.
            If None, the width will be calculated from the input gdf.
        height (int):
            Height of the output. This should match with the underlying image height.
            If None, the height will be calculated from the input gdf.

    Returns:
        np.ndarray:
            Semantic segmentation mask of the input gdf.

    Examples:
        >>> from histolytics.data import hgsc_cancer_nuclei
        >>> from histolytics.utils.raster import gdf2sem
        >>> import matplotlib.pyplot as plt
        >>> from skimage.measure import label
        >>> from skimage.color import label2rgb
        >>>
        >>> nuc = hgsc_cancer_nuclei()
        >>> # Convert the GeoDataFrame to an instance segmentation raster
        >>> nuc_raster = gdf2sem(nuc, xoff=0, yoff=0, width=1500, height=1500)
        >>> # Visualize the semantic segmentation raster and the GeoDataFrame
        >>> fig, ax = plt.subplots(1, 2, figsize=(8, 4))
        >>> ax[0].imshow(label2rgb(nuc_raster, bg_label=0))
        >>> ax[0].set_axis_off()
        >>> nuc.plot(column="class_name", ax=ax[1])
        >>> ax[1].set_axis_off()
        >>> fig.tight_layout()
    ![out](../../img/gdf2sem.png)
    """
    xmin, ymin, xmax, ymax = gdf.total_bounds
    xoff = xoff - xmin
    yoff = yoff - ymin

    if width is None:
        width = int(xmax - xmin)
    if height is None:
        height = int(ymax - ymin)

    image_shape = (int(height), int(width))
    out_mask = np.zeros(image_shape, dtype=np.int32)
    for i, (cl, gdf) in enumerate(gdf.explode(index_parts=True).groupby("class_name")):
        mask = geometry_mask(
            gdf.geometry.translate(-xmin - xoff, -ymin - yoff),
            out_shape=image_shape,
            transform=rasterio.Affine(1, 0, 0, 0, 1, 0),
            invert=True,
            all_touched=True,
        )
        if class_dict is None:
            out_mask[mask] = int(i + 1)
        else:
            out_mask[mask] = class_dict[cl]

    return out_mask