Skip to content

gdf_to_polars

Convert a GeoDataFrame to a polars DataFrame while preserving Shapely geometries.

Parameters:

Name Type Description Default
gdf GeoDataFrame

geopandas.GeoDataFrame The input GeoDataFrame

required

Raises:

Type Description
ImportError

If polars is not installed.

Returns:

Type Description

pl.DataFrame: with Shapely objects preserved as Python objects

Examples:

>>> from histolytics.utils.gdf import gdf_to_polars
>>> from histolytics.data import hgsc_cancer_nuclei
>>> gdf = hgsc_cancer_nuclei()
>>> gdf_pl = gdf_to_polars(gdf)
>>> print(gdf_pl.head(3))
    shape: (3, 2)
    ┌────────────┬─────────────────────────────────┐
    │ class_name ┆ geometry                        │
    │ ---        ┆ ---                             │
    │ str        ┆ object                          │
    ╞════════════╪═════════════════════════════════╡
    │ connective ┆ POLYGON ((1394.01 0, 1395.01 1… │
    │ connective ┆ POLYGON ((1391 2.01, 1387 2.01… │
    │ connective ┆ POLYGON ((1382.99 156.01, 1380… │
    └────────────┴─────────────────────────────────┘
Source code in src/histolytics/utils/gdf.py
def gdf_to_polars(gdf: gpd.GeoDataFrame):
    """Convert a GeoDataFrame to a polars DataFrame while preserving Shapely geometries.

    Parameters:
        gdf: geopandas.GeoDataFrame
            The input GeoDataFrame

    Raises:
        ImportError: If polars is not installed.

    Returns:
        pl.DataFrame: with Shapely objects preserved as Python objects

    Examples:
        >>> from histolytics.utils.gdf import gdf_to_polars
        >>> from histolytics.data import hgsc_cancer_nuclei
        >>> gdf = hgsc_cancer_nuclei()
        >>> gdf_pl = gdf_to_polars(gdf)
        >>> print(gdf_pl.head(3))
            shape: (3, 2)
            ┌────────────┬─────────────────────────────────┐
            │ class_name ┆ geometry                        │
            │ ---        ┆ ---                             │
            │ str        ┆ object                          │
            ╞════════════╪═════════════════════════════════╡
            │ connective ┆ POLYGON ((1394.01 0, 1395.01 1… │
            │ connective ┆ POLYGON ((1391 2.01, 1387 2.01… │
            │ connective ┆ POLYGON ((1382.99 156.01, 1380… │
            └────────────┴─────────────────────────────────┘
    """
    try:
        import polars as pl
    except ImportError:
        raise ImportError(
            "polars is not installed. Please install it with `pip install polars`."
        )

    # First convert to pandas
    pdf = pd.DataFrame(gdf)

    # Identify columns containing Shapely objects
    geometry_cols = []
    for col in pdf.columns:
        if len(pdf) > 0:
            shapely_modules = (
                "shapely.geometry.point",
                "shapely.geometry.polygon",
                "shapely.geometry.linestring",
                "shapely.geometry.multipoint",
                "shapely.geometry.multipolygon",
                "shapely.geometry.multilinestring",
                "shapely.geometry.collection",
            )
            if (
                getattr(pdf[col].iloc[0], "__class__", None)
                and getattr(pdf[col].iloc[0].__class__, "__module__", None)
                in shapely_modules
            ):
                # If the column contains Shapely objects, we will treat it as a geometry column
                # and store it as a Python object in polars
                geometry_cols.append(col)

    # Convert to polars with all columns as objects initially
    pl_df = pl.from_pandas(
        pdf[[col for col in pdf.columns if col not in geometry_cols]]
    )

    # For geometry columns, ensure they're stored as Python objects
    # Add geometry columns as Python objects to the polars DataFrame
    for col in geometry_cols:
        pl_df = pl_df.with_columns(pl.Series(col, pdf[col].tolist(), dtype=pl.Object))
    return pl_df