Convert a GeoDataFrame to a polars DataFrame while preserving Shapely geometries.
Parameters:
Name |
Type |
Description |
Default |
gdf
|
GeoDataFrame
|
geopandas.GeoDataFrame
The input GeoDataFrame
|
required
|
Raises:
Type |
Description |
ImportError
|
If polars is not installed.
|
Returns:
Type |
Description |
|
pl.DataFrame: with Shapely objects preserved as Python objects
|
Examples:
>>> from histolytics.utils.gdf import gdf_to_polars
>>> from histolytics.data import hgsc_cancer_nuclei
>>> gdf = hgsc_cancer_nuclei()
>>> gdf_pl = gdf_to_polars(gdf)
>>> print(gdf_pl.head(3))
shape: (3, 2)
┌────────────┬─────────────────────────────────┐
│ class_name ┆ geometry │
│ --- ┆ --- │
│ str ┆ object │
╞════════════╪═════════════════════════════════╡
│ connective ┆ POLYGON ((1394.01 0, 1395.01 1… │
│ connective ┆ POLYGON ((1391 2.01, 1387 2.01… │
│ connective ┆ POLYGON ((1382.99 156.01, 1380… │
└────────────┴─────────────────────────────────┘
Source code in src/histolytics/utils/gdf.py
| def gdf_to_polars(gdf: gpd.GeoDataFrame):
"""Convert a GeoDataFrame to a polars DataFrame while preserving Shapely geometries.
Parameters:
gdf: geopandas.GeoDataFrame
The input GeoDataFrame
Raises:
ImportError: If polars is not installed.
Returns:
pl.DataFrame: with Shapely objects preserved as Python objects
Examples:
>>> from histolytics.utils.gdf import gdf_to_polars
>>> from histolytics.data import hgsc_cancer_nuclei
>>> gdf = hgsc_cancer_nuclei()
>>> gdf_pl = gdf_to_polars(gdf)
>>> print(gdf_pl.head(3))
shape: (3, 2)
┌────────────┬─────────────────────────────────┐
│ class_name ┆ geometry │
│ --- ┆ --- │
│ str ┆ object │
╞════════════╪═════════════════════════════════╡
│ connective ┆ POLYGON ((1394.01 0, 1395.01 1… │
│ connective ┆ POLYGON ((1391 2.01, 1387 2.01… │
│ connective ┆ POLYGON ((1382.99 156.01, 1380… │
└────────────┴─────────────────────────────────┘
"""
try:
import polars as pl
except ImportError:
raise ImportError(
"polars is not installed. Please install it with `pip install polars`."
)
# First convert to pandas
pdf = pd.DataFrame(gdf)
# Identify columns containing Shapely objects
geometry_cols = []
for col in pdf.columns:
if len(pdf) > 0:
shapely_modules = (
"shapely.geometry.point",
"shapely.geometry.polygon",
"shapely.geometry.linestring",
"shapely.geometry.multipoint",
"shapely.geometry.multipolygon",
"shapely.geometry.multilinestring",
"shapely.geometry.collection",
)
if (
getattr(pdf[col].iloc[0], "__class__", None)
and getattr(pdf[col].iloc[0].__class__, "__module__", None)
in shapely_modules
):
# If the column contains Shapely objects, we will treat it as a geometry column
# and store it as a Python object in polars
geometry_cols.append(col)
# Convert to polars with all columns as objects initially
pl_df = pl.from_pandas(
pdf[[col for col in pdf.columns if col not in geometry_cols]]
)
# For geometry columns, ensure they're stored as Python objects
# Add geometry columns as Python objects to the polars DataFrame
for col in geometry_cols:
pl_df = pl_df.with_columns(pl.Series(col, pdf[col].tolist(), dtype=pl.Object))
return pl_df
|