Collagen Orientation Disorder Quantification
Introduction¶
This tutorial outlines a workflow to quantitatively measure collagen orientation disorder (see example) at WSI-level, a key feature of fibrosis in many solid tumors. The spatial arrangement of collagen fibers within the tumor microenvironment holds significant prognostic value: a highly aligned, parallel architecture is often associated with a less aggressive tumor phenotype, while a disorganized, chaotic pattern have been associated with more aggressive forms of cancer.
This guide will demonstrate how to use Histolytics to extract collagen orientation disorder by measuring the fiber angle orientational entropy. We will use a segmented HGSC omental slide as an example data in this workflow.
import geopandas as gpd
nuc = gpd.read_parquet("/path/to/nuclei_seg.parquet") # <- modify path
tis = gpd.read_parquet("/path/to/tissue_seg.parquet") # <- modify path
ax = tis.plot(figsize=(10, 10), column="class_name", legend=True)
ax.set_axis_off()
Fit a Rectangular Grid to the Tissue Segmentation Map¶
First, we'll create a rectangular grid over the tissue segmentation map from which we will filter only relevant parts to be analyzed. Firstly, the segmentation map contains two identical sections, we will only analyze one of them to reduce computational cost. Secondly, we are extracting a feature of the stroma so we will constrain our analysis to the stromal regions.
from histolytics.spatial_ops.rect_grid import rect_grid
# fit the grid. We'll use 256, 256 sized patches, focus only on stroma
patch_size = (256, 256)
gr = rect_grid(tis, patch_size, 0, "intersects")
ax = tis.plot(column="class_name", figsize=(10, 10), alpha=0.5, legend=True)
ax = gr.plot(ax=ax, edgecolor="black", facecolor="none", lw=0.5)
ax.set_axis_off()
Let's now filter out the other tissue section and constrain the bboxes to the stromal regions.
from histolytics.wsi.utils import get_sub_grids
from histolytics.spatial_ops.ops import get_objs
sub_grids = get_sub_grids(gr, min_size=1000, return_gdf=True)
gr = get_objs(tis.loc[tis["class_name"] == "stroma"], sub_grids[1], "contains")
gr = gr.reset_index(drop=True)
ax = tis.plot(column="class_name", aspect=1, figsize=(10, 10), legend=True)
ax = gr.boundary.plot(ax=ax, lw=0.5, color="red")
ax.set_axis_off()
# Visualize the grid on top of the slide
# thumbnail = reader.read_level(-2)
# reader.get_annotated_thumbnail(thumbnail, gr, linewidth=1)
Initialize the Collagen Disorder Pipeline¶
To quantify the collagen orientation disorder, we will compute the mean Shannon entropy of the neighborhood major axis angles of collagen fibers within each patch. This will provide a measure of the dispersion of collagen fiber orientations, with higher values indicating greater disorder. To compute the disorder we will be using the graph fitting and neighborhood feature extraction utilities introduced previously.
import numpy as np
import pandas as pd
import scipy.ndimage as ndimage
from histolytics.wsi.wsi_processor import WSIGridProcessor
from histolytics.wsi.slide_reader import SlideReader
from histolytics.stroma_feats.collagen import _fiber_midpoints, _major_axis_angle
from histolytics.spatial_agg.local_diversity import local_diversity
from histolytics.spatial_graph.spatial_weights import fit_distband
from histolytics.utils.gdf import set_uid
from histolytics.stroma_feats.collagen import extract_collagen_fibers
from skimage.measure import label as label_sk
def pipeline(img: np.ndarray, label: np.ndarray, mask: np.ndarray) -> pd.DataFrame:
"""A pipeline for extracting collagen orientation disorder from WSI stromal patches."""
fibers = extract_collagen_fibers(img, label, rm_fg=True, rm_bg=False)
labeled_fibers = label_sk(fibers)
# return empty gdf if no fibers detected
if len(np.unique(labeled_fibers)) <= 1: # Only background (0) present
return gpd.GeoDataFrame(
columns=[
"geometry",
"uid",
"major_axis_angle",
"major_axis_angle_shannon_index",
]
)
# get fiber indices (x, y coords for each extracted and labelled fiber)
fiber_indices = ndimage.value_indices(labeled_fibers, ignore_value=0)
# get fiber midpoints to fit the spatial graph
midpoints = _fiber_midpoints(fiber_indices)
point_gdf = set_uid(
gpd.GeoDataFrame(geometry=gpd.points_from_xy(midpoints[:, 0], midpoints[:, 1]))
)
# compute major axis angle
maa = _major_axis_angle(fiber_indices)
point_gdf["major_axis_angle"] = maa
# fit distband graph to the midpoints of the collagen fibers with 32 micron nhood radius
# note the threshold value is 32*2 since it is in pixels
w = fit_distband(point_gdf, id_col="uid", threshold=64)
# compute the local shannon index of the collagen neighborhoods
point_gdf = local_diversity(
point_gdf,
w,
val_cols=["major_axis_angle"],
normalize=False,
scheme="quantiles",
metrics=["shannon_index"],
k=4,
)
return point_gdf
sl_p = "/path/to/slide.mrxs" # <- modify path
reader = SlideReader(sl_p, backend="OPENSLIDE")
crop_loader = WSIGridProcessor(
slide_reader=reader,
grid=gr,
nuclei=nuc,
pipeline_func=pipeline,
batch_size=8,
num_workers=8,
pin_memory=False,
shuffle=False,
drop_last=False,
)
import pandas as pd
from tqdm import tqdm
crop_feats = []
with crop_loader as loader:
with tqdm(loader, unit="batch", total=len(loader)) as pbar:
for batch_idx, batch in enumerate(pbar):
crop_feats.extend(batch) # collect the patch level dfs
100%|██████████| 718/718 [05:29<00:00, 2.18batch/s]
Let's take the mean of the major axis angle Shannon index across all patches and create a patch-level DataFrame out of these.
patch_indices = []
mean_shannon_values = []
for idx, gdf in crop_feats:
patch_indices.append(idx)
if gdf.empty or "major_axis_angle_shannon_index" not in gdf.columns:
mean_shannon_values.append(0.0)
else:
mean_shannon_values.append(gdf["major_axis_angle_shannon_index"].mean())
patch_feats = pd.DataFrame(
{"mean_major_axis_angle_shannon_index": mean_shannon_values}, index=patch_indices
)
patch_feats.index.name = "uid"
patch_feats.head()
mean_major_axis_angle_shannon_index | |
---|---|
uid | |
0 | 0.621227 |
1 | 0.843264 |
2 | 1.069790 |
3 | 1.199338 |
4 | 1.043276 |
Visualize the Collagen Disorder Spatial Distribution¶
Next, we'll visualize the spatial distribution of the averaged collagen disorder values overlayed on the segmentation map.
import matplotlib.pyplot as plt
from histolytics.utils.plot import legendgram
# Merge with the grid for spatial visualization
grid_feats = gr.merge(patch_feats, left_index=True, right_index=True, how="left")
# Fill any missing values with 0 (for patches not in crop_feats)
grid_feats["mean_major_axis_angle_shannon_index"] = grid_feats[
"mean_major_axis_angle_shannon_index"
].fillna(0)
# Visualize
fig, ax = plt.subplots(figsize=(10, 10))
tis.plot(ax=ax, column="class_name", alpha=0.3, legend=True)
grid_feats.plot(
ax=ax,
column="mean_major_axis_angle_shannon_index",
cmap="viridis",
legend=False,
lw=0.5,
)
ax.set_axis_off()
# add legendgram
ax = legendgram(
grid_feats,
"mean_major_axis_angle_shannon_index",
cmap="viridis",
ax=ax,
loc="lower left",
)
Visualize H&E patches from Low and High Cases¶
Next, we'll visualize a few H&E patches from low and high collagen disorder cases.
import matplotlib.pyplot as plt
import numpy as np
# Let's get sample some low and high collagen disorder cases..
# Sort by Shannon index
grid_feats_sorted = grid_feats.sort_values("mean_major_axis_angle_shannon_index")
# Define thresholds for low and high cases
n_total = len(grid_feats_sorted)
low_threshold = int(n_total * 0.25) # Bottom 25%
high_threshold = int(n_total * 0.75) # Top 25%
# Get low and high value patches
low_candidates = grid_feats_sorted.iloc[:low_threshold]
high_candidates = grid_feats_sorted.iloc[high_threshold:]
# Randomly sample 6 from each group
np.random.seed(46)
low_patches = low_candidates.sample(n=min(6, len(low_candidates)))
high_patches = high_candidates.sample(n=min(6, len(high_candidates)))
def polygon_to_xywh(polygon):
"""Convert polygon to xywh coordinates for slide reader."""
minx, miny, maxx, maxy = polygon.bounds
return (int(minx), int(miny), int(maxx - minx), int(maxy - miny))
def extract_patch_images(patches, reader):
"""Extract images from WSI for given patches."""
images = []
patch_info = []
for idx, _ in patches.iterrows():
original_geom = gr.loc[idx, "geometry"]
x, y, w, h = polygon_to_xywh(original_geom)
img = reader.read_region((int(x), int(y), int(w), int(h)), 0)
images.append(img)
return images, patch_info
# Extract images for low and high chromatin patches
low_images, low_info = extract_patch_images(low_patches, reader)
high_images, high_info = extract_patch_images(high_patches, reader)
from histolytics.utils.plot import draw_thing_contours
fig, axes = plt.subplots(4, 3, figsize=(12, 12))
# Plot low collagen disorder patches (top 2 rows)
for i, img in enumerate(low_images):
row = i // 3
col = i % 3
fibers = label_sk(extract_collagen_fibers(img, rm_fg=True, rm_bg=True))
ax = axes[row, col]
ax.imshow(draw_thing_contours(img, fibers, fibers > 0))
ax.axis("off")
# Plot high collagen disorder patches (bottom 2 rows)
for i, img in enumerate(high_images):
row = 2 + i // 3
col = i % 3
fibers = label_sk(extract_collagen_fibers(img, rm_fg=True, rm_bg=True))
ax = axes[row, col]
ax.imshow(draw_thing_contours(img, fibers, fibers > 0))
ax.axis("off")