#!/usr/bin/env python3
"""
slide.py: Contains the Slide and Annotations classes.
Slide class: Wrapper around openslide.OpenSlide with annotation overlay and mask generation.
Annotations class: Parses annotation files from QuPath, ImageJ, and ASAP.
"""
import os
import glob
import json
import itertools
import operator as op
import xml.etree.ElementTree as ET
from typing import Any, Dict, List, Optional, Tuple, Union, Callable, Sequence
from itertools import chain
import numpy as np
import cv2
import openslide
from openslide import OpenSlide
import pandas as pd
from matplotlib.path import Path
import seaborn as sns
from pyslyde.util.utilities import mask2rgb
__author__ = 'Gregory Verghese'
__email__ = 'gregory.verghese@gmail.com'
[docs]
class Slide(OpenSlide):
"""
Whole Slide Image (WSI) object that enables annotation overlay as a wrapper around
openslide.OpenSlide. Generates annotation mask.
Attributes:
mag (int): Magnification level.
dims (tuple): Dimensions of the WSI.
name (str): Name of the slide file.
draw_border (bool): Whether to generate border based on annotations.
_border (list): List of border coordinates [(x1, y1), (x2, y2)].
"""
MAG_FACTORS: Dict[int, int] = {0: 1, 1: 2, 2: 4, 3: 8, 4: 16, 5: 32}
MASK_SIZE: Tuple[int, int] = (2000, 2000)
[docs]
def __init__(self,
filename: str,
mag: int = 0,
annotations: Optional['Annotations'] = None,
annotations_path: Optional[Union[str, List[str]]] = None,
labels: Optional[List[str]] = None,
source: Optional[str] = None) -> None:
super().__init__(filename)
self.mag: int = mag
self.dims: Tuple[int, int] = self.dimensions
self.name: str = os.path.basename(filename)
self._border: Optional[List[Tuple[int, int]]] = None
self.annotations: Optional['Annotations'] = None
if annotations is not None:
self.annotations = annotations
elif annotations_path is not None and source is not None:
self.annotations = Annotations(
annotations_path,
source=source,
labels=labels,
encode=True
)
@property
def slide_mask(self) -> np.ndarray:
"""Get the slide mask as an RGB array."""
mask = self.generate_mask((Slide.MASK_SIZE))
mask = mask2rgb(mask)
return mask
[docs]
def generate_mask(self, size: Optional[Tuple[int, int]] = None,
labels: Optional[List[Union[int, str]]] = None) -> np.ndarray:
"""
Generate a mask representation of annotations.
Args:
size (tuple, optional): Dimensions of the mask.
labels (list, optional): List of labels to include in the mask.
Returns:
np.ndarray: Single-channel mask with integer for each class.
"""
x, y = self.dims[0], self.dims[1]
slide_mask = np.zeros((y, x), dtype=np.uint8)
if self.annotations is None:
return slide_mask
self.annotations.encode = True
coordinates = self.annotations.annotations
if coordinates is None:
return slide_mask
keys = sorted(list(coordinates.keys()))
if labels:
# Convert string labels to integer keys if needed
label_keys = []
for l in labels:
if isinstance(l, str) and l in self.annotations.class_key:
label_keys.append(self.annotations.class_key[l])
elif isinstance(l, int):
label_keys.append(l)
labels = label_keys
else:
labels = keys
for k in keys:
if k in labels:
v = coordinates[k]
v = [np.array(a) for a in v]
cv2.fillPoly(slide_mask, v, color=(int(k),))
if size is not None:
slide_mask = cv2.resize(slide_mask, size)
return slide_mask
[docs]
@staticmethod
def resize_border(dim: int, factor: int = 1, threshold: Optional[int] = None,
operator: str = '=>') -> int:
"""
Resize and redraw annotation border. Useful to trim WSI and mask to a specific size.
Args:
dim (int): Dimension to resize.
factor (int): Border increments.
threshold (int, optional): Minimum/maximum size.
operator (str): Threshold limit operator.
Returns:
int: New border dimension.
"""
if threshold is None:
threshold = dim
operator_dict: Dict[str, Callable] = {'>': op.gt, '=>': op.ge, '<': op.lt, '=<': op.lt}
op_func = operator_dict[operator]
multiples = [factor * i for i in range(100000)]
multiples = [m for m in multiples if op_func(m, threshold)]
diff = list(map(lambda x: abs(dim - x), multiples))
new_dim = multiples[diff.index(min(diff))]
return new_dim
[docs]
def get_border(self, space: int = 100) -> List[Tuple[int, int]]:
"""
Generate border around max/min annotation points.
Args:
space (int): Gap between max/min annotation point and border.
Returns:
list: Border dimensions [(x1, y1), (x2, y2)].
"""
if self.annotations is None:
self._border = [(0, self.dims[0]), (0, self.dims[1])]
else:
coordinates = self.annotations.annotations
if coordinates is None:
self._border = [(0, self.dims[0]), (0, self.dims[1])]
else:
coordinates = list(chain(*list(coordinates.values())))
coordinates = list(chain(*coordinates))
f = lambda x: (min(x) - space, max(x) + space)
self._border = list(map(f, list(zip(*coordinates))))
mag_factor = Slide.MAG_FACTORS[self.mag]
f = lambda x: (int(x[0] / mag_factor), int(x[1] / mag_factor))
self._border = list(map(f, self._border))
return self._border
[docs]
def detect_components(self, level_dims: int = 6, num_component: Optional[int] = None,
min_size: Optional[int] = None) -> Tuple[List[np.ndarray], List[List[Tuple[int, int]]]]:
"""
Find the largest section on the slide.
Args:
level_dims (int): Level of downsampling.
num_component (int, optional): Number of components to keep.
min_size (int, optional): Minimum size of component.
Returns:
tuple: (List of images with contours, list of border coordinates)
"""
new_dims = self.level_dimensions[6]
image = np.array(self.get_thumbnail(self.level_dimensions[6]))
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.bilateralFilter(np.bitwise_not(gray), 9, 100, 100)
_, thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
if num_component is not None:
idx = sorted([(cv2.contourArea(c), i) for i, c in enumerate(contours)])
contours = [contours[i] for c, i in idx]
contours = contours[-num_component:]
if min_size is not None:
contours = list(map(lambda x: cv2.contourArea(x), contours))
contours = [c for c in contours if c > min_size]
borders: List[List[Tuple[int, int]]] = []
components: List[np.ndarray] = []
image_new = image.copy()
for c in contours:
if isinstance(c, (list, np.ndarray)):
x, y, w, h = cv2.boundingRect(np.array(c))
else:
continue
x_scale = self.dims[0] / new_dims[0]
y_scale = self.dims[1] / new_dims[1]
x1 = round(x_scale * x)
x2 = round(x_scale * (x + w))
y1 = round(y_scale * y)
y2 = round(y_scale * (y - h))
self._border = [(x1, x2), (y1, y2)]
image_new = cv2.rectangle(image_new, (x, y), (x + w, y + h), (0, 255, 0), 2)
components.append(image_new)
borders.append([(x1, x2), (y1, y2)])
return components, borders
[docs]
def generate_region(self,
mag: int = 0,
x: Optional[Union[int, Tuple[int, int]]] = None,
y: Optional[Union[int, Tuple[int, int]]] = None,
x_size: Optional[int] = None,
y_size: Optional[int] = None,
scale_border: bool = False,
factor: int = 1,
threshold: Optional[int] = None,
operator: str = '=>') -> Tuple[np.ndarray, np.ndarray]:
"""
Extract a specific region of the slide.
Args:
mag (int): Magnification level.
x (int or tuple, optional): Minimum x coordinate or (x_min, x_max).
y (int or tuple, optional): Minimum y coordinate or (y_min, y_max).
x_size (int, optional): Width of the region.
y_size (int, optional): Height of the region.
scale_border (bool): Whether to resize the border.
factor (int): Factor for resizing.
threshold (int, optional): Threshold for resizing.
operator (str): Operator for threshold.
Returns:
tuple: (Extracted region as RGB ndarray, mask)
"""
x_min: int = 0
y_min: int = 0
x_max: int = 0
y_max: int = 0
if x is None:
border = self.get_border()
if border and len(border) >= 2:
x, y = border[0], border[1]
else:
x, y = (0, self.dims[0]), (0, self.dims[1])
if x is not None:
if isinstance(x, tuple):
if x_size is None:
x_min, x_max = x
x_size = x_max - x_min
elif x_size is not None:
x_min = x[0]
x_max = x_min + x_size
elif isinstance(x, int):
x_min = x
x_max = x + (x_size or 0)
if y is not None:
if isinstance(y, tuple):
if y_size is None:
y_min, y_max = y
y_size = y_max - y_min
elif y_size is not None:
y_min = y[0]
y_max = y_min + y_size
elif isinstance(y, int):
y_min = y
y_max = y + (y_size or 0)
if scale_border and x_size is not None and y_size is not None:
x_size = Slide.resize_border(x_size, factor, threshold, operator)
y_size = Slide.resize_border(y_size, factor, threshold, operator)
if x_size is not None and (x_min + x_size) > self.dimensions[0]:
x_size = self.dimensions[0] - x_min
if y_size is not None and (y_min + y_size) > self.dimensions[1]:
y_size = self.dimensions[1] - y_min
if x_size is None or y_size is None:
raise ValueError("x_size and y_size must be specified")
x_size_adj = int(x_size / Slide.MAG_FACTORS[mag])
y_size_adj = int(y_size / Slide.MAG_FACTORS[mag])
region = self.read_region((x_min, y_min), mag, (x_size_adj, y_size_adj))
mask = self.generate_mask()[y_min:y_min + y_size, x_min:x_min + x_size]
return np.array(region.convert('RGB')), mask
[docs]
def save(self, path: str, size: Tuple[int, int] = (2000, 2000), mask: bool = False) -> None:
"""
Save a thumbnail of the slide as an image file.
Args:
path (str): Path to save the image.
size (tuple): Size of the thumbnail.
mask (bool): Whether to save the mask instead of the image.
"""
if mask:
cv2.imwrite(path, self.slide_mask)
else:
image = self.get_thumbnail(size)
image = image.convert('RGB')
image = np.array(image)
cv2.imwrite(path, image)
[docs]
class Annotations:
"""
Parses annotation files in XML or JSON format and returns a dictionary
containing x, y coordinates for each region of interest (ROI).
Args:
path (str or list): Path(s) to annotation file(s).
source (str): Annotation source type (e.g., 'qupath', 'imagej', 'asap').
labels (list, optional): List of ROI names.
encode (bool): Whether to encode labels as integers.
"""
[docs]
def __init__(self, path: Union[str, List[str]], source: str,
labels: Optional[List[str]] = None, encode: bool = False) -> None:
self.paths: List[str] = path if isinstance(path, list) else [path]
self.source: str = source
self.labels: Optional[List[str]] = labels
self.encode: bool = encode
self._annotations: Optional[Dict[Union[str, int], List[List[List[int]]]]] = None
self._generate_annotations()
def __repr__(self) -> str:
if self._annotations is None:
return "Annotations(empty)"
numbers = [len(v) for k, v in self._annotations.items()]
df = pd.DataFrame({"classes": self.labels or [], "number": numbers})
return str(df)
@property
def keys(self) -> List[Union[str, int]]:
if self._annotations is None:
return []
return list(self._annotations.keys())
@property
def values(self) -> List[List[List[List[int]]]]:
if self._annotations is None:
return []
return list(self._annotations.values())
@property
def annotations(self) -> Optional[Dict[Union[str, int], List[List[List[int]]]]]:
if self.encode:
annotations = self.encode_keys()
self.encode = False
else:
annotations = self._annotations
return annotations
@property
def class_key(self) -> Dict[str, int]:
if self.labels is None:
self.labels = list(self._annotations.keys()) if self._annotations else []
class_key = {l: i + 1 for i, l in enumerate(self.labels)}
return class_key
@property
def numbers(self) -> Dict[str, int]:
if self._annotations is None:
return {}
numbers = [len(v) for k, v in self._annotations.items()]
return dict(zip(self.labels or [], numbers))
def _generate_annotations(self) -> None:
"""
Call the appropriate method for the file type and generate annotations.
"""
self._annotations = {}
if not isinstance(self.paths, list):
self._paths = [self.paths]
if self.source is not None:
for p in self.paths:
annotations = getattr(self, '_' + self.source)(p)
for k, v in annotations.items():
if k in self._annotations:
self._annotations[k].append(v)
else:
self._annotations[k] = v
if len(self.labels or []) > 0:
self._annotations = self.filter_labels(self.labels or [])
else:
self.labels = list(self._annotations.keys())
[docs]
def filter_labels(self, labels: List[str]) -> Dict[Union[str, int], List[List[List[int]]]]:
"""
Remove labels from annotations.
Args:
labels (list): Label list to keep.
Returns:
dict: Filtered annotation dictionary.
"""
self.labels = labels
if self._annotations is None:
return {}
keys = list(self._annotations.keys())
for k in keys:
if k not in labels:
del self._annotations[k]
return self._annotations
[docs]
def rename_labels(self, names: Dict[str, str]) -> None:
"""
Rename annotation labels.
Args:
names (dict): Mapping from current labels to new labels.
"""
if self._annotations is None:
return
for k, v in names.items():
self._annotations[v] = self._annotations.pop(k)
self.labels = list(self._annotations.keys())
[docs]
def encode_keys(self) -> Dict[int, List[List[List[int]]]]:
"""
Encode labels as integer values.
Returns:
dict: Annotations with integer keys.
"""
if self._annotations is None:
return {}
annotations = {self.class_key[k]: v for k, v in self._annotations.items()}
return annotations
def _imagej(self, path: str) -> Dict[str, List[List[List[int]]]]:
"""
Parse ImageJ XML annotation files.
Args:
path (str): Path to the XML file.
Returns:
dict: Annotations dictionary.
"""
tree = ET.parse(path)
root = tree.getroot()
anns = root.findall('Annotation')
labels = list(root.iter('Annotation'))
labels = list(set([i.attrib['Name'] for i in labels]))
annotations = {l: [] for l in labels}
for i in anns:
label = i.attrib['Name']
instances = list(i.iter('Vertices'))
for j in instances:
coordinates = list(j.iter('Vertex'))
coordinates = [[c.attrib['X'], c.attrib['Y']] for c in coordinates]
coordinates = [[round(float(c[0])), round(float(c[1]))] for c in coordinates]
annotations[label] = annotations[label] + [coordinates]
return annotations
def _asap(self, path: str) -> Dict[str, List[List[List[int]]]]:
"""
Parse ASAP XML annotation files.
Args:
path (str): Path to the XML file.
Returns:
dict: Annotations dictionary.
"""
tree = ET.parse(path)
root = tree.getroot()
ns = root[0].findall('Annotation')
labels = list(root.iter('Annotation'))
labels = list(set([i.attrib['PartOfGroup'] for i in labels]))
annotations = {l: [] for l in labels}
for i in ns:
coordinates = list(i.iter('Coordinate'))
coordinates = [[float(c.attrib['X']), float(c.attrib['Y'])] for c in coordinates]
coordinates = [[round(c[0]), round(c[1])] for c in coordinates]
label = i.attrib['PartOfGroup']
annotations[label] = annotations[label] + [coordinates]
return annotations
def _qupath(self, path: str) -> Dict[str, List[List[List[int]]]]:
"""
Parse QuPath annotation JSON files.
Args:
path (str): Path to the JSON file.
Returns:
dict: Annotations dictionary.
"""
annotations: Dict[str, List[List[List[int]]]] = {}
with open(path) as json_file:
j = json.load(json_file)
for a in j:
c = a['properties']['classification']['name']
geometry = a['geometry']['type']
coordinates = a['geometry']['coordinates']
if c not in annotations:
annotations[c] = []
if geometry == "LineString":
points = [[int(i[0]), int(i[1])] for i in coordinates]
annotations[c].append(points)
elif geometry == "Polygon":
for a2 in coordinates:
points = [[int(i[0]), int(i[1])] for i in a2]
annotations[c].append(points)
elif geometry == "MultiPolygon":
for a2 in coordinates:
for a3 in a2:
points = [[int(i[0]), int(i[1])] for i in a3]
annotations[c].append(points)
return annotations
def _json(self, path: str) -> Dict[str, List[List[List[int]]]]:
"""
Parse JSON annotation files with a specific structure.
Args:
path (str): Path to the JSON file.
Returns:
dict: Annotations dictionary.
"""
with open(path) as json_file:
json_annotations = json.load(json_file)
labels = list(json_annotations.keys())
if self.labels is None:
self.labels = []
self.labels.extend(labels)
annotations = {k: [[[int(i['x']), int(i['y'])] for i in v2]
for v2 in v.values()] for k, v in json_annotations.items()}
return annotations
def _dataframe(self, path: str) -> None:
"""
Parse a DataFrame with a specific structure.
"""
anns_df = pd.read_csv(path)
anns_df.fillna('undefined', inplace=True)
anns_df.set_index('labels', drop=True, inplace=True)
self.labels = list(set(anns_df.index))
annotations: Dict[str, List[List[List[int]]]] = {}
for l in self.labels:
coords = list(zip(anns_df.loc[l].x, anns_df.loc[l].y))
annotations[l] = [[[int(x), int(y)] for x, y in coords]]
self._annotations = annotations
def _csv(self, path: str) -> Dict[str, List[List[List[int]]]]:
"""
Parse CSV annotation files with a specific structure.
Args:
path (str): Path to the CSV file.
Returns:
dict: Annotations dictionary.
"""
anns_df = pd.read_csv(path)
anns_df.fillna('undefined', inplace=True)
anns_df.set_index('labels', drop=True, inplace=True)
labels = list(set(anns_df.index))
annotations: Dict[str, List[List[List[int]]]] = {}
for l in labels:
coords = list(zip(anns_df.loc[l].x, anns_df.loc[l].y))
annotations[l] = [[[int(x), int(y)] for x, y in coords]]
self._annotations = annotations
return annotations
[docs]
def df(self) -> pd.DataFrame:
"""
Return a DataFrame of annotations.
Returns:
pd.DataFrame: DataFrame of annotations.
"""
if self._annotations is None:
return pd.DataFrame()
labels = [[l] * len(self._annotations[l][0]) for l in self._annotations.keys()]
labels = list(chain(*labels))
x_values = [xi[0] for x in list(self._annotations.values()) for xi in x[0]]
y_values = [yi[1] for y in list(self._annotations.values()) for yi in y[0]]
df = pd.DataFrame({'labels': list(labels), 'x': x_values, 'y': y_values})
return df
[docs]
def save(self, save_path: str) -> None:
"""
Save annotations as a CSV file.
Args:
save_path (str): Path to save the annotations.
"""
self.df().to_csv(save_path)