Source code for cubats.slide_collection.slide_collection

# Standard Library
import concurrent.futures
import logging.config
import os
import pickle
import re
from itertools import combinations
from time import time

# Third Party
import pandas as pd
from PIL import Image
from tqdm import tqdm

# CuBATS
import cubats.cutils as cutils
import cubats.logging_config as log_config
from cubats.config import get_backend_info, xp
from cubats.slide_collection.registration import (
    register_slides, register_slides_high_resolution,
    register_slides_with_reference)
from cubats.slide_collection.segmentation import run_tumor_segmentation
from cubats.slide_collection.slide import Slide
from cubats.slide_collection.tile_colocalization import (
    evaluate_antigen_pair_tile, evaluate_antigen_triplet_tile)

# Constants
# Destination directories
RESULT_DATA_DIR = "data"
REGISTRATION_DIR = "registration"
TILES_DIR = "tiles"
COLOCALIZATION = "colocalization"
ORIGINAL_TILES_DIR = "original"
DAB_TILE_DIR = "dab"
E_TILE_DIR = "eosin"
H_TILE_DIR = "hematoxylin"
PICKLE_DIR = "pickle"
RECONSTRUCT_DIR = "reconstructed_slides"
SLIDE_COLLECTION_COLUMN_NAMES = [
    "Name",
    "Reference",
    "Mask",
    "Openslide Object",
    "Tiles",
    "Level Count",
    "Level Dimensions",
    "Tile Count",
]
SUPPORTED_IMAGE_FORMATS = [".tif", ".tiff", ".mrxs", ".svs"]
# Define column names and data types
QUANTIFICATION_RESULTS_COLUMN_NAMES = {
    "Name": str,
    "Coverage (%)": float,
    "High Positive (%)": float,
    "Medium Positive (%)": float,
    "Low Positive (%)": float,
    "Negative (%)": float,
    "Total Tissue (%)": float,
    "Background / No Tissue (%)": float,
    "Mask Area (%)": float,
    "Non-mask Area (%)": float,
    "H-Score": float,
    "Score": str,
    "Total Processed Tiles (%)": float,
    "Error (%)": float,
    "Thresholds": str,
}
DUAL_ANTIGEN_EXPRESSIONS_COLUMN_NAMES = {
    "Slide 1": str,
    "Slide 2": str,
    "Total Coverage (%)": float,
    "Total Overlap (%)": float,
    "Total Complement (%)": float,
    "High Positive Overlap (%)": float,
    "High Positive Complement (%)": float,
    "Medium Positive Overlap (%)": float,
    "Medium Positive Complement (%)": float,
    "Low Positive Overlap (%)": float,
    "Low Positive Complement (%)": float,
    "Negative Tissue (%)": float,
    "Total Tissue (%)": float,
    "Background / No Tissue (%)": float,
    "Mask Area (%)": float,
    "Non-mask Area (%)": float,
    "Total Processed Tiles (%)": float,
    "Total Error (%)": float,
    "Error1 (%)": float,
    "Error2 (%)": float,
    "Thresholds1": str,
    "Thresholds2": str,
}
TRIPLET_ANTIGEN_EXPRESSIONS_COLUMN_NAMES = {
    "Slide 1": str,
    "Slide 2": str,
    "Slide 3": str,
    "Total Coverage (%)": float,
    "Total Overlap (%)": float,
    "Total Complement (%)": float,
    "High Positive Overlap (%)": float,
    "High Positive Complement (%)": float,
    "Medium Positive Overlap (%)": float,
    "Medium Positive Complement (%)": float,
    "Low Positive Overlap (%)": float,
    "Low Positive Complement (%)": float,
    "Negative Tissue (%)": float,
    "Total Tissue (%)": float,
    "Background / No Tissue (%)": float,
    "Mask Area (%)": float,
    "Non-mask Area (%)": float,
    "Total Error (%)": float,
    "Error1 (%)": float,
    "Error2 (%)": float,
    "Thresholds1": str,
    "Thresholds2": str,
    "Thresholds3": str,
}
DEFAULT_TILE_SIZE = 1024


[docs] class SlideCollection(object): """Initializes a slide collection, stores slide info and performs slide processing. `SlideCollection` is a class that initializes a collection of slides, stores relevant processing information, and allows the execution of separate processing steps. Previously processed data can be reloaded. Attributes: name (str): Name of parent directory (i.e. name of tumorset). src_dir (str): Path to source directory containing the WSIs. dest_dir (str): Path to destination directory for results. data_dir (str): Path to data directory, a subdirectory of `dest_dir`. The directory will be initiaded upon class creation inside the `dest_dir`. Inside the data directory summaries of quantification results, dual antigen expression results and triplet antigen expression results are stored as .CSV file. The data_dir also contains the `pickle_dir`. pickle_dir (str): Path to pickle directory, a subdirectory of `data_dir`. Inside the pickle directory pickled copies of the slide_collection, quantification results and antigen analysis are stored which will be automatically reloaded if a slide collection is (re-)initialized with the same output_dir. tiles_dir (str): Path to tiles directory, a subdirectory of `dest_dir`. Inside the tiles directory the tile directories for each slide are stored. colocalization_dir (str): Path to colocalization directory, a subdirectory of `dest_dir`. Inside the colocalization directory results of dual and triplet overlap analyses are stored. reconstruct_dir (str): Path to reconstruct directory, a subdirectory of `dest_dir`. Inside the reconstruct directory reconstructed slides are stored. collection_list (list of Slide): list containing all slide objects. collection_info_df (Dataframe): Dataframe containing relevant information on all the slides. The colums are: - Name (str): Name of slide. - Reference (bool): True if slide is reference slide. - Mask (bool): True if slide is mask slide. - OpenSlide Object (OpenSlide): OpenSlide object of the slide. - Tiles (DeepZoomGenerator): DeepZoom tiles of the slide. - Level Count (int): Number of Deep Zoom levels in the image. - Level Dimensions (list): List of tuples (pixels_x, pixels_y) for each Deep Zoom level. - Tile Count (int): Number of total tiles in the image. mask (Slide): Mask slide of the collection. Is set during initialization. mask_coordinates (list): List containing the tile coordinates for tiles that are covered by the mask Coordinates are tuples (column, row). quantification_results (Dataframe): Dataframe containing the quantification results for all processed slides. The columns are: - Name (str): Name of the slide. - Coverage (%) (float): Positively stained pixels in the slide, characterizing the tumor coverage. - High Positive (%) (float): Percentage of highly positive stained pixels in the slide. - Medium Positive (%) (float): Percentage of medium positive stained pixels in the slide. - Low Positive (%) (float): Percentage of low positive stained pixels in the slide. - Negative (%) (float): Percentage negatively stained pixels in the slide. - Total Tissue (%) (float): Total amount of tissue in the slide. - Background / No Tissue (%) (float): Percentage of background and non-tissue regions in the slide. - Mask Area (%) (float): Area of the slide covered by the tumor mask. - Non-mask Area (%) (float): Area of the slide not covered by the tumor mask. - H-Score (float): Established pathological score calculated based on the distribution of staining intensities in the slide. - Score (str): Additional overall score of the slide calculated from the average of scores for all tiles. However, this score may be misleading, as it is an average over the entire slide. - Total Processed Tiles (%) (float): Percentage of processed tiles. - Error (%) (float): Percentage of tiles that were not processed due to insufficient tissue coverage. Only the case for tile-level masking. - Thresholds (list): List containing the thresholds applied during quantification. dual_antigen_expressions (Dataframe): Dataframe containing a summary of the dual antigen expression results for all processed analyses: - Slide 1 (str): Name of the first slide. - Slide 2 (str): Name of the second slide. - Total Coverage (%) (float): Percentage of combined coverage in two slides. - Total Overlap (%) (float: Percentage of overlapping antigen expressions in two slides. - Total Complement (%) (float): Percentage of complementary antigen expressions in two slides. - High Positive Overlap (%) (float): Percentage of highly positive overlapping antigen expressions in two slides. - High Positive Complement (%) (float): Percentage of highly positive complementary antigen expressions in two slides. - Medium Positive Overlap (%) (float): Percentage of medium positive overlapping antigen expressions in two slides. - Medium Positive Complement (%) (float): Percentage of medium positive complementary antigen expressions in two slides. - Low Positive Overlap (%) (float): Percentage of low positive overlapping antigen expressions in two slides. - Low Positive Complement (%) (float): Percentage of low positive complementary antigen expressions in two slides. - Negative Tissue (%) (float): Percentage of negative tissue in the two slides. - Total Tissue (%) (float): Percentage of total tissue in the two slides. - Background / No Tissue (%) (float): Percentage of background and non-tissue regions in the two slides. - Total Processed Tiles (%) (float): Percentage of processed tiles. - Total Error (%) (float): Percentage of tiles that were not processed due to insufficient tissue coverage. - Error1 (%) (float): Percentage of tiles where neither tile contained tissue. - Error2 (%) (float): Percentage of tiles where tiles could not be analyzed due to incorrect tile shapes. - Thresholds1 (str): Antigen thresholds for slide 1. - Thresholds2 (str): Antigen thresholds for slide 2 triplet_antigen_results (Dataframe): Dataframe containing a summary of the triplet antigen expression results for all processed analyses: - Slide 1 (str): Name of the first slide. - Slide 2 (str): Name of the second slide. - Slide 3 (str): Name of the third slide. - Total Coverage (%) (float): Percentage of combined coverage in three slides. - Total Overlap (%) (float: Percentage of overlapping antigen expressions in three slides. - Total Complement (%) (float): Percentage of complementary antigen expressions in three slides. - High Positive Overlap (%) (float): Percentage of highly positive overlapping antigen expressions in three slides. - High Positive Complement (%) (float): Percentage of highly positive complementary antigen expressions in three slides. - Medium Positive Overlap (%) (float): Percentage of medium positive overlapping antigen expressions in three slides. - Medium Positive Complement (%) (float): Percentage of medium positive complementary antigen expressions in three slides. - Low Positive Overlap (%) (float): Percentage of low positive overlapping antigen expressions in three slides. - Low Positive Complement (%) (float): Percentage of low positive complementary antigen expressions in three slides. - Negative Tissue (%) (float): Percentage of negative tissue in the three slides. - Total Tissue (%) (float): Percentage of total tissue in the three slides. - Background / No Tissue (%) (float): Percentage of background and non-tissue regions in the three slides. - Total Processed Tiles (%) (float): Percentage of processed tiles. - Total Error (%) (float): Percentage of tiles that were not processed due to insufficient tissue coverage. - Error1 (%) (float): Percentage of tiles where neither tile contained tissue. - Error2 (%) (float): Percentage of tiles where tiles could not be analyzed due to incorrect tile shapes. - Thresholds1 (str): Antigen thresholds for slide 1. - Thresholds2 (str): Antigen thresholds for slide 2 - Thresholds3 (str): Antigen thresholds for slide 3 """
[docs] def __init__( self, collection_name, src_dir, dest_dir, ref_slide=None, path_antigen_profiles=None, ): """Initializes the class. The class contains information on the slide collection. Args: collection_name (str): Name of the collection (i.e. Name of tumor set or patient ID) src_dir (str): Path to src directory containing the WSIs. dest_dir (str): Path to destination directory for results. ref_slide (str, optional): Path to reference slide. If 'ref_slide' is None it will be automatically set to the HE slide based on the filename of input files. Defaults to None. path_antigen_profiles (str, optional): Path to antigen profiles. Definitions as .json or .csv are accepted. If no default thresholds will be applied during processing. """ # Logging logging.config.dictConfig(log_config.LOGGING) self.logger = logging.getLogger(__name__) # Name of the tumorset self.collection_name = collection_name # Validate directories if not os.path.isdir(src_dir): raise ValueError( f"Source directory {src_dir} does not exist or is not accessible." ) if not os.path.isdir(dest_dir): raise ValueError( f"Destination directory {dest_dir} does not exist or is not accessible." ) # Directories self.src_dir = src_dir self.dest_dir = dest_dir self.registration_dir = None self.data_dir = None self.pickle_dir = None self.tiles_dir = None self.colocalization_dir = None self.reconstruct_dir = None # List containing all slide objects self.slides = [] # Slide informations self.collection_info = pd.DataFrame( columns=SLIDE_COLLECTION_COLUMN_NAMES) # Mask Variables self.mask = None self.mask_coordinates = [] # Reference Slide self.reference_slide = ref_slide # Quantification Variables self.quantification_results = pd.DataFrame( columns=QUANTIFICATION_RESULTS_COLUMN_NAMES ) # Antigen Expression Variables self.dual_antigen_expressions = pd.DataFrame( columns=DUAL_ANTIGEN_EXPRESSIONS_COLUMN_NAMES ) self.triplet_antigen_results = pd.DataFrame( columns=TRIPLET_ANTIGEN_EXPRESSIONS_COLUMN_NAMES ) self.status = { "initialized": False, "registered": False, "segmented": False, "quantified": False, "dual_antigen_expression": False, "triplet_antigen_expression": False, } # Set destination directories self.set_dst_dir() # Initialize the slide collection self.init_slide_collection() # Load previous results if exist self.load_previous_results() # if not self.mask_coordinates: # self.extract_mask_tile_coordinates() if path_antigen_profiles is not None: self.add_antigen_profiles(path_antigen_profiles) # Log initialization details self.logger.debug( f"Initialized SlideCollection with collection_name: {self.collection_name}, " f"src_dir: {self.src_dir}, dest_dir: {self.dest_dir}, " f"ref_slide: {self.reference_slide}, " f"backend: {get_backend_info()}" )
def set_dst_dir(self): """Assign and initiate needed directories if they do not exist yet.""" # Data dir self.data_dir = os.path.join(self.dest_dir, RESULT_DATA_DIR) os.makedirs(self.data_dir, exist_ok=True) # Pickle dir self.pickle_dir = os.path.join(self.data_dir, PICKLE_DIR) os.makedirs(self.pickle_dir, exist_ok=True) self.logger.debug("Data and Pickle directories created") def init_slide_collection(self): """ Initializes the slide collection by iterating over the files in the source directory. Only files with the extensions '.tiff' or '.tif' are considered. For each valid file, a 'Slide' object is created. Each Slide's information is added to the 'collection_info_df'. Returns: None TODO: Check indexing of collection_info_df """ self.logger.info( f"Initializing SlideCollection: {self.collection_name}") init_start_time = time() for file in os.listdir(self.src_dir): if os.path.isfile(os.path.join(self.src_dir, file)): file_ext = os.path.splitext(file)[-1].lower() if not file.startswith(".") and file_ext in SUPPORTED_IMAGE_FORMATS: filename = cutils.get_name(file) mask = False ref = False # Look for mask and reference slide. If no reference selected HE slide will be selected # if re.search("_mask", filename): # mask = True if re.search("HE", filename) or filename == self.reference_slide: ref = True slide = Slide( filename, os.path.join(self.src_dir, file), is_mask=mask, is_reference=ref, ) self.slides.append(slide) self.collection_info.loc[len(self.collection_info)] = ( slide.properties.values() ) if ref and not self.reference_slide: self.reference_slide = slide # elif mask: # self.mask = slide self.collection_info.to_csv( os.path.join(self.data_dir, "collection_info.csv"), sep=",", index=False, header=True, encoding="utf-8", ) self.status["initialized"] = True init_end_time = time() self.logger.debug( f"Slide collection initialized in {round((init_end_time - init_start_time),2)} seconds" ) def load_previous_results(self, path=None): """Loads results from previous processing if they exist. Tries to load results from previous processing. If no path is passed, the collections `pickle_dir` is used. Slide objects are based on `OpenSlid` which are C-type objects and cannot be stored as pickle. Therefore, each Slide is re-initialized in the init_slide_collection function. The function will try to load the following files: - mask_coordinates.pickle: Load mask coordinates from previous mask processing. - quantification_results.pickle: Load quantification results from previous processing. - dual_antigen_expressions.pickle: Load dual antigen overlap results from previous processing. - triplet_antigen_expressions.pickle: Load triplet antigen overlap results from previous processing. Args: path (str, optional): Path to directory containing pickle files. Defaults to `pickle_dir` of the slide collection. """ prev_res_start_time = time() self.logger.debug("Searching for previous results") reg_dir = os.path.join(self.dest_dir, REGISTRATION_DIR) if os.path.isdir(reg_dir) and os.listdir(reg_dir): self.registration_dir = reg_dir try: self.logger.info("Searching for previous registration results") # update Slide objects to point to registered images self._update_slide_paths_after_registration() self.status["registered"] = True self.logger.info( f"Successfully loaded registered slides into {self.collection_name}." ) except Exception as e: self.logger.warning( f"Failed to update slide paths from existing registration directory: {e}" ) self.logger.info("Searching for previous segmentation results") self.add_mask_to_collection(reg_dir) self.logger.info("Searching for previous quantification results") if self.quantification_results.__len__() == 0: if path is None: path = self.pickle_dir path_mask_coord = os.path.join(path, "mask_coordinates.pickle") path_quant_res = os.path.join( path, "quantification_results.pickle") path_dual_overlap_res = os.path.join( path, "dual_antigen_expressions.pickle" ) path_triplet_overlap_res = os.path.join( path, "triplet_antigen_expressions.pickle" ) # load mask coordinates if os.path.exists(path_mask_coord): self.mask_coordinates = pickle.load( open(path_mask_coord, "rb")) self.status["segmented"] = True self.logger.info( f"Successfully loaded mask coordinates for {self.collection_name}" ) else: self.logger.debug( f"No mask coordinates found for {self.collection_name}" ) # load quantification results if os.path.exists(path_quant_res): self.quantification_results = pickle.load( open(path_quant_res, "rb")) self.status["quantified"] = True self.logger.info( f"Sucessfully loaded quantification results for {self.collection_name}" ) else: self.logger.debug( f"No previous quantification results found for {self.collection_name}" ) # load dual overlap results self.logger.info( "Searching for previous dual antigen expression results") if os.path.exists(path_dual_overlap_res): self.dual_antigen_expressions = pickle.load( open(path_dual_overlap_res, "rb") ) self.status["dual_antigen_expression"] = True self.logger.info( f"Successfully loaded dual_antigen_expression results for {self.collection_name}" ) else: self.logger.debug( f"No previous dual_antigen_expression results found for {self.collection_name}" ) # load triplet overlap results self.logger.info( "Searching for previous triplet antigen expression results" ) if os.path.exists(path_triplet_overlap_res): self.triplet_antigen_results = pickle.load( open(path_triplet_overlap_res, "rb") ) self.status["triplet_antigen_expression"] = True self.logger.info( f"Successfully loaded triplet_antigen_expression results for {self.collection_name}" ) else: self.logger.debug( f"No previous triplet_antigen_expression results found for {self.collection_name}" ) # Load processing info for each slide for slide in self.slides: path_slide = os.path.join( path, f"{slide.name}_processing_info.pickle") if os.path.exists(path_slide): slide.detailed_quantification_results = pickle.load( open(path_slide, "rb") ) self.logger.debug( f"Successfully loaded processing info for slide {slide.name}" ) # If quantification results for loaded slide exist, load them into the slide object if not self.quantification_results[ self.quantification_results["Name"] == slide.name ].empty: slide.quantification_summary = self.quantification_results.loc[ self.quantification_results["Name"] == slide.name ] self.logger.debug( f"Successfully loaded detailed quantification results for slide {slide.name}" ) else: self.logger.debug( f"No quantification results found for slide {slide.name}" ) else: self.logger.debug( f"No previous processing info found for slide {slide.name}" ) pass prev_res_end_time = time() self.logger.info( f"Finished loading previous results for {self.collection_name} in \ {round((prev_res_end_time - prev_res_start_time), 2 )} seconds" ) def add_mask_to_collection(self, dir): """Adds a mask slide to the slide collection. Args: dir (str): Path to directory the mask is in. """ if not dir or not os.path.isdir(dir): self.logger.debug( f"Registration directory {dir} does not exist yet. Please run registration first." ) try: for fname in os.listdir(dir): if re.search(r"_mask", fname, re.IGNORECASE): ext = os.path.splitext(fname)[1].lower() if ext not in SUPPORTED_IMAGE_FORMATS: continue mask_name = cutils.get_name(fname) mask_path = os.path.join(dir, fname) existing = [s for s in self.slides if s.name == mask_name] if existing: existing[0].is_mask = True try: existing[0].update_slide(mask_path) except Exception: self.logger.debug( f"Could not update existing mask slide path for {mask_name}" ) self.mask = existing[0] else: try: new_mask = Slide( mask_name, mask_path, is_mask=True) self.slides.append(new_mask) try: self.collection_info.loc[len(self.collection_info)] = ( new_mask.properties.values() ) except Exception: self.logger.debug( "Could not append new mask slide to collection_info" ) self.mask = new_mask except Exception as e: self.logger.warning( f"Found mask Slide but failed to load it: {mask_path}: {e}" ) self.logger.info( f"Loaded mask slide from directory: {mask_path}") except Exception as e: self.logger.debug(f"Error while scanning {dir} for mask: {e}")
[docs] def extract_mask_tile_coordinates(self, save_img=False): """Extracts mask coordinates from the mask slide. Generates a list containing of tiles coordinates that are part of the mask. This allows to only process tiles that are part of the mask and thus contain tumor tissue. Tiles with less than 10% tumor tissue are dropped due to save runtime. Previous mask coordinates will be overwritten and the results will be stored as pickle in `pickle_dir`. Args: save_img (bool): Boolean to determine if mask tiles shall be saved as image. Necessary if mask shall be reconstructed later on. Note: Storing tiles will require addition storage. Defaults to False. """ # Create tiles directory if it does not exist if save_img: self.tiles_dir = os.path.join(self.dest_dir, TILES_DIR) os.makedirs(self.tiles_dir, exist_ok=True) # If no mask slide is provided, mask coordinates will contain all tiles of the slide. if self.mask is None: mask_start_time = time() slide_tiles = self.slides[0].tiles self.mask_coordinates.clear() cols, rows = slide_tiles.level_tiles[slide_tiles.level_count - 1] for col in tqdm(range(cols), desc="Extracting mask tiles"): for row in range(rows): self.mask_coordinates.append((col, row)) else: mask_start_time = time() self.logger.debug("Extracting Mask Tile Coordinates") mask_tiles = self.mask.tiles self.mask_coordinates.clear() cols, rows = mask_tiles.level_tiles[mask_tiles.level_count - 1] for col in tqdm(range(cols), desc="Extracting mask tiles"): for row in range(rows): temp = mask_tiles.get_tile( mask_tiles.level_count - 1, (col, row)) if temp.mode != "RGB": temp = temp.convert("RBG") temp = xp.array(temp) mean = xp.mean(temp) # If tile is mostly white, drop tile coordinate; cutoff: 230 (10%) if mean < 230: self.mask_coordinates.append((col, row)) if save_img: tile_name = str(col) + "_" + str(row) # Convert to NumPy array if necessary if hasattr(temp, "get"): temp = temp.get() elif hasattr(temp, "asnumpy"): temp = temp.asnumpy() # Convert to PIL Image img = Image.fromarray(temp) dir = os.path.join(self.tiles_dir, "mask") os.makedirs(dir, exist_ok=True) out_path = os.path.join(dir, tile_name + ".tif") img.save(out_path) else: pass mask_end_time = time() self.logger.debug( f"Mask coordinates generated in {round((mask_end_time - mask_start_time)/60,2)} minutes" ) # Save mask coordinates as pickle out = os.path.join(self.pickle_dir, "mask_coordinates.pickle") with open(out, "wb") as file: pickle.dump(self.mask_coordinates, file, protocol=pickle.HIGHEST_PROTOCOL) # pickle.dump(self.mask_coordinates, open(out, "wb")) self.logger.debug(f"Successfully saved mask coordinates to {out}") self.logger.info("Finished Mask Tile Extraction")
[docs] def register_slides( self, reference_slide=None, microregistration=True, max_non_rigid_registration_dim_px=2000, crop=None, high_res_alignement=False, high_res_fraction=None, ): """Registers all WSIs in the collection using Valis. Registers all WSIs in the collection and aligns them. Registration can be performed towards a selected referenceWSI or automatically towards a WSI chosen by VALIS. Registration includes rigid and non-rigid registration steps. Optional `microregistration` can be applied to further improve registration quality. The registered slides are saved in the `registration` directory. Intermediate results are stored in the `intermediate_registration_results` directory. Further configuration options such as `max_non_rigid_registration_dim_px` and cropping methods are available. Lastly, an additional, customizable high-resolution alignment can be performed if specified, which allows tailoring the alignment resolution via the `high_res_fraction` parameter. Args: reference_slide (str): Path to reference slide. If None, the first slide in the collection will be used. Defaults to None. microregistration (bool): Boolean determining if microregistration shall be used. Defaults to True. max_non_rigid_registration_dim_px (int): Maximum size of non-rigid registration dimension in pixels. Defaults to 2000. crop (str): Crop method to be used. Defaults to None which results cropping to reference slide if one is provided. If no reference slide is provided, the default crop method is 'overlap'. high_res_alignment (bool): Boolean determining if customizable high-resolution alignment shall be performed. Defaults to False. high_res_fraction (float): Fraction of the image to be used for high resolution alignment. Defaults to None. """ registration_begin_time = time() self.registration_dir = os.path.join(self.dest_dir, REGISTRATION_DIR) os.makedirs(self.registration_dir, exist_ok=True) self.intermediate_registration_dir = os.path.join( self.data_dir, "intermediate_registration_results" ) os.makedirs(self.intermediate_registration_dir, exist_ok=True) if reference_slide: self.logger.info( f"Registering slides with reference slide {reference_slide}" ) register_slides_with_reference( slide_src_dir=self.src_dir, results_dst_dir=self.intermediate_registration_dir, referenceSlide=reference_slide, microregistration=microregistration, max_non_rigid_registration_dim_px=max_non_rigid_registration_dim_px, crop=crop, ) elif high_res_alignement: self.logger.info( f"Performing high resolution alignment with {high_res_fraction} fraction" ) register_slides_high_resolution( slide_src_dir=self.src_dir, results_dst_dir=self.intermediate_registration_dir, registered_slides_dst=self.registration_dir, micro_reg_fraction=high_res_fraction, ) else: self.logger.info("Registering slides without reference slide") register_slides( slide_src_dir=self.src_dir, results_dst_dir=self.intermediate_registration_dir, registered_slides_dst=self.registration_dir, microregistration=microregistration, max_non_rigid_registration_dim_px=max_non_rigid_registration_dim_px, crop=crop, ) registration_end_time = time() self.status["registered"] = True self.logger.info( f"Finished image registration of {self.collection_name} in \ {round((registration_end_time - registration_begin_time)/60,2)} minutes." ) self.logger.info(f"Updating slide paths for {self.collection_name}") self._update_slide_paths_after_registration()
def _update_slide_paths_after_registration(self): """Update Slide objects to use registered slide files saved in self.registration_dir. Uses original basename to look for the registered file (registration preserves filenames). """ if not self.registration_dir or not os.path.isdir(self.registration_dir): self.logger.info( "Registration directory not set or missing; skipping _update_slide_paths_after_registration." ) return # First, scan all files in registration_dir recursively and build a mapping from stem -> path reg_files = {} for root, _, files in os.walk(self.registration_dir): for f in files: stem = cutils.get_name(f) path = os.path.join(root, f) reg_files[stem] = path for slide in self.slides: base_stem = cutils.get_name(slide.orig_path) if base_stem in reg_files: reg_path = reg_files[base_stem] try: slide.update_slide(reg_path) except Exception as e: self.logger.error( f"Could not update slide {slide.name} to {reg_path}: {e}" ) else: self.logger.debug( f"No registered file found for {slide.name} (stem={base_stem})" )
[docs] def tumor_segmentation( self, model_path, reference_slide=None, tile_size=[1024, 1024], output_path=None, normalization=False, inversion=False, plot_results=False, ): """Performs tumor segmentation on the HE WSI in the SlideCollection. Performs tumor segmentation on either a specified reference WSI or the reference WSI selected by the SlideCollection. The segmentation model needs to be passed. By default, segmentation results are saved into the `registration_dir` of the SlideCollection, an optional ouput path can be provided. Model-specific parameters such as `normalization`, or `inversion` can also be passed. Lastly, segmentation_results can be plotted onto the tissue if `plot_results` is set to True. Args: model_path (str): Path to segmentation model. reference_slide (str, optional): Path to reference slide. If None, the reference slide of the SlideCollection is used. Defaults to None. tile_size (list, optional): Tile size to be used for segmentation. Defaults to [1024, 1024]. output_path (str, optional): Output path for segmentation results. If None, the `registration_dir` of the SlideCollection is used. Defaults to None. normalization (bool, optional): Boolean determining if stain normalization shall be applied. Defaults to False. inversion (bool, optional): Boolean determining if color inversion shall be applied. Defaults to False. plot_results (bool, optional): Boolean determining if segmentation results shall be plotted onto the tissue. Defaults to False. Returns: None """ if reference_slide is None: reference_slide = self.reference_slide.registered_path if output_path is None: output_path = self.registration_dir run_tumor_segmentation( reference_slide, model_path, tile_size, output_path, normalization, inversion, plot_results, ) self.add_mask_to_collection(output_path) self.status["segmented"] = True
def add_antigen_profiles(self, profile_path): """Adds antigen profiles to slides in the collection based on matching names. Args: profile_path (str): Path to antigen profile file. Supported formats are CSV and JSON. """ if profile_path.endswith(".csv"): profiles_df = pd.read_csv(profile_path) elif profile_path.endswith(".json"): profiles_df = pd.read_json(profile_path) else: raise ValueError( "Unsupported file format for antigen profile. Must be CSV or JSON" ) if "Name" not in profiles_df.columns: raise ValueError("Antigen profile must contain a Name") for slide in self.slides: if slide.is_mask or slide.is_reference: continue for _, profile in profiles_df.iterrows(): antigen_name = str(profile["Name"]).lower() if antigen_name in slide.name.lower(): # print(profile.to_dict()) slide.antigen_profile.update(profile.to_dict()) self.logger.debug( f"Antigen profile'{antigen_name}' added to slide {slide.name}" ) break else: self.logger.debug( f"No antigen profile found matching slide {slide.name}" )
[docs] def quantify_all_slides(self, save_imgs=False, masking_mode="tile-level"): """Quantifies all registered slides sequentially and stores results. Quantifies all slides that were instantiated sequentially with the exception of the reference_slide and the mask_slide. Results are stored as .CSV into the `data_dir`. All previous quantification results in the `quant_res_df` will be reset and the previous .CSV file overwritten. Args: save_imgs (bool): Boolean determining if tiles shall be saved as image during processing. This is necessary if slides shall be reconstructed after processing. Note: storing tiles may require substantial additional storage. Defaults to False. masking_mode (str): Defines how the tumor mask is applied to tiles during quantification. - `tile-level` (default): Applies the mask coarsly - tiles overlapping the mask are fully included. Recommended when registration quality is lower (e.g. high median rTRE). - `pixel-level`: Applies the mask precisely at pixel level - only masked pixels are included. Offers finer quantification, but is more sensitive to registration errors. """ if masking_mode not in ["tile-level", "pixel-level"]: raise ValueError( f"masking_mode must either be 'tile-level' or 'pixel-level'. {masking_mode} is not supported." ) start_quant_time = time() self.logger.info( f"Starting quantification of all slides, save_imgs: {save_imgs}, masking_mode: {masking_mode}" ) if self.quantification_results.__len__() != 0: self.quantification_results = self.quantification_results.iloc[0:0] # Counter variable for progress tracking c = 1 for slide in self.slides: if not slide.is_mask and not slide.is_reference: self.logger.info( f"Analyzing Slide: {slide.name}({c}/{len(self.slides) - 2})" ) self.quantify_single_slide(slide.name, save_imgs, masking_mode) c += 1 end_quant_time = time() self.logger.info( f"Finished quantification for {self.collection_name} in \ {round((end_quant_time - start_quant_time)/60,2)} minutes." ) self.status["quantified"] = True
[docs] def quantify_single_slide( self, slide_name, save_img=False, masking_mode="tile-level" ): """Quantifies a single slide and appends the results to `quant_res_df`. This function quantifies staining intensities for all tiles of the given slide using multiprocessing. The slide matching the passed slide_name is retrieved from the collection_list and quantified using the quantify_slide function of the Slide class. Results are appended to `quant_res_df`, which is then stored as .CSV in self. `data_dir` and as .PICKLE in `pickle_dir`. Existing .CSV/.PICKLE files are overwritten. For more information on quantification checkout Slide.quantify_slide() function in the slide.py. Args: slide_name (str): Name of the slide to be processed. save_img (bool): Boolean determining if tiles shall be saved during processing. Necessary if slide shall be reconstructed later on. However, storing images will require addition storage. Defaults to False. masking_mode (str): Defines how the tumor mask is applied to tiles during quantification. - `tile-level` (default): Applies the mask coarsly - tiles overlapping the mask are fully included. Recommended when registration quality is lower (e.g. high median rTRE). - `pixel-level`: Applies the mask precisely at pixel level - only masked pixels are included. Offers finer quantification, but is more sensitive to registration errors. """ if masking_mode not in ["tile-level", "pixel-level"]: raise ValueError( f"masking_mode must either be 'tile-level' or 'pixel-level'. {masking_mode} is not supported." ) if self.status["registered"] is False: raise RuntimeError( "Slides must be registered before quantification. Please call register_slides() before quantification." ) if self.status["segmented"] is False: raise RuntimeError( "Tumor segmentation must be performed before quantification.\ Please call tumor_segmentation() before quantification." ) if not self.mask_coordinates: self.logger.info( "Extracting mask coordinates before quantification") self.extract_mask_tile_coordinates() slide = [slide for slide in self.slides if slide.name == slide_name][0] # Create directories for images if they are to be saved. if save_img: dab_tile_dir = os.path.join( self.tiles_dir, slide_name, DAB_TILE_DIR) if masking_mode == "pixel-level": slide.quantify_slide( self.mask_coordinates, self.pickle_dir, save_img, dab_tile_dir, mask=self.mask.tiles, ) elif masking_mode == "tile-level": slide.quantify_slide( self.mask_coordinates, self.pickle_dir, save_img, dab_tile_dir, ) else: if masking_mode == "pixel-level": slide.quantify_slide( self.mask_coordinates, self.pickle_dir, mask=self.mask.tiles, ) elif masking_mode == "tile-level": slide.quantify_slide( self.mask_coordinates, self.pickle_dir, ) # slide_summary_series = pd.Series(slide.quantification_summary) slide_name_summary = slide.quantification_summary["Name"] # Check if a row with the same 'Name' exists existing_row_index = self.quantification_results[ self.quantification_results["Name"] == slide_name_summary ].index if not existing_row_index.empty: # Overwrite the existing row self.quantification_results.loc[existing_row_index[0]] = pd.Series( slide.quantification_summary ) else: # Append the new row self.quantification_results = pd.concat( [ self.quantification_results, pd.DataFrame([slide.quantification_summary]), ], ignore_index=True, ) # Sort the DataFrame by the 'Name' column self.quantification_results = self.quantification_results.sort_values( by="Coverage (%)", ascending=False ).reset_index(drop=True) self.save_quantification_results(masking_mode=masking_mode)
def save_quantification_results(self, masking_mode): """ Stores `quant_res_df` as .CSV for and .PICKLE in `data_dir` and `pickle_dir`, respectively. """ if self.quantification_results.__len__() != 0: save_start_time = time() filename = f"{self.data_dir}/{masking_mode}_quantification_results.csv" self.quantification_results.to_csv( filename, sep=",", index=False, encoding="utf-8", ) out = os.path.join( self.pickle_dir, "quantification_results.pickle") with open(out, "wb") as file: pickle.dump( self.quantification_results, file, protocol=pickle.HIGHEST_PROTOCOL ) save_end_time = time() self.logger.debug( f"Successfully saved quantification results to {out} in \ {round((save_end_time - save_start_time),2)} seconds" ) else: self.logger.warning( "No quantification results were found. Please call quantify_all_slides() to quantify all slides \ in this slide collection or call quantify_single_slide() to quantify a single slide." )
[docs] def generate_antigen_pair_combinations(self, masking_mode="tile-level"): """Creates all possible antigen pairs and analyzes antigen co-expression for all pairs. Results are stored in `dual_antigen_expressions`. Args: masking_mode (str): Determines the mode for mask application that was used for quantification. - `tile-level` (default): Applies the mask coarsly - tiles overlapping the mask are fully included. Recommended when registration quality is lower (e.g. high median rTRE). - `pixel-level`: Applies the mask precisely at pixel level - only masked pixels are included. Offers finer co-expression evaluation, but is more sensitive to registration errors. """ dual_expression_time_start = time() self.dual_antigen_expressions = pd.DataFrame( columns=DUAL_ANTIGEN_EXPRESSIONS_COLUMN_NAMES ) # Filter out mask and reference slides filtered_slides = [ slide for slide in self.slides if not slide.is_mask and not slide.is_reference ] # Generate all possible pairs of the filtered slides slide_combinations = list(combinations(filtered_slides, 2)) # Pass each combination to the compute_dual_antigen_combination method for combo in slide_combinations: self.evaluate_antigen_pair( combo[0], combo[1], masking_mode=masking_mode) dual_expression_time_end = time() self.logger.info( f"Finished dual antigen expression analysis in \ {round((dual_expression_time_end - dual_expression_time_start)/60,2)} minutes." ) self.status["dual_antigen_expression"] = True
[docs] def generate_antigen_triplet_combinations(self, masking_mode="tile-level"): """Creates all possible antigen triplets and analyzes antigen co-expression for all triplets. Results are stored in `triplet_antigen_expressions`. Args: masking_mode (str): Determines the mode for mask application that was used for quantification. - `tile-level` (default): Applies the mask coarsly - tiles overlapping the mask are fully included. Recommended when registration quality is lower (e.g. high median rTRE). - `pixel-level`: Applies the mask precisely at pixel level - only masked pixels are included. Offers finer co-expression evaluation, but is more sensitive to registration errors. """ triplet_expression_time_start = time() self.triplet_antigen_results = pd.DataFrame( columns=TRIPLET_ANTIGEN_EXPRESSIONS_COLUMN_NAMES ) # Filter out mask and reference slides filtered_slides = [ slide for slide in self.slides if not slide.is_mask and not slide.is_reference ] # Generate all possible triplets of the filtered slides slide_combinations = list(combinations(filtered_slides, 3)) # Pass each combination to the compute_triplet_antigen_combinations method for combo in slide_combinations: self.evaluate_antigen_triplet( combo[0], combo[1], combo[2], masking_mode=masking_mode ) triplet_expression_time_end = time() self.logger.info( f"Finished triplet antigen expression analysis in \ {round((triplet_expression_time_end - triplet_expression_time_start)/60,2)} minutes." ) self.status["triplet_antigen_expression"] = True
[docs] def evaluate_antigen_pair( self, slide1, slide2, save_img=False, masking_mode="tile-level" ): """Analyzes the antigen co-expression for a pair of two slides. Analyzes antigen co-expressions for each tile for the given pair of slides using multiprocesing based on the antigen-specific thresholds of each slide. Results from each of the tiles are summarized, stored in `dual_antigen_expressions` and saved as CSV in `data_dir` as well as PICKLE in `pickle_dir`. For more detailed explanation of the co-expression results see `SlideCollection.triplet_antigen_expressions`. Args: slide1 (Slide): Slide Object for slide 1. slide2 (Slide): Slide Object for slide 2. save_img (bool): Boolean determining if tiles shall be saved during processing. Necessary if slide shall be reconstructed later on. However, storing images will require additional storage. Defaults to False. masking_mode (str): Determines the mode for mask application that was used for quantification. - `tile-level` (default): Applies the mask coarsly - tiles overlapping the mask are fully included. Recommended when registration quality is lower (e.g. high median rTRE). - `pixel-level`: Applies the mask precisely at pixel level - only masked pixels are included. Offers finer co-expression evaluation, but is more sensitive to registration errors. """ # Create directory for pair of slides if save_img: # Create Colocalization directory if it does not exist self.colocalization_dir = os.path.join( self.dest_dir, COLOCALIZATION) os.makedirs(self.colocalization_dir, exist_ok=True) # Create sub-directory for slide pair dir = os.path.join( self.colocalization_dir, (slide1.name + "_and_" + slide2.name) ) os.makedirs(dir, exist_ok=True) else: dir = None # Create iterable for multiprocessing iterable = [] for i in slide1.detailed_quantification_results: if ( slide1.detailed_quantification_results[i]["Tilename"] == slide2.detailed_quantification_results[i]["Tilename"] ): _dict1 = slide1.detailed_quantification_results[i] _dict2 = slide2.detailed_quantification_results[i] iterable.append( ( _dict1, _dict2, [slide1.antigen_profile, slide2.antigen_profile], dir, save_img, masking_mode, ) ) start_time = time() self.logger.debug( f"Starting antigen analysis for: {slide1.name} & {slide2.name}" ) # Init dict for results of each tile comparison_dict = {} max_workers = os.cpu_count() - 1 with concurrent.futures.ProcessPoolExecutor(max_workers=max_workers) as exe: results = tqdm( exe.map( evaluate_antigen_pair_tile, iterable, ), total=len(iterable), desc="Calculating Coverage of Slide " + slide1.name + " & " + slide2.name, ) for idx, result in enumerate(results): comparison_dict[idx] = result end_time = time() if end_time - start_time >= 60: self.logger.debug( f"Finished antigen analysis for: {slide1.name} & {slide2.name} in \ {round((end_time - start_time)/60,2)} minutes." ) else: self.logger.debug( f"Finished antigen analysis for: {slide1.name} & {slide2.name} in \ {round((end_time - start_time), 2)} seconds." ) self.summarize_antigen_combinations( comparison_dict, [slide1.name, slide2.name], antigen_profiles=[slide1.antigen_profile, slide2.antigen_profile], ) self.save_antigen_combinations( result_type="dual", masking_mode=masking_mode)
[docs] def evaluate_antigen_triplet( self, slide1, slide2, slide3, save_img=False, masking_mode="tile-level" ): """Analyzes the antigen co-expression for a triplet of three slides. Analyzes antigen co-expressions for each of tiles of the given triplet of slides using Multiprocessing based on the antigen-specific thresholds of each slide. Results from each of the tiles are summarized, stored in `triplet_antigen_expressions` and saved as CSV in `data_dir` as well as PICKLE in `pickle_dir`. For more detailed explanation of the co-expression results see `SlideCollection.triplet_antigen_expressions`. Args: slide1 (Slide): Slide Object for slide 1. slide2 (Slide): Slide Object for slide 2. slide3 (Slide): Slide Object for slide 3. save_img (bool): Boolean determining if tiles shall be saved during processing. Necessary if slide shall be reconstructed later on. However, storing images will require additional storage. Defaults to False. masking_mode (str): Determines the mode for mask application that was used for quantification. - `tile-level` (default): Applies the mask coarsly - tiles overlapping the mask are fully included. Recommended when registration quality is lower (e.g. high median rTRE). - `pixel-level`: Applies the mask precisely at pixel level - only masked pixels are included. Offers finer co-expression evaluation, but is more sensitive to registration errors. """ # Create directory for triplet of slides if save_img: # Create Colocalization directory if it does not exist self.colocalization_dir = os.path.join( self.dest_dir, COLOCALIZATION) os.makedirs(self.colocalization_dir, exist_ok=True) # Create sub-directory for slide triplet dirname = os.path.join( self.colocalization_dir, (slide1.name + "_and_" + slide2.name + "_and_" + slide3.name), ) os.makedirs(dirname, exist_ok=True) else: dirname = None # Create iterable for multiprocessing iterable = [] for i in slide1.detailed_quantification_results: if ( slide1.detailed_quantification_results[i]["Tilename"] == slide2.detailed_quantification_results[i]["Tilename"] == slide3.detailed_quantification_results[i]["Tilename"] ): _dict1 = slide1.detailed_quantification_results[i] _dict2 = slide2.detailed_quantification_results[i] _dict3 = slide3.detailed_quantification_results[i] iterable.append( ( _dict1, _dict2, _dict3, [ slide1.antigen_profile, slide2.antigen_profile, slide3.antigen_profile, ], dirname, save_img, masking_mode, ) ) # Init dict for results of each tile start_time = time() self.logger.debug( f"Starting antigen analysis for: {slide1.name} & {slide2.name} & {slide3.name}" ) comparison_dict = {} max_workers = os.cpu_count() - 1 # Process tiles using multiprocessing with concurrent.futures.ProcessPoolExecutor(max_workers=max_workers) as exe: results = tqdm( exe.map(evaluate_antigen_triplet_tile, iterable), total=len(iterable), desc="Calculating Coverage of Slides " + slide1.name + " & " + slide2.name + " & " + slide3.name, ) for idx, result in enumerate(results): comparison_dict[idx] = result end_time = time() if end_time - start_time >= 60: self.logger.debug( f"Finished antigen analysis for: {slide1.name} & {slide2.name} & {slide3.name} in \ {round((end_time - start_time)/60, 2)} minutes." ) else: self.logger.debug( f"Finished antigen analysis for: {slide1.name} & {slide2.name} & {slide3.name} in \ {round((end_time - start_time), 2)} seconds." ) self.summarize_antigen_combinations( comparison_dict, [slide1.name, slide2.name, slide3.name], antigen_profiles=[ slide1.antigen_profile, slide2.antigen_profile, slide3.antigen_profile, ], ) self.save_antigen_combinations( result_type="triplet", masking_mode=masking_mode)
def summarize_antigen_combinations( self, comparison_dict, slide_names, antigen_profiles ): """Summarizes co-expression results of an antigen pair or triplet. Args: - comparison_dict (dict): Dictionary containing the co-expression results for each tile of the combination. - slide_names (str): Names of all slides of the combination. - antigen_profiles (list): List of all antigen profiles of the slides in the combination. """ processed_tiles = 0 sum_total_coverage = 0.00 sum_total_overlap = 0.00 sum_total_complement = 0.00 sum_high_overlap = 0.00 sum_high_complement = 0.00 sum_pos_overlap = 0.00 sum_pos_complement = 0.00 sum_low_overlap = 0.00 sum_low_complement = 0.00 sum_negative = 0.00 sum_tissue = 0.00 sum_background = 0.00 sum_mask = 0.00 sum_non_mask = 0.00 error1 = 0 error2 = 0 for i in comparison_dict: if comparison_dict[i]["Flag"] == 1: processed_tiles += 1 sum_total_coverage += comparison_dict[i]["Total Coverage"] sum_total_overlap += comparison_dict[i]["Total Overlap"] sum_total_complement += comparison_dict[i]["Total Complement"] sum_high_overlap += comparison_dict[i]["High Positive Overlap"] sum_high_complement += comparison_dict[i]["High Positive Complement"] sum_pos_overlap += comparison_dict[i]["Medium Positive Overlap"] sum_pos_complement += comparison_dict[i]["Medium Positive Complement"] sum_low_overlap += comparison_dict[i]["Low Positive Overlap"] sum_low_complement += comparison_dict[i]["Low Positive Complement"] sum_negative += comparison_dict[i]["Negative"] sum_tissue += comparison_dict[i]["Tissue"] sum_background += comparison_dict[i]["Background / No Tissue"] sum_mask += comparison_dict[i]["Mask Area"] sum_non_mask += comparison_dict[i]["Non-mask Area"] elif comparison_dict[i].get("Flag") == -1: error1 += 1 elif comparison_dict[i].get("Flag") == -2: error2 += 1 if processed_tiles > 0: sum_total_coverage /= processed_tiles sum_total_overlap /= processed_tiles sum_total_complement /= processed_tiles sum_high_overlap /= processed_tiles sum_high_complement /= processed_tiles sum_pos_overlap /= processed_tiles sum_pos_complement /= processed_tiles sum_low_overlap /= processed_tiles sum_low_complement /= processed_tiles sum_negative /= processed_tiles sum_tissue /= processed_tiles sum_background /= processed_tiles sum_mask /= processed_tiles sum_non_mask /= processed_tiles else: sum_total_coverage = 0 sum_total_overlap = 0 sum_total_complement = 0 sum_high_overlap = 0 sum_high_complement = 0 sum_pos_overlap = 0 sum_pos_complement = 0 sum_low_overlap = 0 sum_low_complement = 0 sum_negative = 0 sum_tissue = 0 sum_background = 0 sum_mask = 0 sum_non_mask = 100 total_error = error1 + error2 total_processed_tiles = ( (processed_tiles / len(comparison_dict)) * 100 if len(comparison_dict) > 0 else 0 ) overlap_dict = {"Slide 1": slide_names[0], "Slide 2": slide_names[1]} if len(slide_names) == 3: overlap_dict["Slide 3"] = slide_names[2] overlap_dict.update( { "Total Coverage (%)": round(float(sum_total_coverage), 2), "Total Overlap (%)": round(float(sum_total_overlap), 2), "Total Complement (%)": round(float(sum_total_complement), 2), "High Positive Overlap (%)": round(float(sum_high_overlap), 2), "High Positive Complement (%)": round(float(sum_high_complement), 2), "Medium Positive Overlap (%)": round(float(sum_pos_overlap), 2), "Medium Positive Complement (%)": round(float(sum_pos_complement), 2), "Low Positive Overlap (%)": round(float(sum_low_overlap), 2), "Low Positive Complement (%)": round(float(sum_low_complement), 2), "Negative Tissue (%)": round(float(sum_negative), 2), "Total Tissue (%)": round(float(sum_tissue), 2), "Background / No Tissue (%)": round(float(sum_background), 2), "Mask Area (%)": round(float(sum_mask), 2), "Non-mask Area (%)": round(float(sum_non_mask), 2), "Total Processed Tiles (%)": round(float(total_processed_tiles), 2), "Total Error (%)": round( float((total_error / len(comparison_dict)) * 100), 2 ), "Error1 (%)": round(float((error1 / len(comparison_dict)) * 100), 2), "Error2 (%)": round(float((error2 / len(comparison_dict)) * 100), 2), "Thresholds1": [ antigen_profiles[0]["high_positive_threshold"], antigen_profiles[0]["medium_positive_threshold"], antigen_profiles[0]["low_positive_threshold"], 235, ], "Thresholds2": [ antigen_profiles[1]["high_positive_threshold"], antigen_profiles[1]["medium_positive_threshold"], antigen_profiles[1]["low_positive_threshold"], 235, ], } ) if len(antigen_profiles) == 3: overlap_dict["Thresholds3"] = [ antigen_profiles[1]["high_positive_threshold"], antigen_profiles[1]["medium_positive_threshold"], antigen_profiles[1]["low_positive_threshold"], 235, ] if len(slide_names) == 2: self.dual_antigen_expressions = pd.concat( [self.dual_antigen_expressions, pd.DataFrame([overlap_dict])], ignore_index=True, ) self.dual_antigen_expressions = self.dual_antigen_expressions.sort_values( by="Total Coverage (%)", ascending=False ) else: self.triplet_antigen_results = pd.concat( [self.triplet_antigen_results, pd.DataFrame([overlap_dict])], ignore_index=True, ) self.triplet_antigen_results = self.triplet_antigen_results.sort_values( by="Total Coverage (%)", ascending=False ) def save_antigen_combinations(self, result_type="dual", masking_mode="tile-level"): """ Save antigen combination results as CSV and PICKLE. Args: result_type (str): Type of antigen combination results to save. Can be "dual" or "triplet". masking_mode (str): Mode of mask application for consistent file naming with the applied mode. """ if result_type == "dual": summary_df = self.dual_antigen_expressions csv_filename = f"{masking_mode}_dual_antigen_expressions.csv" pickle_filename = "dual_antigen_expressions.pickle" elif result_type == "triplet": summary_df = self.triplet_antigen_results csv_filename = f"{masking_mode}_triplet_antigen_expressions.csv" pickle_filename = "triplet_antigen_expressions.pickle" else: raise ValueError( "Invalid result_type. Must be 'dual' or 'triplet'.") # Save results as CSV summary_df.to_csv( os.path.join(self.data_dir, csv_filename), sep=",", index=False, header=True, encoding="utf-8", ) # Save results as PICKLE out = os.path.join(self.pickle_dir, pickle_filename) with open(out, "wb") as f: pickle.dump(summary_df, f, protocol=pickle.HIGHEST_PROTOCOL)