Source code for arctic_ai.workflow

"""Serial Workflow 
==========
Contains functions for serial processing of tissue sections. 
Contains functions for defining and running workflows."""
import glob, os, time, pickle
from .preprocessing import preprocess
from .cnn_prediction import generate_embeddings
from .generate_graph import create_graph_data
from .gnn_prediction import predict
# from .quality_scores import generate_quality_scores
from .ink_detection import detect_inks
#from .compile_results import dump_results
#from .nuclei_detection import predict_nuclei
from .image_stitch import npy2dzi, stitch_slides
#from .case_prototype import Case
import warnings
from functools import partial
import pandas as pd

def files_exist_overwrite(overwrite, files):
    return (not overwrite) and all([os.path.exists(file) for file in files])

def generate_output_file_names(basename):
    out_files=dict()
    out_files['preprocess']=[f"masks/{basename}_{k}_map.npy" for k in ['tumor','macro']]+[f"patches/{basename}_{k}_map.npy" for k in ['tumor','macro']]
    for k in ['macro','tumor']:
        out_files[f'cnn_{k}']=[f"cnn_embeddings/{basename}_{k}_map.pkl"]
        out_files[f'graph_data_{k}']=[f"graph_datasets/{basename}_{k}_map.pkl"]
        out_files[f'gnn_{k}']=[f"gnn_results/{basename}_{k}_map.pkl"]
    out_files['quality']=[f"quality_scores/{basename}.pkl"]
    out_files['ink']=[f"detected_inks/{basename}_thumbnail.npy"]
    out_files['nuclei']=[f"nuclei_results/{basename}.npy"]
    return out_files

[docs]def run_workflow_series(basename, compression, overwrite, ext, dirname, df_section_pieces_file, run_stitch_slide): """ Runs image processing workflow in series on an input image. Parameters ---------- basename : str The base name of the slide to process. compression : float The level of compression to apply to the slide. overwrite : bool Whether to overwrite existing files if they exist. ext : str The file extension of the slide. dirname : str The directory containing the slide and other relevant files. df_section_pieces_file : str The file containing information about the patches. run_stitch_slide : bool Whether to run the stitch_slides function after all other processing is complete. Returns ------- times : dict A dictionary containing the times at which each step of the workflow was completed. """ print(f"{basename} preprocessing") out_files=generate_output_file_names(basename) times=dict() times['start']=time.time() if not files_exist_overwrite(overwrite,out_files['preprocess']): preprocess(basename=basename, threshold=0.05, patch_size=256, ext=ext, no_break=False, df_section_pieces_file=df_section_pieces_file, image_mask_compression=8., dirname=dirname) times['preprocess']=time.time() new_basenames=pd.read_pickle(os.path.join(dirname,"metadata",f"{basename}.pkl")) for k in ['tumor','macro']: times[f'cnn_{k}'],times[f'graph_{k}'],times[f'gnn_{k}']=[],[],[] for bn in new_basenames: for k in ['tumor','macro']: print(f"{bn} {k} embedding") if not files_exist_overwrite(overwrite,out_files[f'cnn_{k}']): generate_embeddings(basename=bn, analysis_type=k, gpu_id=-1, dirname=dirname) times[f'cnn_{k}'].append(time.time()) print(f"{bn} {k} build graph") if not files_exist_overwrite(overwrite,out_files[f'graph_data_{k}']): create_graph_data(basename=bn, analysis_type=k, radius=256, min_component_size=600, no_component_break=True, dirname=dirname) times[f'graph_{k}'].append(time.time()) print(f"{bn} {k} gnn predict") if not files_exist_overwrite(overwrite,out_files[f'gnn_{k}']): predict(basename=bn, analysis_type=k, gpu_id=-1, dirname=dirname) times[f'gnn_{k}'].append(time.time()) print(f"{basename} ink detection") if not files_exist_overwrite(overwrite,out_files['ink']): detect_inks(basename=basename, compression=8., ext=ext, dirname=dirname) times["ink"]=time.time() print(f"{basename} stitch") if not files_exist_overwrite(overwrite,out_files['ink']): if run_stitch_slide: stitch_slides(basename=basename, compression=4, ext=ext, dirname=dirname) times["stitch"]=time.time() return times
def run_series(patient="163_A1", input_dir="inputs", compression=1., overwrite=True, record_time=False, ext=".npy", dirname=".", df_section_pieces_file="df_section_pieces.pkl", run_stitch_slide=True ): for f in glob.glob(os.path.join(input_dir,f"{patient}*{ext}")): basename=os.path.basename(f).replace(ext,"")#.replace(".tiff","").replace(".tif","").replace(".svs","") times=run_workflow_series(basename, compression, overwrite, ext, dirname, df_section_pieces_file, run_stitch_slide) if record_time: os.makedirs(os.path.join(dirname,"times"),exist_ok=True) pickle.dump(times,open(os.path.join(dirname,"times",f"{basename}.pkl"),'wb'))