Source code for biom3d.utils.data_handler.data_handler_factory

"""Class to instantiate a DataHandler depending on the input and output type."""

from os.path import isdir,splitext,exists,dirname
from os import lstat,getcwd,access,W_OK
from typing import Optional
from urllib.parse import urlparse

from .data_handler_abstract import DataHandler
from .file_handler import FileHandler
from .hdf5_handler import HDF5Handler

[docs] class DataHandlerFactory: """Class to instantiate a DataHandler depending on the input and output type.""" EXTENSION_MAP: dict[str, type['DataHandler']] = { ".h5": HDF5Handler, ".hdf5": HDF5Handler, "folder": FileHandler } @staticmethod def _is_url(path: str) -> bool: """ Check if given path is an URL. Parameters ---------- path: str The path to test. Returns ------- boolean: Whether path is an URL or not. """ return urlparse(path).scheme in ("http", "https", "ftp", "s3") @staticmethod def _is_nonexistent_folder(path: str) -> bool: """ Check if given path refer to a folder that doesn't exist yet. Parameters ---------- path: str The path to test. Returns ------- boolean: Wether the path refer to a non existing folder or not. """ def is_path_valid(pathname: str) -> bool: # Assume path is valid unless proven otherwise try: if not pathname or pathname.isspace(): return False lstat(pathname) return True except (OSError, ValueError): return True # We allow non-existing paths except Exception: return False def is_path_creatable(pathname: str) -> bool: # Check for writing authorisation dir = dirname(pathname) or getcwd() return access(dir, W_OK) if DataHandlerFactory._is_url(path): return False _, ext = splitext(path) if ext: # Probably a file return False try: return is_path_valid(path) and ( exists(path) or is_path_creatable(path) ) except OSError: return False @staticmethod def _detect_handler_type(path: str) -> type['DataHandler']: """ Extract the data format from path and return `DataHandler` subclass fit to treat it if existing, raise `NotImplementedError` else. Parameters ---------- path: str The path to test Raises ------ NotImplementedError: If handler or input data not found. Returns ------- Datahandler: A DataHandler that can treat the data format given by path. """ if isdir(path) or DataHandlerFactory._is_nonexistent_folder(path): return DataHandlerFactory.EXTENSION_MAP["folder"] _, ext = splitext(path) ext = ext.lower() if ext in DataHandlerFactory.EXTENSION_MAP: return DataHandlerFactory.EXTENSION_MAP[ext] raise NotImplementedError(f"No handler found for extension: '{ext}'")
[docs] @staticmethod def get(input:str,read_only:bool=False,preprocess:bool=False,output:Optional[str]=None,**kwargs)->DataHandler: """ Create a handler which type depend on the input extension. Parameters ---------- input: str Path to input (Folder path, archive path, url,...). This path will be used as the image path. read_only: bool, default = False (Optional) Whether handler is in read only. output: str, default = None (Optional) Path to output, is used if the output type is different from input. preprocess: bool, default = False (Optional) If it is a preprocessing handler (will create more output). **kwargs: All existing parameters to existing handlers, currently msk_path:str, default=None, Generic : mask output path fg_path:str, default = None Generic : foreground output path eval: "label" | "pred" | None, default=None HDF5Hanlder (and all others that use keys) : Tell your handler that it is to eval and that it should search for the label or prediction key in your dataset key. img_inner_paths_list, default=None Generic : A list of path comming from a specific root (eg: The paths inside a .h5 file), used in data/batch loaders. msk_inner_paths_list, default=None Generic : A list of path comming from a specific root (eg: The paths inside a .h5 file), used in data/batch loaders fg_inner_paths_list, default=None Generic : A list of path comming from a specific root (eg: The paths inside a .h5 file), used in data/batch loaders img_outpath:str, default = None, Generic : images output path msk_outpath:str, default = None Generic : mask output path fg_outpath:str, default = None Generic : foreground output path model_name:str, default = None Generic : Used for prediction, if different than `None`, it will be added at the end of path (eg: predictions/MyModelName, predictions.h5["MyModelName"]) use_tif:bool, default = False FileHandler : If should be saved as tif instead of npy. Raises ------ ValueError: If parameters `read_only` and `preprocess` are both `True`. Returns ------- DataHandler A DataHandler specific to input and output type """ if read_only and preprocess : raise ValueError("A preprocess handler need to write and can't be in read_only") INPUT = DataHandlerFactory._detect_handler_type(input) kwargs["img_path"]=input handler = INPUT() handler._input_parse(**kwargs) handler._preprocess=preprocess if read_only: saver=None elif output == None: saver = handler else : OUTPUT = DataHandlerFactory._detect_handler_type(output) saver = OUTPUT() if OUTPUT != INPUT else handler if not read_only and preprocess : saver._preprocess=preprocess saver._output_parse_preprocess(**kwargs) elif not read_only: saver._preprocess=preprocess saver._output_parse(**kwargs) handler._saver = saver return handler