Source code for biom3d.omero_downloader

"""
Use to download an OMERO dataset to a folder.

Adtapted from https://gist.github.com/will-moore/a9f90c97b5b6f1a0da277a5179d62c5a 
Documentation: https://downloads.openmicroscopy.org/omero/5.3.1/api/python/omero/omero.plugins.html 
"""

import argparse
import sys
import os

from omero.gateway import BlitzGateway
from omero.cli import cli_login
from omero.clients import BaseClient

from omero.plugins.download import DownloadControl

OBJ_INFO = "obj should be 'Project:ID' or 'Dataset:ID'"

"""
Usage:
python download_pdi.py Project:123 my_project_directory
"""

from typing import Optional
from omero.cli import CLI


[docs]
def download_datasets_cli(datasets: list, target_dir:str)->None:
    """
    Download datasets using OMERO CLI interface.

    Parameters
    ----------
    datasets : list
        A list of OMERO Dataset objects to download.
    target_dir : str
        Path to the directory where the datasets will be downloaded.

    Returns
    -------
    None
    """
    with cli_login() as cli:
        cli.register("download", DownloadControl, "omero_downloader.py")

        for dataset in datasets:
            print("Downloading Dataset", dataset.id, dataset.name)
            dataset_dir = os.path.join(target_dir, dataset.name)
            os.makedirs(dataset_dir, exist_ok=True)

            for image in dataset.listChildren():
                if image.getFileset() is None:
                    print("No files to download for Image", image.id)
                    continue

                # If each image is a single file, or are guaranteed not to clash
                # then we don't need image_dir. Could use dataset_dir instead

                cli.invoke(["download", f'Image:{image.id}', dataset_dir])




[docs]
def download_object_cli(cli:CLI, obj:str, target_dir:str):
    """
    Download a dataset or project using OMERO CLI and its ID.

    Parameters
    ----------
    cli : omero.cli.CLI
        Authenticated CLI session.
    obj : str
        OMERO object string, e.g. "Dataset:123" or "Project:456".
    target_dir : str
        Directory where data will be downloaded.

    Returns
    -------
    datasets: list
        Represent a list of OMERO datasets
    target_dir: str
        Final target folder path.

    Examples
    --------
    .. code-block:: python

        with cli_login() as cli:
            download_object_cli(cli, args.obj, args.target)
    """
    conn = BlitzGateway(client_obj=cli._client)
    conn.SERVICE_OPTS.setOmeroGroup(-1)

    try:
        obj_id = int(obj.split(":")[1])
        obj_type = obj.split(":")[0]
    except:
        print(OBJ_INFO)

    parent = conn.getObject(obj_type, obj_id)
    if parent is None:
        print("Not Found:", obj)

    datasets = []

    if obj_type == "Dataset":
        datasets.append(parent)
    elif obj_type == "Project":
        datasets = list(parent.listChildren())
        target_dir = os.path.join(target_dir, parent.getName())
    else:
        print(OBJ_INFO)

    print("Downloading to ", target_dir)

    #TODO fix the missing argument
    download_datasets(datasets, target_dir)

    return datasets, target_dir



[docs]
def download_datasets(conn: BlitzGateway, datasets: list, target_dir: str) -> None:
    """
    Download datasets using OMERO BlitzGateway connection.

    Parameters
    ----------
    conn : BlitzGateway
        Active OMERO connection.
    datasets : list
        List of OMERO dataset objects to download.
    target_dir : str
        Path to the directory where the datasets will be downloaded.

    Returns
    -------
    None
    """
    for dataset in datasets:
        print("Downloading Dataset", dataset.id, dataset.name)
        dc = DownloadControl()
        dataset_dir = os.path.join(target_dir, dataset.name)
        os.makedirs(dataset_dir, exist_ok=True)

        for image in dataset.listChildren():
            if image.getFileset() is None:
                print("No files to download for Image", image.id)
                continue
            
            # If each image is a single file, or are guaranteed not to clash
            # then we don't need image_dir. Can use dataset_dir instead
            
            fileset = image.getFileset()
            if fileset is None:
                print('Image has no Fileset')
                continue
            dc.download_fileset(conn, fileset, dataset_dir)



[docs]
def download_object(username: str,
                    password: str,
                    hostname: str,
                    obj: str,
                    target_dir: str,
                    session_id: Optional[str] = None,
                    ) -> tuple[list, str]:
    """
    Connect to OMERO and download a dataset or project via BlitzGateway.

    Parameters
    ----------
    username : str
        OMERO username.
    password : str
        OMERO password.
    hostname : str
        OMERO server hostname.
    obj : str
        Object identifier, e.g. "Dataset:123" or "Project:456".
    target_dir : str
        Target directory for download.
    session_id : str, optional
        Existing OMERO session ID to reuse.

    Returns
    -------
    datasets: list
        Represent a list of OMERO datasets
    target_dir: str
        Final target folder path.
    """
    if session_id is not None:
        client = BaseClient(host=hostname, port=4064)
        client.joinSession(session_id)
        conn = BlitzGateway(client_obj=client)
    else :
        conn = BlitzGateway(username=username, passwd=password, host=hostname, port=4064)
        conn.connect()
    try:
        obj_id = int(obj.split(":")[1])
        obj_type = obj.split(":")[0]
    except:
        print(OBJ_INFO)

    parent = conn.getObject(obj_type, obj_id)
    if parent is None:
        print("Not Found:", obj)

    datasets = []

    if obj_type == "Dataset":
        datasets.append(parent)
    elif obj_type == "Project":
        datasets = list(parent.listChildren())
        target_dir = os.path.join(target_dir, parent.getName())
    else:
        print(OBJ_INFO)

    print("Downloading to ", target_dir)

    download_datasets(conn, datasets, target_dir)

    # conn.close()

    return datasets, target_dir



[docs]
def download_attachment(hostname: str,
                        username: str,
                        password: str,
                        session_id: Optional[str],
                        attachment_id: int,
                        config: bool = True,
                        ) -> Optional[str]:
    """
    Download an attachment (OMERO FileAnnotation) to the local file system.

    Parameters
    ----------
    hostname : str
        Hostname of the OMERO server.
    username : str
        OMERO username.
    password : str
        OMERO password.
    session_id : str, optional
        Optional OMERO session ID to reuse an existing session.
    attachment_id : int
        ID of the FileAnnotation to download.
    config : bool, default=True
        Whether to save to "configs/" directory. If False, saves to "logs/".

    Returns
    -------
    str or None
        Local path of the downloaded file, or None if not found.
    """
    # Connect to the OMERO server using session ID or username/password
    if session_id is not None:
        client = BaseClient(host=hostname, port=4064)
        client.joinSession(session_id)
        conn = BlitzGateway(client_obj=client)
    else:
        conn = BlitzGateway(username=username, passwd=password, host=hostname, port=4064)
        conn.connect()

    try:
        # Get the FileAnnotation object by ID
        annotation = conn.getObject("FileAnnotation", attachment_id)
        if not annotation:
            print(f"FileAnnotation with ID {attachment_id} not found.")
            return

        # Get the linked OriginalFile object
        original_file = annotation.getFile()
        if original_file is None:
            print(f'No OriginalFile linked to annotation ID {attachment_id}')
            return

        file_id = original_file.id
        file_name = original_file.name
        file_size = original_file.size

        print(f"File ID: {file_id}, Name: {file_name}, Size: {file_size}")

        if config : file_path = os.path.join("configs", file_name)
        else : file_path = os.path.join("logs", file_name)

        # Download the file data in chunks
        print(f"\nDownloading file to {file_path}...")
        with open(file_path, 'wb') as f:
            for chunk in annotation.getFileInChunks():
                f.write(chunk)
        return file_path

    finally:
        # Close the connection
        print("Downloaded!")


# Why not directly in __main__ ?

[docs]
def main(argv: list[str]) -> None:
    """
    Entry point for downloading OMERO datasets or projects from command-line arguments.

    Parses command-line arguments for object identifier, destination directory, and connection credentials.
    Then triggers the download using `download_object`.

    Parameters
    ----------
    argv : list of str
        List of command-line arguments (excluding the script name). Typically `sys.argv[1:]`.

    Returns
    -------
    None
    """
    parser = argparse.ArgumentParser()
    parser.add_argument('--obj',
        help="Download object: 'Project:ID' or 'Dataset:ID'")
    parser.add_argument('--target',
        help="Directory name to download into")
    parser.add_argument('--username', default=None,
        help="User name")
    parser.add_argument('--password', default=None,
        help="Password")
    parser.add_argument('--hostname', default=None,
        help="Host name")
    parser.add_argument('--session_id', default=None,
        help="Session ID")
    args = parser.parse_args(argv)

    # TODO: Add safeguards

    download_object(args.username, args.password, args.hostname, args.obj, args.target, args.session_id)


if __name__ == '__main__':
    main(sys.argv[1:])