Loading lib/galaxy/datatypes/isa.py +22 −105 Original line number Diff line number Diff line Loading @@ -49,9 +49,6 @@ from galaxy.util.sanitize_html import sanitize_html if TYPE_CHECKING: from isatools.model import Investigation # CONSTANTS {{{1 ################################################################ # Main files regex JSON_FILE_REGEX = re.compile(r"^.*\.json$", flags=re.IGNORECASE) INVESTIGATION_FILE_REGEX = re.compile(r"^i_\w+\.txt$", flags=re.IGNORECASE) Loading @@ -62,17 +59,6 @@ ISA_ARCHIVE_NAME = "archive" # Set max number of lines of the history peek _MAX_LINES_HISTORY_PEEK = 11 # Configure logger {{{1 ################################################################ # Function for opening correctly a CSV file for csv.reader() for both Python 2 and 3 {{{1 ################################################################ # ISA class {{{1 ################################################################ class _Isa(Data): """Base class for implementing ISA datatypes""" Loading @@ -80,15 +66,9 @@ class _Isa(Data): composite_type = "auto_primary_file" is_binary = True # Make investigation instance {{{2 ################################################################ def _make_investigation_instance(self, filename: str) -> "Investigation": raise NotImplementedError() # Constructor {{{2 ################################################################ def __init__(self, main_file_regex: re.Pattern, **kwd) -> None: super().__init__(**kwd) self._main_file_regex = main_file_regex Loading @@ -96,57 +76,34 @@ class _Isa(Data): # Add the archive file as the only composite file self.add_composite_file(ISA_ARCHIVE_NAME, is_binary=True, optional=True) # Get ISA folder path {{{2 ################################################################ def _get_isa_folder_path(self, dataset: HasExtraFilesPath) -> str: isa_folder = dataset.extra_files_path if not isa_folder: raise Exception("Unvalid dataset object, or no extra files path found for this dataset.") return isa_folder # Get main file {{{2 ################################################################ def _get_main_file(self, dataset: HasExtraFilesPath) -> Optional[str]: def _get_main_file(self, dataset: HasExtraFilesPath) -> str: """Get the main file of the ISA archive. Either the investigation file i_*.txt for ISA-Tab, or the JSON file for ISA-JSON.""" main_file = None isa_folder = self._get_isa_folder_path(dataset) assert os.path.exists(isa_folder) if os.path.exists(isa_folder): # Get ISA archive older isa_files = os.listdir(isa_folder) # Try to find main file main_file = self._find_main_file_in_archive(isa_files) if main_file is None: raise Exception("Invalid ISA archive. No main file found.") # Make full path assert main_file main_file = os.path.join(isa_folder, main_file) return main_file # Get investigation {{{2 ################################################################ return os.path.join(isa_folder, main_file) def _get_investigation(self, dataset: HasExtraFilesPath) -> Optional["Investigation"]: def _get_investigation(self, dataset: HasExtraFilesPath) -> "Investigation": """Create a contained instance specific to the exact ISA type (Tab or Json). We will use it to parse and access information from the archive.""" investigation = None if (main_file := self._get_main_file(dataset)) is not None: investigation = self._make_investigation_instance(main_file) return investigation # Find main file in archive {{{2 ################################################################ main_file = self._get_main_file(dataset) return self._make_investigation_instance(main_file) def _find_main_file_in_archive(self, files_list: List) -> Optional[str]: def _find_main_file_in_archive(self, files_list: List) -> str: """Find the main file inside the ISA archive.""" found_file = None Loading @@ -159,24 +116,17 @@ class _Isa(Data): found_file = matched if isinstance(matched, str) else matched[0] else: raise Exception( 'More than one file match the pattern "', str(self._main_file_regex), '" to identify the investigation file', f"More than one file match the pattern '{self._main_file_regex}' to identify the investigation file" ) if found_file is None: raise Exception("Invalid ISA archive. No main file found.") return found_file # Set peek {{{2 ################################################################ def set_peek(self, dataset: DatasetProtocol, **kwd) -> None: """Set the peek and blurb text. Get first lines of the main file and set it as the peek.""" main_file = self._get_main_file(dataset) if main_file is None: raise RuntimeError("Unable to find the main file within the 'files_path' folder") # Read first lines of main file with open(main_file, encoding="utf-8") as f: data: List = [] Loading @@ -192,9 +142,6 @@ class _Isa(Data): dataset.peek = "file does not exist" dataset.blurb = "file purged from disk" # Display peek {{{2 ################################################################ def display_peek(self, dataset: DatasetProtocol) -> str: """Create the HTML table used for displaying peek, from the peek text found by set_peek() method.""" Loading @@ -213,9 +160,6 @@ class _Isa(Data): except Exception as exc: return f"Can't create peek: {util.unicodify(exc)}" # Generate primary file {{{2 ################################################################ def generate_primary_file(self, dataset: HasExtraFilesAndMetadata) -> str: """Generate the primary file. It is an HTML file containing description of the composite dataset as well as a list of the composite files that it contains.""" Loading @@ -232,16 +176,10 @@ class _Isa(Data): return "\n".join(rval) return "<div>No dataset available</div>" # Dataset content needs grooming {{{2 ################################################################ def dataset_content_needs_grooming(self, file_name: str) -> bool: """This function is called on an output dataset file after the content is initially generated.""" return os.path.basename(file_name) == ISA_ARCHIVE_NAME # Groom dataset content {{{2 ################################################################ def groom_dataset_content(self, file_name: str) -> None: """This method is called by Galaxy to extract files contained in a composite data type.""" # XXX Is the right place to extract files? Should this step not be a cleaning step instead? Loading Loading @@ -269,9 +207,6 @@ class _Isa(Data): else: shutil.move(temp_folder, output_path) # Display data {{{2 ################################################################ def display_data( self, trans, Loading @@ -293,8 +228,10 @@ class _Isa(Data): return super().display_data(trans, dataset, preview, filename, to_ext, **kwd) # prepare the preview of the ISA dataset try: investigation = self._get_investigation(dataset) if investigation is None: except Exception: logger.exception(f"Failed to display dataset {dataset.id}") html = """<html><header><title>Error while reading ISA archive.</title></header> <body> <h1>An error occurred while reading content of ISA archive.</h1> Loading Loading @@ -338,25 +275,15 @@ class _Isa(Data): return sanitize_html(html).encode("utf-8"), headers # ISA-Tab class {{{1 ################################################################ class IsaTab(_Isa): file_ext = "isa-tab" # Constructor {{{2 ################################################################ def __init__(self, **kwd): super().__init__(main_file_regex=INVESTIGATION_FILE_REGEX, **kwd) # Make investigation instance {{{2 ################################################################ def _make_investigation_instance(self, filename: str) -> "Investigation": def _make_investigation_instance(self, filename: str): if not isatab_meta: return raise Exception(ISA_MISSING_MODULE_MESSAGE) # Parse ISA-Tab investigation file parser = isatab_meta.InvestigationParser() isa_dir = os.path.dirname(filename) Loading @@ -373,25 +300,15 @@ class IsaTab(_Isa): return isa # ISA-JSON class {{{1 ################################################################ class IsaJson(_Isa): file_ext = "isa-json" # Constructor {{{2 ################################################################ def __init__(self, **kwd): super().__init__(main_file_regex=JSON_FILE_REGEX, **kwd) # Make investigation instance {{{2 ################################################################ def _make_investigation_instance(self, filename: str) -> "Investigation": def _make_investigation_instance(self, filename: str): if not isajson: return raise Exception(ISA_MISSING_MODULE_MESSAGE) # Parse JSON file with open(filename, newline="", encoding="utf8") as fp: isa = isajson.load(fp) Loading Loading
lib/galaxy/datatypes/isa.py +22 −105 Original line number Diff line number Diff line Loading @@ -49,9 +49,6 @@ from galaxy.util.sanitize_html import sanitize_html if TYPE_CHECKING: from isatools.model import Investigation # CONSTANTS {{{1 ################################################################ # Main files regex JSON_FILE_REGEX = re.compile(r"^.*\.json$", flags=re.IGNORECASE) INVESTIGATION_FILE_REGEX = re.compile(r"^i_\w+\.txt$", flags=re.IGNORECASE) Loading @@ -62,17 +59,6 @@ ISA_ARCHIVE_NAME = "archive" # Set max number of lines of the history peek _MAX_LINES_HISTORY_PEEK = 11 # Configure logger {{{1 ################################################################ # Function for opening correctly a CSV file for csv.reader() for both Python 2 and 3 {{{1 ################################################################ # ISA class {{{1 ################################################################ class _Isa(Data): """Base class for implementing ISA datatypes""" Loading @@ -80,15 +66,9 @@ class _Isa(Data): composite_type = "auto_primary_file" is_binary = True # Make investigation instance {{{2 ################################################################ def _make_investigation_instance(self, filename: str) -> "Investigation": raise NotImplementedError() # Constructor {{{2 ################################################################ def __init__(self, main_file_regex: re.Pattern, **kwd) -> None: super().__init__(**kwd) self._main_file_regex = main_file_regex Loading @@ -96,57 +76,34 @@ class _Isa(Data): # Add the archive file as the only composite file self.add_composite_file(ISA_ARCHIVE_NAME, is_binary=True, optional=True) # Get ISA folder path {{{2 ################################################################ def _get_isa_folder_path(self, dataset: HasExtraFilesPath) -> str: isa_folder = dataset.extra_files_path if not isa_folder: raise Exception("Unvalid dataset object, or no extra files path found for this dataset.") return isa_folder # Get main file {{{2 ################################################################ def _get_main_file(self, dataset: HasExtraFilesPath) -> Optional[str]: def _get_main_file(self, dataset: HasExtraFilesPath) -> str: """Get the main file of the ISA archive. Either the investigation file i_*.txt for ISA-Tab, or the JSON file for ISA-JSON.""" main_file = None isa_folder = self._get_isa_folder_path(dataset) assert os.path.exists(isa_folder) if os.path.exists(isa_folder): # Get ISA archive older isa_files = os.listdir(isa_folder) # Try to find main file main_file = self._find_main_file_in_archive(isa_files) if main_file is None: raise Exception("Invalid ISA archive. No main file found.") # Make full path assert main_file main_file = os.path.join(isa_folder, main_file) return main_file # Get investigation {{{2 ################################################################ return os.path.join(isa_folder, main_file) def _get_investigation(self, dataset: HasExtraFilesPath) -> Optional["Investigation"]: def _get_investigation(self, dataset: HasExtraFilesPath) -> "Investigation": """Create a contained instance specific to the exact ISA type (Tab or Json). We will use it to parse and access information from the archive.""" investigation = None if (main_file := self._get_main_file(dataset)) is not None: investigation = self._make_investigation_instance(main_file) return investigation # Find main file in archive {{{2 ################################################################ main_file = self._get_main_file(dataset) return self._make_investigation_instance(main_file) def _find_main_file_in_archive(self, files_list: List) -> Optional[str]: def _find_main_file_in_archive(self, files_list: List) -> str: """Find the main file inside the ISA archive.""" found_file = None Loading @@ -159,24 +116,17 @@ class _Isa(Data): found_file = matched if isinstance(matched, str) else matched[0] else: raise Exception( 'More than one file match the pattern "', str(self._main_file_regex), '" to identify the investigation file', f"More than one file match the pattern '{self._main_file_regex}' to identify the investigation file" ) if found_file is None: raise Exception("Invalid ISA archive. No main file found.") return found_file # Set peek {{{2 ################################################################ def set_peek(self, dataset: DatasetProtocol, **kwd) -> None: """Set the peek and blurb text. Get first lines of the main file and set it as the peek.""" main_file = self._get_main_file(dataset) if main_file is None: raise RuntimeError("Unable to find the main file within the 'files_path' folder") # Read first lines of main file with open(main_file, encoding="utf-8") as f: data: List = [] Loading @@ -192,9 +142,6 @@ class _Isa(Data): dataset.peek = "file does not exist" dataset.blurb = "file purged from disk" # Display peek {{{2 ################################################################ def display_peek(self, dataset: DatasetProtocol) -> str: """Create the HTML table used for displaying peek, from the peek text found by set_peek() method.""" Loading @@ -213,9 +160,6 @@ class _Isa(Data): except Exception as exc: return f"Can't create peek: {util.unicodify(exc)}" # Generate primary file {{{2 ################################################################ def generate_primary_file(self, dataset: HasExtraFilesAndMetadata) -> str: """Generate the primary file. It is an HTML file containing description of the composite dataset as well as a list of the composite files that it contains.""" Loading @@ -232,16 +176,10 @@ class _Isa(Data): return "\n".join(rval) return "<div>No dataset available</div>" # Dataset content needs grooming {{{2 ################################################################ def dataset_content_needs_grooming(self, file_name: str) -> bool: """This function is called on an output dataset file after the content is initially generated.""" return os.path.basename(file_name) == ISA_ARCHIVE_NAME # Groom dataset content {{{2 ################################################################ def groom_dataset_content(self, file_name: str) -> None: """This method is called by Galaxy to extract files contained in a composite data type.""" # XXX Is the right place to extract files? Should this step not be a cleaning step instead? Loading Loading @@ -269,9 +207,6 @@ class _Isa(Data): else: shutil.move(temp_folder, output_path) # Display data {{{2 ################################################################ def display_data( self, trans, Loading @@ -293,8 +228,10 @@ class _Isa(Data): return super().display_data(trans, dataset, preview, filename, to_ext, **kwd) # prepare the preview of the ISA dataset try: investigation = self._get_investigation(dataset) if investigation is None: except Exception: logger.exception(f"Failed to display dataset {dataset.id}") html = """<html><header><title>Error while reading ISA archive.</title></header> <body> <h1>An error occurred while reading content of ISA archive.</h1> Loading Loading @@ -338,25 +275,15 @@ class _Isa(Data): return sanitize_html(html).encode("utf-8"), headers # ISA-Tab class {{{1 ################################################################ class IsaTab(_Isa): file_ext = "isa-tab" # Constructor {{{2 ################################################################ def __init__(self, **kwd): super().__init__(main_file_regex=INVESTIGATION_FILE_REGEX, **kwd) # Make investigation instance {{{2 ################################################################ def _make_investigation_instance(self, filename: str) -> "Investigation": def _make_investigation_instance(self, filename: str): if not isatab_meta: return raise Exception(ISA_MISSING_MODULE_MESSAGE) # Parse ISA-Tab investigation file parser = isatab_meta.InvestigationParser() isa_dir = os.path.dirname(filename) Loading @@ -373,25 +300,15 @@ class IsaTab(_Isa): return isa # ISA-JSON class {{{1 ################################################################ class IsaJson(_Isa): file_ext = "isa-json" # Constructor {{{2 ################################################################ def __init__(self, **kwd): super().__init__(main_file_regex=JSON_FILE_REGEX, **kwd) # Make investigation instance {{{2 ################################################################ def _make_investigation_instance(self, filename: str) -> "Investigation": def _make_investigation_instance(self, filename: str): if not isajson: return raise Exception(ISA_MISSING_MODULE_MESSAGE) # Parse JSON file with open(filename, newline="", encoding="utf8") as fp: isa = isajson.load(fp) Loading