Unverified Commit 1e225b62 authored by Jose Borreguero's avatar Jose Borreguero Committed by GitHub
Browse files

allow for list of open beam and dark field directories (#274)

* allow for list of open beam and dark field directories
* add unit test with two directories
* test corner cases
* cast to Path early when processing ob and dc dirs
* delete duplicate cast to Path
parent a62bab5d
Loading
Loading
Loading
Loading
+50 −26
Original line number Diff line number Diff line
@@ -14,10 +14,11 @@ from tqdm.contrib.concurrent import process_map
# standard imports
from functools import partial
from fnmatch import fnmatchcase
import itertools
import logging
from pathlib import Path
import re
from typing import Optional, Tuple, List, Callable
from typing import Callable, List, Optional, Tuple, Union

# ignore warnings generated by importing dxchange
import warnings
@@ -26,6 +27,9 @@ with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    import dxchange

# Custom types
FlexPath = Union[str, Path]

# setup module level logger
logger = logging.getLogger(__name__)
# METADATA_DICT = {
@@ -322,9 +326,9 @@ def _load_by_file_list(


def _get_filelist_by_dir(
    ct_dir: str,
    ob_dir: str,
    dc_dir: Optional[str] = None,
    ct_dir: FlexPath,  # either a string or a pathlib.Path
    ob_dir: Union[FlexPath, List[FlexPath]],
    dc_dir: Optional[Union[FlexPath, List[FlexPath]]] = None,
    ct_fnmatch: Optional[str] = "*",
    ob_fnmatch: Optional[str] = "*",
    dc_fnmatch: Optional[str] = "*",
@@ -357,26 +361,44 @@ def _get_filelist_by_dir(
        embedded in the ct file to find obs with similar metadata.
    """
    # sanity check
    # -- radiograph
    ##########
    # -- Process input argument ct_dir for radiographs
    if not Path(ct_dir).exists():
        logger.error(f"ct_dir {ct_dir} does not exist.")
        raise ValueError("ct_dir does not exist.")
    else:
        ct_dir = Path(ct_dir)
    # -- open beam
    if not Path(ob_dir).exists():
        logger.error(f"ob_dir {ob_dir} does not exist.")
        raise ValueError("ob_dir does not exist.")
    ##########
    # -- Process input argument ob_dir for open beam directories
    if isinstance(ob_dir, (str, Path)):  # single directory
        open_beam_dirs = [Path(ob_dir)]  # cast the single directory to a list of input directories
    elif isinstance(ob_dir, (list, tuple)):  # multiple input directories, assumed items are of FlexPath type
        open_beam_dirs = [Path(data_dir) for data_dir in ob_dir]
    else:
        ob_dir = Path(ob_dir)
    # -- dark current
        raise ValueError("ob_dir must be either a string or a list of strings")
    # validate for existence
    for data_dir in open_beam_dirs:
        if not data_dir.exists():
            logger.error(f"open beam directory {str(data_dir)} does not exist.")
            raise ValueError(f"open beam directory {str(data_dir)} does not exist.")
    ##########
    # -- Process input argument dc_dir for dark current directories
    if dc_dir is None:
        logger.warning("dc_dir is None.")
    elif not Path(dc_dir).exists():
        logger.warning(f"dc_dir {dc_dir} does not exist, treating as None.")
        dc_dir = None
        logger.warning("dc_dir is None, ignoring.")
        dark_field_dirs = []
    else:
        if isinstance(dc_dir, (str, Path)):  # single directory
            dark_field_dirs = [Path(dc_dir)]  # cast the single directory to a list of input directories
        elif isinstance(dc_dir, (list, tuple)):  # multiple directories, assumed items are of FlexPath type
            dark_field_dirs = [Path(data_dir) for data_dir in dc_dir]
        else:
        dc_dir = Path(dc_dir)
            raise ValueError("dc_dir must be either a string or a list of strings")
    # check for existence
    for i, data_dir in enumerate(dark_field_dirs):
        if not data_dir.exists():
            logger.warning(f"dark field directory {str(data_dir)} does not exist, ignoring.")
            dark_field_dirs[i] = None
    dark_field_dirs = [data_dir for data_dir in dark_field_dirs if data_dir is not None]  # extricate None entries

    # gather the ct_files
    ct_files = ct_dir.glob(ct_fnmatch)
@@ -397,11 +419,13 @@ def _get_filelist_by_dir(
            logger.warning("ob_files is [].")
            ob_files = []
        else:
            ob_files = ob_dir.glob(f"*{ext_ref}")
            ob_files = list()
            for open_beam_dir in open_beam_dirs:
                obfs = open_beam_dir.glob(f"*{ext_ref}")
                # remove files that do not match the metadata of ct_ref
            ob_files = [obf for obf in ob_files if metadata_ref.match(other_filename=str(obf), other_datatype="ob")]
                ob_files += [f for f in obfs if metadata_ref.match(other_filename=str(f), other_datatype="ob")]
    else:
        ob_files = ob_dir.glob(ob_fnmatch)
        ob_files = list(itertools.chain(*[list(obd.glob(ob_fnmatch)) for obd in open_beam_dirs]))

    # gather the dc_files
    if dc_dir is None:
@@ -412,13 +436,13 @@ def _get_filelist_by_dir(
                logger.warning("dc_files is [].")
                dc_files = []
            else:
                dc_files = dc_dir.glob(f"*{ext_ref}")
                dc_files = list()
                for dark_field_dir in dark_field_dirs:
                    dcfs = dark_field_dir.glob(f"*{ext_ref}")
                    # remove files that do not match the metadata of ct_ref
                dc_files = [
                    dcf for dcf in dc_files if metadata_ref.match(other_filename=str(dcf), other_datatype="dc")
                ]
                    dc_files += [f for f in dcfs if metadata_ref.match(other_filename=str(f), other_datatype="dc")]
        else:
            dc_files = dc_dir.glob(dc_fnmatch)
            dc_files = list(itertools.chain(*[list(dcf.glob(dc_fnmatch)) for dcf in dark_field_dirs]))

    # since generator returns an unordered list, we need to force it to be sorted
    # so that angles can be properly retrieved if needed
+108 −26
Original line number Diff line number Diff line
@@ -19,6 +19,7 @@ import pytest
import tifffile

# standard imports
from copy import deepcopy
from functools import partial
from pathlib import Path
from unittest import mock
@@ -150,44 +151,46 @@ def test_extract_rotation_angles(data_fixture):
    np.testing.assert_array_almost_equal(rst, ref)


@pytest.fixture(scope="function")
def tiff_with_metadata(tmpdir):
    # create testing tiff images
    data = np.ones((3, 3))
    #
    ext_tags_ct = [
@pytest.fixture(scope="module")
def ext_tags():
    return {
        "ct": [
            (65026, "s", 0, "ManufacturerStr:Test", True),
            (65027, "s", 0, "ExposureTime:70.000000", True),
            (65068, "s", 0, "MotSlitHR.RBV:10.000000", True),
            (65070, "s", 0, "MotSlitHL.RBV:20.000000", True),
            (65066, "s", 0, "MotSlitVT.RBV:10.000000", True),
            (65068, "s", 0, "MotSlitHR.RBV:10.000000", True),
    ]
    ext_tags_dc = [
        (65026, "s", 0, "ManufacturerStr:Test", True),
        (65027, "s", 0, "ExposureTime:70.000000", True),
    ]
    ext_tags_ct_alt = [
        ],
        "dc": [(65026, "s", 0, "ManufacturerStr:Test", True), (65027, "s", 0, "ExposureTime:70.000000", True)],
        "ct_alt": [
            (65026, "s", 0, "ManufacturerStr:Test", True),
            (65027, "s", 0, "ExposureTime:71.000000", True),
            (65068, "s", 0, "MotSlitHR.RBV:11.000000", True),
            (65070, "s", 0, "MotSlitHL.RBV:21.000000", True),
            (65066, "s", 0, "MotSlitVT.RBV:11.000000", True),
            (65068, "s", 0, "MotSlitHR.RBV:11.000000", True),
    ]
        ],
    }


@pytest.fixture(scope="function")
def tiff_with_metadata(tmpdir, ext_tags):
    # create testing tiff images
    data = np.ones((3, 3))
    # write testing data
    ct = tmpdir / "ct_dir" / "test_ct.tiff"
    ct.parent.mkdir()
    tifffile.imwrite(str(ct), data, extratags=ext_tags_ct)
    tifffile.imwrite(str(ct), data, extratags=ext_tags["ct"])
    ob = tmpdir / "ob_dir" / "test_ob.tiff"
    ob.parent.mkdir()
    tifffile.imwrite(str(ob), data, extratags=ext_tags_ct)
    tifffile.imwrite(str(ob), data, extratags=ext_tags["ct"])
    dc = tmpdir / "dc_dir" / "test_dc.tiff"
    dc.parent.mkdir()
    tifffile.imwrite(str(dc), data, extratags=ext_tags_dc)
    tifffile.imwrite(str(dc), data, extratags=ext_tags["dc"])
    ct_alt = tmpdir / "ct_alt_dir" / "test_ct_alt.tiff"
    ct_alt.parent.mkdir()
    tifffile.imwrite(str(ct_alt), data, extratags=ext_tags_ct_alt)
    tifffile.imwrite(str(ct_alt), data, extratags=ext_tags["ct_alt"])
    return ct, ob, dc, ct_alt


@@ -258,6 +261,85 @@ def test_get_filelist_by_dir(tiff_with_metadata):
    assert rst == ([], [], [])


def test_get_filelist_by_dirs(tmpdir, caplog, ext_tags, tiff_with_metadata):
    ct, ob_1, dc_1, ct_alt = tiff_with_metadata
    ct_dir = ct.parent
    ct_alt_dir = ct_alt.parent
    # additional open-beam and dark-field files
    data = np.ones((3, 3))
    ob_2 = tmpdir / "ob_dir_2" / "test_ob.tiff"
    ob_2.parent.mkdir()
    tifffile.imwrite(str(ob_2), data, extratags=ext_tags["ct"])
    ob_dir = [ob_1.parent, ob_2.parent]
    dc_2 = tmpdir / "dc_dir_2" / "test_dc.tiff"
    dc_2.parent.mkdir()
    tifffile.imwrite(str(dc_2), data, extratags=ext_tags["dc"])
    dc_dir = [dc_1.parent, dc_2.parent]
    # convert the golden data to string for ease of comparison
    ct, ct_alt, ob_1, ob_2, dc_1, dc_2 = [str(x) for x in (ct, ct_alt, ob_1, ob_2, dc_1, dc_2)]
    common = dict(
        ct_dir=ct_dir, ob_dir=ob_dir, dc_dir=dc_dir, ct_fnmatch="*.tiff", ob_fnmatch="*.tiff", dc_fnmatch="*.tiff"
    )
    # corner case, open-beam directory is not a valid entry
    with pytest.raises(ValueError) as e:
        kwargs = deepcopy(common)
        kwargs["ob_dir"] = open(ct, "r")
        _get_filelist_by_dir(**kwargs)
    assert "ob_dir must be either a string or a list of strings" == str(e.value)
    # corner case, dark-field directory is not a valid entry
    with pytest.raises(ValueError) as e:
        kwargs = deepcopy(common)
        kwargs["dc_dir"] = open(ct, "r")
        _get_filelist_by_dir(**kwargs)
    assert "dc_dir must be either a string or a list of strings" == str(e.value)
    # corner case, dark-field directory doesn't exist
    kwargs = deepcopy(common)
    kwargs["dc_dir"].append(Path("/tmp/tHIs_dOEs_nOt_EXIsT"))
    caplog.clear()
    rst = _get_filelist_by_dir(**kwargs)
    assert "/tmp/tHIs_dOEs_nOt_EXIsT does not exist, ignoring" in caplog.text
    assert rst == ([ct], [ob_1, ob_2], [dc_1, dc_2])
    # case_0: load all three
    rst = _get_filelist_by_dir(**common)
    assert rst == ([ct], [ob_1, ob_2], [dc_1, dc_2])
    # case_1: load ct and ob, skipping dc
    kwargs = deepcopy(common)
    del kwargs["dc_dir"]
    rst = _get_filelist_by_dir(**kwargs)
    assert rst == ([ct], [ob_1, ob_2], [])
    # case_2: load ct, and detect ob and dc from metadata
    kwargs = deepcopy(common)
    kwargs.update(dict(ob_fnmatch=None, dc_fnmatch=None))
    rst = _get_filelist_by_dir(**kwargs)
    assert rst == ([ct], [ob_1, ob_2], [dc_1, dc_2])
    # case_3: load ct, and detect ob from metadata
    caplog.clear()
    rst = _get_filelist_by_dir(
        ct_dir=ct_dir,
        ob_dir=ob_dir,
        ct_fnmatch="*.tiff",
        ob_fnmatch=None,
    )
    assert "dc_dir is None, ignoring" in caplog.text
    assert rst == ([ct], [ob_1, ob_2], [])
    # case_4: load ct_alt, and find no match ob
    rst = _get_filelist_by_dir(
        ct_dir=ct_alt_dir,
        ob_dir=ob_dir,
        ct_fnmatch="*.tiff",
        ob_fnmatch=None,
    )
    assert rst == ([ct_alt], [], [])
    # case_5: did not find any match for ct
    rst = _get_filelist_by_dir(
        ct_dir=ct_dir,
        ob_dir=ob_dir,
        ct_fnmatch="*.not_exist",
        ob_fnmatch=None,
    )
    assert rst == ([], [], [])


def test_save_data_fail():
    with pytest.raises(ValueError):
        save_data()