Merge pull request #17224 from mantidproject/17106_SDOS_refactor

Indirect SimulatedDensityOfStates - Refactor file load and general tidy

Merge pull request #17224 from mantidproject/17106_SDOS_refactor
Indirect SimulatedDensityOfStates - Refactor file load and general tidy
efd80ef5 · Federico Montesino Pouzols · GitHub · dde04d2a · 42370577 · efd80ef5
Commit efd80ef5 authored 8 years ago by Federico Montesino Pouzols Committed by GitHub 8 years ago
--- a/Framework/PythonInterface/plugins/algorithms/WorkflowAlgorithms/SimulatedDensityOfStates.py
+++ b/Framework/PythonInterface/plugins/algorithms/WorkflowAlgorithms/SimulatedDensityOfStates.py
--- a/scripts/Inelastic/dos/__init__.py
+++ b/scripts/Inelastic/dos/__init__.py
+"""Supports the loading of phonon and castep files
+load_castep -- Parses a castep file into a file data object
+load_phonon -- Parses a phonon file into a file data object
+"""
+from __future__ import absolute_import
+__all__=['load_castep','load_phonon', 'load_helper']
--- a/scripts/Inelastic/dos/load_castep.py
+++ b/scripts/Inelastic/dos/load_castep.py
+import re
+import numpy as np
+import dos.load_helper as load_helper
+def parse_castep_file(file_name, ir_or_raman):
+    """
+    Read frequencies from a <>.castep file
+    @param file_name - file path of the file to read
+    @return the frequencies, infra red and raman intensities and weights of frequency blocks
+    """
+    file_data = {}
+    # Get Regex strings from load_helper
+    header_regex = re.compile(load_helper.CASTEP_HEADER_REGEX)
+    data_regex = re.compile(load_helper.CASTEP_DATA_REGEX)
+    bond_regex = re.compile(load_helper.CASTEP_BOND_REGEX)
+    block_count = 0
+    frequencies, ir_intensities, raman_intensities, weights, q_vectors, bonds = [], [], [], [], [], []
+    data_lists = (frequencies, ir_intensities, raman_intensities)
+    with open(file_name, 'rU') as f_handle:
+        file_data.update(_parse_castep_file_header(f_handle))
+        while True:
+            line = f_handle.readline()
+            # Check we've reached the end of file
+            if not line:
+                break
+            # Check if we've found a block of frequencies
+            header_match = header_regex.match(line)
+            if header_match:
+                block_count += 1
+                weight, q_vector = load_helper._parse_block_header(header_match, block_count)
+                weights.append(weight)
+                q_vectors.append(q_vector)
+                # Move file pointer forward to start of intensity data
+                _find_castep_freq_block(f_handle, data_regex)
+                # Parse block of frequencies
+                for line_data in _parse_castep_freq_block(f_handle, file_data['num_branches'], ir_or_raman):
+                    for data_list, item in zip(data_lists, line_data):
+                        data_list.append(item)
+            # Check if we've found a bond
+            bond_match = bond_regex.match(line)
+            if bond_match:
+                bonds.append(_parse_castep_bond(bond_match))
+    frequencies = np.asarray(frequencies)
+    ir_intensities = np.asarray(ir_intensities)
+    raman_intensities = np.asarray(raman_intensities)
+    warray = np.repeat(weights, file_data['num_branches'])
+    file_data.update({
+        'frequencies': frequencies,
+        'ir_intensities': ir_intensities,
+        'raman_intensities': raman_intensities,
+        'weights': warray,
+        'q_vectors':q_vectors
+        })
+    if len(bonds) > 0:
+        file_data['bonds'] = bonds
+    return file_data
+#----------------------------------------------------------------------------------------
+def _parse_castep_file_header(f_handle):
+    """
+    Read information from the header of a <>.castep file
+    @param f_handle - handle to the file.
+    @return tuple of the number of ions and branches in the file
+    """
+    num_species, num_ions = 0, 0
+    file_data = {}
+    while True:
+        line = f_handle.readline()
+        if not line:
+            raise IOError("Could not find any header information.")
+        if 'Total number of ions in cell =' in line:
+            file_data['num_ions'] = int(line.strip().split()[-1])
+        elif 'Total number of species in cell = ' in line:
+            num_species = int(line.strip().split()[-1])
+        if num_species > 0 and file_data['num_ions'] > 0:
+            file_data['num_branches'] = num_species * file_data['num_ions']
+            return file_data
+#----------------------------------------------------------------------------------------
+def _parse_castep_freq_block(f_handle, num_branches, ir_or_raman):
+    """
+    Iterator to parse a block of frequencies from a .castep file.
+    @param f_handle - handle to the file.
+    """
+    for _ in xrange(num_branches):
+        line = f_handle.readline()
+        line_data = line.strip().split()[1:-1]
+        freq = line_data[1]
+        intensity_data = line_data[3:]
+        # Remove non-active intensities from data
+        intensities = []
+        for value, active in zip(intensity_data[::2], intensity_data[1::2]):
+            if ir_or_raman:
+                if active == 'N' and value != 0:
+                    value = 0.0
+            intensities.append(value)
+        line_data = [freq] + intensities
+        line_data = [float(x) for x in line_data]
+        yield line_data
+#----------------------------------------------------------------------------------------
+def _find_castep_freq_block(f_handle, data_regex):
+    """
+    Find the start of the frequency block in a .castep file.
+    This will set the file pointer to the line before the start
+    of the block.
+    @param f_handle - handle to the file.
+    """
+    while True:
+        pos = f_handle.tell()
+        line = f_handle.readline()
+        if not line:
+            raise IOError("Could not parse frequency block. Invalid file format.")
+        if data_regex.match(line):
+            f_handle.seek(pos)
+            return
+#----------------------------------------------------------------------------------------
+def _parse_castep_bond(bond_match):
+    """
+    Parses a regex match to obtain bond information.
+    @param bond_match Regex match to bond data line
+    @return A dictionary defining the bond
+    """
+    bond = dict()
+    bond['atom_a'] = (bond_match.group(1), int(bond_match.group(2)))
+    bond['atom_b'] = (bond_match.group(3), int(bond_match.group(4)))
+    bond['population'] = float(bond_match.group(5))
+    bond['length'] = float(bond_match.group(6))
+    return bond
+#----------------------------------------------------------------------------------------
\ No newline at end of file
--- a/scripts/Inelastic/dos/load_helper.py
+++ b/scripts/Inelastic/dos/load_helper.py
+###=============General Regex strings===============###
+FLOAT_REGEX = r'\-?(?:\d+\.?\d*|\d*\.?\d+)'
+###=============Phonon Regex strings===============###
+# Header regex. Looks for lines in the following format:
+#     q-pt=    1    0.000000  0.000000  0.000000      1.0000000000    0.000000  0.000000  1.000000
+PHONON_HEADER_REGEX = r"^ +q-pt=\s+\d+ +(%(s)s) +(%(s)s) +(%(s)s) (?: *(%(s)s)){0,4}" % {'s': FLOAT_REGEX}
+PHONON_EIGENVEC_REGEX = r"\s*Mode\s+Ion\s+X\s+Y\s+Z\s*"
+###=============Castep Regex strings===============###
+# Header regex. Looks for lines in the following format:
+# +  q-pt=    1 (  0.000000  0.000000  0.000000)     1.0000000000              +
+CASTEP_HEADER_REGEX = r" +\+ +q-pt= +\d+ \( *(?: *(%(s)s)) *(%(s)s) *(%(s)s)\) +(%(s)s) +\+" % {'s' : FLOAT_REGEX}
+# Data regex. Looks for lines in the following format:
+#     +     1      -0.051481   a          0.0000000  N            0.0000000  N     +
+CASTEP_DATA_REGEX = r" +\+ +\d+ +(%(s)s)(?: +\w)? *(%(s)s)? *([YN])? *(%(s)s)? *([YN])? *\+" % {'s': FLOAT_REGEX}
+# Atom bond regex. Looks for lines in the following format:
+#   H 006 --    O 012               0.46        1.04206
+CASTEP_BOND_REGEX = r" +([A-z])+ +([0-9]+) +-- +([A-z]+) +([0-9]+) +(%(s)s) +(%(s)s)" % {'s': FLOAT_REGEX}
+###===============================================###
+def _parse_block_header(header_match, block_count):
+    """
+    Parse the header of a block of frequencies and intensities
+    @param header_match - the regex match to the header
+    @param block_count - the count of blocks found so far
+    @return weight for this block of values
+    """
+    # Found header block at start of frequencies
+    q1, q2, q3, weight = [float(x) for x in header_match.groups()]
+    q_vector = [q1, q2, q3]
+    if block_count > 1 and sum(q_vector) == 0:
+        weight = 0.0
+    return weight, q_vector
--- a/scripts/Inelastic/dos/load_phonon.py
+++ b/scripts/Inelastic/dos/load_phonon.py
+import re
+import numpy as np
+import dos.load_helper as load_helper
+element_isotope = dict()
+def parse_phonon_file(file_name, record_eigenvectors):
+    """
+    Read frequencies from a <>.phonon file
+    @param file_name - file path of the file to read
+    @return the frequencies, infra red and raman intensities and weights of frequency blocks
+    """
+    file_data = {}
+    # Get regex strings from load_helper
+    header_regex = re.compile(load_helper.PHONON_HEADER_REGEX)
+    eigenvectors_regex = re.compile(load_helper.PHONON_EIGENVEC_REGEX)
+    block_count = 0
+    frequencies, ir_intensities, raman_intensities, weights, q_vectors, eigenvectors = [], [], [], [], [], []
+    data_lists = (frequencies, ir_intensities, raman_intensities)
+    with open(file_name, 'rU') as f_handle:
+        file_data.update(_parse_phonon_file_header(f_handle))
+        while True:
+            line = f_handle.readline()
+            # Check we've reached the end of file
+            if not line:
+                break
+            # Check if we've found a block of frequencies
+            header_match = header_regex.match(line)
+            if header_match:
+                block_count += 1
+                weight, q_vector = load_helper._parse_block_header(header_match, block_count)
+                weights.append(weight)
+                q_vectors.append(q_vector)
+                # Parse block of frequencies
+                for line_data in _parse_phonon_freq_block(f_handle,
+                                                          file_data['num_branches']):
+                    for data_list, item in zip(data_lists, line_data):
+                        data_list.append(item)
+            vector_match = eigenvectors_regex.match(line)
+            if vector_match:
+                if record_eigenvectors:
+                    # Parse eigenvectors for partial dos
+                    vectors = _parse_phonon_eigenvectors(f_handle, 
+                                                         file_data['num_ions'],
+                                                         file_data['num_branches'])
+                    eigenvectors.append(vectors)
+                else:
+                    # Skip over eigenvectors
+                    for _ in xrange(file_data['num_ions'] * file_data['num_branches']):
+                        line = f_handle.readline()
+                        if not line:
+                            raise IOError("Bad file format. Uexpectedly reached end of file.")
+    frequencies = np.asarray(frequencies)
+    ir_intensities = np.asarray(ir_intensities)
+    raman_intensities = np.asarray(raman_intensities)
+    warray = np.repeat(weights, file_data['num_branches'])
+    eigenvectors = np.asarray(eigenvectors)
+    file_data.update({
+        'frequencies': frequencies,
+        'ir_intensities': ir_intensities,
+        'raman_intensities': raman_intensities,
+        'weights': warray,
+        'q_vectors':q_vectors,
+        'eigenvectors': eigenvectors
+        })
+    return file_data, element_isotope
+#----------------------------------------------------------------------------------------
+def _parse_phonon_file_header(f_handle):
+    """
+    Read information from the header of a <>.phonon file
+    @param f_handle - handle to the file.
+    @return List of ions in file as list of tuple of (ion, mode number)
+    """
+    file_data = {'ions': []}
+    while True:
+        line = f_handle.readline()
+        if not line:
+            raise IOError("Could not find any header information.")
+        if 'Number of ions' in line:
+            file_data['num_ions'] = int(line.strip().split()[-1])
+        elif 'Number of branches' in line:
+            file_data['num_branches'] = int(line.strip().split()[-1])
+        elif 'Unit cell vectors' in line:
+            file_data['unit_cell'] = _parse_phonon_unit_cell_vectors(f_handle)
+        elif 'Fractional Co-ordinates' in line:
+            if file_data['num_ions'] is None:
+                raise IOError("Failed to parse file. Invalid file header.")
+            # Extract the mode number for each of the ion in the data file
+            for _ in xrange(file_data['num_ions']):
+                line = f_handle.readline()
+                line_data = line.strip().split()
+                species = line_data[4]
+                ion = {'species': species}
+                ion['fract_coord'] = np.array([float(line_data[1]), float(line_data[2]), float(line_data[3])])
+                ion['isotope_number'] = float(line_data[5])
+                element_isotope[species] = float(line_data[5])
+                # -1 to convert to zero based indexing
+                ion['index'] = int(line_data[0]) - 1
+                ion['bond_number'] = len([i for i in file_data['ions'] if i['species'] == species]) + 1
+                file_data['ions'].append(ion)
+        if 'END header' in line:
+            if file_data['num_ions'] is None or file_data['num_branches'] is None:
+                raise IOError("Failed to parse file. Invalid file header.")
+            return file_data
+#----------------------------------------------------------------------------------------
+def _parse_phonon_freq_block(f_handle, num_branches):
+    """
+    Iterator to parse a block of frequencies from a .phonon file.
+    @param f_handle - handle to the file.
+    """
+    for _ in xrange(num_branches):
+        line = f_handle.readline()
+        line_data = line.strip().split()[1:]
+        line_data = [float(x) for x in line_data]
+        yield line_data
+#----------------------------------------------------------------------------------------
+def _parse_phonon_unit_cell_vectors(f_handle):
+    """
+    Parses the unit cell vectors in a .phonon file.
+    @param f_handle Handle to the file
+    @return Numpy array of unit vectors
+    """
+    data = []
+    for _ in range(3):
+        line = f_handle.readline()
+        line_data = line.strip().split()
+        line_data = [float(x) for x in line_data]
+        data.append(line_data)
+    return np.array(data)
+#----------------------------------------------------------------------------------------
+def _parse_phonon_eigenvectors(f_handle, num_ions, num_branches):
+    vectors = []
+    for _ in xrange(num_ions * num_branches):
+        line = f_handle.readline()
+        if not line:
+            raise IOError("Could not parse file. Invalid file format.")
+        line_data = line.strip().split()
+        vector_componets = line_data[2::2]
+        vector_componets = [float(x) for x in vector_componets]
+        vectors.append(vector_componets)
+    return np.asarray(vectors)
+#----------------------------------------------------------------------------------------