Skip to content
Snippets Groups Projects
Commit efd80ef5 authored by Federico Montesino Pouzols's avatar Federico Montesino Pouzols Committed by GitHub
Browse files

Merge pull request #17224 from mantidproject/17106_SDOS_refactor

Indirect SimulatedDensityOfStates - Refactor file load and general tidy
parents dde04d2a 42370577
No related branches found
No related tags found
No related merge requests found
"""Supports the loading of phonon and castep files
load_castep -- Parses a castep file into a file data object
load_phonon -- Parses a phonon file into a file data object
"""
from __future__ import absolute_import
__all__=['load_castep','load_phonon', 'load_helper']
import re
import numpy as np
import dos.load_helper as load_helper
def parse_castep_file(file_name, ir_or_raman):
"""
Read frequencies from a <>.castep file
@param file_name - file path of the file to read
@return the frequencies, infra red and raman intensities and weights of frequency blocks
"""
file_data = {}
# Get Regex strings from load_helper
header_regex = re.compile(load_helper.CASTEP_HEADER_REGEX)
data_regex = re.compile(load_helper.CASTEP_DATA_REGEX)
bond_regex = re.compile(load_helper.CASTEP_BOND_REGEX)
block_count = 0
frequencies, ir_intensities, raman_intensities, weights, q_vectors, bonds = [], [], [], [], [], []
data_lists = (frequencies, ir_intensities, raman_intensities)
with open(file_name, 'rU') as f_handle:
file_data.update(_parse_castep_file_header(f_handle))
while True:
line = f_handle.readline()
# Check we've reached the end of file
if not line:
break
# Check if we've found a block of frequencies
header_match = header_regex.match(line)
if header_match:
block_count += 1
weight, q_vector = load_helper._parse_block_header(header_match, block_count)
weights.append(weight)
q_vectors.append(q_vector)
# Move file pointer forward to start of intensity data
_find_castep_freq_block(f_handle, data_regex)
# Parse block of frequencies
for line_data in _parse_castep_freq_block(f_handle, file_data['num_branches'], ir_or_raman):
for data_list, item in zip(data_lists, line_data):
data_list.append(item)
# Check if we've found a bond
bond_match = bond_regex.match(line)
if bond_match:
bonds.append(_parse_castep_bond(bond_match))
frequencies = np.asarray(frequencies)
ir_intensities = np.asarray(ir_intensities)
raman_intensities = np.asarray(raman_intensities)
warray = np.repeat(weights, file_data['num_branches'])
file_data.update({
'frequencies': frequencies,
'ir_intensities': ir_intensities,
'raman_intensities': raman_intensities,
'weights': warray,
'q_vectors':q_vectors
})
if len(bonds) > 0:
file_data['bonds'] = bonds
return file_data
#----------------------------------------------------------------------------------------
def _parse_castep_file_header(f_handle):
"""
Read information from the header of a <>.castep file
@param f_handle - handle to the file.
@return tuple of the number of ions and branches in the file
"""
num_species, num_ions = 0, 0
file_data = {}
while True:
line = f_handle.readline()
if not line:
raise IOError("Could not find any header information.")
if 'Total number of ions in cell =' in line:
file_data['num_ions'] = int(line.strip().split()[-1])
elif 'Total number of species in cell = ' in line:
num_species = int(line.strip().split()[-1])
if num_species > 0 and file_data['num_ions'] > 0:
file_data['num_branches'] = num_species * file_data['num_ions']
return file_data
#----------------------------------------------------------------------------------------
def _parse_castep_freq_block(f_handle, num_branches, ir_or_raman):
"""
Iterator to parse a block of frequencies from a .castep file.
@param f_handle - handle to the file.
"""
for _ in xrange(num_branches):
line = f_handle.readline()
line_data = line.strip().split()[1:-1]
freq = line_data[1]
intensity_data = line_data[3:]
# Remove non-active intensities from data
intensities = []
for value, active in zip(intensity_data[::2], intensity_data[1::2]):
if ir_or_raman:
if active == 'N' and value != 0:
value = 0.0
intensities.append(value)
line_data = [freq] + intensities
line_data = [float(x) for x in line_data]
yield line_data
#----------------------------------------------------------------------------------------
def _find_castep_freq_block(f_handle, data_regex):
"""
Find the start of the frequency block in a .castep file.
This will set the file pointer to the line before the start
of the block.
@param f_handle - handle to the file.
"""
while True:
pos = f_handle.tell()
line = f_handle.readline()
if not line:
raise IOError("Could not parse frequency block. Invalid file format.")
if data_regex.match(line):
f_handle.seek(pos)
return
#----------------------------------------------------------------------------------------
def _parse_castep_bond(bond_match):
"""
Parses a regex match to obtain bond information.
@param bond_match Regex match to bond data line
@return A dictionary defining the bond
"""
bond = dict()
bond['atom_a'] = (bond_match.group(1), int(bond_match.group(2)))
bond['atom_b'] = (bond_match.group(3), int(bond_match.group(4)))
bond['population'] = float(bond_match.group(5))
bond['length'] = float(bond_match.group(6))
return bond
#----------------------------------------------------------------------------------------
\ No newline at end of file
###=============General Regex strings===============###
FLOAT_REGEX = r'\-?(?:\d+\.?\d*|\d*\.?\d+)'
###=============Phonon Regex strings===============###
# Header regex. Looks for lines in the following format:
# q-pt= 1 0.000000 0.000000 0.000000 1.0000000000 0.000000 0.000000 1.000000
PHONON_HEADER_REGEX = r"^ +q-pt=\s+\d+ +(%(s)s) +(%(s)s) +(%(s)s) (?: *(%(s)s)){0,4}" % {'s': FLOAT_REGEX}
PHONON_EIGENVEC_REGEX = r"\s*Mode\s+Ion\s+X\s+Y\s+Z\s*"
###=============Castep Regex strings===============###
# Header regex. Looks for lines in the following format:
# + q-pt= 1 ( 0.000000 0.000000 0.000000) 1.0000000000 +
CASTEP_HEADER_REGEX = r" +\+ +q-pt= +\d+ \( *(?: *(%(s)s)) *(%(s)s) *(%(s)s)\) +(%(s)s) +\+" % {'s' : FLOAT_REGEX}
# Data regex. Looks for lines in the following format:
# + 1 -0.051481 a 0.0000000 N 0.0000000 N +
CASTEP_DATA_REGEX = r" +\+ +\d+ +(%(s)s)(?: +\w)? *(%(s)s)? *([YN])? *(%(s)s)? *([YN])? *\+" % {'s': FLOAT_REGEX}
# Atom bond regex. Looks for lines in the following format:
# H 006 -- O 012 0.46 1.04206
CASTEP_BOND_REGEX = r" +([A-z])+ +([0-9]+) +-- +([A-z]+) +([0-9]+) +(%(s)s) +(%(s)s)" % {'s': FLOAT_REGEX}
###===============================================###
def _parse_block_header(header_match, block_count):
"""
Parse the header of a block of frequencies and intensities
@param header_match - the regex match to the header
@param block_count - the count of blocks found so far
@return weight for this block of values
"""
# Found header block at start of frequencies
q1, q2, q3, weight = [float(x) for x in header_match.groups()]
q_vector = [q1, q2, q3]
if block_count > 1 and sum(q_vector) == 0:
weight = 0.0
return weight, q_vector
import re
import numpy as np
import dos.load_helper as load_helper
element_isotope = dict()
def parse_phonon_file(file_name, record_eigenvectors):
"""
Read frequencies from a <>.phonon file
@param file_name - file path of the file to read
@return the frequencies, infra red and raman intensities and weights of frequency blocks
"""
file_data = {}
# Get regex strings from load_helper
header_regex = re.compile(load_helper.PHONON_HEADER_REGEX)
eigenvectors_regex = re.compile(load_helper.PHONON_EIGENVEC_REGEX)
block_count = 0
frequencies, ir_intensities, raman_intensities, weights, q_vectors, eigenvectors = [], [], [], [], [], []
data_lists = (frequencies, ir_intensities, raman_intensities)
with open(file_name, 'rU') as f_handle:
file_data.update(_parse_phonon_file_header(f_handle))
while True:
line = f_handle.readline()
# Check we've reached the end of file
if not line:
break
# Check if we've found a block of frequencies
header_match = header_regex.match(line)
if header_match:
block_count += 1
weight, q_vector = load_helper._parse_block_header(header_match, block_count)
weights.append(weight)
q_vectors.append(q_vector)
# Parse block of frequencies
for line_data in _parse_phonon_freq_block(f_handle,
file_data['num_branches']):
for data_list, item in zip(data_lists, line_data):
data_list.append(item)
vector_match = eigenvectors_regex.match(line)
if vector_match:
if record_eigenvectors:
# Parse eigenvectors for partial dos
vectors = _parse_phonon_eigenvectors(f_handle,
file_data['num_ions'],
file_data['num_branches'])
eigenvectors.append(vectors)
else:
# Skip over eigenvectors
for _ in xrange(file_data['num_ions'] * file_data['num_branches']):
line = f_handle.readline()
if not line:
raise IOError("Bad file format. Uexpectedly reached end of file.")
frequencies = np.asarray(frequencies)
ir_intensities = np.asarray(ir_intensities)
raman_intensities = np.asarray(raman_intensities)
warray = np.repeat(weights, file_data['num_branches'])
eigenvectors = np.asarray(eigenvectors)
file_data.update({
'frequencies': frequencies,
'ir_intensities': ir_intensities,
'raman_intensities': raman_intensities,
'weights': warray,
'q_vectors':q_vectors,
'eigenvectors': eigenvectors
})
return file_data, element_isotope
#----------------------------------------------------------------------------------------
def _parse_phonon_file_header(f_handle):
"""
Read information from the header of a <>.phonon file
@param f_handle - handle to the file.
@return List of ions in file as list of tuple of (ion, mode number)
"""
file_data = {'ions': []}
while True:
line = f_handle.readline()
if not line:
raise IOError("Could not find any header information.")
if 'Number of ions' in line:
file_data['num_ions'] = int(line.strip().split()[-1])
elif 'Number of branches' in line:
file_data['num_branches'] = int(line.strip().split()[-1])
elif 'Unit cell vectors' in line:
file_data['unit_cell'] = _parse_phonon_unit_cell_vectors(f_handle)
elif 'Fractional Co-ordinates' in line:
if file_data['num_ions'] is None:
raise IOError("Failed to parse file. Invalid file header.")
# Extract the mode number for each of the ion in the data file
for _ in xrange(file_data['num_ions']):
line = f_handle.readline()
line_data = line.strip().split()
species = line_data[4]
ion = {'species': species}
ion['fract_coord'] = np.array([float(line_data[1]), float(line_data[2]), float(line_data[3])])
ion['isotope_number'] = float(line_data[5])
element_isotope[species] = float(line_data[5])
# -1 to convert to zero based indexing
ion['index'] = int(line_data[0]) - 1
ion['bond_number'] = len([i for i in file_data['ions'] if i['species'] == species]) + 1
file_data['ions'].append(ion)
if 'END header' in line:
if file_data['num_ions'] is None or file_data['num_branches'] is None:
raise IOError("Failed to parse file. Invalid file header.")
return file_data
#----------------------------------------------------------------------------------------
def _parse_phonon_freq_block(f_handle, num_branches):
"""
Iterator to parse a block of frequencies from a .phonon file.
@param f_handle - handle to the file.
"""
for _ in xrange(num_branches):
line = f_handle.readline()
line_data = line.strip().split()[1:]
line_data = [float(x) for x in line_data]
yield line_data
#----------------------------------------------------------------------------------------
def _parse_phonon_unit_cell_vectors(f_handle):
"""
Parses the unit cell vectors in a .phonon file.
@param f_handle Handle to the file
@return Numpy array of unit vectors
"""
data = []
for _ in range(3):
line = f_handle.readline()
line_data = line.strip().split()
line_data = [float(x) for x in line_data]
data.append(line_data)
return np.array(data)
#----------------------------------------------------------------------------------------
def _parse_phonon_eigenvectors(f_handle, num_ions, num_branches):
vectors = []
for _ in xrange(num_ions * num_branches):
line = f_handle.readline()
if not line:
raise IOError("Could not parse file. Invalid file format.")
line_data = line.strip().split()
vector_componets = line_data[2::2]
vector_componets = [float(x) for x in vector_componets]
vectors.append(vector_componets)
return np.asarray(vectors)
#----------------------------------------------------------------------------------------
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment