Newer
Older
Gigg, Martyn Anthony
committed
//---------------------------------------------------
// Includes
//---------------------------------------------------
#include "MantidDataHandling/LoadRKH.h"
#include "MantidDataHandling/SaveRKH.h"
Gigg, Martyn Anthony
committed
#include "MantidAPI/FileProperty.h"
#include "MantidAPI/NumericAxis.h"
#include "MantidAPI/RegisterFileLoader.h"
Gigg, Martyn Anthony
committed
#include "MantidKernel/UnitFactory.h"
#include "MantidDataObjects/Workspace2D.h"
#include "MantidKernel/VectorHelper.h"
#include "MantidKernel/ListValidator.h"
#include <boost/date_time/gregorian/gregorian.hpp>
#include <boost/date_time/date_parsing.hpp>
#include <boost/lexical_cast.hpp>
#include <boost/algorithm/string.hpp>
#include <Poco/StringTokenizer.h>
Gigg, Martyn Anthony
committed
Gigg, Martyn Anthony
committed
namespace Mantid {
namespace DataHandling {
using namespace Mantid::API;
Sofia Antony
committed
using namespace Mantid::Kernel;
Gigg, Martyn Anthony
committed
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
namespace {
void readLinesForRKH1D(std::istream& stream, int readStart, int readEnd, std::vector<double>& columnOne,
std::vector<double>& ydata, std::vector<double>& errdata, Progress& prog) {
std::string fileline = "";
for (int index = 1; index <= readEnd; ++index) {
getline(stream, fileline);
if (index < readStart)
continue;
double x(0.), y(0.), yerr(0.);
std::istringstream datastr(fileline);
datastr >> x >> y >> yerr;
columnOne.push_back(x);
ydata.push_back(y);
errdata.push_back(yerr);
prog.report();
}
}
void readLinesWithXErrorForRKH1D(std::istream& stream, int readStart, int readEnd, std::vector<double>& columnOne,
std::vector<double>& ydata, std::vector<double>& errdata, std::vector<double>& xError,
Progress& prog) {
std::string fileline = "";
for (int index = 1; index <= readEnd; ++index) {
getline(stream, fileline);
if (index < readStart)
continue;
double x(0.), y(0.), yerr(0.), xerr(0.);
std::istringstream datastr(fileline);
datastr >> x >> y >> yerr>>xerr;
columnOne.push_back(x);
ydata.push_back(y);
errdata.push_back(yerr);
xError.push_back(xerr);
prog.report();
}
}
}
/**
* Return the confidence with with this algorithm can load the file
* @param descriptor A descriptor for the file
* @returns An integer specifying the confidence level. 0 indicates it will not
* be used
int LoadRKH::confidence(Kernel::FileDescriptor &descriptor) const {
if (!descriptor.isAscii())
return 0;
auto &file = descriptor.data();
std::string fileline("");
// Header looks something like this where the text inside [] could be anything
// LOQ Thu 28-OCT-2004 12:23 [W 26 INST_DIRECT_BEAM]
// -- First line --
std::getline(file, fileline);
// LOQ or SANS2D (case insensitive)
if (boost::ifind_first(fileline, "loq").empty() &&
boost::ifind_first(fileline, "sans2d").empty())
return 0;
// Next should be date time string
static const char *MONTHS[12] = {"-JAN-", "-FEB-", "-MAR-", "-APR-",
"-MAY-", "-JUN-", "-JUL-", "-AUG-",
"-SEP-", "-OCT-", "-NOV-", "-DEC-"};
bool foundMonth(false);
for (size_t i = 0; i < 12; ++i) {
if (!boost::ifind_first(fileline, MONTHS[i]).empty()) {
foundMonth = true;
break;
}
}
// there are no constraints on the second line
std::getline(file, fileline);
// read 3rd line - should contain sequence "0 0 0 1"
std::getline(file, fileline);
if (fileline.find("0 0 0 1") == std::string::npos)
return 0;
// read 4th line - should contain sequence ""0 0 0 0"
std::getline(file, fileline);
if (fileline.find("0 0 0 0") == std::string::npos)
return 0;
// read 5th line - should contain sequence "3 (F12.5,2E16.6)"
std::getline(file, fileline);
if (fileline.find("3 (F12.5,2E16.6)") == std::string::npos)
return 0;
return 20; // Better than LoadAscii
}
Gigg, Martyn Anthony
committed
/**
* Initialise the algorithm
*/
Steve Williams
committed
std::vector<std::string> exts;
exts.push_back(".txt");
exts.push_back(".q");
exts.push_back(".dat");
Steve Williams
committed
declareProperty(
new API::FileProperty("Filename", "", API::FileProperty::Load, exts),
"Name of the RKH file to load");
declareProperty(new API::WorkspaceProperty<>("OutputWorkspace", "",
Kernel::Direction::Output),
"The name to use for the output workspace");
// Get the units registered with the UnitFactory
std::vector<std::string> propOptions =
Kernel::UnitFactory::Instance().getKeys();
Gigg, Martyn Anthony
committed
m_unitKeys.insert(propOptions.begin(), propOptions.end());
// m_RKHKeys will be taken as axis(1) units, the first axis will have only one
// value
// and so selection of one of these units will result in a workspace
// orientated differently
// from selection of the above
m_RKHKeys.insert("SpectrumNumber");
Gigg, Martyn Anthony
committed
propOptions.insert(propOptions.end(), m_RKHKeys.begin(), m_RKHKeys.end());
declareProperty(
"FirstColumnValue", "Wavelength",
boost::make_shared<Kernel::StringListValidator>(propOptions),
"Only used for 1D files, the units of the first column in the RKH "
"file (default Wavelength)");
Gigg, Martyn Anthony
committed
}
/**
* Execute the algorithm
*/
Gigg, Martyn Anthony
committed
using namespace Mantid::Kernel;
using namespace Mantid::API;
// Retrieve filename and try to open the file
Gigg, Martyn Anthony
committed
std::string filename = getPropertyValue("Filename");
m_fileIn.open(filename.c_str());
Gigg, Martyn Anthony
committed
g_log.error("Unable to open file " + filename);
throw Exception::FileError("Unable to open File: ", filename);
Gigg, Martyn Anthony
committed
}
g_log.information() << "Opened file \"" << filename << "\" for reading\n";
Gigg, Martyn Anthony
committed
// The first line contains human readable information about the original
// workspace that we don't need
getline(m_fileIn, line);
getline(m_fileIn, line);
// Use one line of the file to diagnose if it is 1D or 2D, this line contains
// some data required by the 2D data reader
MatrixWorkspace_sptr result = is2D(line) ? read2D(line) : read1D();
Steve Williams
committed
// all RKH files contain distribution data
result->isDistribution(true);
setProperty("OutputWorkspace", result);
}
/** Determines if the file is 1D or 2D based on the first after the workspace's
* title
Janik Zikovsky
committed
* @param testLine :: the first line in the file after the title
* @return true if the file must contain 1D data
*/
bool LoadRKH::is2D(const std::string &testLine) {
// check the line part of a valid for 2D data else assume the file is 1D
return readUnit(testLine) != "C++ no unit found";
}
/** Read a data file that contains only one spectrum into a workspace
* @return the new workspace
*/
const API::MatrixWorkspace_sptr LoadRKH::read1D() {
g_log.information()
<< "file appears to contain 1D information, reading in 1D data mode\n";
Gigg, Martyn Anthony
committed
// The 3rd line contains information regarding the number of points in the
// file and
Gigg, Martyn Anthony
committed
// start and end reading points
int totalPoints(0), readStart(0), readEnd(0), buried(0);
std::string fileline;
getline(m_fileIn, fileline);
Gigg, Martyn Anthony
committed
std::istringstream is(fileline);
// Get data information
for (int counter = 1; counter < 8; ++counter) {
switch (counter) {
case 1:
is >> totalPoints;
Gigg, Martyn Anthony
committed
break;
case 5:
is >> readStart;
Gigg, Martyn Anthony
committed
break;
case 6:
is >> readEnd;
Gigg, Martyn Anthony
committed
break;
default:
is >> buried;
break;
}
}
g_log.information()
<< "Total number of data points declared to be in the data file: "
<< totalPoints << "\n";
Gigg, Martyn Anthony
committed
Gigg, Martyn Anthony
committed
std::string firstColVal = getProperty("FirstColumnValue");
bool colIsUnit(true);
if (m_RKHKeys.find(firstColVal) != m_RKHKeys.end()) {
Gigg, Martyn Anthony
committed
colIsUnit = false;
readStart = 1;
readEnd = totalPoints;
}
if (readStart < 1 || readEnd < 1 || readEnd < readStart ||
readStart > totalPoints || readEnd > totalPoints) {
g_log.error("Invalid data range specfied.");
throw std::invalid_argument("Invalid data range specfied.");
}
Gigg, Martyn Anthony
committed
g_log.information() << "Reading started on data line: " << readStart << "\n";
g_log.information() << "Reading finished on data line: " << readEnd << "\n";
// The 4th and 5th line do not contain useful information either
Gigg, Martyn Anthony
committed
int pointsToRead = readEnd - readStart + 1;
// Now stream sits at the first line of data
std::vector<double> columnOne, ydata, errdata, xError;
columnOne.reserve(readEnd);
ydata.reserve(readEnd);
errdata.reserve(readEnd);
auto hasXError = hasXerror(m_fileIn);
Progress prog(this, 0.0, 1.0, readEnd);
if (hasXError) {
xError.reserve(readEnd);
readLinesWithXErrorForRKH1D(m_fileIn, readStart, readEnd,
columnOne, ydata, errdata, xError, prog);
} else {
readLinesForRKH1D(m_fileIn, readStart, readEnd,
columnOne, ydata, errdata, prog);
Gigg, Martyn Anthony
committed
}
Gigg, Martyn Anthony
committed
assert(pointsToRead == static_cast<int>(columnOne.size()));
assert(pointsToRead == static_cast<int>(ydata.size()));
assert(pointsToRead == static_cast<int>(errdata.size()));
if (hasXError) {
assert(pointsToRead == static_cast<int>(xError.size()));
}
if (colIsUnit) {
MatrixWorkspace_sptr localworkspace = WorkspaceFactory::Instance().create(
"Workspace2D", 1, pointsToRead, pointsToRead);
localworkspace->getAxis(0)->unit() =
UnitFactory::Instance().create(firstColVal);
Russell Taylor
committed
localworkspace->dataX(0) = columnOne;
Gigg, Martyn Anthony
committed
localworkspace->dataY(0) = ydata;
localworkspace->dataE(0) = errdata;
if (hasXError) {
localworkspace->dataDx(0) = xError;
}
} else {
MatrixWorkspace_sptr localworkspace =
WorkspaceFactory::Instance().create("Workspace2D", pointsToRead, 1, 1);
// Set the appropriate values
for (int index = 0; index < pointsToRead; ++index) {
localworkspace->getSpectrum(index)
->setSpectrumNo(static_cast<int>(columnOne[index]));
Gigg, Martyn Anthony
committed
localworkspace->dataY(index)[0] = ydata[index];
localworkspace->dataE(index)[0] = errdata[index];
Gigg, Martyn Anthony
committed
}
if (hasXError) {
for (int index = 0; index < pointsToRead; ++index) {
localworkspace->dataDx(index)[0] = xError[index];
}
}
Gigg, Martyn Anthony
committed
}
Gigg, Martyn Anthony
committed
}
/** Reads from the third line of the input file to the end assuming it contains
* 2D data
Janik Zikovsky
committed
* @param firstLine :: the second line in the file
* @return a workspace containing the loaded data
Janik Zikovsky
committed
* @throw NotFoundError if there is compulsulary data is missing from the file
* @throw invalid_argument if there is an inconsistency in the header
* information
const MatrixWorkspace_sptr LoadRKH::read2D(const std::string &firstLine) {
g_log.information()
<< "file appears to contain 2D information, reading in 2D data mode\n";
MatrixWorkspace_sptr outWrksp;
MantidVec axis0Data;
Progress prog(read2DHeader(firstLine, outWrksp, axis0Data));
const size_t nAxis1Values = outWrksp->getNumberHistograms();
for (size_t i = 0; i < nAxis1Values; ++i) {
// set the X-values to the common bin values we read above
MantidVecPtr toPass;
toPass.access() = axis0Data;
outWrksp->setX(i, toPass);
// now read in the Y values
MantidVec &YOut = outWrksp->dataY(i);
for (MantidVec::iterator it = YOut.begin(), end = YOut.end(); it != end;
++it) {
m_fileIn >> *it;
}
prog.report("Loading Y data");
} // loop on to the next spectrum
// the error values form one big block after the Y-values
for (size_t i = 0; i < nAxis1Values; ++i) {
MantidVec &EOut = outWrksp->dataE(i);
for (MantidVec::iterator it = EOut.begin(), end = EOut.end(); it != end;
++it) {
prog.report("Loading error estimates");
} // loop on to the next spectrum
return outWrksp;
}
/** Reads the header information from a file containing 2D data
* @param[in] initalLine the second line in the file
* @param[out] outWrksp the workspace that the data will be writen to
* @param[out] axis0Data x-values for the workspace
* @return a progress bar object
Janik Zikovsky
committed
* @throw NotFoundError if there is compulsulary data is missing from the file
* @throw invalid_argument if there is an inconsistency in the header
* information
Progress LoadRKH::read2DHeader(const std::string &initalLine,
MatrixWorkspace_sptr &outWrksp,
MantidVec &axis0Data) {
const std::string XUnit(readUnit(initalLine));
std::string fileLine;
std::getline(m_fileIn, fileLine);
const std::string YUnit(readUnit(fileLine));
std::getline(m_fileIn, fileLine);
const std::string intensityUnit(readUnit(fileLine));
// the next line should contain just "1", but I'm not enforcing that
std::getline(m_fileIn, fileLine);
std::string title;
std::getline(m_fileIn, title);
std::getline(m_fileIn, fileLine);
boost::trim(fileLine);
const int nAxis0Boundaries = boost::lexical_cast<int>(fileLine);
axis0Data.resize(nAxis0Boundaries);
readNumEntrys(nAxis0Boundaries, axis0Data);
Gigg, Martyn Anthony
committed
std::getline(m_fileIn, fileLine);
boost::trim(fileLine);
int nAxis1Boundaries;
nAxis1Boundaries = boost::lexical_cast<int>(fileLine);
} catch (boost::bad_lexical_cast &) {
// using readNumEntrys() above broke the sequence of getline()s and so try
// again in case we just read the end of a line
std::getline(m_fileIn, fileLine);
boost::trim(fileLine);
nAxis1Boundaries = boost::lexical_cast<int>(fileLine);
}
Steve Williams
committed
MantidVec axis1Data(nAxis1Boundaries);
readNumEntrys(nAxis1Boundaries, axis1Data);
std::getline(m_fileIn, fileLine);
// check for the file pointer being left at the end of a line
std::getline(m_fileIn, fileLine);
}
Poco::StringTokenizer wsDimensions(fileLine, " ",
Poco::StringTokenizer::TOK_TRIM);
if (wsDimensions.count() < 2) {
throw Exception::NotFoundError("Input file", "dimensions");
}
const int nAxis0Values = boost::lexical_cast<int>(wsDimensions[0]);
const int nAxis1Values = boost::lexical_cast<int>(wsDimensions[1]);
Progress prog(this, 0.05, 1.0, 2 * nAxis1Values);
// we now have all the data we need to create the output workspace
outWrksp = WorkspaceFactory::Instance().create(
"Workspace2D", nAxis1Values, nAxis0Boundaries, nAxis0Values);
outWrksp->getAxis(0)->unit() = UnitFactory::Instance().create(XUnit);
outWrksp->setYUnitLabel(intensityUnit);
NumericAxis *const axis1 = new Mantid::API::NumericAxis(nAxis1Boundaries);
axis1->unit() = Mantid::Kernel::UnitFactory::Instance().create(YUnit);
outWrksp->replaceAxis(1, axis1);
for (int i = 0; i < nAxis1Boundaries; ++i) {
Steve Williams
committed
axis1->setValue(i, axis1Data[i]);
}
outWrksp->setTitle(title);
// move over the next line which is there to help with loading from Fortran
// routines
std::getline(m_fileIn, fileLine);
return prog;
}
/** Read the specified number of entries from input file into the
* the array that is passed
* @param[in] nEntries the number of numbers to read
* @param[out] output the contents of this will be replaced by the data read
* from the file
void LoadRKH::readNumEntrys(const int nEntries, MantidVec &output) {
for (int i = 0; i < nEntries; ++i) {
m_fileIn >> output[i];
}
}
/** Convert the units specification line from the RKH file into a
* Mantid unit name
Janik Zikovsky
committed
* @param line :: units specification line
* @return Mantid unit name
*/
const std::string LoadRKH::readUnit(const std::string &line) {
Steve Williams
committed
// split the line into words
const Poco::StringTokenizer codes(line, " ", Poco::StringTokenizer::TOK_TRIM);
if (codes.count() < 1) {
return "C++ no unit found";
Steve Williams
committed
}
// the symbol for the quantity q = MomentumTransfer, etc.
Steve Williams
committed
const std::string symbol(codes[0]);
// this is units used to measure the quantity e.g. angstroms, counts, ...
const std::string unit(*(codes.end() - 1));
Steve Williams
committed
// theQuantity will contain the name of the unit, which can be many words long
std::string theQuantity;
Poco::StringTokenizer::Iterator current = codes.begin() + 1,
end = codes.end();
for (; current != end; ++current) {
if (current != end - 1) {
Steve Williams
committed
theQuantity += *current;
}
}
// this is a syntax check the line before returning its data
if (codes.count() >= 3) {
// For the next line it is possible to use str.compare instead of str.find,
// this would be more efficient if the line was very long
// however to use is safely other checks would be required that would impair
// readability, therefore in this case the unlikely performance hit is
// accepted.
// cppcheck-suppress stlIfStrFind
if (unit.find('(') != 0 || unit.find(')') != unit.size()) {
std::string qCode = boost::lexical_cast<std::string>(SaveRKH::Q_CODE);
if (symbol == qCode && theQuantity == "q" &&
unit == "(1/Angstrom)") { // 6 q (1/Angstrom) is the synatx for
// MomentumTransfer
return "MomentumTransfer";
}
if (symbol == "0" && theQuantity != "q") { // zero means the unit is not q
// but something else, which
// I'm assuming is legal
return theQuantity + " " + unit;
}
}
}
Steve Williams
committed
// the line doesn't contain a valid 2D data file unit line
return "C++ no unit found";
}
Gigg, Martyn Anthony
committed
/**
* Remove lines from an input stream
Janik Zikovsky
committed
* @param strm :: The input stream to adjust
* @param nlines :: The number of lines to remove
Gigg, Martyn Anthony
committed
*/
void LoadRKH::skipLines(std::istream &strm, int nlines) {
Gigg, Martyn Anthony
committed
std::string buried("");
for (int i = 0; i < nlines; ++i) {
Gigg, Martyn Anthony
committed
getline(strm, buried);
}
}
/** PAss a vector of bin boundaries and get a vector of bin centers
* @param[in] oldBoundaries array of bin boundaries
* @param[out] toCenter an array that is one shorter than oldBoundaries, the
* values of the means of pairs of values from the input
void LoadRKH::binCenter(const MantidVec oldBoundaries,
MantidVec &toCenter) const {
VectorHelper::convertToBinCentre(oldBoundaries, toCenter);
}
/**
* Checks if there is an x error present in the data set
*/
bool LoadRKH::hasXerror(std::ifstream &stream) {
auto containsXerror = false;
auto currentPutLocation = stream.tellg();
std::string line;
getline(stream, line);
std::string x,y, yerr, xerr;
std::istringstream datastr(line);
datastr >> x >> y >> yerr >> xerr;
if (!xerr.empty()) {
containsXerror = true;
}
// Reset the original location of the stream
stream.seekg(currentPutLocation, stream.beg);
return containsXerror;
}