Newer
Older
Janik Zikovsky
committed
/*WIKI*
Loads the given file in the RKH text format, which can be a file with three columns of numbers. If the FirstColumnValue is a recognised [[Unit_Factory|Mantid unit]] the workspace is created with just one spectrum. Alteratively if FirstColumnValue is set to 'SpectrumNumber' then the workspace can have many spectra with the spectrum ID's equal to the first column in the file.
*WIKI*/
Gigg, Martyn Anthony
committed
//---------------------------------------------------
// Includes
//---------------------------------------------------
#include "MantidDataHandling/LoadRKH.h"
#include "MantidDataHandling/SaveRKH.h"
Gigg, Martyn Anthony
committed
#include "MantidAPI/FileProperty.h"
Gigg, Martyn Anthony
committed
#include "MantidKernel/UnitFactory.h"
#include "MantidDataObjects/Workspace2D.h"
Sofia Antony
committed
#include "MantidAPI/LoadAlgorithmFactory.h"
#include "MantidAPI/NumericAxis.h"
#include "MantidKernel/VectorHelper.h"
#include "MantidKernel/ListValidator.h"
#include <boost/date_time/gregorian/gregorian.hpp>
#include <boost/date_time/date_parsing.hpp>
#include <boost/lexical_cast.hpp>
#include <boost/algorithm/string.hpp>
#include <Poco/StringTokenizer.h>
Gigg, Martyn Anthony
committed
Gigg, Martyn Anthony
committed
namespace Mantid
{
namespace DataHandling
{
using namespace Mantid::API;
Sofia Antony
committed
using namespace Mantid::Kernel;
Gigg, Martyn Anthony
committed
// Register the algorithm into the AlgorithmFactory
DECLARE_ALGORITHM(LoadRKH)
Sofia Antony
committed
//register the algorithm into loadalgorithm factory
DECLARE_LOADALGORITHM(LoadRKH)
Janik Zikovsky
committed
/// Sets documentation strings for this algorithm
void LoadRKH::initDocs()
{
this->setWikiSummary("Load a file written in the RKH format");
Janik Zikovsky
committed
this->setOptionalMessage("Load a file written in the RKH format");
}
Gigg, Martyn Anthony
committed
/**
* Initialise the algorithm
*/
void LoadRKH::init()
Steve Williams
committed
std::vector<std::string> exts;
exts.push_back(".txt");
exts.push_back(".q");
exts.push_back(".dat");
Steve Williams
committed
declareProperty(new API::FileProperty("Filename","", API::FileProperty::Load, exts),
"Name of the RKH file to load");
Steve Williams
committed
declareProperty(
new API::WorkspaceProperty<>("OutputWorkspace", "", Kernel::Direction::Output),
"The name to use for the output workspace" );
Gigg, Martyn Anthony
committed
//Get the units registered with the UnitFactory
std::vector<std::string> propOptions = Kernel::UnitFactory::Instance().getKeys();
m_unitKeys.insert(propOptions.begin(), propOptions.end());
// m_RKHKeys will be taken as axis(1) units, the first axis will have only one value
// and so selection of one of these units will result in a workspace orientated differently
// from selection of the above
m_RKHKeys.insert("SpectrumNumber");
Gigg, Martyn Anthony
committed
propOptions.insert(propOptions.end(), m_RKHKeys.begin(), m_RKHKeys.end());
Steve Williams
committed
declareProperty("FirstColumnValue", "Wavelength",
boost::make_shared<Kernel::StringListValidator>(propOptions),
"Only used for 1D files, the units of the first column in the RKH "
Steve Williams
committed
"file (default Wavelength)" );
Gigg, Martyn Anthony
committed
}
/**
* Execute the algorithm
*/
void LoadRKH::exec()
{
using namespace Mantid::Kernel;
using namespace Mantid::API;
//Retrieve filename and try to open the file
std::string filename = getPropertyValue("Filename");
m_fileIn.open(filename.c_str());
if( ! m_fileIn )
Gigg, Martyn Anthony
committed
{
g_log.error("Unable to open file " + filename);
throw Exception::FileError("Unable to open File: ", filename);
}
g_log.information() << "Opened file \""<<filename<<"\" for reading\n";
Gigg, Martyn Anthony
committed
std::string line;
//The first line contains human readable information about the original workspace that we don't need
getline(m_fileIn, line);
getline(m_fileIn, line);
//Use one line of the file to diagnose if it is 1D or 2D, this line contains some data required by the 2D data reader
MatrixWorkspace_sptr result = is2D(line) ? read2D(line) : read1D();
Steve Williams
committed
// all RKH files contain distribution data
result->isDistribution(true);
//Set the output workspace
setProperty("OutputWorkspace", result);
}
/** Determines if the file is 1D or 2D based on the first after the workspace's
* title
Janik Zikovsky
committed
* @param testLine :: the first line in the file after the title
* @return true if the file must contain 1D data
*/
bool LoadRKH::is2D(const std::string & testLine)
{
// check the line part of a valid for 2D data else assume the file is 1D
return readUnit(testLine) != "C++ no unit found";
}
/** Read a data file that contains only one spectrum into a workspace
* @return the new workspace
*/
const API::MatrixWorkspace_sptr LoadRKH::read1D()
{
g_log.information() << "file appears to contain 1D information, reading in 1D data mode\n";
Gigg, Martyn Anthony
committed
//The 3rd line contains information regarding the number of points in the file and
// start and end reading points
int totalPoints(0), readStart(0), readEnd(0), buried(0);
std::string fileline;
getline(m_fileIn, fileline);
Gigg, Martyn Anthony
committed
std::istringstream is(fileline);
//Get data information
for( int counter = 1; counter < 8; ++counter )
{
switch( counter )
{
case 1:
is >> totalPoints;
Gigg, Martyn Anthony
committed
break;
case 5:
is >> readStart;
Gigg, Martyn Anthony
committed
break;
case 6:
is >> readEnd;
Gigg, Martyn Anthony
committed
break;
default:
is >> buried;
break;
}
}
g_log.information() << "Total number of data points declared to be in the data file: "
<< totalPoints << "\n";
Gigg, Martyn Anthony
committed
Gigg, Martyn Anthony
committed
//What are we reading?
std::string firstColVal = getProperty("FirstColumnValue");
bool colIsUnit(true);
if( m_RKHKeys.find( firstColVal ) != m_RKHKeys.end() )
{
colIsUnit = false;
readStart = 1;
readEnd = totalPoints;
}
if( readStart < 1 || readEnd < 1 || readEnd < readStart ||
readStart > totalPoints || readEnd > totalPoints )
{
g_log.error("Invalid data range specfied.");
throw std::invalid_argument("Invalid data range specfied.");
}
Gigg, Martyn Anthony
committed
g_log.information() << "Reading started on data line: " << readStart << "\n";
g_log.information() << "Reading finished on data line: " << readEnd << "\n";
Gigg, Martyn Anthony
committed
//The 4th and 5th line do not contain useful information either
Gigg, Martyn Anthony
committed
int pointsToRead = readEnd - readStart + 1;
Gigg, Martyn Anthony
committed
//Now stream sits at the first line of data
fileline = "";
Gigg, Martyn Anthony
committed
std::vector<double> columnOne, ydata, errdata;
columnOne.reserve(readEnd);
ydata.reserve(readEnd);
errdata.reserve(readEnd);
Sofia Antony
committed
Progress prog(this,0.0,1.0,readEnd);
for( int index = 1; index <= readEnd; ++index )
Gigg, Martyn Anthony
committed
{
getline(m_fileIn, fileline);
if( index < readStart ) continue;
Gigg, Martyn Anthony
committed
double x(0.), y(0.), yerr(0.);
std::istringstream datastr(fileline);
datastr >> x >> y >> yerr;
Gigg, Martyn Anthony
committed
columnOne.push_back(x);
Gigg, Martyn Anthony
committed
ydata.push_back(y);
errdata.push_back(yerr);
prog.report();
Gigg, Martyn Anthony
committed
}
Gigg, Martyn Anthony
committed
Gigg, Martyn Anthony
committed
assert( pointsToRead == static_cast<int>(columnOne.size()) );
assert( pointsToRead == static_cast<int>(ydata.size()) );
assert( pointsToRead == static_cast<int>(errdata.size()) );
if( colIsUnit )
Gigg, Martyn Anthony
committed
{
MatrixWorkspace_sptr localworkspace =
WorkspaceFactory::Instance().create("Workspace2D", 1, pointsToRead, pointsToRead);
Gigg, Martyn Anthony
committed
localworkspace->getAxis(0)->unit() = UnitFactory::Instance().create(firstColVal);
Russell Taylor
committed
localworkspace->dataX(0) = columnOne;
Gigg, Martyn Anthony
committed
localworkspace->dataY(0) = ydata;
localworkspace->dataE(0) = errdata;
return localworkspace;
Gigg, Martyn Anthony
committed
}
else
{
MatrixWorkspace_sptr localworkspace =
Russell Taylor
committed
WorkspaceFactory::Instance().create("Workspace2D", pointsToRead, 1, 1);
Gigg, Martyn Anthony
committed
//Set the appropriate values
for( int index = 0; index < pointsToRead; ++index )
Gigg, Martyn Anthony
committed
{
localworkspace->getAxis(1)->setValue(index, static_cast<int>(columnOne[index]));
Gigg, Martyn Anthony
committed
localworkspace->dataY(index)[0] = ydata[index];
localworkspace->dataE(index)[0] = errdata[index];
Gigg, Martyn Anthony
committed
}
Gigg, Martyn Anthony
committed
}
Gigg, Martyn Anthony
committed
}
/** Reads from the third line of the input file to the end assuming it contains
* 2D data
Janik Zikovsky
committed
* @param firstLine :: the second line in the file
* @return a workspace containing the loaded data
Janik Zikovsky
committed
* @throw NotFoundError if there is compulsulary data is missing from the file
* @throw invalid_argument if there is an inconsistency in the header information
*/
const MatrixWorkspace_sptr LoadRKH::read2D(const std::string & firstLine)
{
g_log.information() << "file appears to contain 2D information, reading in 2D data mode\n";
MatrixWorkspace_sptr outWrksp;
MantidVec axis0Data;
Steve Williams
committed
Progress prog( read2DHeader(firstLine, outWrksp, axis0Data) );
const size_t nAxis1Values = outWrksp->getNumberHistograms();
for ( size_t i = 0; i < nAxis1Values; ++i )
{
//set the X-values to the common bin values we read above
MantidVecPtr toPass;
toPass.access() = axis0Data;
outWrksp->setX(i, toPass);
//now read in the Y values
MantidVec & YOut = outWrksp->dataY(i);
for (MantidVec::iterator it = YOut.begin(), end=YOut.end();it != end;++it)
{
m_fileIn >> *it;
}
prog.report("Loading Y data");
}// loop on to the next spectrum
// the error values form one big block after the Y-values
for ( size_t i = 0; i < nAxis1Values; ++i )
{
MantidVec & EOut = outWrksp->dataE(i);
for (MantidVec::iterator it = EOut.begin(), end=EOut.end();it != end;++it)
{
m_fileIn >> *it;
}
prog.report("Loading error estimates");
}// loop on to the next spectrum
return outWrksp;
}
/** Reads the header information from a file containing 2D data
* @param[in] initalLine the second line in the file
* @param[out] outWrksp the workspace that the data will be writen to
* @param[out] axis0Data x-values for the workspace
* @return a progress bar object
Janik Zikovsky
committed
* @throw NotFoundError if there is compulsulary data is missing from the file
* @throw invalid_argument if there is an inconsistency in the header information
Steve Williams
committed
Progress LoadRKH::read2DHeader(const std::string & initalLine, MatrixWorkspace_sptr & outWrksp, MantidVec & axis0Data)
{
const std::string XUnit(readUnit(initalLine));
std::string fileLine;
std::getline(m_fileIn, fileLine);
const std::string YUnit(readUnit(fileLine));
std::getline(m_fileIn, fileLine);
const std::string intensityUnit(readUnit(fileLine));
// the next line should contain just "1", but I'm not enforcing that
std::getline(m_fileIn, fileLine);
std::string title;
std::getline(m_fileIn, title);
std::getline(m_fileIn, fileLine);
boost::trim(fileLine);
const int nAxis0Boundaries = boost::lexical_cast<int>(fileLine);
axis0Data.resize(nAxis0Boundaries);
readNumEntrys(nAxis0Boundaries, axis0Data);
Gigg, Martyn Anthony
committed
std::getline(m_fileIn, fileLine);
boost::trim(fileLine);
int nAxis1Boundaries;
try
{
nAxis1Boundaries = boost::lexical_cast<int>(fileLine);
}
catch(boost::bad_lexical_cast &)
{
// using readNumEntrys() above broke the sequence of getline()s and so try again in case we just read the end of a line
std::getline(m_fileIn, fileLine);
boost::trim(fileLine);
nAxis1Boundaries = boost::lexical_cast<int>(fileLine);
}
Steve Williams
committed
MantidVec axis1Data(nAxis1Boundaries);
readNumEntrys(nAxis1Boundaries, axis1Data);
std::getline(m_fileIn, fileLine);
// check for the file pointer being left at the end of a line
if ( fileLine.size() < 5 )
{
std::getline(m_fileIn, fileLine);
}
Poco::StringTokenizer wsDimensions(fileLine, " ",
Poco::StringTokenizer::TOK_TRIM);
if ( wsDimensions.count() < 2 )
{
throw Exception::NotFoundError("Input file", "dimensions");
}
const int nAxis0Values = boost::lexical_cast<int>(wsDimensions[0]);
const int nAxis1Values = boost::lexical_cast<int>(wsDimensions[1]);
Janik Zikovsky
committed
Progress prog(this, 0.05, 1.0, 2*nAxis1Values);
// we now have all the data we need to create the output workspace
outWrksp = WorkspaceFactory::Instance().create(
"Workspace2D", nAxis1Values, nAxis0Boundaries, nAxis0Values);
outWrksp->getAxis(0)->unit() = UnitFactory::Instance().create(XUnit);
outWrksp->setYUnitLabel(intensityUnit);
NumericAxis* const axis1 = new Mantid::API::NumericAxis(nAxis1Boundaries);
axis1->unit() = Mantid::Kernel::UnitFactory::Instance().create(YUnit);
outWrksp->replaceAxis(1, axis1);
Steve Williams
committed
for ( int i = 0; i < nAxis1Boundaries; ++i )
{
axis1->setValue(i, axis1Data[i]);
}
Steve Williams
committed
outWrksp->setTitle(title);
// move over the next line which is there to help with loading from Fortran routines
std::getline(m_fileIn, fileLine);
return prog;
}
/** Read the specified number of entries from input file into the
* the array that is passed
* @param[in] nEntries the number of numbers to read
* @param[out] output the contents of this will be replaced by the data read from the file
*/
void LoadRKH::readNumEntrys(const int nEntries, MantidVec & output)
{
output.resize(nEntries);
for( int i = 0; i < nEntries; ++i )
{
m_fileIn >> output[i];
}
}
/** Convert the units specification line from the RKH file into a
* Mantid unit name
Janik Zikovsky
committed
* @param line :: units specification line
* @return Mantid unit name
*/
const std::string LoadRKH::readUnit(const std::string & line)
{
Steve Williams
committed
// split the line into words
const Poco::StringTokenizer codes(line, " ", Poco::StringTokenizer::TOK_TRIM);
if ( codes.count() < 1 )
{
return "C++ no unit found";
}
// the symbol for the quantity q = MomentumTransfer, etc.
Steve Williams
committed
const std::string symbol(codes[0]);
// this is units used to measure the quantity e.g. angstroms, counts, ...
Steve Williams
committed
const std::string unit( *(codes.end()-1) );
// theQuantity will contain the name of the unit, which can be many words long
std::string theQuantity;
Poco::StringTokenizer::Iterator current = codes.begin()+1, end = codes.end();
for ( ; current != end; ++current)
{
if ( current != end - 1 )
{
theQuantity += *current;
}
}
Steve Williams
committed
//this is a syntax check the line before returning its data
if ( codes.count() >= 3 )
// For the next line it is possible to use str.compare instead of str.find, this would be more efficient if the line was very long
// however to use is safely other checks would be required that would impair readability, therefore in this case the unlikely performance hit is accepted.
// cppcheck-suppress stlIfStrFind
if ( unit.find('(') != 0 || unit.find(')') != unit.size() )
{
std::string qCode = boost::lexical_cast<std::string>(SaveRKH::Q_CODE);
Steve Williams
committed
if ( symbol == qCode && theQuantity == "q" && unit == "(1/Angstrom)" )
{// 6 q (1/Angstrom) is the synatx for MomentumTransfer
return "MomentumTransfer";
}
Steve Williams
committed
if ( symbol == "0" && theQuantity != "q" )
{// zero means the unit is not q but something else, which I'm assuming is legal
return theQuantity + " " + unit;
}
}
}
Steve Williams
committed
// the line doesn't contain a valid 2D data file unit line
return "C++ no unit found";
}
Gigg, Martyn Anthony
committed
/**
* Remove lines from an input stream
Janik Zikovsky
committed
* @param strm :: The input stream to adjust
* @param nlines :: The number of lines to remove
Gigg, Martyn Anthony
committed
*/
void LoadRKH::skipLines(std::istream & strm, int nlines)
{
std::string buried("");
for( int i = 0 ; i < nlines; ++i )
{
getline(strm, buried);
}
}
/** PAss a vector of bin boundaries and get a vector of bin centers
* @param[in] oldBoundaries array of bin boundaries
* @param[out] toCenter an array that is one shorter than oldBoundaries, the values of the means of pairs of values from the input
*/
void LoadRKH::binCenter(const MantidVec oldBoundaries, MantidVec & toCenter) const
{
VectorHelper::convertToBinCentre(oldBoundaries, toCenter);
}
Sofia Antony
committed
/**This method does a quick file check by checking the no.of bytes read nread params and header buffer
Janik Zikovsky
committed
* @param nread :: no.of bytes read
Gigg, Martyn Anthony
committed
* @param header :: The first 100 bytes of the file as a union
Sofia Antony
committed
* @return true if the given file is of type which can be loaded by this algorithm
*/
bool LoadRKH::quickFileCheck(const std::string &,size_t nread,const file_header& header)
Sofia Antony
committed
{
for(size_t i=0; i<nread; i++)
Sofia Antony
committed
{
Gigg, Martyn Anthony
committed
if (!isascii(header.full_hdr[i]))
return false;
Sofia Antony
committed
}
// RKH files are always ASCII, they can have extension
return true;
Sofia Antony
committed
}
/**checks the file by opening it and reading few lines
Janik Zikovsky
committed
* @param filePath :: name of the file inluding its path
Sofia Antony
committed
* @return an integer value how much this algorithm can load the file
*/
int LoadRKH::fileCheck(const std::string& filePath)
{
int bret=0;
std::ifstream file(filePath.c_str());
if (!file)
{
g_log.error("Unable to open file: " + filePath);
throw Exception::FileError("Unable to open file: " , filePath);
}
typedef boost::tokenizer<boost::char_separator<char> > tokenizer;
boost::char_separator<char> sep(" ");
std::string fileline("");
Sofia Antony
committed
//get first line
getline(file, fileline);
if ( fileline.find("Workspace:") == std::string::npos )
{//seeing Workspace: is enough to carry on
if ( fileline.find(":") == std::string::npos )
{//there must be a : in the line
return 0;
Sofia Antony
committed
}
if ( fileline.find("LOQ") == std::string::npos )
Sofia Antony
committed
{
if ( fileline.find("SANS2D") == std::string::npos )
{
static const std::string MONTHS[12] = {"-JAN-", "-FEB-", "-MAR-", "-APR-", "-MAY-", "-JUN-", "-JUL-", "-AUG-", "-SEP-", "-OCT-", "-NOV-", "-DEC-"};
size_t i = 0;
for ( ; i < 12; ++i )
{
if ( fileline.find(MONTHS[i]) != std::string::npos )
{
//the line is acceptable
break;
}
}
if ( i == 12 )
{
//the line contains none of the strings we were looking for except one : which is not enough, reject the file
return 0;
Sofia Antony
committed
}
}
//there are no constraints on the second line
Sofia Antony
committed
getline(file, fileline);
Sofia Antony
committed
//read 3rd line
getline(file, fileline);
Sofia Antony
committed
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
if(fileline.find("(")!=std::string::npos && fileline.find(")")!=std::string::npos)
{
bret+=10;
if(fileline.find(" 0 0 0 1")!=std::string::npos)
{
bret+=10;
}
}
else
{
tokenizer tok(fileline,sep);
for (tokenizer::iterator beg=tok.begin(); beg!=tok.end(); ++beg)
{
++ncols;
}
if(ncols==7)
{
bret+=10;
}
}
//4th line
getline(file, fileline);
if(fileline.find(" 0 0 0 0")!=std::string::npos)
{
bret+=10;
}
else if(fileline.find(" 0 ")!=std::string::npos)
{
bret+=10;
}
//5th line
getline(file, fileline);
if(fileline.find("3 (F12.5,2E16.6)")!=std::string::npos)
{
bret+=10;
}
else if (fileline.find(" 1\n")!=std::string::npos)
{
bret+=10;
}
ncols=0;
//6th line data line
getline(file, fileline);
tokenizer tok1(fileline, sep);
for (tokenizer::iterator beg=tok1.begin(); beg!=tok1.end(); ++beg)
{
++ncols;
}
if(ncols==3)
{
bret+=20;
}
Robert Whitley
committed
if( bret == 10 )
{
// Assume we are better than LoadAscii
bret += 10;
}
Sofia Antony
committed
return bret;
}