#include "MantidDataHandling/LoadPDFgetNFile.h" #include "MantidAPI/FileProperty.h" #include "MantidAPI/RegisterFileLoader.h" #include "MantidAPI/WorkspaceProperty.h" #include "MantidAPI/WorkspaceFactory.h" #include "MantidKernel/Unit.h" #include "MantidKernel/UnitFactory.h" #include <fstream> #include <boost/algorithm/string.hpp> #include <boost/algorithm/string/split.hpp> #include <boost/algorithm/string/trim.hpp> using namespace Mantid; using namespace Mantid::Kernel; using namespace Mantid::API; using namespace Mantid::DataObjects; using namespace std; using namespace boost; // FIXME Add label and unit to output workspace // FIXME Consider to output multiple workspaces if there are multiple column data (X, Y1, E1, Y2, E2) namespace Mantid { namespace DataHandling { DECLARE_FILELOADER_ALGORITHM(LoadPDFgetNFile); //---------------------------------------------------------------------------------------------- /** Constructor */ LoadPDFgetNFile::LoadPDFgetNFile() { } //---------------------------------------------------------------------------------------------- /** Destructor */ LoadPDFgetNFile::~LoadPDFgetNFile() { } //---------------------------------------------------------------------------------------------- /** * Return the confidence with with this algorithm can load the file * @param descriptor A descriptor for the file * @returns An integer specifying the confidence level. 0 indicates it will not be used */ int LoadPDFgetNFile::confidence(Kernel::FileDescriptor & descriptor) const { // check the file extension const std::string & extn = descriptor.extension(); // Only allow known file extensions if(extn.compare("sq") != 0 && extn.compare("sqa") != 0 && extn.compare("sqb") != 0 && extn.compare("gr") != 0 && extn.compare("ain") != 0 && extn.compare("braw") != 0 && extn.compare("bsmo")!= 0) { return 0; } if(!descriptor.isAscii()) return 0; auto & file = descriptor.data(); std::string str; std::getline(file, str); //workspace title first line while (!file.eof()) { std::getline(file, str); if (startsWith(str, "#L")) { return 80; } } return 0; } //---------------------------------------------------------------------------------------------- /** Define input */ void LoadPDFgetNFile::init() { std::vector<std::string> exts; exts.push_back(".sq"); exts.push_back(".sqa"); exts.push_back(".sqb"); exts.push_back(".gr"); exts.push_back(".ain"); exts.push_back(".braw"); exts.push_back(".bsmo"); auto fileproperty = new FileProperty("Filename", "", FileProperty::Load, exts, Kernel::Direction::Input); this->declareProperty(fileproperty, "The input filename of the stored data"); // auto wsproperty = new WorkspaceProperty<Workspace2D>("OutputWorkspace", "Anonymous", Kernel::Direction::Output); // this->declareProperty(wsproperty, "Name of output workspace. "); declareProperty(new API::WorkspaceProperty<>("OutputWorkspace", "", Kernel::Direction::Output), "Workspace name to load into."); } //---------------------------------------------------------------------------------------------- /** Main executor */ void LoadPDFgetNFile::exec() { // 1. Parse input file std::string inpfilename = getProperty("Filename"); parseDataFile(inpfilename); // 2. Generate output workspace generateDataWorkspace(); setProperty("OutputWorkspace", outWS); return; } //---------------------------------------------------------------------------------------------- /** Parse PDFgetN data file to * 1. a 2D vector for column data * 2. a 1D string vector for column name */ void LoadPDFgetNFile::parseDataFile(std::string filename) { // 1. Open file std::ifstream ifile; ifile.open((filename.c_str())); if (!ifile.is_open()) { stringstream errmsg; errmsg << "Unable to open file " << filename << ". Quit!"; g_log.error() << errmsg.str() << std::endl; throw std::runtime_error(errmsg.str()); } else { g_log.notice() << "Open PDFgetN File " << filename << std::endl; } // 2. Parse bool readdata = false; char line[256]; while(ifile.getline(line, 256)) { string sline(line); if (!readdata && startsWith(sline, "#L")) { // a) Find header line for the data segment in the file parseColumnNameLine(sline); readdata = true; // Set up the data structure size_t numcols = mColumnNames.size(); for (size_t i = 0; i < numcols; ++i) { std::vector<double> tempvec; mData.push_back(tempvec); } } else if (readdata) { // b) Parse data parseDataLine(sline); } else { // c) Do nothing ; } } // ENDWHILE if (!readdata) { stringstream errmsg; errmsg << "Unable to find a line staring with #L as the indicator of data segment. "; g_log.error() << errmsg.str() << std::endl; throw std::runtime_error(errmsg.str()); } return; } //---------------------------------------------------------------------------------------------- /** Check whether the line starts with some specific character */ bool LoadPDFgetNFile::startsWith(const std::string & s, const std::string& header) const { bool answer = true; if (s.size() < header.size()) { answer = false; } else { size_t numchars = header.size(); for (size_t i = 0; i < numchars; ++i) { char c0 = s[i]; char c1 = header[i]; if (c0 != c1) { answer = false; break; } } } return answer; } /** Parse column name line staring with \#L */ void LoadPDFgetNFile::parseColumnNameLine(std::string line) { // 1. Split vector<string> terms; boost::split(terms, line, is_any_of(" \t\n"), token_compress_on); // 2. Validate if (terms.empty()) { throw std::runtime_error("There is nothing in the input line!"); } string header = terms[0]; if (header.compare("#L") != 0) { stringstream errmsg; errmsg << "Expecting header as #L. Input line has header as " << header << ". Unable to proceed. "; g_log.error() << errmsg.str() << endl; throw std::runtime_error(errmsg.str()); } // 3. Parse size_t numcols = terms.size()-1; stringstream msgss; msgss << "Column Names: "; for (size_t i = 0; i < numcols; ++i) { this->mColumnNames.push_back(terms[i+1]); msgss << setw(-3) << i << ": " << setw(-10) << mColumnNames[i]; } g_log.information() << msgss.str() << endl; return; } /** Parse data line */ void LoadPDFgetNFile::parseDataLine(string line) { // 1. Trim (stripg) and Split boost::trim(line); vector<string> terms; boost::split(terms, line, is_any_of(" \t\n"), token_compress_on); // 2. Validate size_t numcols = mData.size(); if (line[0] == '#') { // Comment/information line to indicate the start of another section of data return; } else if (terms.size() != numcols) { // Data line with incorrect number of columns stringstream warnss; warnss << "Line (" << line << ") has incorrect number of columns other than " << numcols << " as expected. "; g_log.warning(warnss.str()); return; } // 3. Parse for (size_t i = 0; i < numcols; ++i) { string temps = terms[i]; double tempvalue; if (temps.compare("NaN") == 0) { // FIXME: Need to discuss with Peter about how to treat NaN value //tempvalue = DBL_MAX-1.0; tempvalue = 0.0; } else if (temps.compare("-NaN") == 0) { //tempvalue = -DBL_MAX+1.0; // FIXME: Need to discuss with Peter about how to treat NaN value tempvalue = 0.0; } else { tempvalue = atof(temps.c_str()); } mData[i].push_back(tempvalue); } return; } //---------------------------------------------------------------------------------------------- void LoadPDFgetNFile::setUnit(Workspace2D_sptr ws) { // 1. Set X string xcolname = mColumnNames[0]; if (xcolname.compare("Q") == 0) { string unit = "MomentumTransfer"; ws->getAxis(0)->setUnit(unit); } else if (xcolname.compare("r") == 0) { ws->getAxis(0)->unit() = UnitFactory::Instance().create("Label"); Unit_sptr unit = ws->getAxis(0)->unit(); boost::shared_ptr<Units::Label> label = boost::dynamic_pointer_cast<Units::Label>(unit); label->setLabel("AtomicDistance", "Angstrom"); } else { stringstream errss; errss << "X axis " << xcolname << " is not supported for unit. " << endl; g_log.warning() << errss.str() << endl; } // 2. Set Y string ycolname = mColumnNames[1]; string ylabel(""); if (ycolname.compare("G(r)") == 0) { ylabel = "PDF"; } else if (ycolname.compare("S") == 0) { ylabel = "S"; } else { ylabel = "Intensity"; } ws->setYUnitLabel(ylabel); return; } /** Generate output data workspace * Assumption. One data set must contain more than 1 element. */ void LoadPDFgetNFile::generateDataWorkspace() { // 0. Check if (mData.size() == 0) { throw runtime_error("Data set has not been initialized. Quit!"); } // 1. Figure out direction of X and number of data set bool xascend = true; if (mData.size() >= 2 && mData[0][1] < mData[0][0]) { xascend = false; } size_t numsets = 0; vector<size_t> numptsvec; size_t arraysize = mData[0].size(); if (arraysize <= 1) { throw runtime_error("Number of columns in data is less and equal to 1. It is unphysically too small."); } double prex = mData[0][0]; size_t vecsize = 1; for (size_t i = 1; i < arraysize; ++i) { double curx = mData[0][i]; if ( ((xascend) && (curx < prex)) || ((!xascend) && (curx > prex)) ) { // X in ascending order and hit the end of one set of data // X in descending order and hit the end of one set of data // Record the current data set information and start the next data set numsets += 1; numptsvec.push_back(vecsize); vecsize = 1; } else { // In the middle of a set of data ++ vecsize; } // Loop variable udpate prex = curx; } // ENDFOR // Record the last data set information ++ numsets; numptsvec.push_back(vecsize); bool samesize = true; for (size_t i = 0; i < numsets; ++i) { if (i > 0) { if (numptsvec[i] != numptsvec[i-1]) { samesize = false; } } g_log.information() << "Set " << i << ": Number of Points = " << numptsvec[i] << std::endl; } if (!samesize) { stringstream errmsg; errmsg << "Multiple bank (number of banks = " << numsets << ") have different size of data array. Unable to handle this situation."; g_log.error() << errmsg.str() << std::endl; throw std::runtime_error(errmsg.str()); } size_t size = numptsvec[0]; // 2. Generate workspace2D object and set the unit outWS = boost::dynamic_pointer_cast<Workspace2D>( API::WorkspaceFactory::Instance().create("Workspace2D", numsets, size, size)); setUnit(outWS); // 3. Set number size_t numspec = outWS->getNumberHistograms(); for (size_t i = 0; i < numspec; ++i) { MantidVec& X = outWS->dataX(i); MantidVec& Y = outWS->dataY(i); MantidVec& E = outWS->dataE(i); size_t baseindex = i*size; for (size_t j = 0; j < size; ++j) { size_t index; if (xascend) index = j; else index = (size-1)-j; X[index] = mData[0][baseindex+j]; Y[index] = mData[1][baseindex+j]; E[index] = mData[2][baseindex+j]; } } return; } } // namespace DataHandling } // namespace Mantid