LoadPDFgetNFile.cpp

#include "MantidDataHandling/LoadPDFgetNFile.h"
#include "MantidAPI/Axis.h"
#include "MantidAPI/FileProperty.h"
#include "MantidAPI/RegisterFileLoader.h"
#include "MantidAPI/WorkspaceProperty.h"
#include "MantidAPI/WorkspaceFactory.h"
#include "MantidKernel/Unit.h"
#include "MantidKernel/UnitFactory.h"

#include <fstream>
#include <boost/algorithm/string.hpp>
#include <boost/algorithm/string/split.hpp>
#include <boost/algorithm/string/trim.hpp>

using namespace Mantid;
using namespace Mantid::Kernel;
using namespace Mantid::API;
using namespace Mantid::DataObjects;
using namespace std;
using namespace boost;

// FIXME  Add label and unit to output workspace
// FIXME  Consider to output multiple workspaces if there are multiple column
// data (X, Y1, E1, Y2, E2)

namespace Mantid {
namespace DataHandling {

DECLARE_FILELOADER_ALGORITHM(LoadPDFgetNFile)

//----------------------------------------------------------------------------------------------
/**
 * Return the confidence with with this algorithm can load the file
 * @param descriptor A descriptor for the file
 * @returns An integer specifying the confidence level. 0 indicates it will not
 * be used
 */
int LoadPDFgetNFile::confidence(Kernel::FileDescriptor &descriptor) const {
  // check the file extension
  const std::string &extn = descriptor.extension();
  // Only allow known file extensions
  if (extn.compare("sq") != 0 && extn.compare("sqa") != 0 &&
      extn.compare("sqb") != 0 && extn.compare("gr") != 0 &&
      extn.compare("ain") != 0 && extn.compare("braw") != 0 &&
      extn.compare("bsmo") != 0) {
    return 0;
  }

  if (!descriptor.isAscii())
    return 0;

  auto &file = descriptor.data();
  std::string str;
  std::getline(file, str); // workspace title first line
  while (!file.eof()) {
    std::getline(file, str);
    if (startsWith(str, "#L")) {
      return 80;
    }
  }

  return 0;
}

//----------------------------------------------------------------------------------------------
/** Define input
  */
void LoadPDFgetNFile::init() {
  const std::vector<std::string> exts{".sq",  ".sqa",  ".sqb", ".gr",
                                      ".ain", ".braw", ".bsmo"};
  auto fileproperty = Kernel::make_unique<FileProperty>(
      "Filename", "", FileProperty::Load, exts, Kernel::Direction::Input);
  this->declareProperty(std::move(fileproperty),
                        "The input filename of the stored data");

  // auto wsproperty = new WorkspaceProperty<Workspace2D>("OutputWorkspace",
  // "Anonymous", Kernel::Direction::Output);
  // this->declareProperty(wsproperty, "Name of output workspace. ");
  declareProperty(Kernel::make_unique<API::WorkspaceProperty<>>(
                      "OutputWorkspace", "", Kernel::Direction::Output),
                  "Workspace name to load into.");
}

//----------------------------------------------------------------------------------------------
/** Main executor
  */
void LoadPDFgetNFile::exec() {
  // 1. Parse input file
  std::string inpfilename = getProperty("Filename");

  parseDataFile(inpfilename);

  // 2. Generate output workspace
  generateDataWorkspace();

  setProperty("OutputWorkspace", outWS);

  return;
}

//----------------------------------------------------------------------------------------------
/** Parse PDFgetN data file to
  * 1. a 2D vector for column data
  * 2. a 1D string vector for column name
  */
void LoadPDFgetNFile::parseDataFile(std::string filename) {
  // 1. Open file
  std::ifstream ifile;
  ifile.open((filename.c_str()));

  if (!ifile.is_open()) {
    stringstream errmsg;
    errmsg << "Unable to open file " << filename << ".  Quit!";
    g_log.error() << errmsg.str() << '\n';
    throw std::runtime_error(errmsg.str());
  } else {
    g_log.notice() << "Open PDFgetN File " << filename << '\n';
  }

  // 2. Parse
  bool readdata = false;

  char line[256];
  while (ifile.getline(line, 256)) {
    string sline(line);

    if (!readdata && startsWith(sline, "#L")) {
      // a) Find header line for the data segment in the file
      parseColumnNameLine(sline);
      readdata = true;

      // Set up the data structure
      size_t numcols = mColumnNames.size();
      for (size_t i = 0; i < numcols; ++i) {
        std::vector<double> tempvec;
        mData.push_back(tempvec);
      }

    } else if (readdata) {
      // b) Parse data
      parseDataLine(sline);
    } else {
      // c) Do nothing
      ;
    }
  } // ENDWHILE

  if (!readdata) {
    stringstream errmsg;
    errmsg << "Unable to find a line staring with #L as the indicator of data "
              "segment. ";
    g_log.error() << errmsg.str() << '\n';
    throw std::runtime_error(errmsg.str());
  }

  return;
}

//----------------------------------------------------------------------------------------------
/** Check whether the line starts with some specific character
  */
bool LoadPDFgetNFile::startsWith(const std::string &s,
                                 const std::string &header) const {
  bool answer = true;

  if (s.size() < header.size()) {
    answer = false;
  } else {
    size_t numchars = header.size();
    for (size_t i = 0; i < numchars; ++i) {
      char c0 = s[i];
      char c1 = header[i];
      if (c0 != c1) {
        answer = false;
        break;
      }
    }
  }

  return answer;
}

/** Parse column name line staring with \#L
  */
void LoadPDFgetNFile::parseColumnNameLine(std::string line) {
  // 1. Split
  vector<string> terms;
  boost::split(terms, line, is_any_of(" \t\n"), token_compress_on);

  // 2. Validate
  if (terms.empty()) {
    throw std::runtime_error("There is nothing in the input line!");
  }

  string header = terms[0];
  if (header.compare("#L") != 0) {
    stringstream errmsg;
    errmsg << "Expecting header as #L.  Input line has header as " << header
           << ". Unable to proceed. ";
    g_log.error() << errmsg.str() << '\n';
    throw std::runtime_error(errmsg.str());
  }

  // 3. Parse
  size_t numcols = terms.size() - 1;
  stringstream msgss;
  msgss << "Column Names: ";
  for (size_t i = 0; i < numcols; ++i) {
    this->mColumnNames.push_back(terms[i + 1]);
    msgss << setw(-3) << i << ": " << setw(-10) << mColumnNames[i];
  }
  g_log.information() << msgss.str() << '\n';

  return;
}

/** Parse data line
  */
void LoadPDFgetNFile::parseDataLine(string line) {
  // 1. Trim (stripg) and Split
  boost::trim(line);
  vector<string> terms;
  boost::split(terms, line, is_any_of(" \t\n"), token_compress_on);

  // 2. Validate
  size_t numcols = mData.size();
  if (line[0] == '#') {
    // Comment/information line to indicate the start of another section of data
    return;
  } else if (terms.size() != numcols) {
    // Data line with incorrect number of columns
    stringstream warnss;
    warnss << "Line (" << line
           << ") has incorrect number of columns other than " << numcols
           << " as expected. ";
    g_log.warning(warnss.str());
    return;
  }

  // 3. Parse
  for (size_t i = 0; i < numcols; ++i) {
    string temps = terms[i];
    double tempvalue;
    if (temps.compare("NaN") == 0) {
      // FIXME:  Need to discuss with Peter about how to treat NaN value
      // tempvalue = DBL_MAX-1.0;
      tempvalue = 0.0;
    } else if (temps.compare("-NaN") == 0) {
      // tempvalue = -DBL_MAX+1.0;
      // FIXME:  Need to discuss with Peter about how to treat NaN value
      tempvalue = 0.0;
    } else {
      tempvalue = atof(temps.c_str());
    }

    mData[i].push_back(tempvalue);
  }

  return;
}

//----------------------------------------------------------------------------------------------
void LoadPDFgetNFile::setUnit(Workspace2D_sptr ws) {
  // 1. Set X
  string xcolname = mColumnNames[0];

  if (xcolname.compare("Q") == 0) {
    string unit = "MomentumTransfer";
    ws->getAxis(0)->setUnit(unit);
  } else if (xcolname.compare("r") == 0) {
    ws->getAxis(0)->unit() = UnitFactory::Instance().create("Label");
    Unit_sptr unit = ws->getAxis(0)->unit();
    boost::shared_ptr<Units::Label> label =
        boost::dynamic_pointer_cast<Units::Label>(unit);
    label->setLabel("AtomicDistance", "Angstrom");
  } else {
    stringstream errss;
    errss << "X axis " << xcolname << " is not supported for unit. \n";
    g_log.warning() << errss.str() << '\n';
  }

  // 2. Set Y
  string ycolname = mColumnNames[1];
  string ylabel("");
  if (ycolname.compare("G(r)") == 0) {
    ylabel = "PDF";
  } else if (ycolname.compare("S") == 0) {
    ylabel = "S";
  } else {
    ylabel = "Intensity";
  }
  ws->setYUnitLabel(ylabel);

  return;
}

/** Generate output data workspace
  * Assumption.  One data set must contain more than 1 element.
  */
void LoadPDFgetNFile::generateDataWorkspace() {
  // 0. Check
  if (mData.empty()) {
    throw runtime_error("Data set has not been initialized. Quit!");
  }

  // 1. Figure out direction of X and number of data set
  bool xascend = true;
  if (mData.size() >= 2 && mData[0][1] < mData[0][0]) {
    xascend = false;
  }

  size_t numsets = 0;
  vector<size_t> numptsvec;
  size_t arraysize = mData[0].size();
  if (arraysize <= 1) {
    throw runtime_error("Number of columns in data is less and equal to 1.  It "
                        "is unphysically too small.");
  }

  double prex = mData[0][0];
  size_t vecsize = 1;
  for (size_t i = 1; i < arraysize; ++i) {
    double curx = mData[0][i];
    if (((xascend) && (curx < prex)) || ((!xascend) && (curx > prex))) {
      // X in ascending order and hit the end of one set of data
      // X in descending order and hit the end of one set of data
      // Record the current data set information and start the next data set
      numsets += 1;
      numptsvec.push_back(vecsize);
      vecsize = 1;
    } else {
      // In the middle of a set of data
      ++vecsize;
    }
    // Loop variable udpate
    prex = curx;
  } // ENDFOR
  // Record the last data set information
  ++numsets;
  numptsvec.push_back(vecsize);

  bool samesize = true;
  for (size_t i = 0; i < numsets; ++i) {
    if (i > 0) {
      if (numptsvec[i] != numptsvec[i - 1]) {
        samesize = false;
      }
    }
    g_log.information() << "Set " << i
                        << ":  Number of Points = " << numptsvec[i] << '\n';
  }
  if (!samesize) {
    stringstream errmsg;
    errmsg << "Multiple bank (number of banks = " << numsets
           << ") have different size of data array.  Unable to handle this "
              "situation.";
    g_log.error() << errmsg.str() << '\n';
    throw std::runtime_error(errmsg.str());
  }
  size_t size = numptsvec[0];

  // 2. Generate workspace2D object and set the unit
  outWS = boost::dynamic_pointer_cast<Workspace2D>(
      API::WorkspaceFactory::Instance().create("Workspace2D", numsets, size,
                                               size));

  setUnit(outWS);

  // 3. Set number
  size_t numspec = outWS->getNumberHistograms();
  for (size_t i = 0; i < numspec; ++i) {
    MantidVec &X = outWS->dataX(i);
    MantidVec &Y = outWS->dataY(i);
    MantidVec &E = outWS->dataE(i);

    size_t baseindex = i * size;
    for (size_t j = 0; j < size; ++j) {
      size_t index;
      if (xascend)
        index = j;
      else
        index = (size - 1) - j;

      X[index] = mData[0][baseindex + j];
      Y[index] = mData[1][baseindex + j];
      E[index] = mData[2][baseindex + j];
    }
  }

  return;
}

} // namespace DataHandling
} // namespace Mantid