//---------------------------------------------------------------------- // Includes //---------------------------------------------------------------------- #include "MantidDataHandling/LoadAscii.h" #include "MantidDataObjects/Workspace2D.h" #include "MantidKernel/UnitFactory.h" #include "MantidAPI/FileProperty.h" #include "MantidAPI/LoadAlgorithmFactory.h" #include <fstream> #include <boost/tokenizer.hpp> #include <Poco/StringTokenizer.h> // String utilities #include <boost/algorithm/string.hpp> namespace Mantid { namespace DataHandling { // Register the algorithm into the algorithm factory DECLARE_ALGORITHM(LoadAscii) //register the algorithm into loadalgorithm factory DECLARE_LOADALGORITHM(LoadAscii) /// Sets documentation strings for this algorithm void LoadAscii::initDocs() { this->setWikiSummary("Loads data from a text file and stores it in a 2D [[workspace]] ([[Workspace2D]] class). "); this->setOptionalMessage("Loads data from a text file and stores it in a 2D workspace (Workspace2D class)."); } using namespace Kernel; using namespace API; /// Empty constructor LoadAscii::LoadAscii() : m_columnSep(), m_separatorIndex() { } /** This method does a quick file check by checking the no.of bytes read nread params and header buffer * @param filePath :: path of the file including name. * @param nread :: no.of bytes read * @param header :: The first 100 bytes of the file as a union * @return true if the given file is of type which can be loaded by this algorithm */ bool LoadAscii::quickFileCheck(const std::string& filePath,size_t nread,const file_header& header) { std::string extn=extension(filePath); bool bascii(false); (!extn.compare("dat")||!extn.compare("csv")|| extn.compare("txt")|| extn.compare(""))?bascii=true:bascii=false; bool is_ascii (true); for(size_t i=0; i<nread; i++) { if (!isascii(header.full_hdr[i])) is_ascii =false; } return(is_ascii|| bascii?true:false); } /** * Checks the file by opening it and reading few lines * @param filePath name of the file including its path * @return an integer value how much this algorithm can load the file */ int LoadAscii::fileCheck(const std::string& filePath) { std::ifstream file(filePath.c_str()); if (!file) { g_log.error("Unable to open file: " + filePath); throw Exception::FileError("Unable to open file: " , filePath); } std::string separators(","); int ncols=0; typedef boost::tokenizer<boost::char_separator<char> > tokenizer; boost::char_separator<char> seps(separators.c_str()); std::string line; int confidence(0); while(getline(file,line)) { if (line.empty()||line[0] == '#') { continue; } else { //break at a non empty/non comment line is teh 1st data line break; } } // iterate through the first line columns boost::tokenizer<boost::char_separator<char> > values(line, seps); for (tokenizer::iterator it = values.begin(); it != values.end(); ++it) { ++ncols; } bool bloadAscii(true); //if the data is of double type this file can be loaded by loadascci double data; for (tokenizer::iterator it = values.begin(); it != values.end(); ++it) { std::istringstream is(*it); is>>data; if(is.fail()) { bloadAscii=false; break; } } //if the line has odd number of coulmns with mantid supported separators // this is considered as ascci file if (ncols % 2 == 1 && ncols > 2 && bloadAscii) { confidence = 80; } return confidence; } //-------------------------------------------------------------------------- // Protected methods //-------------------------------------------------------------------------- /** * Process the header information. This implementation just skips it entirely. * @param file :: A reference to the file stream */ void LoadAscii::processHeader(std::ifstream & file) const { // Most files will have some sort of header. If we've haven't been told how many lines to // skip then try and guess int numToSkip = getProperty("SkipNumLines"); if( numToSkip == EMPTY_INT() ) { const int rowsToMatch(5); // Have a guess where the data starts. Basically say, when we have say "rowsToMatch" lines of pure numbers // in a row then the line that started block is the top of the data int numCols(-1), matchingRows(0), row(0); std::string line; std::vector<double> values; while( getline(file,line) ) { ++row; //int nchars = (int)line.length(); TODO dead code? boost::trim(line); if( this->skipLine(line) ) { continue; } std::list<std::string> columns; int lineCols = this->splitIntoColumns(columns, line); try { fillInputValues(values, columns); } catch(boost::bad_lexical_cast&) { continue; } if( numCols < 0 ) numCols = lineCols; if( lineCols == numCols ) { ++matchingRows; if( matchingRows == rowsToMatch ) break; } else { numCols = lineCols; matchingRows = 1; } } // Seek the file pointer back to the start. // NOTE: Originally had this as finding the stream position of the data and then moving the file pointer // back to the start of the data. This worked when a file was read on the same platform it was written // but failed when read on a different one due to underlying differences in the stream translation. file.seekg(0,std::ios::beg); // We've read the header plus the number of rowsToMatch numToSkip = row - rowsToMatch; } int i(0); std::string line; while( i < numToSkip && getline(file, line) ) { ++i; } g_log.information() << "Skipped " << numToSkip << " line(s) of header information()\n"; } /** * Reads the data from the file. It is assumed that the provided file stream has its position * set such that the first call to getline will be give the first line of data * @param file :: A reference to a file stream * @returns A pointer to a new workspace */ API::Workspace_sptr LoadAscii::readData(std::ifstream & file) const { // Get the first line and find the number of spectra from the number of columns std::string line; getline(file,line); boost::trim(line); std::list<std::string> columns; const int numCols = splitIntoColumns(columns, line); if( numCols < 2 ) { g_log.error() << "Invalid data format found in file \"" << getPropertyValue("Filename") << "\"\n"; throw std::runtime_error("Invalid data format. Fewer than 2 columns found."); } size_t numSpectra(0); bool haveErrors(false); // Assume single data set with no errors if( numCols == 2 ) { numSpectra = numCols/2; } // Data with errors else if( (numCols-1) % 2 == 0 ) { numSpectra = (numCols - 1)/2; haveErrors = true; } else { g_log.error() << "Invalid data format found in file \"" << getPropertyValue("Filename") << "\"\n"; g_log.error() << "LoadAscii requires the number of columns to be an even multiple of either 2 or 3."; throw std::runtime_error("Invalid data format."); } // A quick check at the number of lines won't be accurate enough as potentially there // could be blank lines and comment lines int numBins(0), lineNo(0); std::vector<DataObjects::Histogram1D> spectra(numSpectra); std::vector<double> values(numCols, 0.); do { ++lineNo; boost::trim(line); if( this->skipLine(line) ) continue; columns.clear(); int lineCols = this->splitIntoColumns(columns, line); if( lineCols != numCols ) { std::ostringstream ostr; ostr << "Number of columns changed at line " << lineNo; throw std::runtime_error(ostr.str()); } try { fillInputValues(values, columns); } catch(boost::bad_lexical_cast&) { g_log.error() << "Invalid value on line " << lineNo << " of \"" << getPropertyValue("Filename") << "\"\n"; throw std::runtime_error("Invalid value encountered."); } for (size_t i = 0; i < numSpectra; ++i) { spectra[i].dataX().push_back(values[0]); spectra[i].dataY().push_back(values[i*2+1]); if( haveErrors ) { spectra[i].dataE().push_back(values[i*2+2]); } else { spectra[i].dataE().push_back(0.0); } } ++numBins; } while(getline(file,line)); MatrixWorkspace_sptr localWorkspace = boost::dynamic_pointer_cast<MatrixWorkspace> (WorkspaceFactory::Instance().create("Workspace2D",numSpectra,numBins,numBins)); try { localWorkspace->getAxis(0)->unit() = UnitFactory::Instance().create(getProperty("Unit")); } catch (Exception::NotFoundError&) { // Asked for dimensionless workspace (obviously not in unit factory) } for (size_t i = 0; i < numSpectra; ++i) { localWorkspace->dataX(i) = spectra[i].dataX(); localWorkspace->dataY(i) = spectra[i].dataY(); localWorkspace->dataE(i) = spectra[i].dataE(); // Just have spectrum number start at 1 and count up localWorkspace->getAxis(1)->spectraNo(i) = static_cast<specid_t>(i+1); } return localWorkspace; } /** * Peek at a line without extracting it from the stream */ void LoadAscii::peekLine(std::ifstream & is, std::string & str) const { getline(is, str); is.seekg(-(int)str.length(),std::ios::cur); boost::trim(str); } /** * Return true if the line is to be skipped. * @param line :: The line to be checked * @return True if the line should be skipped */ bool LoadAscii::skipLine(const std::string & line) const { // Empty or comment return ( line.empty() || boost::starts_with(line, "#") ); } /** * Split the data into columns based on the input separator * @param[out] columns :: A reference to a list to store the column data * @param[in] str :: The input string * @returns The number of columns */ int LoadAscii::splitIntoColumns(std::list<std::string> & columns, const std::string & str) const { boost::split(columns, str, boost::is_any_of(m_columnSep), boost::token_compress_on); return static_cast<int>(columns.size()); } /** * Fill the given vector with the data values. Its size is assumed to be correct * @param[out] values :: The data vector fill * @param columns :: The list of strings denoting columns */ void LoadAscii::fillInputValues(std::vector<double> &values, const std::list<std::string>& columns) const { values.resize(columns.size()); std::list<std::string>::const_iterator iend = columns.end(); int i = 0; for( std::list<std::string>::const_iterator itr = columns.begin(); itr != iend; ++itr ) { std::string value = *itr; boost::trim(value); values[i] = boost::lexical_cast<double>(value); ++i; } } //-------------------------------------------------------------------------- // Private methods //-------------------------------------------------------------------------- /// Initialisation method. void LoadAscii::init() { std::vector<std::string> exts; exts.push_back(".dat"); exts.push_back(".txt"); exts.push_back(".csv"); exts.push_back(""); declareProperty(new FileProperty("Filename", "", FileProperty::Load, exts), "A comma separated Ascii file"); declareProperty(new WorkspaceProperty<Workspace>("OutputWorkspace", "",Direction::Output), "The name of the workspace that will be created."); std::string spacers[5][2] = { {"CSV", ","}, {"Tab", "\t"}, {"Space", " "}, {"Colon", ":"}, {"SemiColon", ";"} }; // For the ListValidator std::vector<std::string> sepOptions; for( size_t i = 0; i < 5; ++i ) { std::string option = spacers[i][0]; m_separatorIndex.insert(std::pair<std::string,std::string>(option, spacers[i][1])); sepOptions.push_back(option); } declareProperty("Separator", "CSV", new ListValidator(sepOptions), "The column separator character (default: CSV)"); std::vector<std::string> units = UnitFactory::Instance().getKeys(); units.insert(units.begin(),"Dimensionless"); declareProperty("Unit","Energy",new Kernel::ListValidator(units), "The unit to assign to the X axis (default: Energy)"); BoundedValidator<int> * mustBePosInt = new BoundedValidator<int>(); mustBePosInt->setLower(0); declareProperty("SkipNumLines", EMPTY_INT(), mustBePosInt, "If set, this number of lines from the top of the file are ignored."); } /** * Executes the algorithm. */ void LoadAscii::exec() { std::string filename = getProperty("Filename"); std::ifstream file(filename.c_str()); if (!file) { g_log.error("Unable to open file: " + filename); throw Exception::FileError("Unable to open file: " , filename); } std::string sepOption = getProperty("Separator"); m_columnSep = m_separatorIndex[sepOption]; // Process the header information. processHeader(file); // Read the data Workspace_sptr outputWS = readData(file); setProperty("OutputWorkspace", outputWS); } } // namespace DataHandling } // namespace Mantid