Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
//----------------------------------------------------------------------
// Includes
//----------------------------------------------------------------------
#include "MantidKernel/MultiFileNameParser.h"
#include <algorithm>
#include <numeric>
#include <exception>
#include <sstream>
#include <boost/regex.hpp>
#include <boost/tokenizer.hpp>
#include <boost/algorithm/string.hpp>
#include <boost/bind.hpp>
#include <Poco/Path.h>
namespace Mantid
{
namespace Kernel
{
////////////////////////////////////////////////////////////////////////////////////////
// STATIC CONSTANTS - regexs used to parse multi runs, built up and nested step by step.
////////////////////////////////////////////////////////////////////////////////////////
const std::string MultiFileNameParser::INST = "([A-Za-z]+|PG3|pg3)";
const std::string MultiFileNameParser::UNDERSCORE = "(_{0,1})";
const std::string MultiFileNameParser::SPACE = "(\\s*)";
const std::string MultiFileNameParser::COMMA = "(" + SPACE + "," + SPACE + ")";
const std::string MultiFileNameParser::PLUS = "(" + SPACE + "\\+" + SPACE + ")";
const std::string MultiFileNameParser::MINUS = "(" + SPACE + "\\-" + SPACE + ")";
const std::string MultiFileNameParser::COLON = "(" + SPACE + ":" + SPACE + ")";
const std::string MultiFileNameParser::SINGLE = "([0-9]+)";
const std::string MultiFileNameParser::RANGE = "(" + SINGLE + COLON + SINGLE + ")";
const std::string MultiFileNameParser::STEP_RANGE = "(" + SINGLE + COLON + SINGLE + COLON + SINGLE + ")";
const std::string MultiFileNameParser::ADD_LIST = "(" + SINGLE + "(" + PLUS + SINGLE + ")+" + ")";
const std::string MultiFileNameParser::ADD_RANGE = "(" + SINGLE + MINUS + SINGLE + ")";
const std::string MultiFileNameParser::ADD_STEP_RANGE = "(" + SINGLE + MINUS + SINGLE + COLON + SINGLE + ")";
const std::string MultiFileNameParser::ANY = "(" + ADD_STEP_RANGE + "|" + ADD_RANGE + "|" + ADD_LIST + "|" + STEP_RANGE + "|" + RANGE + "|" + SINGLE + ")";
const std::string MultiFileNameParser::LIST = "(" + ANY + "(" + COMMA + ANY + ")*" + ")";
namespace
{
/**
* Functor to convert a vector of run numbers, which only contains
* a single run, into a string containing a cast of that run.
*
* @param run - a single run contained in a vector of
*/
struct convertUIntVect2String
{
std::string operator()(std::vector<unsigned int> run)
{
if(run.size() != 1)
throw std::exception("An unexpected run number was found during parsing.");
return boost::lexical_cast<std::string>(run.at(0));
}
};
/**
* Functor that takes in a map of run information parsed so far and a runString, parses
* parses the runString then appends the info to the map, returning it to the accumulate
* STL algo that called it.
*
* @param parsedRuns - the map of info relating to the runs parsed so far
* @param runString - the string containing new runs to be added to the map
* @returns the updated map, appended with the data from the runString
*/
struct parseRunRange
{
VectOfUInt2StringMap & operator()(VectOfUInt2StringMap & parsedRuns, const std::string & runString)
{
// Regex to separate non-added runs from the added runs.
boost::regex regex("(" +
MultiFileNameParser::STEP_RANGE + "|" +
MultiFileNameParser::RANGE + "|" +
MultiFileNameParser::SINGLE + ")");
// Deal with the case where we have an added run range or list to append to the map.
if(!boost::regex_match(runString,regex))
{
// Use Sofia's run parser here, but it's only ever going to produce a vector with a
// single vector inside, since we only ever feed in one added bunch of runs.
std::vector<unsigned int> runUInts = UserStringParser().parse(runString).at(0);
// Add the newly parsed runs and the ws name to the map, and return it.
parsedRuns.insert(
parsedRuns.end(),
std::pair<std::vector<unsigned int>, std::string>
(runUInts, runString));
return parsedRuns;
}
// We are left with the case where we have a NON-added run or run range to append to the map.
else
{
// In this case the run parser will spit out a vector of vectors, each of the inner vectors
// containing only one run number.
std::vector<std::vector<unsigned int>> runUInts = UserStringParser().parse(runString);
// Convert the unsigned ints into strings.
std::vector<std::string> runStrings;
runStrings.resize(runUInts.size());
std::transform(
runUInts.begin(), runUInts.end(),
runStrings.begin(),
convertUIntVect2String());
// @TODO:
// Here would be the best place to insert code to make sure the run strings mantain any
// of the original trailing zeros entered by the user.
// Add the newly parsed runs and the ws name to the map, and return it.
std::vector<std::string>::const_iterator s_it = runStrings.cbegin();
std::vector<std::vector<unsigned int>>::const_iterator vUI_it = runUInts.cbegin();
for( ; s_it != runStrings.cend(); ++s_it, ++vUI_it)
{
parsedRuns.insert(
parsedRuns.end(),
std::pair<std::vector<unsigned int>, std::string>
(*vUI_it, *s_it));
}
return parsedRuns;
}
}
};
} // Anonymous namespace
/**
* Constructor
*/
MultiFileNameParser::MultiFileNameParser() :
m_multiFileName(),
m_dir(),
m_inst(),
m_runs(),
m_ext(),
m_fileNamesToWsNameMap(),
m_parser()
{
}
/**
* Destructor
*/
MultiFileNameParser::~MultiFileNameParser()
{
}
/**
* Main entry point to the class. Parses the given multiFileName string
* by calling all the helper private functions, and the functors.
*
* @param multiFileName - the string to parse into seperate file names
* @returns a string containing any errors, "" if successful
*/
std::string MultiFileNameParser::parse(const std::string & multiFileName)
{
try
{
m_multiFileName = multiFileName;
split();
// Split the run string into tokens, tokenised by commas.
std::vector<std::string> tokens;
boost::split(tokens, m_runs, boost::is_any_of(","));
// Do some further tokenising of the tokens where necessary, and parse into
// a UIntVect2StringMap which maps vectors of unsigned int run numbers to
// the eventual workspace name of that vector of runs.
VectOfUInt2StringMap runUIntsToWsNameMap;
runUIntsToWsNameMap = std::accumulate(
tokens.begin(), tokens.end(),
runUIntsToWsNameMap,
parseRunRange());
// Finally, convert the unsigned int run numbers back into full file names
// to get our finished map, by passing each pair in runUIntsToWsNameMap
// to populateMap.
std::for_each(
runUIntsToWsNameMap.begin(), runUIntsToWsNameMap.end(),
boost::bind(&MultiFileNameParser::populateMap, this, _1));
return "";
}
catch(const std::exception &e)
{
std::stringstream error;
error << "Could not parse \"" << multiFileName << "\": " << e.what() << ".";
clear();
return error.str();
}
}
/**
* Getter for the m_fileNamesToWsNameMap.
*
* @returns the map
*/
VectOfStrings2StringMap MultiFileNameParser::getFileNamesToWsNameMap() const
{
return m_fileNamesToWsNameMap;
}
/**
* Clears all member variables, to be called when parsing exception caught.
*/
void MultiFileNameParser::clear()
{
m_multiFileName.clear();
m_dir.clear();
m_inst.clear();
m_runs.clear();
m_ext.clear();
m_fileNamesToWsNameMap.clear();
}
/**
* Splits the string found at m_multiFileName into it's basic
* dir/inst/run/ext components.
*/
void MultiFileNameParser::split()
{
if(m_multiFileName.empty())
throw std::exception("No file name to parse.");
Poco::Path fullPath(m_multiFileName);
// The extension is easy to parse, let's do it first.
m_ext = fullPath.getExtension();
if(!m_ext.empty()) m_ext = "." + m_ext;
// Parse instrument name and runs from the base name using regexs. Throw if
// not found since these are required.
std::string baseName = fullPath.getBaseName();
m_inst = getMatchingString("^" + INST + UNDERSCORE, baseName);
if(m_inst.empty()) throw std::exception("Cannot parse instrument name.");
m_runs = getMatchingString(LIST + "$", baseName);
if(m_runs.empty()) throw std::exception("Cannot parse run numbers.");
// Lop off what we've found and we're left with the directory.
m_dir = m_multiFileName.substr(0, m_multiFileName.size() - (baseName.size() + m_ext.size()));
}
/**
* Takes in a pair consisting of a vector of run numbers and a ws name,
* converts the run numbers to full file names, and appends a new pair
* to the filename to wsName map.
*
* @param pair - a std::pair consisting of a vector of run numbers and a ws name
*/
void MultiFileNameParser::populateMap(const VectOfUInt2StringPair & pair)
{
// Convert vector of run numbers to vector of filenames
std::vector<unsigned int> runs = pair.first;
std::vector<std::string> fileNames;
fileNames.resize(runs.size());
std::transform(
runs.begin(), runs.end(),
fileNames.begin(),
boost::bind(&MultiFileNameParser::createFileName, this, _1));
// Append to map
m_fileNamesToWsNameMap.insert(std::make_pair(fileNames, pair.second));
}
/**
* Uses the run parameter and the other parts of the file name that have
* already been parsed to construct a new file name for this specific run.
*
* @param run - the run number of the file
* @returns the full file name of the file with given run number
*/
std::string MultiFileNameParser::createFileName(unsigned int run)
{
std::stringstream fileName;
fileName << m_dir << m_inst << boost::lexical_cast<std::string>(run) << m_ext;
return fileName.str();
}
/**
* Finds a given regex in a given string, and returns the part of the string
* that matches the regex. Returns "" if there is no occurance of the regex.
*
* @param regex - the regular expression to find within the string
* @param toParse - the string within to find the regex
* @returns the part (if any) of the given string that matches the given regex
*/
std::string MultiFileNameParser::getMatchingString(const std::string & regex, const std::string & toParse)
{
return boost::sregex_iterator(
toParse.begin(), toParse.end(),
boost::regex(regex)
)->str();
}
}
}