FileFinder.cpp 18.9 KB
Newer Older
1
2
3
4
//----------------------------------------------------------------------
// Includes
//----------------------------------------------------------------------
#include "MantidAPI/FileFinder.h"
5
6
#include "MantidAPI/IArchiveSearch.h"
#include "MantidAPI/ArchiveSearchFactory.h"
7
#include "MantidKernel/ConfigService.h"
8
#include "MantidKernel/Exception.h"
9
10
#include "MantidKernel/FacilityInfo.h"
#include "MantidKernel/InstrumentInfo.h"
11
12
#include "MantidKernel/LibraryManager.h"
#include "MantidKernel/Glob.h"
13

Campbell, Stuart's avatar
Campbell, Stuart committed
14
15
16
#include <Poco/Path.h>
#include <Poco/File.h>
#include <Poco/StringTokenizer.h>
17
18
19
20
21
#include <boost/lexical_cast.hpp>

#include <cctype>
#include <algorithm>

22
23
#include <boost/algorithm/string.hpp>

24
25
namespace Mantid
{
26
  namespace API
27
  {
28
29
    using std::string;

30
31
32
33
34
35
36
37
    // this allowed string could be made into an array of allowed, currently used only by the ISIS SANS group
    const std::string FileFinderImpl::ALLOWED_SUFFIX = "-add";
    //----------------------------------------------------------------------
    // Public member functions
    //----------------------------------------------------------------------
    /**
     * Default constructor
     */
Peterson, Peter's avatar
Peterson, Peter committed
38
    FileFinderImpl::FileFinderImpl() : g_log(Mantid::Kernel::Logger::get("FileFinderImpl"))
39
    {
40
41
42
      // Make sure plugins are loaded
      std::string libpath = Kernel::ConfigService::Instance().getString("plugins.directory");
      if (!libpath.empty())
43
      {
44
        Kernel::LibraryManager::Instance().OpenAllLibraries(libpath);
45
      }
46
47
48

      // determine from Mantid property how sensitive Mantid should be
      std::string casesensitive = Mantid::Kernel::ConfigService::Instance().getString("filefinder.casesensitive");
49
      if ( boost::iequals("Off",casesensitive) )
50
51
52
        globOption = Poco::Glob::GLOB_CASELESS;
      else
        globOption = Poco::Glob::GLOB_DEFAULT;
53
    }
54

55
56
57

    /**
     * Option to set if file finder should be case sensitive
58
     * @param cs :: If true then set to case sensitive
59
60
61
62
63
64
65
66
67
     */
    void FileFinderImpl::setCaseSensitive(const bool cs) 
    {
      if ( cs )
        globOption = Poco::Glob::GLOB_DEFAULT;
      else
        globOption = Poco::Glob::GLOB_CASELESS;
    }

68
69
70
71
72
73
74
    /**
     * Return the full path to the file given its name
     * @param fName :: A full file name (without path) including extension
     * @return The full path if the file exists and can be found in one of the search locations
     *  or an empty string otherwise.
     */
    std::string FileFinderImpl::getFullPath(const std::string& fName) const
75
    {
Peterson, Peter's avatar
Peterson, Peter committed
76
      g_log.debug() << "getFullPath(" << fName << ")\n";
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
      // If this is already a full path, nothing to do
      if (Poco::Path(fName).isAbsolute())
        return fName;

      // First try the path relative to the current directory. Can throw in some circumstances with extensions that have wild cards
      try
      {
        Poco::File fullPath(Poco::Path().resolve(fName));
        if (fullPath.exists())
          return fullPath.path();
      }
      catch (std::exception&)
      {
      }

      const std::vector<std::string>& searchPaths =
          Kernel::ConfigService::Instance().getDataSearchDirs();
      std::vector<std::string>::const_iterator it = searchPaths.begin();
      for (; it != searchPaths.end(); ++it)
96
      {
97
98
99
100
101
102
103
104
// On windows globbing is note working properly with network drives 
// for example a network drive containing a $ 
// For this reason, and since windows is case insensitive anyway
// a special case is made for windows
#ifdef _WIN32
          if (fName.find("*") != std::string::npos)
          {
#endif
105
106
107
          Poco::Path path(*it, fName);
          Poco::Path pathPattern(path);
          std::set < std::string > files;
108
          Kernel::Glob::glob(pathPattern, files, globOption);
109
110
111
112
          if (!files.empty())
          {
            return *files.begin();
          }
113
114
115
116
117
118
119
120
121
122
123
124
#ifdef _WIN32
          }
          else
          {
            Poco::Path path(*it, fName);
            Poco::File file(path);
            if (file.exists())
            {
              return path.toString();
            }
          }
#endif
125
      }
126
      return "";
127
128
    }

129
130
131
132
133
134
    /** Run numbers can be followed by an allowed string. Check if there is
     *  one, remove it from the name and return the string, else return empty
     *  @param userString run number that may have a suffix
     *  @return the suffix, if there was one
     */
    std::string FileFinderImpl::extractAllowedSuffix(std::string & userString) const
135
    {
136
137
138
139
140
141
142
143
144
      if (userString.find(ALLOWED_SUFFIX) == std::string::npos)
      {
        //short cut processing as normally there is no suffix
        return "";
      }

      // ignore any file extension in checking if a suffix is present
      Poco::Path entry(userString);
      std::string noExt(entry.getBaseName());
Peterson, Peter's avatar
Peterson, Peter committed
145
      const size_t repNumChars = ALLOWED_SUFFIX.size();
146
147
148
149
150
151
      if (noExt.find(ALLOWED_SUFFIX) == noExt.size() - repNumChars)
      {
        userString.replace(userString.size() - repNumChars, repNumChars, "");
        return ALLOWED_SUFFIX;
      }
      return "";
152
    }
153

154
155
156
157
158
159
160
161
162
163
    /**
     * Return the name of the facility as determined from the hint.
     *
     * @param hint :: The name hint.
     * @return This will return the default facility if it cannot be determined.
     */
    const Kernel::FacilityInfo FileFinderImpl::getFacility(const string& hint) const
    {
      if ((!hint.empty()) && (!isdigit(hint[0])))
      {
Peterson, Peter's avatar
Peterson, Peter committed
164
165
166
167
        string instrName(hint);
        Poco::Path path(instrName);
        instrName = path.getFileName();
        if ((instrName.find("PG3") == 0) || (instrName.find("pg3") == 0))
168
        {
169
          instrName = "PG3";
170
171
172
173
174
        }
        else
        {
          // go forwards looking for the run number to start
          {
Peterson, Peter's avatar
Peterson, Peter committed
175
176
177
            string::const_iterator it = std::find_if(instrName.begin(), instrName.end(), std::ptr_fun(isdigit));
            std::string::size_type nChars = std::distance( static_cast<string::const_iterator>(instrName.begin()), it);
            instrName = instrName.substr(0, nChars);
178
          }
Peterson, Peter's avatar
Peterson, Peter committed
179

180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
          // go backwards looking for the instrument name to end - gets around delimiters
          if (!instrName.empty())
          {
            string::const_reverse_iterator it = std::find_if(instrName.rbegin(), instrName.rend(),
                                                             std::ptr_fun(isalpha));
            string::size_type nChars = std::distance(it,
                                        static_cast<string::const_reverse_iterator>(instrName.rend()));
            instrName = instrName.substr(0, nChars);
          }
        }
        try {
          const Kernel::InstrumentInfo instrument = Kernel::ConfigService::Instance().getInstrument(instrName);
          return instrument.facility();
        } catch (Kernel::Exception::NotFoundError &e) {
          g_log.debug() << e.what() << "\n";
        }
      }
      return Kernel::ConfigService::Instance().getFacility();;
    }

200
201
202
203
204
205
206
207
208
209
210
211
    /**
     * Extracts the instrument name and run number from a hint
     * @param hint :: The name hint
     * @return A pair of instrument name and run number
     */
    std::pair<std::string, std::string> FileFinderImpl::toInstrumentAndNumber(const std::string& hint) const
    {
      std::string instrPart;
      std::string runPart;

      if (isdigit(hint[0]))
      {
212
        instrPart = Kernel::ConfigService::Instance().getInstrument().shortName();
213
214
215
216
        runPart = hint;
      }
      else
      {
217
218
219
220
221
222
223
224
225
226
        /// Find the last non-digit as the instrument name can contain numbers
        std::string::const_reverse_iterator it = std::find_if(hint.rbegin(), hint.rend(),
            std::not1(std::ptr_fun(isdigit)));
        // No non-digit or all non-digits
        if (it == hint.rend() || it == hint.rbegin())
        {
          throw std::invalid_argument("Malformed hint to FileFinderImpl::makeFileName: " + hint);
        }
        std::string::size_type nChars = std::distance(it, hint.rend());

227
228
        // PG3 is a special case (name ends in a number)- don't trust them
        if ((hint.find("PG3") == 0) || (hint.find("pg3") == 0)) {
229
230
231
          instrPart = "PG3";
          if (nChars < 3)
            nChars++;
232
233
234
        }
        else {
          instrPart = hint.substr(0, nChars);
235
        }
236
        runPart = hint.substr(nChars);
237
238
      }

239
      Kernel::InstrumentInfo instr = Kernel::ConfigService::Instance().getInstrument(instrPart);
240
241
242
243
244
245
246
247
248
249
      size_t nZero = instr.zeroPadding();
      // remove any leading zeros in case there are too many of them
      std::string::size_type i = runPart.find_first_not_of('0');
      runPart.erase(0, i);
      while (runPart.size() < nZero)
        runPart.insert(0, "0");
      if (runPart.size() > nZero && nZero != 0)
      {
        throw std::invalid_argument("Run number does not match instrument's zero padding");
      }
250

251
      instrPart = instr.shortName();
252

253
      return std::make_pair(instrPart, runPart);
254

255
    }
256

257
258
259
260
261
    /**
     * Make a data file name (without extension) from a hint. The hint can be either a run number or
     * a run number prefixed with an instrument name/short name. If the instrument
     * name is absent the default one is used.
     * @param hint :: The name hint
262
     * @param facility :: The current facility object
263
264
265
266
     * @return The file name
     * @throw NotFoundError if a required default is not set
     * @throw std::invalid_argument if the argument is malformed or run number is too long
     */
267
    std::string FileFinderImpl::makeFileName(const std::string& hint, const Kernel::FacilityInfo& facility) const
268
269
270
    {
      if (hint.empty())
        return "";
271

272
273
      std::string filename(hint);
      const std::string suffix = extractAllowedSuffix(filename);
274

275
      std::pair < std::string, std::string > p = toInstrumentAndNumber(filename);
276
      std::string delimiter = facility.delimiter();
277

278
279
280
281
282
283
284
285
      filename = p.first;
      if (!delimiter.empty())
      {
        filename += delimiter;
      }
      filename += p.second;

      if (!suffix.empty())
286
      {
287
        filename += suffix;
288
      }
289
290

      return filename;
291
    }
292

293
294
295
    /**
     * Find the file given a hint. If the name contains a dot(.) then it is assumed that it is already a file stem
     * otherwise calls makeFileName internally.
296
     * @param hint :: The name hint, format: [INSTR]1234[.ext]
297
     * @param exts :: Optional list of allowed extensions. Only those extensions found in both
298
299
     *  facilities extension list and exts will be used in the search. If an extension is given in hint 
     *  this argument is ignored.
300
301
302
     * @return The full path to the file or empty string if not found
     */
    std::string FileFinderImpl::findRun(const std::string& hint, const std::set<std::string> *exts) const
303
304
305
306
307
308
309
310
311
312
313
    {
      if (hint.empty())
        return "";
      std::vector<std::string> exts_v;
      if (exts != NULL && exts->size() > 0)
        exts_v.assign(exts->begin(), exts->end());

      return this->findRun(hint, exts_v);
    }

    std::string FileFinderImpl::findRun(const std::string& hint,const std::vector<std::string> &exts)const
314
    {
315
      g_log.debug() << "findRun(\'" << hint << "\', exts[" << exts.size() << "])\n";
316
317
318
      if (hint.empty())
        return "";

319
320
321
322
323
324
      // if it looks like a full filename just do a quick search for it
      Poco::Path hintPath(hint);
      if (!hintPath.getExtension().empty())
      {
        // check in normal search locations
        std::string path = getFullPath(hint);
Roman Tolchenov's avatar
Roman Tolchenov committed
325
        try
326
        {
Roman Tolchenov's avatar
Roman Tolchenov committed
327
328
329
330
331
332
333
334
335
          if (!path.empty() && Poco::File(path).exists())
          {
            return path;
          }
        }
        catch(std::exception& e)
        {
          g_log.error() << "Cannot open file " << path << ": " << e.what() << '\n';
          return "";
336
337
338
        }
      }

339
340
341
342
343
344
345
346
347
348
349
350
351
352
      // so many things depend on the facility just get it now
      const Kernel::FacilityInfo facility = this->getFacility(hint);
      // initialize the archive searcher
      IArchiveSearch_sptr arch;
      { // hide in a local namespace so things fall out of scope
        std::string archiveOpt = Kernel::ConfigService::Instance().getString("datasearch.searcharchive");
        std::transform(archiveOpt.begin(), archiveOpt.end(), archiveOpt.begin(), tolower);
        if (!archiveOpt.empty() && archiveOpt != "off" && !facility.archiveSearch().empty())
        {
          g_log.debug() << "Starting archive search..." << *facility.archiveSearch().begin() << "\n";
          arch = ArchiveSearchFactory::Instance().create(*facility.archiveSearch().begin());
        }
      }

353
      // ask the archive search for help
354
355
      if (!hintPath.getExtension().empty())
      {
356
357
        if (arch)
        {
358
          try
359
          {
360
361
            std::string path = arch->getPath(hint);
            if (!path.empty())
362
            {
363
364
365
366
367
              Poco::File file(path);
              if (file.exists())
              {
                return file.path();
              }
368
            }
369
          }
370
371
372
373
          catch(...)
          {
            g_log.error() << "Archive search could not find '" << hint << "'\n";
          }
374
        }
375
      }
376

377
378
      // Do we need to try and form a filename from our preset rules
      std::string filename(hint);
379
      std::string extension;
380
      if (hintPath.depth() == 0)
381
      {
Peterson, Peter's avatar
Peterson, Peter committed
382
        std::size_t i = filename.find_last_of('.');
383
384
385
386
387
        if (i != std::string::npos)
        {
          extension = filename.substr(i);
          filename.erase(i);
        }
388
389
390
391
        try
        {
          filename = makeFileName(filename, facility);
        }
392
        catch(std::invalid_argument&)
393
394
395
        {
          g_log.error() << "Could not find file '" << filename << "'\n";
        }
396
397
      }

Peterson, Peter's avatar
Peterson, Peter committed
398
      // work through the extensions
399
      const std::vector<std::string> facility_extensions = facility.extensions();
400
401
      // select allowed extensions
      std::vector < std::string > extensions;
402
403
404
405
      if (!extension.empty())
      {
        extensions.push_back(extension);
      }
406
      else if (!exts.empty())
407
      {
408
        extensions.insert(extensions.end(), exts.begin(), exts.end());
409
410
411
        // find intersection of facility_extensions and exts, preserving the order of facility_extensions
        std::vector<std::string>::const_iterator it = facility_extensions.begin();
        for (; it != facility_extensions.end(); ++it)
412
        {
413
          if (std::find(exts.begin(), exts.end(), *it) == exts.end())
414
          {
415
            extensions.push_back(*it);
416
          }
417
418
419
420
421
422
        }
      }
      else
      {
        extensions.assign(facility_extensions.begin(), facility_extensions.end());
      }
423
424
425
426
427
      // Look first at the original filename then for case variations. This is important
      // on platforms where file names ARE case sensitive.
      std::vector<std::string> filenames(3,filename);
      std::transform(filename.begin(),filename.end(),filenames[1].begin(),toupper);
      std::transform(filename.begin(),filename.end(),filenames[2].begin(),tolower);
428
429
430
      std::vector<std::string>::const_iterator ext = extensions.begin();
      for (; ext != extensions.end(); ++ext)
      {
431
432
433
434
435
436
        for(size_t i = 0; i < filenames.size(); ++i)
        {
          std::string path = getFullPath(filenames[i] + *ext);
          if (!path.empty())
            return path;
        }
437
438
439
      }

      // Search the archive of the default facility
440
      if (arch)
441
      {
442
443
444
        std::string path;
        std::vector<std::string>::const_iterator ext = extensions.begin();
        for (; ext != extensions.end(); ++ext)
445
        {
446
          for(size_t i = 0; i < filenames.size(); ++i)
447
          {
Roman Tolchenov's avatar
Roman Tolchenov committed
448
449
450
451
452
453
454
455
            try
            {
              path = arch->getPath(filenames[i] + *ext);
            }
            catch(...)
            {
              return "";
            }
456
            if( path.empty() ) return "";
457
458
            Poco::Path pathPattern(path);
            if (ext->find("*") != std::string::npos)
459
            {
460
461
              continue;
              std::set < std::string > files;
462
              Kernel::Glob::glob(pathPattern, files, globOption);
463
464
465
466
467
            }
            else
            {
              Poco::File file(pathPattern);
              if (file.exists())
468
              {
469
                return file.path();
470
              }
471
472
473
474
475
            }
          } // i
        }  // ext
      } // arch

476
      return "";
477
    }
478

479
480
481
482
483
484
485
486
487
    /**
     * Find a list of files file given a hint. Calls findRun internally.
     * @param hint :: Comma separated list of hints to findRun method.
     *  Can also include ranges of runs, e.g. 123-135 or equivalently 123-35.
     *  Only the beginning of a range can contain an instrument name.
     * @return A vector of full paths or empty vector
     * @throw std::invalid_argument if the argument is malformed
     */
    std::vector<std::string> FileFinderImpl::findRuns(const std::string& hint) const
488
    {
489
490
491
492
      std::vector < std::string > res;
      Poco::StringTokenizer hints(hint, ",",
          Poco::StringTokenizer::TOK_TRIM | Poco::StringTokenizer::TOK_IGNORE_EMPTY);
      Poco::StringTokenizer::Iterator h = hints.begin();
493

494
      for (; h != hints.end(); ++h)
495
      {
496
497
498
499
        // Quick check for a filename
        bool fileSuspected = false;
        // Assume if the hint contains either a "/" or "\" it is a filename..
        if ((*h).find("\\") != std::string::npos)
500
        {
501
502
503
504
505
          fileSuspected = true;
        }
        if ((*h).find("/") != std::string::npos)
        {
          fileSuspected = true;
506
        }
507
508
509
510
        if ((*h).find(ALLOWED_SUFFIX) != std::string::npos)
        {
          fileSuspected = true;
        }
511

512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
        Poco::StringTokenizer range(*h, "-",
            Poco::StringTokenizer::TOK_TRIM | Poco::StringTokenizer::TOK_IGNORE_EMPTY);
        if ((range.count() > 2) && (!fileSuspected))
        {
          throw std::invalid_argument("Malformed range of runs: " + *h);
        }
        else if ((range.count() == 2) && (!fileSuspected))
        {
          std::pair < std::string, std::string > p1 = toInstrumentAndNumber(range[0]);
          std::string run = p1.second;
          size_t nZero = run.size(); // zero padding
          if (range[1].size() > nZero)
          {
            ("Malformed range of runs: " + *h
                + ". The end of string value is longer than the instrument's zero padding");
          }
          int runNumber = boost::lexical_cast<int>(run);
          std::string runEnd = run;
          runEnd.replace(runEnd.end() - range[1].size(), runEnd.end(), range[1]);
          int runEndNumber = boost::lexical_cast<int>(runEnd);
          if (runEndNumber < runNumber)
          {
            throw std::invalid_argument("Malformed range of runs: " + *h);
          }
          for (int irun = runNumber; irun <= runEndNumber; ++irun)
          {
            run = boost::lexical_cast<std::string>(irun);
            while (run.size() < nZero)
              run.insert(0, "0");
            std::string path = findRun(p1.first + run);
            if (!path.empty())
            {
              res.push_back(path);
            }
          }

        }
        else
        {
          std::string path = findRun(*h);
          if (!path.empty())
          {
            res.push_back(path);
          }
        }
557
      }
558

559
560
      return res;
    }
561

562
  }// API
563
564
565

}// Mantid