LoadBankFromDiskTask.cpp 19.3 KB
Newer Older
1
2
3
// Mantid Repository : https://github.com/mantidproject/mantid
//
// Copyright © 2018 ISIS Rutherford Appleton Laboratory UKRI,
4
5
//   NScD Oak Ridge National Laboratory, European Spallation Source,
//   Institut Laue - Langevin & CSNS, Institute of High Energy Physics, CAS
6
// SPDX - License - Identifier: GPL - 3.0 +
LamarMoore's avatar
LamarMoore committed
7
#include "MantidDataHandling/LoadBankFromDiskTask.h"
8
9
#include "MantidDataHandling/BankPulseTimes.h"
#include "MantidDataHandling/DefaultEventLoader.h"
10
#include "MantidDataHandling/LoadEventNexus.h"
11
#include "MantidDataHandling/ProcessBankData.h"
12
13
#include "MantidKernel/Unit.h"
#include "MantidNexus/NexusIOHelper.h"
Neil Vaytet's avatar
Neil Vaytet committed
14
#include <algorithm>
15
#include <utility>
16

17
namespace Mantid::DataHandling {
18
19

/** Constructor
LamarMoore's avatar
LamarMoore committed
20
21
22
23
24
25
26
27
28
29
30
 *
 * @param loader :: Handle to the main loader
 * @param entry_name :: The pathname of the bank to load
 * @param entry_type :: The classtype of the entry to load
 * @param numEvents :: The number of events in the bank.
 * @param oldNeXusFileNames :: Identify if file is of old variety.
 * @param prog :: an optional Progress object
 * @param ioMutex :: a mutex shared for all Disk I-O tasks
 * @param scheduler :: the ThreadScheduler that runs this task.
 * @param framePeriodNumbers :: Period numbers corresponding to each frame
 */
31
32
33
34
35
36
37
LoadBankFromDiskTask::LoadBankFromDiskTask(DefaultEventLoader &loader, std::string entry_name, std::string entry_type,
                                           const std::size_t numEvents, const bool oldNeXusFileNames,
                                           API::Progress *prog, std::shared_ptr<std::mutex> ioMutex,
                                           Kernel::ThreadScheduler &scheduler, std::vector<int> framePeriodNumbers)
    : m_loader(loader), entry_name(std::move(entry_name)), entry_type(std::move(entry_type)), prog(prog),
      scheduler(scheduler), m_loadError(false), m_oldNexusFileNames(oldNeXusFileNames), m_have_weight(false),
      m_framePeriodNumbers(std::move(framePeriodNumbers)) {
38
39
40
41
42
43
44
  setMutex(ioMutex);
  m_cost = static_cast<double>(numEvents);
  m_min_id = std::numeric_limits<uint32_t>::max();
  m_max_id = 0;
}

/** Load the pulse times, if needed. This sets
LamarMoore's avatar
LamarMoore committed
45
46
 * thisBankPulseTimes to the right pointer.
 * */
47
48
49
50
51
52
53
void LoadBankFromDiskTask::loadPulseTimes(::NeXus::File &file) {
  try {
    // First, get info about the event_time_zero field in this bank
    file.openData("event_time_zero");
  } catch (::NeXus::Exception &) {
    // Field not found error is most likely.
    // Use the "proton_charge" das logs.
54
    thisBankPulseTimes = m_loader.alg->m_allBanksPulseTimes;
55
56
57
    return;
  }
  std::string thisStartTime;
58
  size_t thispulseTimes = 0;
Neil Vaytet's avatar
Neil Vaytet committed
59
  // If the offset is not present, use Unix epoch
60
61
  if (!file.hasAttr("offset")) {
    thisStartTime = "1970-01-01T00:00:00Z";
Samuel Jones's avatar
Samuel Jones committed
62
63
    m_loader.alg->getLogger().warning() << "In loadPulseTimes: no ISO8601 offset attribute provided for "
                                           "event_time_zero, using UNIX epoch instead\n";
64
65
66
67
  } else {
    file.getAttr("offset", thisStartTime);
  }

68
  if (!file.getInfo().dims.empty())
69
    thispulseTimes = file.getInfo().dims[0];
70
71
72
73
  file.closeData();

  // Now, we look through existing ones to see if it is already loaded
  // thisBankPulseTimes = NULL;
74
  for (auto &bankPulseTime : m_loader.m_bankPulseTimes) {
75
    if (bankPulseTime->equals(thispulseTimes, thisStartTime)) {
76
77
78
79
80
81
      thisBankPulseTimes = bankPulseTime;
      return;
    }
  }

  // Not found? Need to load and add it
Samuel Jones's avatar
Samuel Jones committed
82
  thisBankPulseTimes = std::make_shared<BankPulseTimes>(boost::ref(file), m_framePeriodNumbers);
83
  m_loader.m_bankPulseTimes.emplace_back(thisBankPulseTimes);
84
85
86
}

/** Load the event_index field
87
88
89
90
 * (a list of size of # of pulses giving the index in the event list for that
    pulse)
 * @param file :: File handle for the NeXus file
 */
Samuel Jones's avatar
Samuel Jones committed
91
std::vector<uint64_t> LoadBankFromDiskTask::loadEventIndex(::NeXus::File &file) {
92
  // Get the event_index (a list of size of # of pulses giving the index in
93
94
95
  // the event list for that pulse) as a uint64 vector.
  // The Nexus standard does not specify if this is to be 32-bit or 64-bit
  // integers, so we use the NeXusIOHelper to do the conversion on the fly.
Samuel Jones's avatar
Samuel Jones committed
96
  auto event_index = NeXus::NeXusIOHelper::readNexusVector<uint64_t>(file, "event_index");
97
98

  // Look for the sign that the bank is empty
99
100
  if (event_index.size() == 1) {
    if (event_index[0] == 0) {
101
102
      // One entry, only zero. This means NO events in this bank.
      m_loadError = true;
Samuel Jones's avatar
Samuel Jones committed
103
      m_loader.alg->getLogger().debug() << "Bank " << entry_name << " is empty.\n";
104
105
    }
  }
106
  return event_index;
107
108
109
}

/** Open the event_id field and validate the contents
LamarMoore's avatar
LamarMoore committed
110
111
112
113
114
115
116
 *
 * @param file :: File handle for the NeXus file
 * @param start_event :: set to the index of the first event
 * @param stop_event :: set to the index of the last event + 1
 * @param event_index ::  (a list of size of # of pulses giving the index in
 *the event list for that pulse)
 */
Samuel Jones's avatar
Samuel Jones committed
117
118
void LoadBankFromDiskTask::prepareEventId(::NeXus::File &file, int64_t &start_event, int64_t &stop_event,
                                          const std::vector<uint64_t> &event_index) {
119
120
121
122
123
124
125
126
127
128
129
130
  // Get the list of pixel ID's
  if (m_oldNexusFileNames)
    file.openData("event_pixel_id");
  else
    file.openData("event_id");

  // By default, use all available indices
  start_event = 0;
  ::NeXus::Info id_info = file.getInfo();
  // dims[0] can be negative in ISIS meaning 2^32 + dims[0]. Take that into
  // account
  int64_t dim0 = recalculateDataSize(id_info.dims[0]);
131
  stop_event = dim0;
132
133

  // Handle the time filtering by changing the start/end offsets.
134
  for (size_t i = 0; i < thisBankPulseTimes->pulseTimes.size(); i++) {
135
    if (thisBankPulseTimes->pulseTimes[i] >= m_loader.alg->filter_time_start) {
136
      start_event = static_cast<int64_t>(event_index[i]);
137
138
139
140
      break; // stop looking
    }
  }

141
  if (start_event > dim0) {
142
143
144
    // If the frame indexes are bad then we can't construct the times of the
    // events properly and filtering by time
    // will not work on this data
Samuel Jones's avatar
Samuel Jones committed
145
146
147
148
149
    m_loader.alg->getLogger().warning() << this->entry_name
                                        << "'s field 'event_index' seems to be invalid (start_index > than "
                                           "the number of events in the bank)."
                                        << "All events will appear in the same frame and filtering by time "
                                           "will not be possible on this data.\n";
150
    start_event = 0;
151
    stop_event = dim0;
152
  } else {
153
    for (size_t i = 0; i < thisBankPulseTimes->pulseTimes.size(); i++) {
154
      if (thisBankPulseTimes->pulseTimes[i] > m_loader.alg->filter_time_stop) {
155
156
157
158
159
160
        stop_event = event_index[i];
        break;
      }
    }
  }
  // We are loading part - work out the event number range
161
  if (m_loader.chunk != EMPTY_INT()) {
Samuel Jones's avatar
Samuel Jones committed
162
163
    start_event = static_cast<int64_t>(m_loader.chunk - m_loader.firstChunkForBank) *
                  static_cast<int64_t>(m_loader.eventsPerChunk);
164
    // Don't change stop_event for the final chunk
Samuel Jones's avatar
Samuel Jones committed
165
    if (start_event + static_cast<int64_t>(m_loader.eventsPerChunk) < stop_event)
166
      stop_event = start_event + static_cast<int64_t>(m_loader.eventsPerChunk);
167
168
169
  }

  // Make sure it is within range
170
  if (stop_event > dim0)
171
172
    stop_event = dim0;

Samuel Jones's avatar
Samuel Jones committed
173
174
  m_loader.alg->getLogger().debug() << entry_name << ": start_event " << start_event << " stop_event " << stop_event
                                    << "\n";
175
176
}

177
178
179
/** Load the event_id field, which has been opened
 * @param file An NeXus::File object opened at the correct group
 * @returns A new array containing the event Ids for this bank
LamarMoore's avatar
LamarMoore committed
180
 */
Samuel Jones's avatar
Samuel Jones committed
181
std::unique_ptr<std::vector<uint32_t>> LoadBankFromDiskTask::loadEventId(::NeXus::File &file) {
182
183
184
185
186
  // This is the data size
  ::NeXus::Info id_info = file.getInfo();
  int64_t dim0 = recalculateDataSize(id_info.dims[0]);

  // Now we allocate the required arrays
187
  auto event_id = std::make_unique<std::vector<uint32_t>>(m_loadSize[0]);
188
189
190

  // Check that the required space is there in the file.
  if (dim0 < m_loadSize[0] + m_loadStart[0]) {
Samuel Jones's avatar
Samuel Jones committed
191
192
193
    m_loader.alg->getLogger().warning() << "Entry " << entry_name << "'s event_id field is too small (" << dim0
                                        << ") to load the desired data size (" << m_loadSize[0] + m_loadStart[0]
                                        << ").\n";
194
195
196
    m_loadError = true;
  }

197
  if (m_loader.alg->getCancel())
198
199
200
201
202
    m_loadError = true; // To allow cancelling the algorithm

  if (!m_loadError) {
    // Must be uint32
    if (id_info.type == ::NeXus::UINT32)
203
      file.getSlab(event_id->data(), m_loadStart, m_loadSize);
204
    else {
205
      m_loader.alg->getLogger().warning()
Samuel Jones's avatar
Samuel Jones committed
206
          << "Entry " << entry_name << "'s event_id field is not UINT32! It will be skipped.\n";
207
208
209
210
211
      m_loadError = true;
    }
    file.closeData();

    // determine the range of pixel ids
Samuel Jones's avatar
Samuel Jones committed
212
213
    m_min_id = *(std::min_element(event_id->data(), event_id->data() + m_loadSize[0]));
    m_max_id = *(std::max_element(event_id->data(), event_id->data() + m_loadSize[0]));
214

215
    if (m_min_id > static_cast<uint32_t>(m_loader.eventid_max)) {
216
217
218
219
220
221
222
223
224
225
      // All the detector IDs in the bank are higher than the highest 'known'
      // (from the IDF)
      // ID. Setting this will abort the loading of the bank.
      m_loadError = true;
    }
    // fixup the minimum pixel id in the case that it's lower than the lowest
    // 'known' id. We test this by checking that when we add the offset we
    // would not get a negative index into the vector. Note that m_min_id is
    // a uint so we have to be cautious about adding it to an int which may be
    // negative.
226
227
    if (static_cast<int32_t>(m_min_id) + m_loader.pixelID_to_wi_offset < 0) {
      m_min_id = static_cast<uint32_t>(abs(m_loader.pixelID_to_wi_offset));
228
229
230
    }
    // fixup the maximum pixel id in the case that it's higher than the
    // highest 'known' id
231
232
    if (m_max_id > static_cast<uint32_t>(m_loader.eventid_max))
      m_max_id = static_cast<uint32_t>(m_loader.eventid_max);
233
  }
234
  return event_id;
235
236
237
}

/** Open and load the times-of-flight data
238
239
 * @param file An NeXus::File object opened at the correct group
 * @returns A new array containing the time of flights for this bank
LamarMoore's avatar
LamarMoore committed
240
 */
Samuel Jones's avatar
Samuel Jones committed
241
std::unique_ptr<std::vector<float>> LoadBankFromDiskTask::loadTof(::NeXus::File &file) {
242
  // Allocate the array
Samuel Jones's avatar
Samuel Jones committed
243
  auto event_time_of_flight = std::make_unique<std::vector<float>>(m_loadSize[0]);
244
245

  // Get the list of event_time_of_flight's
246
  std::string key, tof_unit;
247
  if (!m_oldNexusFileNames)
248
    key = "event_time_offset";
249
  else
250
251
    key = "event_time_of_flight";
  file.openData(key);
252
253
254
255
256

  // Check that the required space is there in the file.
  ::NeXus::Info tof_info = file.getInfo();
  int64_t tof_dim0 = recalculateDataSize(tof_info.dims[0]);
  if (tof_dim0 < m_loadSize[0] + m_loadStart[0]) {
Samuel Jones's avatar
Samuel Jones committed
257
258
259
    m_loader.alg->getLogger().warning() << "Entry " << entry_name
                                        << "'s event_time_offset field is too small "
                                           "to load the desired data.\n";
260
261
262
    m_loadError = true;
  }

263
264
265
  // Mantid assumes event_time_offset to be float.
  // Nexus only requires event_time_offset to be a NXNumber.
  // We thus have to consider 32-bit or 64-bit options, and we
266
267
  // explicitly allow downcasting using the additional AllowDowncasting
  // template argument.
Samuel Jones's avatar
Samuel Jones committed
268
269
  auto vec = NeXus::NeXusIOHelper::readNexusSlab<float, NeXus::NeXusIOHelper::AllowNarrowing>(file, key, m_loadStart,
                                                                                              m_loadSize);
270
  file.getAttr("units", tof_unit);
271
  file.closeData();
272
  // Convert Tof to microseconds
273
  Kernel::Units::timeConversionVector(vec, tof_unit, "microseconds");
274
  std::copy(vec.begin(), vec.end(), event_time_of_flight->data());
275

276
  return event_time_of_flight;
277
278
}

279
280
281
282
/** Load weight of weigthed events if they exist
 * @param file An NeXus::File object opened at the correct group
 * @returns A new array containing the weights or a nullptr if the weights
 * are not present
LamarMoore's avatar
LamarMoore committed
283
 */
Samuel Jones's avatar
Samuel Jones committed
284
std::unique_ptr<std::vector<float>> LoadBankFromDiskTask::loadEventWeights(::NeXus::File &file) {
285
286
287
288
289
290
  try {
    // First, get info about the event_weight field in this bank
    file.openData("event_weight");
  } catch (::NeXus::Exception &) {
    // Field not found error is most likely.
    m_have_weight = false;
291
    return std::unique_ptr<std::vector<float>>();
292
293
294
295
296
  }
  // OK, we've got them
  m_have_weight = true;

  // Allocate the array
297
  auto event_weight = std::make_unique<std::vector<float>>(m_loadSize[0]);
298
299
300
301

  ::NeXus::Info weight_info = file.getInfo();
  int64_t weight_dim0 = recalculateDataSize(weight_info.dims[0]);
  if (weight_dim0 < m_loadSize[0] + m_loadStart[0]) {
Samuel Jones's avatar
Samuel Jones committed
302
303
    m_loader.alg->getLogger().warning() << "Entry " << entry_name
                                        << "'s event_weight field is too small to load the desired data.\n";
304
305
306
307
308
    m_loadError = true;
  }

  // Check that the type is what it is supposed to be
  if (weight_info.type == ::NeXus::FLOAT32)
309
    file.getSlab(event_weight->data(), m_loadStart, m_loadSize);
310
  else {
Samuel Jones's avatar
Samuel Jones committed
311
312
    m_loader.alg->getLogger().warning() << "Entry " << entry_name
                                        << "'s event_weight field is not FLOAT32! It will be skipped.\n";
313
314
315
316
317
318
    m_loadError = true;
  }

  if (!m_loadError) {
    file.closeData();
  }
319
  return event_weight;
320
321
322
323
324
325
326
327
328
}

void LoadBankFromDiskTask::run() {
  // These give the limits in each file as to which events we actually load
  // (when filtering by time).
  m_loadStart.resize(1, 0);
  m_loadSize.resize(1, 0);

  m_loadError = false;
329
  m_have_weight = m_loader.m_haveWeights;
330
331
332

  prog->report(entry_name + ": load from disk");

333
  // arrays to load into
334
335
336
  std::unique_ptr<std::vector<uint32_t>> event_id;
  std::unique_ptr<std::vector<float>> event_time_of_flight;
  std::unique_ptr<std::vector<float>> event_weight;
337
  std::vector<uint64_t> event_index;
338

339
  // Open the file
340
  ::NeXus::File file(m_loader.alg->m_filename);
341
342
  try {
    // Navigate into the file
343
    file.openGroup(m_loader.alg->m_top_entry_name, "NXentry");
344
345
346
347
    // Open the bankN_event group
    file.openGroup(entry_name, entry_type);

    // Load the event_index field.
348
    event_index = this->loadEventIndex(file);
349
350
351
352
353
354
355

    if (!m_loadError) {
      // Load and validate the pulse times
      this->loadPulseTimes(file);

      // The event_index should be the same length as the pulse times from DAS
      // logs.
356
      if (event_index.size() != thisBankPulseTimes->pulseTimes.size())
Samuel Jones's avatar
Samuel Jones committed
357
358
359
        m_loader.alg->getLogger().warning() << "Bank " << entry_name
                                            << " has a mismatch between the number of event_index entries "
                                               "and the number of pulse times in event_time_zero.\n";
360
361

      // Open and validate event_id field.
362
363
      int64_t start_event = 0;
      int64_t stop_event = 0;
364
      this->prepareEventId(file, start_event, stop_event, event_index);
365
366

      // These are the arguments to getSlab()
367
368
      m_loadStart[0] = start_event;
      m_loadSize[0] = stop_event - start_event;
369
370
371

      if ((m_loadSize[0] > 0) && (m_loadStart[0] >= 0)) {
        // Load pixel IDs
372
        event_id = this->loadEventId(file);
373
        if (m_loader.alg->getCancel()) {
Samuel Jones's avatar
Samuel Jones committed
374
          m_loader.alg->getLogger().error() << "Loading bank " << entry_name << " is cancelled.\n";
375
          m_loadError = true; // To allow cancelling the algorithm
376
        }
377
378
379

        // And TOF.
        if (!m_loadError) {
380
          event_time_of_flight = this->loadTof(file);
381
          if (m_have_weight) {
382
            event_weight = this->loadEventWeights(file);
383
384
385
386
387
          }
        }
      } // Size is at least 1
      else {
        // Found a size that was 0 or less; stop processing
388
        m_loader.alg->getLogger().error()
Samuel Jones's avatar
Samuel Jones committed
389
390
            << "Loading bank " << entry_name << " is stopped due to either zero/negative loading size ("
            << m_loadStart[0] << ") or negative load start index (" << m_loadStart[0] << ")\n";
391
392
393
394
395
396
397
        m_loadError = true;
      }

    } // no error

  } // try block
  catch (std::exception &e) {
Samuel Jones's avatar
Samuel Jones committed
398
    m_loader.alg->getLogger().error() << "Error while loading bank " << entry_name << ":\n";
399
    m_loader.alg->getLogger().error() << e.what() << '\n';
400
401
    m_loadError = true;
  } catch (...) {
Samuel Jones's avatar
Samuel Jones committed
402
    m_loader.alg->getLogger().error() << "Unspecified error while loading bank " << entry_name << '\n';
403
404
405
406
407
408
409
410
411
412
413
414
415
    m_loadError = true;
  }

  // Close up the file even if errors occured.
  file.closeGroup();
  file.close();

  // Abort if anything failed
  if (m_loadError) {
    return;
  }

  const auto bank_size = m_max_id - m_min_id;
416
417
418
  const auto minSpectraToLoad = static_cast<uint32_t>(m_loader.alg->m_specMin);
  const auto maxSpectraToLoad = static_cast<uint32_t>(m_loader.alg->m_specMax);
  const auto emptyInt = static_cast<uint32_t>(EMPTY_INT());
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
  // check that if a range of spectra were requested that these fit within
  // this bank
  if (minSpectraToLoad != emptyInt && m_min_id < minSpectraToLoad) {
    if (minSpectraToLoad > m_max_id) { // the minimum spectra to load is more
                                       // than the max of this bank
      return;
    }
    // the min spectra to load is higher than the min for this bank
    m_min_id = minSpectraToLoad;
  }
  if (maxSpectraToLoad != emptyInt && m_max_id > maxSpectraToLoad) {
    if (maxSpectraToLoad < m_min_id) {
      // the maximum spectra to load is less than the minimum of this bank
      return;
    }
    // the max spectra to load is lower than the max for this bank
    m_max_id = maxSpectraToLoad;
  }
  if (m_min_id > m_max_id) {
    // the min is now larger than the max, this means the entire block of
    // spectra to load is outside this bank
    return;
  }

  // schedule the job to generate the event lists
  auto mid_id = m_max_id;
445
  if (m_loader.splitProcessing && m_max_id > (m_min_id + (bank_size / 4)))
446
447
448
449
450
    // only split if told to and the section to load is at least 1/4 the size
    // of the whole bank
    mid_id = (m_max_id + m_min_id) / 2;

  // No error? Launch a new task to process that data.
451
452
  auto numEvents = static_cast<size_t>(m_loadSize[0]);
  auto startAt = static_cast<size_t>(m_loadStart[0]);
453

454
  // convert things to shared_arrays to share between tasks
455
  std::shared_ptr<std::vector<uint32_t>> event_id_shrd(event_id.release());
Samuel Jones's avatar
Samuel Jones committed
456
  std::shared_ptr<std::vector<float>> event_time_of_flight_shrd(event_time_of_flight.release());
457
  std::shared_ptr<std::vector<float>> event_weight_shrd(event_weight.release());
Samuel Jones's avatar
Samuel Jones committed
458
  auto event_index_shrd = std::make_shared<std::vector<uint64_t>>(std::move(event_index));
459

460
  std::shared_ptr<Task> newTask1 = std::make_shared<ProcessBankData>(
Samuel Jones's avatar
Samuel Jones committed
461
462
      m_loader, entry_name, prog, event_id_shrd, event_time_of_flight_shrd, numEvents, startAt, event_index_shrd,
      thisBankPulseTimes, m_have_weight, event_weight_shrd, m_min_id, mid_id);
463
464
  scheduler.push(newTask1);
  if (m_loader.splitProcessing && (mid_id < m_max_id)) {
465
    std::shared_ptr<Task> newTask2 = std::make_shared<ProcessBankData>(
Samuel Jones's avatar
Samuel Jones committed
466
467
        m_loader, entry_name, prog, event_id_shrd, event_time_of_flight_shrd, numEvents, startAt, event_index_shrd,
        thisBankPulseTimes, m_have_weight, event_weight_shrd, (mid_id + 1), m_max_id);
468
    scheduler.push(newTask2);
469
470
471
472
  }
}

/**
LamarMoore's avatar
LamarMoore committed
473
474
475
476
477
 * Interpret the value describing the number of events. If the number is
 * positive return it unchanged.
 * If the value is negative (can happen at ISIS) add 2^32 to it.
 * @param size :: The size of events value.
 */
478
479
480
481
482
483
484
485
int64_t LoadBankFromDiskTask::recalculateDataSize(const int64_t &size) {
  if (size < 0) {
    const int64_t shift = int64_t(1) << 32;
    return shift + size;
  }
  return size;
}

486
} // namespace Mantid::DataHandling