BinaryFile.h 8.61 KB
Newer Older
1
2
3
4
5
6
#ifndef BINARYFILE_H_
#define BINARYFILE_H_

#include <fstream>
#include <string>
#include <vector>
7
#include "MantidKernel/DllConfig.h"
Campbell, Stuart's avatar
Campbell, Stuart committed
8
9
#include <Poco/File.h>
#include <Poco/Path.h>
10

11
12
namespace Mantid {
namespace Kernel {
13
14

/// Default number of items to read in from any of the files.
15
16
17
18
19
20
21
static const size_t DEFAULT_BLOCK_SIZE = 100000; // 100,000

/// Max size block to read from a file (memory limitations)
static const size_t MAX_BLOCK_SIZE = 100000000; // 100 million

/// Min size of a block (too small is inefficient)
static const size_t MIN_BLOCK_SIZE = 1000;
22
23
24
25

/**
 * The BinaryFile template is a helper function for loading simple binary files.
 *  - The file format must be a simple sequence of objects of type T.
26
 *  - To load, first create an object of type BinaryFile<T>
27
28
 *  - The file provided when opening is checked so that its size is an even
 *multiple
29
30
 *    of sizeof(T); an error is thrown otherwise.
 *
31
32
 * NOTE: Data saving and loading is little-endian (on Win, Linux, and Intel
 *Mac).
33
34
 *       Converting from a byte buffer loaded from disk to
 *       the T type is done with a reinterpret_cast<T>.
35
36
 *
 */
37
template <typename T> class DLLExport BinaryFile {
38
39
40
public:
  //------------------------------------------------------------------------------------
  /// Empty constructor
41
  BinaryFile() : handle(NULL), num_elements(0), offset(0) {}
42
43
44

  //------------------------------------------------------------------------------------
  /// Constructor - open a file
45
  BinaryFile(std::string filename) { this->open(filename); }
46

47
  /// Destructor, close the file if needed
48
  ~BinaryFile() { this->close(); }
49

50
51
  //------------------------------------------------------------------------------------
  /** Open a file and keep a handle to the file
52
   * @param filename :: full path to open
53
54
   * @throw runtime_error if the file size is not an even multiple of the type
   * size
55
   * @throw invalid_argument if the file does not exist
56
   * */
57
  void open(const std::string &filename) {
58
    this->handle = NULL;
59
    if (!Poco::File(filename).exists()) {
60
      std::stringstream msg;
61
62
63
      msg << "BinaryFile::open: File " << filename << " was not found.";
      throw std::invalid_argument("File does not exist.");
    }
64
    // Open the file
65
    this->handle = new std::ifstream(filename.c_str(), std::ios::binary);
66
    // Count the # of elements.
67
    this->num_elements = this->getFileSize();
68
    // Make sure we are starting at 0
69
70
71
72
73
74
    this->offset = 0;
  }

  //------------------------------------------------------------------------------------
  /** Close the file
   * */
75
  void close() {
76
77
    delete handle;
    handle = NULL;
78
79
80
  }

  //-----------------------------------------------------------------------------
81
  /// Returns the # of elements in the file (cached result of getFileSize)
82
  size_t getNumElements() const { return this->num_elements; }
83

84
  /// Returns the current offset into the file.
85
  size_t getOffset() const { return this->offset; }
86
87
88

  //-----------------------------------------------------------------------------
  /** Get a buffer size for loading blocks of data.
89
90
   *  @param num_items
   *  @return the buffer size
91
   */
92
  size_t getBufferSize(const size_t num_items) const {
93
94
95
96
97
98
99
100
101
    if (num_items < DEFAULT_BLOCK_SIZE)
      return num_items;
    else
      return DEFAULT_BLOCK_SIZE;
  }

  //-----------------------------------------------------------------------------
  /**
   * Loads the entire contents of the file into a pointer to a std::vector.
102
   * The file is closed once done.
103
   * @return file contents in a vector
104
   */
105
  std::vector<T> *loadAll() {
106
    if (!handle) {
107
      throw std::runtime_error("BinaryFile: file is not open.");
108
109
    }

110
111
    // Initialize the pointer
    std::vector<T> *data = new std::vector<T>;
112

113
    // A buffer to load from
114
    size_t buffer_size = getBufferSize(num_elements);
115
    T *buffer = new T[buffer_size];
116

117
    // Make sure we are at the beginning
118
119
120
121
    offset = 0;
    handle->seekg(0, std::ios::beg);

    size_t loaded_size;
122
123
    while (offset < num_elements) {
      // Load that block of data
124
125
126
127
128
      loaded_size = loadBlock(buffer, buffer_size);
      // Insert into the data
      data->insert(data->end(), buffer, (buffer + loaded_size));
    }

129
    // Close the file, since we are done.
130
    this->close();
131
132
    // Free memory
    delete[] buffer;
133

134
    // Here's your vector!
135
136
137
    return data;
  }

138
139
140
141
  //-----------------------------------------------------------------------------
  /**
   * Loads the entire contents of the file into a std::vector.
   * The file is closed once done.
142
   * @param data :: The contents to load into the file
143
   * @returns :: vector with contents of the file
144
   */
145
  std::vector<T> loadAllIntoVector() {
146
    if (!handle) {
147
      throw std::runtime_error("BinaryFile: file is not open.");
148
    }
149
    std::vector<T> data;
150

151
    // A buffer to load from
152
    size_t buffer_size = getBufferSize(num_elements);
153
    T *buffer = new T[buffer_size];
154

155
    // Make sure we are at the beginning
156
157
158
159
    offset = 0;
    handle->seekg(0, std::ios::beg);

    size_t loaded_size;
160
161
    while (offset < num_elements) {
      // Load that block of data
162
163
164
165
166
      loaded_size = loadBlock(buffer, buffer_size);
      // Insert into the data
      data.insert(data.end(), buffer, (buffer + loaded_size));
    }

167
    // Close the file, since we are done.
168
    this->close();
169
170
    // Free memory
    delete[] buffer;
171
172

    return data;
173
174
  }

175
176
  //-----------------------------------------------------------------------------
  /**
177
178
   * Loads a single block from file and returns a pointer to a vector containing
   *it.
179
180
   *  This can be called repeatedly to load an entire file.
   *
181
182
   * @param block_size: how many elements to load in the block. If there are not
   *enough elements,
183
184
   *  the vector returned is smaller than block_size
   * @param buffer: array of block_size[] of T; must have been allocated before.
185
   * @return loaded_size, actually how many elements were loaded.
186
   */
187
  size_t loadBlock(T *buffer, size_t block_size) {
188
    if (!handle) {
189
      throw std::runtime_error("BinaryFile: file is not open.");
190
191
    }

192
    size_t loaded_size;
193
    // Limit how much is loaded
194
195
196
    loaded_size = block_size;
    if (offset + loaded_size > num_elements)
      loaded_size = num_elements - offset;
197
    // Read it right into the buffer
198
199
200
201
202
    handle->read(reinterpret_cast<char *>(buffer), loaded_size * obj_size);
    offset += loaded_size;
    return loaded_size;
  }

203
  //-----------------------------------------------------------------------------
204
205
  /** Loads a single block from file and returns a pointer to a vector
   *containing it.
206
207
208
   *  This can be called repeatedly to load an entire file.
   *
   * @param buffer: array of block_size[] of T; must have been allocated before.
209
210
211
212
   * @param newOffset: offset (in # of elements) of where to start loading in
   *the file.
   * @param block_size: how many elements to load in the block. If there are not
   *enough elements,
213
214
215
   *  the vector returned is smaller than block_size
   * @return loaded_size, actually how many elements were loaded.
   */
216
  size_t loadBlockAt(T *buffer, size_t newOffset, size_t block_size) {
217
218
219
220
221
    if (!handle) {
      throw std::runtime_error("BinaryFile: file is not open.");
    }
    // Change our offset to the new spot.
    offset = newOffset;
222
    handle->seekg(sizeof(T) * offset, std::ios::beg);
223
224
225
    return loadBlock(buffer, block_size);
  }

226
private:
227
228
229
230
231
  /** Get the size of a file as a multiple of a particular data type
   *  @return the size of the file normalized to the data type
   *  @throw runtime_error if the file size is not compatible
   *  @throw runtime_error if the handle is not open.
   */
232
  size_t getFileSize() {
233
234
235
    this->obj_size = sizeof(T);

    if (!handle) {
236
237
      throw std::runtime_error("BinaryFile::getFileSize: Cannot find the size "
                               "of a file from a null handle");
238
239
    }

240
241
    // get the size of the file in bytes and reset the handle back to the
    // beginning
242
243
244
245
246
247
248
    handle->seekg(0, std::ios::end);
    size_t filesize = static_cast<size_t>(handle->tellg());
    handle->seekg(0, std::ios::beg);

    // check the file is a compatible size
    if (filesize % obj_size != 0) {
      std::stringstream msg;
249
250
      msg << "BinaryFile::getFileSize: File size is not compatible with data "
             "size ";
251
252
253
254
255
256
257
258
      msg << filesize << "%" << obj_size << "=";
      msg << filesize % obj_size;
      throw std::runtime_error(msg.str());
    }

    return filesize / sizeof(T);
  }

259
  /// File stream
260
  std::ifstream *handle;
261
  /// Size of each object.
262
263
264
265
266
267
268
  size_t obj_size;
  /// Number of elements of size T in the file
  size_t num_elements;
  /// Offset into the file, if loading in blocks.
  size_t offset;
};

269
} // Namespace Kernel
270

271
} // Namespace Mantid
272
273

#endif /* BINARYFILE_H_ */