Expression.cpp 15.6 KB
Newer Older
1
2
3
4
5
6
// Mantid Repository : https://github.com/mantidproject/mantid
//
// Copyright © 2018 ISIS Rutherford Appleton Laboratory UKRI,
//     NScD Oak Ridge National Laboratory, European Spallation Source
//     & Institut Laue - Langevin
// SPDX - License - Identifier: GPL - 3.0 +
7
8
9
10
#include <iostream>
#include <locale>
#include <sstream>

11
12
#include "MantidAPI/Expression.h"

13
#include <MantidKernel/StringTokenizer.h>
14

15
16
namespace Mantid {
namespace API {
17

18
using tokenizer = Mantid::Kernel::StringTokenizer;
19

LamarMoore's avatar
LamarMoore committed
20
21
const std::string DEFAULT_OPS_STR[] = {
    ";", ",", "=", "== != > < <= >=", "&& || ^^", "+ -", "* /", "^"};
22

23
24
const std::string EMPTY_EXPRESSION_NAME = "EMPTY";

25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
namespace {
/// Make the full text of the error message
/// @param msg :: The text of the error message.
/// @param expr :: An expression string that caused the error.
/// @param i :: An index of a symbol in expr that may help identify the location
///             of the error.
std::string makeErrorMessage(const std::string &msg, const std::string &expr,
                             size_t i) {
  const size_t MAX_LEFT_SIZE = 10;
  const size_t MAX_RIGHT_SIZE = 10;
  std::ostringstream res;
  res << msg << " at\n\n";
  size_t j = i;
  size_t skip = 0;
  size_t n = expr.size();
  std::string leftEllipsis = "";
  if (i > MAX_LEFT_SIZE) {
    skip = i - MAX_LEFT_SIZE;
    leftEllipsis = "...";
    j = MAX_LEFT_SIZE + leftEllipsis.size();
    n -= skip;
  }
  std::string rightEllipsis = "";
  if (n - j > MAX_RIGHT_SIZE) {
    n = i + MAX_RIGHT_SIZE;
    rightEllipsis = "...";
  }
  // Write a substring of expr around the error indicator at symbol #i.
  res << leftEllipsis << expr.substr(skip, n) << rightEllipsis << '\n';
  res << std::string(j, ' ') << '^' << '\n';
  return res.str();
}

} // namespace

/// Constructor
/// @param msg :: The text of the error message.
/// @param expr :: An expression string that caused the error.
/// @param i :: An index of a symbol in expr that may help identify the location
///             of the error.
Expression::ParsingError::ParsingError(const std::string &msg,
                                       const std::string &expr, size_t i)
    : std::runtime_error(makeErrorMessage(msg, expr, i)) {}

/// Constructor
/// @param msg :: The text of the error message.
Expression::ParsingError::ParsingError(const std::string &msg)
    : std::runtime_error(msg) {}

74
Expression::Expression() {
75
76

  m_operators.reset(new Operators());
77
78
79
  // Define binary operators. Put them in the reverse precedence order (from
  // lower to higher prec.)
  std::vector<std::string> ops(DEFAULT_OPS_STR, DEFAULT_OPS_STR + 8);
80
81
82
83

  add_operators(ops);

  // Define unary operators
84
  std::unordered_set<std::string> unary;
85
86
87
88
89
90
91
  unary.insert("+");
  unary.insert("-");

  add_unary(unary);
}

/// contructor
92
Expression::Expression(const std::vector<std::string> &ops) {
93
94
95
96
97
  m_operators.reset(new Operators());
  add_operators(ops);
}

/// contructor
98
Expression::Expression(const std::vector<std::string> &binary,
99
                       const std::unordered_set<std::string> &unary) {
100
101
102
103
104
  m_operators.reset(new Operators());
  add_operators(binary);
  add_unary(unary);
}

105
106
107
Expression::Expression(const Expression &expr)
    : // m_tokens(expr.m_tokens),
      // m_expr(expr.m_expr),
LamarMoore's avatar
LamarMoore committed
108
109
      m_funct(expr.m_funct), m_op(expr.m_op), m_terms(expr.m_terms),
      m_operators(expr.m_operators) {}
110
111
Expression::Expression(const Expression *pexpr)
    : m_operators(pexpr->m_operators) {}
112
113

/// Assignment operator
114
Expression &Expression::operator=(const Expression &expr) {
115
116
117
118
  m_operators = expr.m_operators;
  m_funct = expr.m_funct;
  m_op = expr.m_op;
  m_terms = expr.m_terms;
119
120
  // m_expr = expr.m_expr;
  // m_tokens = expr.m_tokens;
121
122
123
  return *this;
}

124
void Expression::add_operators(const std::vector<std::string> &ops) {
125
126
  m_operators->binary = ops;
  // Fill in the precedence table (m_op_precedence)
127
  for (size_t i = 0; i < m_operators->binary.size(); i++) {
128
    char j = 0;
129
130
    tokenizer tkz(m_operators->binary[i], " ",
                  tokenizer::TOK_IGNORE_EMPTY | tokenizer::TOK_TRIM);
Hahn, Steven's avatar
Hahn, Steven committed
131
132
133
    for (const auto &index : tkz) {
      m_operators->precedence[index] = i + 1;
      m_operators->op_number[index] = j++;
134
135
136
    }
  }

137
138
  for (auto str : ops) {
    for (char c : str) {
139
140
      if (c == ' ')
        continue;
141
142
143
144
145
      m_operators->symbols.insert(c);
    }
  }
}

146
void Expression::add_unary(const std::unordered_set<std::string> &ops) {
147
  m_operators->unary = ops;
148
149
  for (const auto &op : ops) {
    m_operators->symbols.insert(op.cbegin(), op.cend());
150
151
152
  }
}

153
154
155
156
157
size_t Expression::op_prec(const std::string &op) const {
  std::map<std::string, size_t>::const_iterator i =
      m_operators->precedence.find(op);
  if (i == m_operators->precedence.end())
    return 0;
158
159
160
  return i->second;
}

161
bool Expression::is_unary(const std::string &op) const {
162
  return m_operators->unary.find(op) != m_operators->unary.end();
163
164
}

165
bool Expression::is_op_symbol(const char c) const {
166
  return m_operators->symbols.find(c) != m_operators->symbols.end();
167
168
}

169
void Expression::trim(std::string &str) {
170
171
  size_t i = str.find_first_not_of(" \t\n\r");
  size_t j = str.find_last_not_of(" \t\n\r");
172
  if (i == std::string::npos || j == std::string::npos || j < i) {
173
    str = "";
174
175
  } else {
    str = str.substr(i, j - i + 1);
176
177
178
  }
}

179
void Expression::parse(const std::string &str) {
180
181
182
  m_expr = str;
  trim(m_expr);

183
  if (m_expr.size() > 1 && m_expr.front() == '(' && m_expr.back() == ')') {
184
185
186
    if (m_expr.find('(', 1) == std::string::npos) {
      m_expr.erase(0, 1);
      m_expr.erase(m_expr.size() - 1, 1);
187
188
189
190
191
192
      trim(m_expr);
    }
  }

  tokenize();

193
  if (m_tokens.empty()) {
194
195
196
197
198
    setFunct(m_expr);
    return;
  }

  std::string op = GetOp(0);
199
  // size_t prec = m_operators->precedence[op];
200
  size_t prec = op_prec(op);
201
202
  tokenizer tkz(m_operators->binary[prec - 1], " ",
                tokenizer::TOK_IGNORE_EMPTY | tokenizer::TOK_TRIM);
203
204
205

  setFunct(*tkz.begin());

206
  for (size_t i = 0; i <= m_tokens.size(); i++) {
207
    m_terms.push_back(Expression(this));
208
    Expression &t = m_terms.back();
209
    if (i)
210
      t.m_op = GetOp(i - 1);
211
212
213
214
215
216
    t.parse(GetToken(i));
  }
  m_expr = "";
  m_tokens.clear();
}

217
void Expression::tokenize() {
218
219
  m_tokens.clear();

Peterson, Peter's avatar
Peterson, Peter committed
220
  size_t min_prec = 1000;
221
222
223
224
225
226
227
  size_t is = 0;
  size_t is1 = 0;
  unsigned int lvl = 0;
  size_t last = m_expr.size() - 1;
  bool inString = false;
  int skip = 0;
  bool canBeBinary = false;
228
229
  bool isNumber =
      false; // if parser is inside a number (important case is 123.45e+67)
230
231
232
  bool canDotBeAdded = false;
  bool canEBeAdded = false;
  bool canPlusBeAdded = false;
233
  Tokens tokens;
234
  for (size_t i = 0; i < m_expr.size(); i++) {
235
    char c = m_expr[i];
236

237
238
239
240
    if (!inString && skip == 0) {
      if (isNumber) {
        if (c == '.') {
          if (canDotBeAdded) {
241
            canDotBeAdded = false;
242
          } else {
243
244
            isNumber = false;
          }
245
246
        } else if (c == 'e' || c == 'E') {
          if (canEBeAdded) {
247
248
249
            canEBeAdded = false;
            canDotBeAdded = false;
            canPlusBeAdded = true;
250
          } else {
251
252
            isNumber = false;
          }
253
254
        } else if (c == '+' || c == '-') {
          if (canPlusBeAdded) {
255
256
257
            canPlusBeAdded = false;
            canEBeAdded = false;
            canDotBeAdded = false;
258
          } else {
259
260
            isNumber = false;
          }
261
        } else if (!isdigit(c)) {
262
263
          isNumber = false;
        }
264
      } else if (isdigit(c)) {
265
266
267
268
269
        isNumber = true;
        canDotBeAdded = true;
        canEBeAdded = true;
        canPlusBeAdded = false;
      }
270
      if (lvl == 0 && !isNumber && is_op_symbol(c)) // insert new token
271
      {
272
        if (i == last) {
273
274
275
276
          if (c == ',' || c == ';') {
            m_expr.resize(last);
            break;
          } else {
277
278
            throw ParsingError("A binary operator isn't followed by a value",
                               m_expr, i);
279
          }
280
281
        }

282
        if (is_op_symbol(m_expr[i + 1])) {
283
          is1 = i + 2;
284
        } else {
285
286
287
          is1 = i + 1;
        }

288
        if (is1 > last) {
289
          throw ParsingError("Syntax error", m_expr, last);
290
291
        }

292
293
        std::string op = m_expr.substr(i, is1 - i);
        size_t prec = canBeBinary ? m_operators->precedence[op] : 0;
294
295
296
297
        if (!prec) // operator does not exist
        {
          bool error = true;
          // check if it's a binary and a unary operators together
298
299
          if (op.size() == 2) {
            if (is_unary(op)) {
300
301
302
303
              is1 -= 2;
              skip = 2;
              prec = min_prec + 1; // do not add token
              error = false;
304
            } else {
305
              is1 -= 1;
306
              std::string uop = op.substr(1, 1);
307
              op = op[0];
308
309
310
              if (is_op_symbol(m_expr[is1 + 1])) {
                uop += m_expr[is1 + 1];
                if (is1 + 2 > last) {
311
                  throw ParsingError("Syntax error", m_expr, is1 + 1);
312
313
                }
              }
314
              if (is_unary(uop)) {
315
                prec = m_operators->precedence[op];
316
317
                if (prec) { // we don't want to create a new token with unary
                            // operator. it is processed in SetFunct()
318
319
320
321
322
                  skip = 1;
                  error = false;
                }
              }
            }
323
324
325
          } // op.size == 2
          else if (op.size() == 1) {
            // skip = 1;
326
327
328
            prec = min_prec + 1; // do not add token
            error = false;
          }
329
          if (error) {
330
            throw ParsingError("Unrecognized operator", m_expr, i);
331
332
333
          }
        }

334
        if (prec <= min_prec) {
Peterson, Peter's avatar
Peterson, Peter committed
335
          if (prec < min_prec)
336
337
            min_prec = prec;
          Token tok(is, i - 1, is1, prec);
338
339
340
341
342
343
344
345
          tokens.push_back(tok);
          is = is1;
        }

        i = is1 - 1;

        canBeBinary = false;

346
347
      } // insert new token
      else if (c != ' ' && c != '\t' && c != '\r' && c != '\n') {
348
349
350
        canBeBinary = true;
      }

351
352
353
354
355
356
      if (c == '(')
        lvl++;
      if (c == ')') {
        if (lvl)
          lvl--;
        else {
357
          throw ParsingError("Unmatched bracket", m_expr, 0);
358
359
360
        }
      }
    } // !inString || skip
361
362
    else if (skip > 0) {
      skip--;
363
364
    }

365
    if (c == '"') {
366
      inString = !inString;
367
368
369
370
    }

  } // for i

371
  if (!tokens.empty()) {
372
    // remove operators of higher prec
373
    m_tokens.emplace_back(tokens[0]);
374
375
376
377
378
    for (size_t i = 0; i < tokens.size(); i++) {
      Token &tok = tokens[i];
      std::string op = m_expr.substr(tok.ie + 1, tok.is1 - tok.ie - 1); //?
      if (m_operators->precedence[op] == min_prec) {
        Token &last_tok = m_tokens.back();
379
        last_tok.ie = tok.ie;
380
381
        last_tok.is1 = tok.is1;
        if (i != tokens.size() - 1)
382
          m_tokens.emplace_back(tokens[i + 1]);
383
384
385
386
387
      }
    }
  }
}

388
std::string Expression::GetToken(size_t i) {
389
  if (m_tokens.empty())
390
    return m_expr;
391

392
393
394
  if (i < m_tokens.size()) {
    Token &tok = m_tokens[i];
    return m_expr.substr(tok.is, tok.ie - tok.is + 1);
395
396
  }

397
398
  if (i == m_tokens.size()) {
    Token &tok = m_tokens[i - 1];
399
400
401
402
403
404
    return m_expr.substr(tok.is1);
  }

  return "";
}

405
std::string Expression::GetOp(size_t i) {
406
  if (m_tokens.empty() || i >= m_tokens.size())
407
    return "";
408

409
410
  Token &tok = m_tokens[i];
  return m_expr.substr(tok.ie + 1, tok.is1 - tok.ie - 1);
411
412
}

413
void Expression::logPrint(const std::string &pads) const {
414
  std::string myPads = pads + "   ";
415
  if (!m_terms.empty()) {
Hahn, Steven's avatar
Hahn, Steven committed
416
    std::cerr << myPads << m_op << '[' << m_funct << ']' << "(\n";
Hahn, Steven's avatar
Hahn, Steven committed
417
418
    for (const auto &term : m_terms)
      term.logPrint(myPads);
Hahn, Steven's avatar
Hahn, Steven committed
419
    std::cerr << myPads << ")\n";
420
421
  } else
    std::cerr << myPads << m_op << m_funct << '\n';
422
423
}

424
425
void Expression::setFunct(const std::string &name) {
  if (!op_prec(name)) {
426
    std::string op;
427
428
429
    if (name.size() > 1 && is_op_symbol(name[0])) {
      op = name.substr(0, 1);
      if (name.size() > 2 && is_op_symbol(name[1])) {
430
431
432
        op += name[1];
      }
    }
433
    if (!op.empty() && is_unary(op)) {
434
435
436
437
438
439
440
441
442
443
      m_funct = op;
      Expression tmp(this);
      tmp.parse(name.substr(op.size()));
      m_terms.push_back(tmp);
      return;
    }
  }

  m_funct = name;
  trim(m_funct);
444

445
  if (m_funct.empty()) {
446
447
    m_funct = EMPTY_EXPRESSION_NAME;
    return;
448
449
450
451
452
453
  }

  // Check if the function has arguments
  std::string::size_type i = std::string::npos;

  bool inQuotes = false;
454
455
  for (std::string::const_iterator c = name.begin(); c != name.end(); ++c) {
    if (*c == '"') {
456
      inQuotes = !inQuotes;
457
458
459
      continue;
    }

460
    if (!inQuotes && *c == '(') {
461
462
463
464
465
      i = c - name.begin();
      break;
    }
  }

466
  if (i != std::string::npos) {
467
    std::string::size_type j = name.find_last_of(')');
468
    if (j == std::string::npos || j < i) {
469
      throw ParsingError("Unmatched bracket", name, i);
470
471
    }

472
    if (j > i + 1) // nonzero argument list
473
    {
474
      std::string args = name.substr(i + 1, j - i - 1); //?
475
      trim(args);
476
      std::string f = name.substr(0, i);
477
478
      Expression tmp(this);
      tmp.parse(args);
479
480
      if (tmp.name() != EMPTY_EXPRESSION_NAME &&
          (!tmp.isFunct() || tmp.name() != ",")) {
481
        m_terms.push_back(tmp);
482
      } else {
483
484
485
        if (f.empty() && tmp.name() == ",") {
          f = ",";
        }
486
487
488
489
490
        std::string my_op = m_op;
        *this = tmp;
        m_op = my_op;
      }
      m_funct = f;
491
492
493
      if (m_funct.empty() && m_terms.empty()) {
        m_funct = EMPTY_EXPRESSION_NAME;
      }
494
495
496
497
    }
  }
}

498
std::string Expression::str() const {
499
500
501
  bool brackets = false;
  std::ostringstream res;
  size_t prec = op_prec(m_funct);
502
  if (size() == 1 && is_unary(m_funct)) { // unary operator
503
    res << m_funct;
504
    if (op_prec(m_terms[0].m_funct) > 0) {
505
506
      brackets = true;
    }
507
  } else if (!prec) { // function with a name
508
509
    res << m_funct;
    brackets = true;
510
511
  } else if (m_op == "-" && m_funct == "+") {
    brackets = true;
512
513
  } else if (m_op == "/" && m_funct == "*") {
    brackets = true;
514
515
  }

516
  if (!m_terms.empty()) {
517
518
    if (brackets)
      res << '(';
Hahn, Steven's avatar
Hahn, Steven committed
519
520
521
    for (const auto &term : m_terms) {
      res << term.operator_name();
      size_t prec1 = op_prec(term.m_funct);
522
      bool isItUnary = false;
Hahn, Steven's avatar
Hahn, Steven committed
523
      if (term.size() == 1 && is_unary(term.m_funct)) {
524
525
526
        prec1 = 0; // unary operator
        isItUnary = true;
      }
527
528
529
530
531
      bool bk = prec > 0 && prec1 > 0 && prec > prec1;
      if (bk)
        res << '(';
      if (isItUnary)
        res << ' ';
Hahn, Steven's avatar
Hahn, Steven committed
532
      res << term.str();
533
534
      if (bk)
        res << ')';
535
    }
536
537
    if (brackets)
      res << ')';
538
539
540
541
  }
  return res.str();
}

542
543
544
const Expression &Expression::bracketsRemoved() const {
  const Expression *e = this;
  while (e->name().empty() && e->size() == 1) {
545
546
547
548
549
    e = &e->m_terms[0];
  }
  return *e;
}

550
551
552
/**
 * Return a list of all variable names in this expression
 */
553
554
std::unordered_set<std::string> Expression::getVariables() const {
  std::unordered_set<std::string> out;
555
  if (!isFunct()) {
556
    std::string s = name();
557
    if (!s.empty() && !isdigit(s[0])) {
558
559
      out.insert(s);
    }
560
  } else {
561
562
    for (const auto &e : *this) {
      if (e.isFunct()) {
563
        std::unordered_set<std::string> tout = e.getVariables();
564
565
        out.insert(tout.begin(), tout.end());
      } else {
566
        std::string s = e.name();
567
        if (!s.empty() && !isdigit(s[0])) {
568
569
          out.insert(s);
        }
570
571
572
573
574
575
      }
    }
  }
  return out;
}

576
void Expression::rename(const std::string &newName) { m_funct = newName; }
577

578
579
580
void Expression::renameAll(const std::string &oldName,
                           const std::string &newName) {
  if (!isFunct() && name() == oldName) {
581
    rename(newName);
582
  } else {
583
584
    for (auto &term : m_terms) {
      term.renameAll(oldName, newName);
585
586
587
588
    }
  }
}

589
590
591
void Expression::toList(const std::string &sep) {
  if (name() == sep)
    return;
592
593
594
595
596
597
  Expression term(*this);
  m_terms.resize(1);
  m_terms[0] = term;
  setFunct(sep);
}

LamarMoore's avatar
LamarMoore committed
598
599
} // namespace API
} // namespace Mantid