Expression.cpp 13.9 KB
Newer Older
1
2
3
4
#include <iostream>
#include <locale>
#include <sstream>

5
6
7
8
#include "MantidAPI/Expression.h"

#include <Poco/StringTokenizer.h>

9
10
namespace Mantid {
namespace API {
11
12
13

typedef Poco::StringTokenizer tokenizer;

14
15
const std::string DEFAULT_OPS_STR[] = {";", ",", "=", "== != > < <= >=",
                                       "&& || ^^", "+ -", "* /", "^"};
16

17
18
const std::string EMPTY_EXPRESSION_NAME = "EMPTY";

19
Expression::Expression() {
20
21

  m_operators.reset(new Operators());
22
23
24
  // Define binary operators. Put them in the reverse precedence order (from
  // lower to higher prec.)
  std::vector<std::string> ops(DEFAULT_OPS_STR, DEFAULT_OPS_STR + 8);
25
26
27
28
29
30
31
32
33
34
35
36

  add_operators(ops);

  // Define unary operators
  std::set<std::string> unary;
  unary.insert("+");
  unary.insert("-");

  add_unary(unary);
}

/// contructor
37
Expression::Expression(const std::vector<std::string> &ops) {
38
39
40
41
42
  m_operators.reset(new Operators());
  add_operators(ops);
}

/// contructor
43
44
Expression::Expression(const std::vector<std::string> &binary,
                       const std::set<std::string> &unary) {
45
46
47
48
49
  m_operators.reset(new Operators());
  add_operators(binary);
  add_unary(unary);
}

50
51
52
53
54
55
56
Expression::Expression(const Expression &expr)
    : // m_tokens(expr.m_tokens),
      // m_expr(expr.m_expr),
      m_funct(expr.m_funct),
      m_op(expr.m_op), m_terms(expr.m_terms), m_operators(expr.m_operators) {}
Expression::Expression(const Expression *pexpr)
    : m_operators(pexpr->m_operators) {}
57
58

/// Assignment operator
59
Expression &Expression::operator=(const Expression &expr) {
60
61
62
63
  m_operators = expr.m_operators;
  m_funct = expr.m_funct;
  m_op = expr.m_op;
  m_terms = expr.m_terms;
64
65
  // m_expr = expr.m_expr;
  // m_tokens = expr.m_tokens;
66
67
68
  return *this;
}

69
void Expression::add_operators(const std::vector<std::string> &ops) {
70
71
  m_operators->binary = ops;
  // Fill in the precedence table (m_op_precedence)
72
  for (size_t i = 0; i < m_operators->binary.size(); i++) {
73
    char j = 0;
74
75
76
    tokenizer tkz(m_operators->binary[i], " ",
                  tokenizer::TOK_IGNORE_EMPTY | tokenizer::TOK_TRIM);
    for (tokenizer::Iterator it = tkz.begin(); it != tkz.end(); it++) {
77
78
79
80
81
      m_operators->precedence[*it] = i + 1;
      m_operators->op_number[*it] = j++;
    }
  }

82
  for (size_t i = 0; i < ops.size(); i++) {
83
    std::string str = ops[i];
84
    for (size_t j = 0; j < str.size(); j++) {
85
      char c = str[j];
86
87
      if (c == ' ')
        continue;
88
89
90
91
92
      m_operators->symbols.insert(c);
    }
  }
}

93
void Expression::add_unary(const std::set<std::string> &ops) {
94
  m_operators->unary = ops;
95
96
97
  for (std::set<std::string>::const_iterator it = ops.begin(); it != ops.end();
       ++it) {
    for (std::string::const_iterator c = it->begin(); c != it->end(); ++c) {
98
99
100
101
102
      m_operators->symbols.insert(*c);
    }
  }
}

103
104
105
106
107
size_t Expression::op_prec(const std::string &op) const {
  std::map<std::string, size_t>::const_iterator i =
      m_operators->precedence.find(op);
  if (i == m_operators->precedence.end())
    return 0;
108
109
110
  return i->second;
}

111
bool Expression::is_unary(const std::string &op) const {
112
  return m_operators->unary.find(op) != m_operators->unary.end();
113
114
}

115
bool Expression::is_op_symbol(const char c) const {
116
  return m_operators->symbols.find(c) != m_operators->symbols.end();
117
118
}

119
void Expression::trim(std::string &str) {
120
121
  size_t i = str.find_first_not_of(" \t\n\r");
  size_t j = str.find_last_not_of(" \t\n\r");
122
  if (i == std::string::npos || j == std::string::npos || j < i) {
123
    str = "";
124
125
  } else {
    str = str.substr(i, j - i + 1);
126
127
128
  }
}

129
void Expression::parse(const std::string &str) {
130
131
132
  m_expr = str;
  trim(m_expr);

133
134
135
136
137
  if (m_expr.size() > 1 && m_expr[0] == '(' &&
      m_expr[m_expr.size() - 1] == ')') {
    if (m_expr.find('(', 1) == std::string::npos) {
      m_expr.erase(0, 1);
      m_expr.erase(m_expr.size() - 1, 1);
138
139
140
141
142
143
      trim(m_expr);
    }
  }

  tokenize();

144
  if (m_tokens.size() == 0) {
145
146
147
148
149
    setFunct(m_expr);
    return;
  }

  std::string op = GetOp(0);
150
  // size_t prec = m_operators->precedence[op];
151
  size_t prec = op_prec(op);
152
153
  tokenizer tkz(m_operators->binary[prec - 1], " ",
                tokenizer::TOK_IGNORE_EMPTY | tokenizer::TOK_TRIM);
154
155
156

  setFunct(*tkz.begin());

157
  for (size_t i = 0; i <= m_tokens.size(); i++) {
158
    m_terms.push_back(Expression(this));
159
    Expression &t = m_terms.back();
160
    if (i)
161
      t.m_op = GetOp(i - 1);
162
163
164
165
166
167
    t.parse(GetToken(i));
  }
  m_expr = "";
  m_tokens.clear();
}

168
void Expression::tokenize() {
169
170
  m_tokens.clear();

Peterson, Peter's avatar
Peterson, Peter committed
171
  size_t min_prec = 1000;
172
173
174
175
176
177
178
  size_t is = 0;
  size_t is1 = 0;
  unsigned int lvl = 0;
  size_t last = m_expr.size() - 1;
  bool inString = false;
  int skip = 0;
  bool canBeBinary = false;
179
180
  bool isNumber =
      false; // if parser is inside a number (important case is 123.45e+67)
181
182
183
  bool canDotBeAdded = false;
  bool canEBeAdded = false;
  bool canPlusBeAdded = false;
184
  Tokens tokens;
185
  for (size_t i = 0; i < m_expr.size(); i++) {
186
    char c = m_expr[i];
187

188
189
190
191
    if (!inString && skip == 0) {
      if (isNumber) {
        if (c == '.') {
          if (canDotBeAdded) {
192
            canDotBeAdded = false;
193
          } else {
194
195
            isNumber = false;
          }
196
197
        } else if (c == 'e' || c == 'E') {
          if (canEBeAdded) {
198
199
200
            canEBeAdded = false;
            canDotBeAdded = false;
            canPlusBeAdded = true;
201
          } else {
202
203
            isNumber = false;
          }
204
205
        } else if (c == '+' || c == '-') {
          if (canPlusBeAdded) {
206
207
208
            canPlusBeAdded = false;
            canEBeAdded = false;
            canDotBeAdded = false;
209
          } else {
210
211
            isNumber = false;
          }
212
        } else if (!isdigit(c)) {
213
214
          isNumber = false;
        }
215
      } else if (isdigit(c)) {
216
217
218
219
220
        isNumber = true;
        canDotBeAdded = true;
        canEBeAdded = true;
        canPlusBeAdded = false;
      }
221
      if (lvl == 0 && !isNumber && is_op_symbol(c)) // insert new token
222
      {
223
        if (i == last) {
224
          break;
225
          // throw std::runtime_error("Expression: syntax error");
226
227
        }

228
        if (is_op_symbol(m_expr[i + 1])) {
229
          is1 = i + 2;
230
        } else {
231
232
233
          is1 = i + 1;
        }

234
        if (is1 > last) {
235
236
237
          throw std::runtime_error("Expression: syntax error");
        }

238
239
        std::string op = m_expr.substr(i, is1 - i);
        size_t prec = canBeBinary ? m_operators->precedence[op] : 0;
240
241
242
243
244
        if (!prec) // operator does not exist
        {
          std::ostringstream mess;
          bool error = true;
          // check if it's a binary and a unary operators together
245
246
          if (op.size() == 2) {
            if (is_unary(op)) {
247
248
249
250
              is1 -= 2;
              skip = 2;
              prec = min_prec + 1; // do not add token
              error = false;
251
            } else {
252
              is1 -= 1;
253
              std::string uop = op.substr(1, 1);
254
              op = op[0];
255
256
257
258
              if (is_op_symbol(m_expr[is1 + 1])) {
                uop += m_expr[is1 + 1];
                if (is1 + 2 > last) {
                  mess << "Expression: syntax error at " << is1 + 1;
259
260
261
                  throw std::runtime_error(mess.str());
                }
              }
262
              if (is_unary(uop)) {
263
                prec = m_operators->precedence[op];
264
265
                if (prec) { // we don't want to create a new token with unary
                            // operator. it is processed in SetFunct()
266
267
268
269
270
                  skip = 1;
                  error = false;
                }
              }
            }
271
272
273
          } // op.size == 2
          else if (op.size() == 1) {
            // skip = 1;
274
275
276
            prec = min_prec + 1; // do not add token
            error = false;
          }
277
          if (error) {
278
279
280
281
282
            mess << "Expression: unrecognized operator " << op;
            throw std::runtime_error(mess.str());
          }
        }

283
        if (prec <= min_prec) {
Peterson, Peter's avatar
Peterson, Peter committed
284
          if (prec < min_prec)
285
286
            min_prec = prec;
          Token tok(is, i - 1, is1, prec);
287
288
289
290
291
292
293
294
          tokens.push_back(tok);
          is = is1;
        }

        i = is1 - 1;

        canBeBinary = false;

295
296
      } // insert new token
      else if (c != ' ' && c != '\t' && c != '\r' && c != '\n') {
297
298
299
        canBeBinary = true;
      }

300
301
302
303
304
305
      if (c == '(')
        lvl++;
      if (c == ')') {
        if (lvl)
          lvl--;
        else {
306
307
308
309
          throw std::runtime_error("Unmatched brackets");
        }
      }
    } // !inString || skip
310
311
    else if (skip > 0) {
      skip--;
312
313
    }

314
315
    if (c == '"') {
      if (!inString) {
316
        inString = true;
317
      } else {
318
319
320
321
322
323
        inString = false;
      }
    }

  } // for i

324
  if (tokens.size()) {
325
326
    // remove operators of higher prec
    m_tokens.push_back(Token(tokens[0]));
327
328
329
330
331
    for (size_t i = 0; i < tokens.size(); i++) {
      Token &tok = tokens[i];
      std::string op = m_expr.substr(tok.ie + 1, tok.is1 - tok.ie - 1); //?
      if (m_operators->precedence[op] == min_prec) {
        Token &last_tok = m_tokens.back();
332
        last_tok.ie = tok.ie;
333
334
335
        last_tok.is1 = tok.is1;
        if (i != tokens.size() - 1)
          m_tokens.push_back(Token(tokens[i + 1]));
336
337
338
339
340
      }
    }
  }
}

341
342
343
std::string Expression::GetToken(size_t i) {
  if (m_tokens.size() == 0)
    return m_expr;
344

345
346
347
  if (i < m_tokens.size()) {
    Token &tok = m_tokens[i];
    return m_expr.substr(tok.is, tok.ie - tok.is + 1);
348
349
  }

350
351
  if (i == m_tokens.size()) {
    Token &tok = m_tokens[i - 1];
352
353
354
355
356
357
    return m_expr.substr(tok.is1);
  }

  return "";
}

358
359
360
std::string Expression::GetOp(size_t i) {
  if (m_tokens.size() == 0 || i >= m_tokens.size())
    return "";
361

362
363
  Token &tok = m_tokens[i];
  return m_expr.substr(tok.ie + 1, tok.is1 - tok.ie - 1);
364
365
}

366
void Expression::logPrint(const std::string &pads) const {
367
  std::string myPads = pads + "   ";
368
369
370
  if (m_terms.size()) {
    std::cerr << myPads << m_op << '[' << m_funct << ']' << "(" << '\n';
    for (size_t i = 0; i < m_terms.size(); i++)
371
      m_terms[i].logPrint(myPads);
372
373
374
    std::cerr << myPads << ")" << '\n';
  } else
    std::cerr << myPads << m_op << m_funct << '\n';
375
376
}

377
378
void Expression::setFunct(const std::string &name) {
  if (!op_prec(name)) {
379
    std::string op = "";
380
381
382
    if (name.size() > 1 && is_op_symbol(name[0])) {
      op = name.substr(0, 1);
      if (name.size() > 2 && is_op_symbol(name[1])) {
383
384
385
        op += name[1];
      }
    }
386
    if (!op.empty() && is_unary(op)) {
387
388
389
390
391
392
393
394
395
396
      m_funct = op;
      Expression tmp(this);
      tmp.parse(name.substr(op.size()));
      m_terms.push_back(tmp);
      return;
    }
  }

  m_funct = name;
  trim(m_funct);
397

398
  if (m_funct.empty()) {
399
400
    m_funct = EMPTY_EXPRESSION_NAME;
    return;
401
402
403
404
405
406
  }

  // Check if the function has arguments
  std::string::size_type i = std::string::npos;

  bool inQuotes = false;
407
408
409
410
  for (std::string::const_iterator c = name.begin(); c != name.end(); ++c) {
    if (*c == '"') {
      if (inQuotes)
        inQuotes = false;
411
412
413
414
415
      else
        inQuotes = true;
      continue;
    }

416
    if (!inQuotes && *c == '(') {
417
418
419
420
421
      i = c - name.begin();
      break;
    }
  }

422
  if (i != std::string::npos) {
423
    std::string::size_type j = name.find_last_of(')');
424
    if (j == std::string::npos || j < i) {
425
426
427
      throw std::runtime_error("Unmatched brackets");
    }

428
    if (j > i + 1) // nonzero argument list
429
    {
430
      std::string args = name.substr(i + 1, j - i - 1); //?
431
      trim(args);
432
      std::string f = name.substr(0, i);
433
434
      Expression tmp(this);
      tmp.parse(args);
435
      if (tmp.name() != EMPTY_EXPRESSION_NAME && (!tmp.isFunct() || tmp.name() != ",")) {
436
        m_terms.push_back(tmp);
437
      } else {
438
439
440
441
442
        std::string my_op = m_op;
        *this = tmp;
        m_op = my_op;
      }
      m_funct = f;
443
444
445
      if (m_funct.empty() && m_terms.empty()) {
        m_funct = EMPTY_EXPRESSION_NAME;
      }
446
447
448
449
    }
  }
}

450
std::string Expression::str() const {
451
452
453
  bool brackets = false;
  std::ostringstream res;
  size_t prec = op_prec(m_funct);
454
  if (size() == 1 && is_unary(m_funct)) { // unary operator
455
    res << m_funct;
456
    if (op_prec(m_terms[0].m_funct) > 0) {
457
458
      brackets = true;
    }
459
  } else if (!prec) { // function with a name
460
461
462
463
    res << m_funct;
    brackets = true;
  }

464
465
466
467
  if (m_terms.size()) {
    if (brackets)
      res << '(';
    for (size_t i = 0; i < m_terms.size(); i++) {
468
      res << m_terms[i].operator_name();
469
      size_t prec1 = op_prec(m_terms[i].m_funct);
470
      bool isItUnary = false;
471
      if (m_terms[i].size() == 1 && is_unary(m_terms[i].m_funct)) {
472
473
474
        prec1 = 0; // unary operator
        isItUnary = true;
      }
475
476
477
478
479
      bool bk = prec > 0 && prec1 > 0 && prec > prec1;
      if (bk)
        res << '(';
      if (isItUnary)
        res << ' ';
480
      res << m_terms[i].str();
481
482
      if (bk)
        res << ')';
483
    }
484
485
    if (brackets)
      res << ')';
486
487
488
489
  }
  return res.str();
}

490
491
492
const Expression &Expression::bracketsRemoved() const {
  const Expression *e = this;
  while (e->name().empty() && e->size() == 1) {
493
494
495
496
497
    e = &e->m_terms[0];
  }
  return *e;
}

498
499
500
/**
 * Return a list of all variable names in this expression
 */
501
std::set<std::string> Expression::getVariables() const {
502
  std::set<std::string> out;
503
  if (!isFunct()) {
504
    std::string s = name();
505
    if (!s.empty() && !isdigit(s[0])) {
506
507
      out.insert(s);
    }
508
509
510
  } else {
    for (iterator e = begin(); e != end(); ++e) {
      if (e->isFunct()) {
511
        std::set<std::string> tout = e->getVariables();
512
513
        out.insert(tout.begin(), tout.end());
      } else {
514
        std::string s = e->name();
515
        if (!s.empty() && !isdigit(s[0])) {
516
517
          out.insert(s);
        }
518
519
520
521
522
523
      }
    }
  }
  return out;
}

524
void Expression::rename(const std::string &newName) { m_funct = newName; }
525

526
527
528
void Expression::renameAll(const std::string &oldName,
                           const std::string &newName) {
  if (!isFunct() && name() == oldName) {
529
    rename(newName);
530
  } else {
531
    std::vector<Expression>::iterator e = m_terms.begin();
532
533
    for (; e != m_terms.end(); ++e) {
      e->renameAll(oldName, newName);
534
535
536
537
    }
  }
}

538
539
540
void Expression::toList(const std::string &sep) {
  if (name() == sep)
    return;
541
542
543
544
545
546
  Expression term(*this);
  m_terms.resize(1);
  m_terms[0] = term;
  setFunct(sep);
}

547
548
} // API
} // Mantid