Commit ce6f0303 authored by peter klausler's avatar peter klausler
Browse files

[flang] Tuning up binary->decimal conversion

Use short division of big-radix values by powers of two when
converting values with negative unbiased exponents rather than
multiplication by smaller powers of five; this reduces the overall
outer iteration count. This change is a win across the entire range
of inputs.

Reviewed By: tskeith

Differential Revision: https://reviews.llvm.org/D83806
parent 6b476e24
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -69,7 +69,7 @@ enum DecimalConversionFlags {
 * some extra due to the library working internally in base 10**16
 * and computing its output size in multiples of 16.
 */
#define EXTRA_DECIMAL_CONVERSION_SPACE (1 + 1 + 16 - 1)
#define EXTRA_DECIMAL_CONVERSION_SPACE (1 + 1 + 2 * 16 - 1)

#ifdef __cplusplus
template <int PREC>
+37 −6
Original line number Diff line number Diff line
@@ -222,15 +222,46 @@ private:
    return remainder;
  }

  int DivideByPowerOfTwo(int twoPow) { // twoPow <= LOG10RADIX
    int remainder{0};
  void DivideByPowerOfTwo(int twoPow) { // twoPow <= log10Radix
    Digit remainder{0};
    auto mask{(Digit{1} << twoPow) - 1};
    auto coeff{radix >> twoPow};
    for (int j{digits_ - 1}; j >= 0; --j) {
      Digit q{digit_[j] >> twoPow};
      int nrem = digit_[j] - (q << twoPow);
      digit_[j] = q + (radix >> twoPow) * remainder;
      auto nrem{digit_[j] & mask};
      digit_[j] = (digit_[j] >> twoPow) + coeff * remainder;
      remainder = nrem;
    }
    return remainder;
  }

  // Returns true on overflow
  bool DivideByPowerOfTwoInPlace(int twoPow) {
    if (digits_ > 0) {
      while (twoPow > 0) {
        int chunk{twoPow > log10Radix ? log10Radix : twoPow};
        if ((digit_[0] & ((Digit{1} << chunk) - 1)) == 0) {
          DivideByPowerOfTwo(chunk);
          twoPow -= chunk;
          continue;
        }
        twoPow -= chunk;
        if (digit_[digits_ - 1] >> chunk != 0) {
          if (digits_ == digitLimit_) {
            return true; // overflow
          }
          digit_[digits_++] = 0;
        }
        auto remainder{digit_[digits_ - 1]};
        exponent_ -= log10Radix;
        auto coeff{radix >> chunk}; // precise; radix is (5*2)**log10Radix
        auto mask{(Digit{1} << chunk) - 1};
        for (int j{digits_ - 1}; j >= 1; --j) {
          digit_[j] = (digit_[j - 1] >> chunk) + coeff * remainder;
          remainder = digit_[j - 1] & mask;
        }
        digit_[0] = coeff * remainder;
      }
    }
    return false; // no overflow
  }

  int AddCarry(int position = 0, int carry = 1) {
+2 −36
Original line number Diff line number Diff line
@@ -70,42 +70,8 @@ BigRadixFloatingPointNumber<PREC, LOG10RADIX>::BigRadixFloatingPointNumber(
    overflow |= MultiplyBy<2>();
  }

  while (twoPow < 0) {
    int shift{common::TrailingZeroBitCount(digit_[0])};
    if (shift == 0) {
      break;
    }
    if (shift > log10Radix) {
      shift = log10Radix;
    }
    if (shift > -twoPow) {
      shift = -twoPow;
    }
    // (D*(2**S)) * 10.**E * 2.**twoPow -> D * 10.**E * 2.**(twoPow+S)
    DivideByPowerOfTwo(shift);
    twoPow += shift;
  }

  for (; twoPow <= -4; twoPow += 4) {
    // D * 10.**E * 2.**twoPow -> 625D * 10.**(E-4) * 2.**(twoPow+4)
    overflow |= MultiplyBy<(5 * 5 * 5 * 5)>();
    exponent_ -= 4;
  }
  if (twoPow <= -2) {
    // D * 10.**E * 2.**twoPow -> 25D * 10.**(E-2) * 2.**(twoPow+2)
    overflow |= MultiplyBy<5 * 5>();
    twoPow += 2;
    exponent_ -= 2;
  }
  for (; twoPow < 0; ++twoPow) {
    // D * 10.**E * 2.**twoPow -> 5D * 10.**(E-1) * 2.**(twoPow+1)
    overflow |= MultiplyBy<5>();
    --exponent_;
  }

  overflow |= DivideByPowerOfTwoInPlace(-twoPow);
  assert(overflow == 0);

  // twoPow == 0, the decimal encoding is complete.
  Normalize();
}

@@ -153,7 +119,7 @@ BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ConvertToDecimal(char *buffer,
    for (int k{0}; k < log10Radix; k += 2) {
      Digit d{common::DivideUnsignedBy<Digit, hundredth>(dig)};
      dig = 100 * (dig - d * hundredth);
      const char *q = lut + 2 * d;
      const char *q{lut + 2 * d};
      *p++ = q[0];
      *p++ = q[1];
    }