Commit 228ea1a4 authored by aartbik's avatar aartbik
Browse files

[mlir] [VectorOps] consolidate all vector utilities to one header/cc file

Reviewers: nicolasvasilache, andydavis1, dcaballe

Reviewed By: andydavis1, dcaballe

Subscribers: dcaballe, merge_guards_bot, mgorny, mehdi_amini, rriddle, jpienaar, burmako, shauheen, antiagainst, arpith-jacob, mgester, lucyrfox, liufengdb, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D73593
parent c5fffa4d
Loading
Loading
Loading
Loading
+25 −1
Original line number Diff line number Diff line
//===- Utils.h - VectorOps Utils ----------------------------*- C++ -*-=======//
//===- VectorUtils.h - VectorOps Utilities ------------------*- C++ -*-=======//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -15,6 +15,7 @@

namespace mlir {

// Forward declarations.
class AffineApplyOp;
class AffineForOp;
class AffineMap;
@@ -25,6 +26,29 @@ class Operation;
class Value;
class VectorType;

/// Given the shape and sizes of a vector, returns the corresponding
/// strides for each dimension.
SmallVector<int64_t, 4> computeStrides(ArrayRef<int64_t> shape,
                                       ArrayRef<int64_t> sizes);

/// Given the slice strides together with a linear index in the dimension
/// space, returns the vector-space offsets in each dimension for a
/// de-linearized index.
SmallVector<int64_t, 4> delinearize(ArrayRef<int64_t> sliceStrides,
                                    int64_t linearIndex);

/// Given the target sizes of a vector, together with vector-space offsets,
/// returns the element-space offsets for each dimension.
SmallVector<int64_t, 4>
computeElementOffsetsFromVectorSliceOffsets(ArrayRef<int64_t> sizes,
                                            ArrayRef<int64_t> vectorOffsets);

/// Given the shape, sizes, and element-space offsets of a vector, returns
/// the slize sizes for each dimension.
SmallVector<int64_t, 4> computeSliceSizes(ArrayRef<int64_t> shape,
                                          ArrayRef<int64_t> sizes,
                                          ArrayRef<int64_t> elementOffsets);

/// Computes and returns the multi-dimensional ratio of `superShape` to
/// `subShape`. This is calculated by performing a traversal from minor to major
/// dimensions (i.e. in reverse shape order). If integral division is not
+0 −1
Original line number Diff line number Diff line
@@ -13,7 +13,6 @@ add_llvm_library(MLIRAnalysis STATIC
  TestMemRefDependenceCheck.cpp
  TestParallelismDetection.cpp
  Utils.cpp
  VectorAnalysis.cpp
  Verifier.cpp

  ADDITIONAL_HEADER_DIRS
+1 −0
Original line number Diff line number Diff line
@@ -2,6 +2,7 @@ add_llvm_library(MLIRVectorOps
  DialectRegistration.cpp
  VectorOps.cpp
  VectorTransforms.cpp
  VectorUtils.cpp

  ADDITIONAL_HEADER_DIRS
  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/VectorOps
+7 −28
Original line number Diff line number Diff line
@@ -14,6 +14,7 @@
#include "mlir/Dialect/VectorOps/VectorOps.h"
#include "mlir/Dialect/StandardOps/Ops.h"
#include "mlir/Dialect/Utils/StructuredOpsUtils.h"
#include "mlir/Dialect/VectorOps/VectorUtils.h"
#include "mlir/IR/AffineExpr.h"
#include "mlir/IR/AffineMap.h"
#include "mlir/IR/Builders.h"
@@ -524,37 +525,15 @@ isValidExtractOrInsertSlicesType(Operation *op, VectorType vectorType,
  if (sizes.size() != rank || strides.size() != rank)
    return op->emitError("requires sizes and strides of rank ") << rank;

  // Compute the number of slices in each dimension.
  // TODO(andydavis) Move this into a slice generation helper function.
  auto shape = vectorType.getShape();
  SmallVector<int64_t, 4> dimSliceCounts(rank);
  for (unsigned i = 0; i < rank; ++i)
    dimSliceCounts[i] = ceilDiv(shape[i], sizes[i]);
  // Compute the strides between slices in each dimension.
  SmallVector<int64_t, 4> sliceStrides(rank);
  sliceStrides[rank - 1] = 1;
  for (int i = rank - 2; i >= 0; --i)
    sliceStrides[i] = sliceStrides[i + 1] * dimSliceCounts[i + 1];

  // Generate each slice shape based on 'sizes', 'strides' and 'vectorType',
  // and verify that the same matches the corresponding tuple element 'i'.
  auto shape = vectorType.getShape();
  auto sliceStrides = computeStrides(shape, sizes);
  for (int64_t i = 0, e = tupleType.size(); i < e; ++i) {
    // De-linearize w.r.t. 'sliceStrides'.
    SmallVector<int64_t, 4> vectorOffsets(rank);
    int64_t linearIndex = i;
    for (unsigned j = 0; j < rank; ++j) {
      vectorOffsets[j] = linearIndex / sliceStrides[j];
      linearIndex %= sliceStrides[j];
    }
    // Convert from unrolled vector-space offsets to element-space offsets.
    auto offsets = mlir::functional::zipMap(
        [](int64_t v1, int64_t v2) { return v1 * v2; }, vectorOffsets, sizes);
    // Initialize 'sliceSizes' to target 'sizes'
    SmallVector<int64_t, 4> sliceSizes(sizes.begin(), sizes.end());
    for (unsigned j = 0; j < rank; ++j) {
      // Based on 'offsets' and 'shape' clip some dim sizes for partial tiles.
      sliceSizes[j] = std::min(sliceSizes[j], shape[j] - offsets[j]);
    }
    auto vectorOffsets = delinearize(sliceStrides, i);
    auto elementOffsets =
        computeElementOffsetsFromVectorSliceOffsets(sizes, vectorOffsets);
    auto sliceSizes = computeSliceSizes(shape, sizes, elementOffsets);
    // Create slice VectorType type.
    auto sliceVectorType =
        VectorType::get(sliceSizes, vectorType.getElementType());
+26 −81
Original line number Diff line number Diff line
@@ -29,7 +29,6 @@
#include "mlir/IR/PatternMatch.h"
#include "mlir/IR/Types.h"
#include "mlir/Support/Functional.h"
#include "mlir/Support/MathExtras.h"
#include "mlir/Support/STLExtras.h"

#include "llvm/Support/CommandLine.h"
@@ -78,22 +77,6 @@ static int64_t linearize(ArrayRef<int64_t> offsets, ArrayRef<int64_t> basis) {
  return linearIndex;
}

/// Given a shape with sizes greater than 0 along all dimensions, returns the
/// delinearized components of linearIndex along shape.
static SmallVector<int64_t, 8> delinearize(int64_t linearIndex,
                                           ArrayRef<int64_t> basis) {
  SmallVector<int64_t, 8> res;
  res.reserve(basis.size());
  for (unsigned idx = 0, e = basis.size(); idx < e; ++idx) {
    assert(basis[idx] > 0);
    res.push_back(linearIndex / basis[idx]);
    linearIndex %= basis[idx];
  }
  // Sanity check.
  assert(linearIndex == 0 && "linear index remainder must be 0");
  return res;
}

// Clones `op` into a new operations that takes `operands` and returns
// `resultTypes`.
static Operation *cloneOpWithOperandsAndTypes(PatternRewriter &builder,
@@ -128,9 +111,8 @@ static TupleType generateExtractSlicesOpResultType(VectorType vectorType,
                                                   ArrayRef<int64_t> strides,
                                                   PatternRewriter &builder) {
  assert(llvm::all_of(strides, [](int64_t s) { return s == 1; }));
  unsigned rank = vectorType.getRank();
  assert(sizes.size() == rank);
  assert(strides.size() == rank);
  assert(static_cast<int64_t>(sizes.size()) == vectorType.getRank());
  assert(static_cast<int64_t>(strides.size()) == vectorType.getRank());

  // Compute shape ratio of 'shape' and 'sizes'.
  auto shape = vectorType.getShape();
@@ -139,21 +121,14 @@ static TupleType generateExtractSlicesOpResultType(VectorType vectorType,
  auto sliceDimCounts = *maybeDimSliceCounts;

  // Compute strides w.r.t number of slices in each dimension.
  auto basis = computeStrides(sliceDimCounts);
  auto sliceStrides = computeStrides(sliceDimCounts);
  int64_t sliceCount = computeMaxLinearIndex(sliceDimCounts);
  SmallVector<Type, 4> vectorTypes(sliceCount);
  for (unsigned i = 0; i < sliceCount; ++i) {
    // De-linearize w.r.t. 'basis'.
    auto vectorOffsets = delinearize(i, basis);
    // Convert from unrolled vector-space offsets to element-space offsets.
    auto offsets = zipMap([](int64_t v1, int64_t v2) { return v1 * v2; },
                          vectorOffsets, sizes);
    // Initialize 'sliceSizes' to target 'sizes'
    SmallVector<int64_t, 4> sliceSizes(sizes.begin(), sizes.end());
    for (unsigned j = 0; j < rank; ++j) {
      // Based on 'offsets' and 'shape' clip some dim sizes for partial tiles.
      sliceSizes[j] = std::min(sliceSizes[j], shape[j] - offsets[j]);
    }
    auto vectorOffsets = delinearize(sliceStrides, i);
    auto elementOffsets =
        computeElementOffsetsFromVectorSliceOffsets(sizes, vectorOffsets);
    auto sliceSizes = computeSliceSizes(shape, sizes, elementOffsets);
    // Create Vector type and add to 'vectorTypes[i]'.
    vectorTypes[i] = VectorType::get(sliceSizes, vectorType.getElementType());
  }
@@ -333,7 +308,7 @@ static Value unrollSingleResultStructuredOp(Operation *op,
  }
  // Compute number of total unrolled instances.
  auto numUnrolledInstances = computeMaxLinearIndex(unrollFactors);
  auto basis = computeStrides(unrollFactors);
  auto sliceStrides = computeStrides(unrollFactors);

  auto &resultValueState = unrolledVectorState[resultIndex];
  auto unrolledResultType = VectorType::get(resultValueState.unrolledShape,
@@ -346,11 +321,9 @@ static Value unrollSingleResultStructuredOp(Operation *op,

  // Unroll 'numUnrolledInstances' of 'op', storing results in 'caches'.
  for (unsigned i = 0; i < numUnrolledInstances; ++i) {
    // De-linearize w.r.t. 'basis'.
    auto vectorOffsets = delinearize(i, basis);
    // Convert from unrolled vector-space offsets to element-space offsets.
    auto offsets = zipMap([](int64_t v1, int64_t v2) { return v1 * v2; },
                          vectorOffsets, targetShape);
    auto vectorOffsets = delinearize(sliceStrides, i);
    auto elementOffsets =
        computeElementOffsetsFromVectorSliceOffsets(targetShape, vectorOffsets);
    // Get cached slice (or create slice) for each operand at 'offsets'.
    SmallVector<Value, 3> operands;
    operands.resize(op->getNumOperands());
@@ -360,7 +333,7 @@ static Value unrollSingleResultStructuredOp(Operation *op,
        continue; // Output
      auto operand = op->getOperand(operandIndex);
      operands[operandIndex] = getOrCreateUnrolledVectorSlice(
          op->getLoc(), unrolledVectorState[i], vectorOffsets, offsets,
          op->getLoc(), unrolledVectorState[i], vectorOffsets, elementOffsets,
          vectors[i].indexMap, operand, caches[i], builder);
    }
    // Create op on sliced vector arguments.
@@ -498,22 +471,20 @@ generateTransferOpSlices(VectorType vectorType, TupleType tupleType,
  auto maybeDimSliceCounts = shapeRatio(vectorType.getShape(), sizes);
  assert(maybeDimSliceCounts.hasValue());
  auto sliceDimCounts = *maybeDimSliceCounts;
  auto basis = computeStrides(sliceDimCounts);
  auto sliceStrides = computeStrides(sliceDimCounts);

  int64_t numSlices = tupleType.size();
  unsigned numSliceIndices = indices.size();
  auto *ctx = rewriter.getContext();
  for (unsigned i = 0; i < numSlices; ++i) {
    // De-linearize w.r.t. 'basis'.
    auto vectorOffsets = delinearize(i, basis);
    // Convert from unrolled vector-space offsets to element-space offsets.
    auto offsets = zipMap([](int64_t v1, int64_t v2) { return v1 * v2; },
                          vectorOffsets, sizes);
    auto vectorOffsets = delinearize(sliceStrides, i);
    auto elementOffsets =
        computeElementOffsetsFromVectorSliceOffsets(sizes, vectorOffsets);
    // Compute 'sliceIndices' by adding 'sliceOffsets[i]' to 'indices[i]'.
    SmallVector<Value, 4> sliceIndices(numSliceIndices);
    for (auto it : llvm::enumerate(indices)) {
      auto expr = getAffineDimExpr(0, ctx) +
                  getAffineConstantExpr(offsets[it.index()], ctx);
                  getAffineConstantExpr(elementOffsets[it.index()], ctx);
      auto map = AffineMap::get(/*dimCount=*/1, /*symbolCount=*/0, expr);
      sliceIndices[it.index()] = rewriter.create<AffineApplyOp>(
          it.value().getLoc(), map, ArrayRef<Value>(it.value()));
@@ -672,13 +643,11 @@ class ExtractSlicesOpLowering
public:
  using OpRewritePattern<vector::ExtractSlicesOp>::OpRewritePattern;

  // TODO(ajcbik): refactor slice utilities out into VectorUtils.h
  PatternMatchResult matchAndRewrite(vector::ExtractSlicesOp op,
                                     PatternRewriter &rewriter) const override {
    auto loc = op.getLoc();

    VectorType vectorType = op.getSourceVectorType();
    int64_t rank = vectorType.getRank();
    auto shape = vectorType.getShape();

    SmallVector<int64_t, 4> sizes;
@@ -686,26 +655,16 @@ public:
    SmallVector<int64_t, 4> strides;
    op.getStrides(strides); // all-ones at the moment

    // Compute the number of slices in each dimension.
    SmallVector<int64_t, 4> sliceDimCounts(rank);
    for (int64_t r = 0; r < rank; ++r)
      sliceDimCounts[r] = ceilDiv(shape[r], sizes[r]);

    // For each element in the tuple, generate the proper strided slice.
    auto basis = computeStrides(sliceDimCounts);
    TupleType tupleType = op.getResultTupleType();
    int64_t tupleSize = tupleType.size();
    SmallVector<Value, 4> tupleValues(tupleSize);
    auto sliceStrides = computeStrides(shape, sizes);
    for (int64_t i = 0; i < tupleSize; ++i) {
      // De-linearize w.r.t. 'basis'.
      auto vectorOffsets = delinearize(i, basis);
      // Convert from unrolled vector-space offsets to element-space offsets.
      auto elementOffsets = mlir::functional::zipMap(
          [](int64_t v1, int64_t v2) { return v1 * v2; }, vectorOffsets, sizes);
      // Compute the size of each slice.
      SmallVector<int64_t, 4> sliceSizes(rank);
      for (int64_t r = 0; r < rank; ++r)
        sliceSizes[r] = std::min(sizes[r], shape[r] - elementOffsets[r]);
      auto vectorOffsets = delinearize(sliceStrides, i);
      auto elementOffsets =
          computeElementOffsetsFromVectorSliceOffsets(sizes, vectorOffsets);
      auto sliceSizes = computeSliceSizes(shape, sizes, elementOffsets);
      // Insert in tuple.
      tupleValues[i] = rewriter.create<vector::StridedSliceOp>(
          loc, op.vector(), elementOffsets, sliceSizes, strides);
@@ -731,13 +690,11 @@ class InsertSlicesOpLowering : public OpRewritePattern<vector::InsertSlicesOp> {
public:
  using OpRewritePattern<vector::InsertSlicesOp>::OpRewritePattern;

  // TODO(ajcbik): refactor slice utilities out into VectorUtils.h
  PatternMatchResult matchAndRewrite(vector::InsertSlicesOp op,
                                     PatternRewriter &rewriter) const override {
    auto loc = op.getLoc();

    VectorType vectorType = op.getResultVectorType();
    int64_t rank = vectorType.getRank();
    auto shape = vectorType.getShape();

    SmallVector<int64_t, 4> sizes;
@@ -745,11 +702,6 @@ public:
    SmallVector<int64_t, 4> strides;
    op.getStrides(strides); // all-ones at the moment

    // Compute the number of slices in each dimension.
    SmallVector<int64_t, 4> sliceDimCounts(rank);
    for (int64_t r = 0; r < rank; ++r)
      sliceDimCounts[r] = ceilDiv(shape[r], sizes[r]);

    // Prepare result.
    auto elemType = vectorType.getElementType();
    Value zero = rewriter.create<ConstantOp>(loc, elemType,
@@ -757,20 +709,13 @@ public:
    Value result = rewriter.create<SplatOp>(loc, vectorType, zero);

    // For each element in the tuple, extract the proper strided slice.
    auto basis = computeStrides(sliceDimCounts);
    TupleType tupleType = op.getSourceTupleType();
    int64_t tupleSize = tupleType.size();
    SmallVector<Value, 4> tupleValues(tupleSize);
    auto sliceStrides = computeStrides(shape, sizes);
    for (int64_t i = 0; i < tupleSize; ++i) {
      // De-linearize w.r.t. 'basis'.
      auto vectorOffsets = delinearize(i, basis);
      // Convert from unrolled vector-space offsets to element-space offsets.
      auto elementOffsets = mlir::functional::zipMap(
          [](int64_t v1, int64_t v2) { return v1 * v2; }, vectorOffsets, sizes);
      // Compute the size of each slice.
      SmallVector<int64_t, 4> sliceSizes(rank);
      for (int64_t r = 0; r < rank; ++r)
        sliceSizes[r] = std::min(sizes[r], shape[r] - elementOffsets[r]);
      auto vectorOffsets = delinearize(sliceStrides, i);
      auto elementOffsets =
          computeElementOffsetsFromVectorSliceOffsets(sizes, vectorOffsets);
      // Extract from tuple into the result.
      auto index = rewriter.getI64IntegerAttr(i);
      auto tupleGet = rewriter.create<vector::TupleGetOp>(
Loading