Commit 5c166f1d authored by Sanjay Patel's avatar Sanjay Patel
Browse files

[x86] make SLM extract vector element more expensive than default

I'm not sure what the effect of this change will be on all of the affected
tests or a larger benchmark, but it fixes the horizontal add/sub problems
noted here:
https://reviews.llvm.org/D59710?vs=227972&id=228095&whitespace=ignore-most#toc

The costs are based on reciprocal throughput numbers in Agner's tables for
PEXTR*; these appear to be very slow ops on Silvermont.

This is a small step towards the larger motivation discussed in PR43605:
https://bugs.llvm.org/show_bug.cgi?id=43605

Also, it seems likely that insert/extract is the source of perf regressions on
other CPUs (up to 30%) that were cited as part of the reason to revert D59710,
so maybe we'll extend the table-based approach to other subtargets.

Differential Revision: https://reviews.llvm.org/D70607
parent 5c5e8605
Loading
Loading
Loading
Loading
+14 −0
Original line number Diff line number Diff line
@@ -2377,6 +2377,13 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
}

int X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
  static const CostTblEntry SLMCostTbl[] = {
     { ISD::EXTRACT_VECTOR_ELT,       MVT::i8,      4 },
     { ISD::EXTRACT_VECTOR_ELT,       MVT::i16,     4 },
     { ISD::EXTRACT_VECTOR_ELT,       MVT::i32,     4 },
     { ISD::EXTRACT_VECTOR_ELT,       MVT::i64,     7 }
   };

  assert(Val->isVectorTy() && "This must be a vector type");

  Type *ScalarType = Val->getScalarType();
@@ -2396,6 +2403,13 @@ int X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
    // Floating point scalars are already located in index #0.
    if (ScalarType->isFloatingPointTy() && Index == 0)
      return 0;

    int ISD = TLI->InstructionOpcodeToISD(Opcode);
    assert(ISD && "Unexpected vector opcode");
    MVT MScalarTy = LT.second.getScalarType();
    if (ST->isSLM())
      if (auto *Entry = CostTableLookup(SLMCostTbl, ISD, MScalarTy))
        return LT.first * Entry->Cost;
  }

  // Add to the base cost if we know that the extracted element of a vector is
+58 −1
Original line number Diff line number Diff line
@@ -6,7 +6,7 @@
; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512DQ
;
; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE,SSE42
; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=SLM
; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE,SSE42
; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=btver2 | FileCheck %s --check-prefixes=BTVER2

@@ -39,6 +39,13 @@ define i32 @fptosi_double_i64(i32 %arg) {
; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64>
; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SLM-LABEL: 'fptosi_double_i64'
; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi double undef to i64
; SLM-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = fptosi <2 x double> undef to <2 x i64>
; SLM-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64>
; SLM-NEXT:  Cost Model: Found an estimated cost of 75 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64>
; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; BTVER2-LABEL: 'fptosi_double_i64'
; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi double undef to i64
; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptosi <2 x double> undef to <2 x i64>
@@ -75,6 +82,13 @@ define i32 @fptosi_double_i32(i32 %arg) {
; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = fptosi <8 x double> undef to <8 x i32>
; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SLM-LABEL: 'fptosi_double_i32'
; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi double undef to i32
; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = fptosi <2 x double> undef to <2 x i32>
; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = fptosi <4 x double> undef to <4 x i32>
; SLM-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8I32 = fptosi <8 x double> undef to <8 x i32>
; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; BTVER2-LABEL: 'fptosi_double_i32'
; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi double undef to i32
; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = fptosi <2 x double> undef to <2 x i32>
@@ -111,6 +125,13 @@ define i32 @fptosi_double_i16(i32 %arg) {
; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16>
; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SLM-LABEL: 'fptosi_double_i16'
; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi double undef to i16
; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16>
; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16>
; SLM-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16>
; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; BTVER2-LABEL: 'fptosi_double_i16'
; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi double undef to i16
; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16>
@@ -147,6 +168,13 @@ define i32 @fptosi_double_i8(i32 %arg) {
; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8>
; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SLM-LABEL: 'fptosi_double_i8'
; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi double undef to i8
; SLM-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8>
; SLM-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8>
; SLM-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8>
; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; BTVER2-LABEL: 'fptosi_double_i8'
; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi double undef to i8
; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8>
@@ -194,6 +222,14 @@ define i32 @fptosi_float_i64(i32 %arg) {
; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64>
; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SLM-LABEL: 'fptosi_float_i64'
; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi float undef to i64
; SLM-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = fptosi <2 x float> undef to <2 x i64>
; SLM-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64>
; SLM-NEXT:  Cost Model: Found an estimated cost of 75 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64>
; SLM-NEXT:  Cost Model: Found an estimated cost of 151 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64>
; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; BTVER2-LABEL: 'fptosi_float_i64'
; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi float undef to i64
; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptosi <2 x float> undef to <2 x i64>
@@ -218,6 +254,13 @@ define i32 @fptosi_float_i32(i32 %arg) {
; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = fptosi <16 x float> undef to <16 x i32>
; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SLM-LABEL: 'fptosi_float_i32'
; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi float undef to i32
; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = fptosi <4 x float> undef to <4 x i32>
; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = fptosi <8 x float> undef to <8 x i32>
; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = fptosi <16 x float> undef to <16 x i32>
; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; BTVER2-LABEL: 'fptosi_float_i32'
; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi float undef to i32
; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = fptosi <4 x float> undef to <4 x i32>
@@ -254,6 +297,13 @@ define i32 @fptosi_float_i16(i32 %arg) {
; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16>
; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SLM-LABEL: 'fptosi_float_i16'
; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16
; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16>
; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16>
; SLM-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16>
; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; BTVER2-LABEL: 'fptosi_float_i16'
; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16
; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16>
@@ -290,6 +340,13 @@ define i32 @fptosi_float_i8(i32 %arg) {
; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8>
; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SLM-LABEL: 'fptosi_float_i8'
; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi float undef to i8
; SLM-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8>
; SLM-NEXT:  Cost Model: Found an estimated cost of 49 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8>
; SLM-NEXT:  Cost Model: Found an estimated cost of 99 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8>
; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; BTVER2-LABEL: 'fptosi_float_i8'
; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi float undef to i8
; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8>
+58 −1
Original line number Diff line number Diff line
@@ -6,7 +6,7 @@
; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512DQ
;
; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE,SSE42
; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=SLM
; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE,SSE42
; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=btver2 | FileCheck %s --check-prefixes=BTVER2

@@ -39,6 +39,13 @@ define i32 @fptoui_double_i64(i32 %arg) {
; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64>
; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SLM-LABEL: 'fptoui_double_i64'
; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui double undef to i64
; SLM-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64>
; SLM-NEXT:  Cost Model: Found an estimated cost of 49 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64>
; SLM-NEXT:  Cost Model: Found an estimated cost of 99 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64>
; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; BTVER2-LABEL: 'fptoui_double_i64'
; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui double undef to i64
; BTVER2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64>
@@ -75,6 +82,13 @@ define i32 @fptoui_double_i32(i32 %arg) {
; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = fptoui <8 x double> undef to <8 x i32>
; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SLM-LABEL: 'fptoui_double_i32'
; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui double undef to i32
; SLM-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2I32 = fptoui <2 x double> undef to <2 x i32>
; SLM-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V4I32 = fptoui <4 x double> undef to <4 x i32>
; SLM-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %V8I32 = fptoui <8 x double> undef to <8 x i32>
; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; BTVER2-LABEL: 'fptoui_double_i32'
; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui double undef to i32
; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = fptoui <2 x double> undef to <2 x i32>
@@ -111,6 +125,13 @@ define i32 @fptoui_double_i16(i32 %arg) {
; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16>
; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SLM-LABEL: 'fptoui_double_i16'
; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16
; SLM-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16>
; SLM-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16>
; SLM-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16>
; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; BTVER2-LABEL: 'fptoui_double_i16'
; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16
; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16>
@@ -147,6 +168,13 @@ define i32 @fptoui_double_i8(i32 %arg) {
; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8>
; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SLM-LABEL: 'fptoui_double_i8'
; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui double undef to i8
; SLM-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8>
; SLM-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8>
; SLM-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8>
; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; BTVER2-LABEL: 'fptoui_double_i8'
; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui double undef to i8
; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8>
@@ -194,6 +222,14 @@ define i32 @fptoui_float_i64(i32 %arg) {
; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64>
; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SLM-LABEL: 'fptoui_float_i64'
; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui float undef to i64
; SLM-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64>
; SLM-NEXT:  Cost Model: Found an estimated cost of 49 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64>
; SLM-NEXT:  Cost Model: Found an estimated cost of 99 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64>
; SLM-NEXT:  Cost Model: Found an estimated cost of 199 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64>
; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; BTVER2-LABEL: 'fptoui_float_i64'
; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui float undef to i64
; BTVER2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64>
@@ -232,6 +268,13 @@ define i32 @fptoui_float_i32(i32 %arg) {
; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = fptoui <16 x float> undef to <16 x i32>
; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SLM-LABEL: 'fptoui_float_i32'
; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui float undef to i32
; SLM-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4I32 = fptoui <4 x float> undef to <4 x i32>
; SLM-NEXT:  Cost Model: Found an estimated cost of 49 for instruction: %V8I32 = fptoui <8 x float> undef to <8 x i32>
; SLM-NEXT:  Cost Model: Found an estimated cost of 99 for instruction: %V16I32 = fptoui <16 x float> undef to <16 x i32>
; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; BTVER2-LABEL: 'fptoui_float_i32'
; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui float undef to i32
; BTVER2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = fptoui <4 x float> undef to <4 x i32>
@@ -268,6 +311,13 @@ define i32 @fptoui_float_i16(i32 %arg) {
; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16>
; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SLM-LABEL: 'fptoui_float_i16'
; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui float undef to i16
; SLM-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16>
; SLM-NEXT:  Cost Model: Found an estimated cost of 49 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16>
; SLM-NEXT:  Cost Model: Found an estimated cost of 99 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16>
; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; BTVER2-LABEL: 'fptoui_float_i16'
; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui float undef to i16
; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16>
@@ -304,6 +354,13 @@ define i32 @fptoui_float_i8(i32 %arg) {
; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8>
; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SLM-LABEL: 'fptoui_float_i8'
; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui float undef to i8
; SLM-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8>
; SLM-NEXT:  Cost Model: Found an estimated cost of 49 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8>
; SLM-NEXT:  Cost Model: Found an estimated cost of 99 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8>
; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; BTVER2-LABEL: 'fptoui_float_i8'
; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui float undef to i8
; BTVER2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8>
+475 −179

File changed.

Preview size limit exceeded, changes collapsed.

+606 −74

File changed.

Preview size limit exceeded, changes collapsed.

Loading