Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 3 additions & 26 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7921,32 +7921,9 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(VPInstruction *VPI) {
case Instruction::Shl:
case Instruction::Sub:
case Instruction::Xor:
case Instruction::Freeze: {
SmallVector<VPValue *> NewOps(VPI->operands());
if (Instruction::isBinaryOp(VPI->getOpcode())) {
// The legacy cost model uses SCEV to check if some of the operands are
// constants. To match the legacy cost model's behavior, use SCEV to try
// to replace operands with constants.
ScalarEvolution &SE = *PSE.getSE();
auto GetConstantViaSCEV = [this, &SE](VPValue *Op) {
if (!Op->isLiveIn())
return Op;
Value *V = Op->getUnderlyingValue();
if (isa<Constant>(V) || !SE.isSCEVable(V->getType()))
return Op;
auto *C = dyn_cast<SCEVConstant>(SE.getSCEV(V));
if (!C)
return Op;
return Plan.getOrAddLiveIn(C->getValue());
};
// For Mul, the legacy cost model checks both operands.
if (VPI->getOpcode() == Instruction::Mul)
NewOps[0] = GetConstantViaSCEV(NewOps[0]);
// For other binops, the legacy cost model only checks the second operand.
NewOps[1] = GetConstantViaSCEV(NewOps[1]);
}
return new VPWidenRecipe(*I, NewOps, *VPI, *VPI, VPI->getDebugLoc());
}
case Instruction::Freeze:
return new VPWidenRecipe(*I, VPI->operands(), *VPI, *VPI,
VPI->getDebugLoc());
case Instruction::ExtractValue: {
SmallVector<VPValue *> NewOps(VPI->operands());
auto *EVI = cast<ExtractValueInst>(I);
Expand Down
18 changes: 18 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
Expand Down Expand Up @@ -566,13 +567,30 @@ static void addInitialSkeleton(VPlan &Plan, Type *InductionTy, DebugLoc IVDL,
}
}

/// Check \p Plan's live-in and replace them with constants, if they can be
/// simplified via SCEV.
static void simplifyLiveInsWithSCEV(VPlan &Plan, ScalarEvolution &SE) {
auto GetSimplifiedLiveInViaSCEV = [&](VPValue *VPV) -> VPValue * {
const SCEV *Expr = vputils::getSCEVExprForVPValue(VPV, SE);
if (auto *C = dyn_cast<SCEVConstant>(Expr))
return Plan.getOrAddLiveIn(C->getValue());
return nullptr;
};

for (VPValue *LiveIn : Plan.getLiveIns()) {
if (VPValue *SimplifiedLiveIn = GetSimplifiedLiveInViaSCEV(LiveIn))
LiveIn->replaceAllUsesWith(SimplifiedLiveIn);
}
}

std::unique_ptr<VPlan>
VPlanTransforms::buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy,
DebugLoc IVDL, PredicatedScalarEvolution &PSE,
LoopVersioning *LVer) {
PlainCFGBuilder Builder(TheLoop, &LI, LVer);
std::unique_ptr<VPlan> VPlan0 = Builder.buildPlainCFG();
addInitialSkeleton(*VPlan0, InductionTy, IVDL, PSE, TheLoop);
simplifyLiveInsWithSCEV(*VPlan0, *PSE.getSE());
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Better doing so here this way than as a vplan-to-vplan transform, because expected to apply only once, which can be at construction?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, we shouldn't add simplify-able live-ins later on I think

return VPlan0;
}

Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,8 @@ bool vputils::isHeaderMask(const VPValue *V, const VPlan &Plan) {
const SCEV *vputils::getSCEVExprForVPValue(const VPValue *V,
ScalarEvolution &SE, const Loop *L) {
if (V->isLiveIn()) {
if (Value *LiveIn = V->getLiveInIRValue())
Value *LiveIn = V->getLiveInIRValue();
if (LiveIn && SE.isSCEVable(LiveIn->getType()))
return SE.getSCEV(LiveIn);
return SE.getCouldNotCompute();
}
Expand Down
18 changes: 6 additions & 12 deletions llvm/test/Transforms/LoopVectorize/iv_outside_user.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1067,19 +1067,16 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification(ptr %dst) {
; VEC: [[VECTOR_PH]]:
; VEC-NEXT: br label %[[VECTOR_BODY:.*]]
; VEC: [[VECTOR_BODY]]:
; VEC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; VEC-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
; VEC-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 1
; VEC-NEXT: [[TMP0:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; VEC-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[TMP0]]
; VEC-NEXT: store <2 x i16> zeroinitializer, ptr [[TMP1]], align 2
; VEC-NEXT: [[TMP5:%.*]] = add i32 [[STEP_2]], [[TMP6]]
; VEC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; VEC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[TMP0]], 2
; VEC-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 8
; VEC-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}}
; VEC: [[MIDDLE_BLOCK]]:
; VEC-NEXT: br label %[[E_EXIT:.*]]
; VEC: [[E_EXIT]]:
; VEC-NEXT: ret i32 [[TMP5]]
; VEC-NEXT: ret i32 8
;
; INTERLEAVE-LABEL: define i32 @test_iv_uniform_with_outside_use_scev_simplification(
; INTERLEAVE-SAME: ptr [[DST:%.*]]) {
Expand All @@ -1096,14 +1093,13 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification(ptr %dst) {
; INTERLEAVE-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[TMP1]]
; INTERLEAVE-NEXT: store i16 0, ptr [[TMP2]], align 2
; INTERLEAVE-NEXT: store i16 0, ptr [[TMP3]], align 2
; INTERLEAVE-NEXT: [[TMP5:%.*]] = add i32 [[STEP_2]], [[TMP1]]
; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; INTERLEAVE-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 8
; INTERLEAVE-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}}
; INTERLEAVE: [[MIDDLE_BLOCK]]:
; INTERLEAVE-NEXT: br label %[[E_EXIT:.*]]
; INTERLEAVE: [[E_EXIT]]:
; INTERLEAVE-NEXT: ret i32 [[TMP5]]
; INTERLEAVE-NEXT: ret i32 8
;
entry:
%step.1 = sext i8 0 to i32
Expand Down Expand Up @@ -1131,8 +1127,6 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification_2(ptr %dst) {
; VEC-NEXT: [[STEP_2:%.*]] = add nsw i32 [[STEP_1]], 1
; VEC-NEXT: br label %[[VECTOR_PH:.*]]
; VEC: [[VECTOR_PH]]:
; VEC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP_2]], i64 0
; VEC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
; VEC-NEXT: br label %[[VECTOR_BODY:.*]]
; VEC: [[VECTOR_BODY]]:
; VEC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
Expand All @@ -1145,7 +1139,7 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification_2(ptr %dst) {
; VEC-NEXT: store i16 0, ptr [[TMP2]], align 2
; VEC-NEXT: store i16 0, ptr [[TMP3]], align 2
; VEC-NEXT: [[TMP4:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 1)
; VEC-NEXT: [[TMP5:%.*]] = add <2 x i32> [[BROADCAST_SPLAT]], [[TMP4]]
; VEC-NEXT: [[TMP5:%.*]] = add <2 x i32> splat (i32 1), [[TMP4]]
; VEC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; VEC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 4)
; VEC-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 4
Expand Down Expand Up @@ -1173,7 +1167,7 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification_2(ptr %dst) {
; INTERLEAVE-NEXT: store i16 0, ptr [[TMP2]], align 2
; INTERLEAVE-NEXT: store i16 0, ptr [[TMP3]], align 2
; INTERLEAVE-NEXT: [[TMP4:%.*]] = add i32 [[TMP1]], 1
; INTERLEAVE-NEXT: [[TMP5:%.*]] = add i32 [[STEP_2]], [[TMP4]]
; INTERLEAVE-NEXT: [[TMP5:%.*]] = add i32 1, [[TMP4]]
; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; INTERLEAVE-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 4
; INTERLEAVE-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}}
Expand Down
36 changes: 14 additions & 22 deletions llvm/test/Transforms/LoopVectorize/lcssa-crashes.ll
Original file line number Diff line number Diff line change
Expand Up @@ -89,46 +89,38 @@ define void @test3(ptr %p) {
; CHECK: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY1:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[POS_337:%.*]] = add i32 [[ADD41]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[ADD41]], 1
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[ADD41]], 2
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[ADD41]], 3
; CHECK-NEXT: [[INC46:%.*]] = add i32 [[POS_337]], 1
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[TMP1]], 1
; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[TMP2]], 1
; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[TMP3]], 1
; CHECK-NEXT: [[INC46:%.*]] = add i32 6, 1
; CHECK-NEXT: [[TMP5:%.*]] = add i32 7, 1
; CHECK-NEXT: [[TMP6:%.*]] = add i32 8, 1
; CHECK-NEXT: [[TMP7:%.*]] = add i32 9, 1
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> poison, i32 [[INC46]], i32 0
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP5]], i32 1
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP6]], i32 2
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP7]], i32 3
; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
; CHECK-NEXT: [[IDXPROM4738:%.*]] = add i64 [[IDXPROM4736]], 0
; CHECK-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds [1024 x i8], ptr [[P:%.*]], i64 0, i64 [[IDXPROM4738]]
; CHECK-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds [1024 x i8], ptr [[P:%.*]], i64 0, i64 6
; CHECK-NEXT: store i8 0, ptr [[ARRAYIDX48]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3:%.*]]
; CHECK: pred.store.if2:
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[IDXPROM4736]], 1
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1024 x i8], ptr [[P]], i64 0, i64 [[TMP14]]
; CHECK: pred.store.if1:
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1024 x i8], ptr [[P]], i64 0, i64 7
; CHECK-NEXT: store i8 0, ptr [[TMP15]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE3]]
; CHECK: pred.store.continue3:
; CHECK: pred.store.continue2:
; CHECK-NEXT: br i1 false, label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]]
; CHECK: pred.store.if4:
; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[IDXPROM4736]], 2
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1024 x i8], ptr [[P]], i64 0, i64 [[TMP16]]
; CHECK: pred.store.if3:
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1024 x i8], ptr [[P]], i64 0, i64 8
; CHECK-NEXT: store i8 0, ptr [[TMP17]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE5]]
; CHECK: pred.store.continue5:
; CHECK: pred.store.continue4:
; CHECK-NEXT: br i1 false, label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7:%.*]]
; CHECK: pred.store.if6:
; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[IDXPROM4736]], 3
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1024 x i8], ptr [[P]], i64 0, i64 [[TMP18]]
; CHECK: pred.store.if5:
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1024 x i8], ptr [[P]], i64 0, i64 9
; CHECK-NEXT: store i8 0, ptr [[TMP19]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE7]]
; CHECK: pred.store.continue7:
; CHECK: pred.store.continue6:
; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]]
; CHECK: middle.block:
; CHECK-NEXT: [[FIRST_INACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> <i1 false, i1 false, i1 true, i1 true>, i1 false)
Expand Down
5 changes: 2 additions & 3 deletions llvm/test/Transforms/LoopVectorize/pr58811-scev-expansion.ll
Original file line number Diff line number Diff line change
Expand Up @@ -504,11 +504,10 @@ define void @iv_start_from_shl_of_previous_iv(ptr %dst) {
; VF2-NEXT: [[IV_1_SHL:%.*]] = shl i64 1, 1
; VF2-NEXT: br label %[[VECTOR_PH1:.*]]
; VF2: [[VECTOR_PH1]]:
; VF2-NEXT: [[TMP0:%.*]] = add i64 [[IV_1_SHL]], 98
; VF2-NEXT: br label %[[VECTOR_BODY2:.*]]
; VF2: [[VECTOR_BODY2]]:
; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH1]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY2]] ]
; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[IV_1_SHL]], [[INDEX]]
; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 2, [[INDEX]]
; VF2-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET_IDX]]
; VF2-NEXT: store <2 x i8> splat (i8 1), ptr [[TMP1]], align 1
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
Expand All @@ -519,7 +518,7 @@ define void @iv_start_from_shl_of_previous_iv(ptr %dst) {
; VF2: [[SCALAR_PH]]:
; VF2-NEXT: br label %[[LOOP_2:.*]]
; VF2: [[LOOP_2]]:
; VF2-NEXT: [[IV_2:%.*]] = phi i64 [ [[TMP0]], %[[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], %[[LOOP_2]] ]
; VF2-NEXT: [[IV_2:%.*]] = phi i64 [ 100, %[[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], %[[LOOP_2]] ]
; VF2-NEXT: [[GEP_2:%.*]] = getelementptr i8, ptr [[DST]], i64 [[IV_2]]
; VF2-NEXT: store i8 1, ptr [[GEP_2]], align 1
; VF2-NEXT: [[IV_2_NEXT]] = add i64 [[IV_2]], 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ define void @reuse_lcssa_phi_for_add_rec1(ptr %head) {
; CHECK-NEXT: [[IV_2:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_2_NEXT:%.*]], %[[LOOP_1]] ]
; CHECK-NEXT: [[FOR:%.*]] = phi ptr [ [[HEAD]], %[[ENTRY]] ], [ [[L_1:%.*]], %[[LOOP_1]] ]
; CHECK-NEXT: [[L_1]] = load ptr, ptr [[FOR]], align 8
; CHECK-NEXT: [[IV_2_NEXT]] = add nuw nsw i32 [[IV_2]], 1
; CHECK-NEXT: [[IV_2_NEXT]] = add nuw i32 [[IV_2]], 1
; CHECK-NEXT: [[EC_1:%.*]] = icmp eq ptr [[L_1]], null
; CHECK-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 1
; CHECK-NEXT: br i1 [[EC_1]], label %[[PH:.*]], label %[[LOOP_1]]
Expand Down
20 changes: 2 additions & 18 deletions llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll
Original file line number Diff line number Diff line change
Expand Up @@ -13,38 +13,22 @@ define i32 @iv_live_out_wide(ptr %dst) {
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i32 [[TMP4]], 2
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[TMP5]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i32 [[TMP5]], 2
; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i32 [[TMP4]], 4
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 2000, [[TMP6]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 2000, [[N_MOD_VF]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[STEP_2]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i32> @llvm.stepvector.nxv2i32()
; CHECK-NEXT: [[TMP8:%.*]] = mul <vscale x 2 x i32> [[TMP7]], splat (i32 1)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i32> zeroinitializer, [[TMP8]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[STEP_ADD:%.*]] = add <vscale x 2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]]
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP12]], 1
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i64 [[TMP13]]
; CHECK-NEXT: store <vscale x 2 x i16> zeroinitializer, ptr [[TMP10]], align 2
; CHECK-NEXT: store <vscale x 2 x i16> zeroinitializer, ptr [[TMP14]], align 2
; CHECK-NEXT: [[TMP15:%.*]] = add <vscale x 2 x i32> [[BROADCAST_SPLAT]], [[STEP_ADD]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP6]]
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i32> [[STEP_ADD]], [[BROADCAST_SPLAT2]]
; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP18:%.*]] = mul nuw i32 [[TMP17]], 2
; CHECK-NEXT: [[TMP19:%.*]] = sub i32 [[TMP18]], 1
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <vscale x 2 x i32> [[TMP15]], i32 [[TMP19]]
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 2000, [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[E_EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
Expand All @@ -58,7 +42,7 @@ define i32 @iv_live_out_wide(ptr %dst) {
; CHECK-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[IV_NEXT]], 2000
; CHECK-NEXT: br i1 [[CMP_I]], label %[[LOOP]], label %[[E_EXIT]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: [[E_EXIT]]:
; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ], [ [[TMP20]], %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ], [ [[N_VEC]], %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i32 [[RES]]
;
entry:
Expand Down