Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions llvm/lib/Target/AArch64/AArch64.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ class ModulePass;

FunctionPass *createAArch64DeadRegisterDefinitions();
FunctionPass *createAArch64RedundantCopyEliminationPass();
FunctionPass *createAArch64RedundantCondBranchPass();
FunctionPass *createAArch64CondBrTuning();
FunctionPass *createAArch64CompressJumpTablesPass();
FunctionPass *createAArch64ConditionalCompares();
Expand Down Expand Up @@ -103,6 +104,7 @@ void initializeAArch64PostSelectOptimizePass(PassRegistry &);
void initializeAArch64PreLegalizerCombinerPass(PassRegistry &);
void initializeAArch64PromoteConstantPass(PassRegistry&);
void initializeAArch64RedundantCopyEliminationPass(PassRegistry&);
void initializeAArch64RedundantCondBranchPass(PassRegistry &);
void initializeAArch64SIMDInstrOptPass(PassRegistry &);
void initializeAArch64SLSHardeningPass(PassRegistry &);
void initializeAArch64SpeculationHardeningPass(PassRegistry &);
Expand Down
107 changes: 107 additions & 0 deletions llvm/lib/Target/AArch64/AArch64RedundantCondBranchPass.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
//=- AArch64RedundantCondBranch.cpp - Remove redundant conditional branches -=//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Late in the pipeline, especially with zero phi operands propagated after tail
// duplications, we can end up with CBZ/CBNZ/TBZ/TBNZ with a zero register. This
// simple pass looks at the terminators to a block, removing the redundant
// instructions where necessary.
//
//===----------------------------------------------------------------------===//

#include "AArch64.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/Support/Debug.h"

using namespace llvm;

#define DEBUG_TYPE "aarch64-redundantcondbranch"

namespace {
class AArch64RedundantCondBranch : public MachineFunctionPass {
public:
static char ID;
AArch64RedundantCondBranch() : MachineFunctionPass(ID) {}

bool runOnMachineFunction(MachineFunction &MF) override;

MachineFunctionProperties getRequiredProperties() const override {
return MachineFunctionProperties().setNoVRegs();
}
StringRef getPassName() const override {
return "AArch64 Redundant Conditional Branch Elimination";
}
};
char AArch64RedundantCondBranch::ID = 0;
} // namespace

INITIALIZE_PASS(AArch64RedundantCondBranch, "aarch64-redundantcondbranch",
"AArch64 Redundant Conditional Branch Elimination pass", false,
false)

static bool optimizeTerminators(MachineBasicBlock *MBB,
const TargetInstrInfo &TII) {
for (MachineInstr &MI : make_early_inc_range(MBB->terminators())) {
unsigned Opc = MI.getOpcode();
switch (Opc) {
case AArch64::CBZW:
case AArch64::CBZX:
case AArch64::TBZW:
case AArch64::TBZX:
// CBZ/TBZ with WZR/XZR -> unconditional B
if (MI.getOperand(0).getReg() == AArch64::WZR ||
MI.getOperand(0).getReg() == AArch64::XZR) {
LLVM_DEBUG(dbgs() << "Removing redundant branch: " << MI);
MachineBasicBlock *Target = TII.getBranchDestBlock(MI);
SmallVector<MachineBasicBlock *> Succs(MBB->successors());
for (auto *S : Succs)
if (S != Target)
MBB->removeSuccessor(S);
DebugLoc DL = MI.getDebugLoc();
while (MBB->rbegin() != &MI)
MBB->rbegin()->eraseFromParent();
MI.eraseFromParent();
BuildMI(MBB, DL, TII.get(AArch64::B)).addMBB(Target);
return true;
}
break;
case AArch64::CBNZW:
case AArch64::CBNZX:
case AArch64::TBNZW:
case AArch64::TBNZX:
// CBNZ/TBNZ with WZR/XZR -> never taken, remove branch and successor
if (MI.getOperand(0).getReg() == AArch64::WZR ||
MI.getOperand(0).getReg() == AArch64::XZR) {
LLVM_DEBUG(dbgs() << "Removing redundant branch: " << MI);
MachineBasicBlock *Target = TII.getBranchDestBlock(MI);
MI.getParent()->removeSuccessor(Target);
MI.eraseFromParent();
return true;
}
break;
}
}
return false;
}

bool AArch64RedundantCondBranch::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;

const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();

bool Changed = false;
for (MachineBasicBlock &MBB : MF)
Changed |= optimizeTerminators(&MBB, TII);
return Changed;
}

FunctionPass *llvm::createAArch64RedundantCondBranchPass() {
return new AArch64RedundantCondBranch();
}
3 changes: 3 additions & 0 deletions llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,7 @@ LLVMInitializeAArch64Target() {
initializeAArch64PostSelectOptimizePass(PR);
initializeAArch64PromoteConstantPass(PR);
initializeAArch64RedundantCopyEliminationPass(PR);
initializeAArch64RedundantCondBranchPass(PR);
initializeAArch64StorePairSuppressPass(PR);
initializeFalkorHWPFFixPass(PR);
initializeFalkorMarkStridedAccessesLegacyPass(PR);
Expand Down Expand Up @@ -862,6 +863,8 @@ void AArch64PassConfig::addPreEmitPass() {
if (TM->getOptLevel() >= CodeGenOptLevel::Aggressive &&
EnableAArch64CopyPropagation)
addPass(createMachineCopyPropagationPass(true));
if (TM->getOptLevel() != CodeGenOptLevel::None)
addPass(createAArch64RedundantCondBranchPass());

addPass(createAArch64A53Fix835769());

Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AArch64/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ add_llvm_target(AArch64CodeGen
AArch64CompressJumpTables.cpp
AArch64ConditionOptimizer.cpp
AArch64RedundantCopyElimination.cpp
AArch64RedundantCondBranchPass.cpp
AArch64ISelDAGToDAG.cpp
AArch64ISelLowering.cpp
AArch64InstrInfo.cpp
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/AArch64/O3-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,7 @@
; CHECK-NEXT: Implement the 'patchable-function' attribute
; CHECK-NEXT: AArch64 load / store optimization pass
; CHECK-NEXT: Machine Copy Propagation Pass
; CHECK-NEXT: AArch64 Redundant Conditional Branch Elimination
; CHECK-NEXT: Workaround A53 erratum 835769 pass
; CHECK-NEXT: Contiguously Lay Out Funclets
; CHECK-NEXT: Remove Loads Into Fake Uses
Expand Down
10 changes: 2 additions & 8 deletions llvm/test/CodeGen/AArch64/arm64-rev.ll
Original file line number Diff line number Diff line change
Expand Up @@ -530,28 +530,22 @@ declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) nounwind readnone
define void @test_rev16_truncstore() {
; CHECK-SD-LABEL: test_rev16_truncstore:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: cbnz wzr, .LBB38_2
; CHECK-SD-NEXT: .LBB38_1: // %cleanup
; CHECK-SD-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-SD-NEXT: ldrh w8, [x8]
; CHECK-SD-NEXT: rev16 w8, w8
; CHECK-SD-NEXT: strh w8, [x8]
; CHECK-SD-NEXT: cbz wzr, .LBB38_1
; CHECK-SD-NEXT: .LBB38_2: // %fail
; CHECK-SD-NEXT: ret
; CHECK-SD-NEXT: b .LBB38_1
;
; CHECK-GI-LABEL: test_rev16_truncstore:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: tbnz wzr, #0, .LBB38_2
; CHECK-GI-NEXT: .LBB38_1: // %cleanup
; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-GI-NEXT: ldrh w8, [x8]
; CHECK-GI-NEXT: rev w8, w8
; CHECK-GI-NEXT: lsr w8, w8, #16
; CHECK-GI-NEXT: strh w8, [x8]
; CHECK-GI-NEXT: tbz wzr, #0, .LBB38_1
; CHECK-GI-NEXT: .LBB38_2: // %fail
; CHECK-GI-NEXT: ret
; CHECK-GI-NEXT: b .LBB38_1
entry:
br label %body

Expand Down
18 changes: 6 additions & 12 deletions llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll
Original file line number Diff line number Diff line change
Expand Up @@ -735,7 +735,6 @@ define void @infiniteloop() {
; ENABLE-NEXT: .cfi_offset w29, -16
; ENABLE-NEXT: .cfi_offset w19, -24
; ENABLE-NEXT: .cfi_offset w20, -32
; ENABLE-NEXT: cbnz wzr, LBB10_3
; ENABLE-NEXT: ; %bb.1: ; %if.then
; ENABLE-NEXT: sub x19, sp, #16
; ENABLE-NEXT: mov sp, x19
Expand All @@ -746,7 +745,7 @@ define void @infiniteloop() {
; ENABLE-NEXT: add w20, w0, w20
; ENABLE-NEXT: str w20, [x19]
; ENABLE-NEXT: b LBB10_2
; ENABLE-NEXT: LBB10_3: ; %if.end
; ENABLE-NEXT: ; %bb.3: ; %if.end
; ENABLE-NEXT: sub sp, x29, #16
; ENABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; ENABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
Expand All @@ -762,7 +761,6 @@ define void @infiniteloop() {
; DISABLE-NEXT: .cfi_offset w29, -16
; DISABLE-NEXT: .cfi_offset w19, -24
; DISABLE-NEXT: .cfi_offset w20, -32
; DISABLE-NEXT: cbnz wzr, LBB10_3
; DISABLE-NEXT: ; %bb.1: ; %if.then
; DISABLE-NEXT: sub x19, sp, #16
; DISABLE-NEXT: mov sp, x19
Expand All @@ -773,7 +771,7 @@ define void @infiniteloop() {
; DISABLE-NEXT: add w20, w0, w20
; DISABLE-NEXT: str w20, [x19]
; DISABLE-NEXT: b LBB10_2
; DISABLE-NEXT: LBB10_3: ; %if.end
; DISABLE-NEXT: ; %bb.3: ; %if.end
; DISABLE-NEXT: sub sp, x29, #16
; DISABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; DISABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
Expand Down Expand Up @@ -808,7 +806,6 @@ define void @infiniteloop2() {
; ENABLE-NEXT: .cfi_offset w29, -16
; ENABLE-NEXT: .cfi_offset w19, -24
; ENABLE-NEXT: .cfi_offset w20, -32
; ENABLE-NEXT: cbnz wzr, LBB11_3
; ENABLE-NEXT: ; %bb.1: ; %if.then
; ENABLE-NEXT: sub x8, sp, #16
; ENABLE-NEXT: mov sp, x8
Expand All @@ -825,7 +822,7 @@ define void @infiniteloop2() {
; ENABLE-NEXT: nop
; ENABLE-NEXT: ; InlineAsm End
; ENABLE-NEXT: b LBB11_2
; ENABLE-NEXT: LBB11_3: ; %if.end
; ENABLE-NEXT: ; %bb.3: ; %if.end
; ENABLE-NEXT: sub sp, x29, #16
; ENABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; ENABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
Expand All @@ -841,7 +838,6 @@ define void @infiniteloop2() {
; DISABLE-NEXT: .cfi_offset w29, -16
; DISABLE-NEXT: .cfi_offset w19, -24
; DISABLE-NEXT: .cfi_offset w20, -32
; DISABLE-NEXT: cbnz wzr, LBB11_3
; DISABLE-NEXT: ; %bb.1: ; %if.then
; DISABLE-NEXT: sub x8, sp, #16
; DISABLE-NEXT: mov sp, x8
Expand All @@ -858,7 +854,7 @@ define void @infiniteloop2() {
; DISABLE-NEXT: nop
; DISABLE-NEXT: ; InlineAsm End
; DISABLE-NEXT: b LBB11_2
; DISABLE-NEXT: LBB11_3: ; %if.end
; DISABLE-NEXT: ; %bb.3: ; %if.end
; DISABLE-NEXT: sub sp, x29, #16
; DISABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; DISABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
Expand Down Expand Up @@ -893,7 +889,6 @@ if.end:
define void @infiniteloop3() {
; ENABLE-LABEL: infiniteloop3:
; ENABLE: ; %bb.0: ; %entry
; ENABLE-NEXT: cbnz wzr, LBB12_5
; ENABLE-NEXT: ; %bb.1: ; %loop2a.preheader
; ENABLE-NEXT: mov x8, xzr
; ENABLE-NEXT: mov x9, xzr
Expand All @@ -912,12 +907,11 @@ define void @infiniteloop3() {
; ENABLE-NEXT: mov x8, x10
; ENABLE-NEXT: mov x11, x10
; ENABLE-NEXT: b LBB12_3
; ENABLE-NEXT: LBB12_5: ; %end
; ENABLE-NEXT: ; %bb.5: ; %end
; ENABLE-NEXT: ret
;
; DISABLE-LABEL: infiniteloop3:
; DISABLE: ; %bb.0: ; %entry
; DISABLE-NEXT: cbnz wzr, LBB12_5
; DISABLE-NEXT: ; %bb.1: ; %loop2a.preheader
; DISABLE-NEXT: mov x8, xzr
; DISABLE-NEXT: mov x9, xzr
Expand All @@ -936,7 +930,7 @@ define void @infiniteloop3() {
; DISABLE-NEXT: mov x8, x10
; DISABLE-NEXT: mov x11, x10
; DISABLE-NEXT: b LBB12_3
; DISABLE-NEXT: LBB12_5: ; %end
; DISABLE-NEXT: ; %bb.5: ; %end
; DISABLE-NEXT: ret
entry:
br i1 undef, label %loop2a, label %body
Expand Down
12 changes: 5 additions & 7 deletions llvm/test/CodeGen/AArch64/block-placement-optimize-branches.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,20 @@
define i8 @foo_optsize(i32 %v4) optsize {
; CHECK-LABEL: foo_optsize:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: cbz wzr, .LBB0_2
; CHECK-NEXT: b .LBB0_2
; CHECK-NEXT: .LBB0_1:
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB0_2: // %b1
; CHECK-NEXT: cbnz w0, .LBB0_4
; CHECK-NEXT: .LBB0_3: // %b2
; CHECK-NEXT: // %bb.3: // %b2
; CHECK-NEXT: mov w0, #1 // =0x1
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB0_4: // %b1
; CHECK-NEXT: cmp w0, #1
; CHECK-NEXT: b.ne .LBB0_1
; CHECK-NEXT: // %bb.5: // %b3
; CHECK-NEXT: cbz wzr, .LBB0_1
; CHECK-NEXT: b .LBB0_3
; CHECK-NEXT: b .LBB0_1
entry:
%v2 = icmp eq i32 0, 0
br i1 %v2, label %b1, label %b4
Expand All @@ -48,20 +47,19 @@ b4:
define i8 @foo_optspeed(i32 %v4) {
; CHECK-LABEL: foo_optspeed:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: cbz wzr, .LBB1_2
; CHECK-NEXT: b .LBB1_2
; CHECK-NEXT: .LBB1_1:
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB1_2: // %b1
; CHECK-NEXT: cbnz w0, .LBB1_4
; CHECK-NEXT: .LBB1_3: // %b2
; CHECK-NEXT: // %bb.3: // %b2
; CHECK-NEXT: mov w0, #1 // =0x1
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB1_4: // %b1
; CHECK-NEXT: cmp w0, #1
; CHECK-NEXT: b.ne .LBB1_1
; CHECK-NEXT: // %bb.5: // %b3
; CHECK-NEXT: cbnz wzr, .LBB1_3
; CHECK-NEXT: b .LBB1_1
entry:
%v2 = icmp eq i32 0, 0
Expand Down
Loading
Loading