Skip to content

Commit 0105646

Browse files
committed
[AArch64] Optimize CBZ wzr and friends.
In certain situations, especially with zero phi operands propagated after tail duplications, we can end up with CBZ/CBNZ/TBZ/TBNZ with a zero register. It only happens late in the pipeline. This patch adds a basic simplifyInstruction to fold them away to either a direct branch or removing the instruction entirely. It needs some fixups, but seems to work.
1 parent fef7753 commit 0105646

File tree

15 files changed

+136
-151
lines changed

15 files changed

+136
-151
lines changed

llvm/include/llvm/CodeGen/TargetInstrInfo.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -550,7 +550,10 @@ class LLVM_ABI TargetInstrInfo : public MCInstrInfo {
550550
/// MachineCopyPropagation, where their mutation of the MI operands may
551551
/// expose opportunities to convert the instruction to a simpler form (e.g.
552552
/// a load of 0).
553-
virtual bool simplifyInstruction(MachineInstr &MI) const { return false; }
553+
virtual bool simplifyInstruction(MachineInstr &MI,
554+
bool &AlteredTerminators) const {
555+
return false;
556+
}
554557

555558
/// A pair composed of a register and a sub-register index.
556559
/// Used to give some type checking when modeling Reg:SubReg.

llvm/lib/CodeGen/MachineCopyPropagation.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -928,9 +928,13 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
928928

929929
// Attempt to canonicalize/optimize the instruction now its arguments have
930930
// been mutated. This may convert MI from a non-copy to a copy instruction.
931-
if (TII->simplifyInstruction(MI)) {
931+
bool AlteredTerminators = false;
932+
if (TII->simplifyInstruction(MI, AlteredTerminators)) {
932933
Changed = true;
933-
LLVM_DEBUG(dbgs() << "MCP: After simplifyInstruction: " << MI);
934+
if (AlteredTerminators)
935+
break;
936+
else
937+
LLVM_DEBUG(dbgs() << "MCP: After simplifyInstruction: " << MI);
934938
}
935939

936940
CopyOperands = isCopyInstr(MI, *TII, UseCopyInstr);

llvm/lib/CodeGen/ShrinkWrap.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -618,6 +618,8 @@ bool ShrinkWrapImpl::postShrinkWrapping(bool HasCandidate, MachineFunction &MF,
618618

619619
DenseSet<const MachineBasicBlock *> DirtyBBs;
620620
for (MachineBasicBlock &MBB : MF) {
621+
if (!MDT->isReachableFromEntry(&MBB))
622+
continue;
621623
if (MBB.isEHPad()) {
622624
DirtyBBs.insert(&MBB);
623625
continue;

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -685,6 +685,55 @@ unsigned AArch64InstrInfo::insertBranch(
685685
return 2;
686686
}
687687

688+
bool AArch64InstrInfo::simplifyInstruction(MachineInstr &MI,
689+
bool &AlteredTerminators) const {
690+
unsigned Opc = MI.getOpcode();
691+
switch (Opc) {
692+
case AArch64::CBZW:
693+
case AArch64::CBZX:
694+
case AArch64::TBZW:
695+
case AArch64::TBZX:
696+
// CBZ XZR -> B
697+
if (MI.getOperand(0).getReg() == AArch64::WZR ||
698+
MI.getOperand(0).getReg() == AArch64::XZR) {
699+
MachineBasicBlock *Target =
700+
MI.getOperand(Opc == AArch64::TBZW || Opc == AArch64::TBZX ? 2 : 1)
701+
.getMBB();
702+
MachineBasicBlock *MBB = MI.getParent();
703+
SmallVector<MachineBasicBlock *> Succs(MBB->successors());
704+
for (auto *S : Succs)
705+
if (S != Target)
706+
MBB->removeSuccessor(S);
707+
SmallVector<MachineInstr*> DeadInstrs;
708+
for (auto It = MI.getIterator(); It != MBB->end(); ++It)
709+
DeadInstrs.push_back(&*It);
710+
BuildMI(MBB, MI.getDebugLoc(), get(AArch64::B)).addMBB(Target);
711+
for (auto It : DeadInstrs)
712+
It->eraseFromParent();
713+
AlteredTerminators = true;
714+
return true;
715+
}
716+
break;
717+
case AArch64::CBNZW:
718+
case AArch64::CBNZX:
719+
case AArch64::TBNZW:
720+
case AArch64::TBNZX:
721+
// CBNZ XZR -> nop
722+
if (MI.getOperand(0).getReg() == AArch64::WZR ||
723+
MI.getOperand(0).getReg() == AArch64::XZR) {
724+
MachineBasicBlock *Target =
725+
MI.getOperand(Opc == AArch64::TBNZW || Opc == AArch64::TBNZX ? 2 : 1)
726+
.getMBB();
727+
MI.getParent()->removeSuccessor(Target);
728+
MI.eraseFromParent();
729+
AlteredTerminators = true;
730+
return true;
731+
}
732+
break;
733+
}
734+
return false;
735+
}
736+
688737
// Find the original register that VReg is copied from.
689738
static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
690739
while (Register::isVirtualRegister(VReg)) {

llvm/lib/Target/AArch64/AArch64InstrInfo.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -401,6 +401,9 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo {
401401
const DebugLoc &DL,
402402
int *BytesAdded = nullptr) const override;
403403

404+
bool simplifyInstruction(MachineInstr &MI,
405+
bool &AlteredTerminators) const override;
406+
404407
std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
405408
analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override;
406409

llvm/lib/Target/RISCV/RISCVInstrInfo.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4169,7 +4169,8 @@ MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI,
41694169
#undef CASE_VFMA_OPCODE_VV
41704170
#undef CASE_VFMA_SPLATS
41714171

4172-
bool RISCVInstrInfo::simplifyInstruction(MachineInstr &MI) const {
4172+
bool RISCVInstrInfo::simplifyInstruction(MachineInstr &MI,
4173+
bool &AlteredTerminators) const {
41734174
switch (MI.getOpcode()) {
41744175
default:
41754176
break;

llvm/lib/Target/RISCV/RISCVInstrInfo.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,8 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
239239
unsigned OpIdx1,
240240
unsigned OpIdx2) const override;
241241

242-
bool simplifyInstruction(MachineInstr &MI) const override;
242+
bool simplifyInstruction(MachineInstr &MI,
243+
bool &AlteredTerminators) const override;
243244

244245
MachineInstr *convertToThreeAddress(MachineInstr &MI, LiveVariables *LV,
245246
LiveIntervals *LIS) const override;

llvm/test/CodeGen/AArch64/arm64-rev.ll

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -530,28 +530,22 @@ declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) nounwind readnone
530530
define void @test_rev16_truncstore() {
531531
; CHECK-SD-LABEL: test_rev16_truncstore:
532532
; CHECK-SD: // %bb.0: // %entry
533-
; CHECK-SD-NEXT: cbnz wzr, .LBB38_2
534533
; CHECK-SD-NEXT: .LBB38_1: // %cleanup
535534
; CHECK-SD-NEXT: // =>This Inner Loop Header: Depth=1
536535
; CHECK-SD-NEXT: ldrh w8, [x8]
537536
; CHECK-SD-NEXT: rev16 w8, w8
538537
; CHECK-SD-NEXT: strh w8, [x8]
539-
; CHECK-SD-NEXT: cbz wzr, .LBB38_1
540-
; CHECK-SD-NEXT: .LBB38_2: // %fail
541-
; CHECK-SD-NEXT: ret
538+
; CHECK-SD-NEXT: b .LBB38_1
542539
;
543540
; CHECK-GI-LABEL: test_rev16_truncstore:
544541
; CHECK-GI: // %bb.0: // %entry
545-
; CHECK-GI-NEXT: tbnz wzr, #0, .LBB38_2
546542
; CHECK-GI-NEXT: .LBB38_1: // %cleanup
547543
; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1
548544
; CHECK-GI-NEXT: ldrh w8, [x8]
549545
; CHECK-GI-NEXT: rev w8, w8
550546
; CHECK-GI-NEXT: lsr w8, w8, #16
551547
; CHECK-GI-NEXT: strh w8, [x8]
552-
; CHECK-GI-NEXT: tbz wzr, #0, .LBB38_1
553-
; CHECK-GI-NEXT: .LBB38_2: // %fail
554-
; CHECK-GI-NEXT: ret
548+
; CHECK-GI-NEXT: b .LBB38_1
555549
entry:
556550
br label %body
557551

llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll

Lines changed: 22 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -735,22 +735,15 @@ define void @infiniteloop() {
735735
; ENABLE-NEXT: .cfi_offset w29, -16
736736
; ENABLE-NEXT: .cfi_offset w19, -24
737737
; ENABLE-NEXT: .cfi_offset w20, -32
738-
; ENABLE-NEXT: cbnz wzr, LBB10_3
739-
; ENABLE-NEXT: ; %bb.1: ; %if.then
740738
; ENABLE-NEXT: sub x19, sp, #16
741739
; ENABLE-NEXT: mov sp, x19
742740
; ENABLE-NEXT: mov w20, wzr
743-
; ENABLE-NEXT: LBB10_2: ; %for.body
741+
; ENABLE-NEXT: LBB10_1: ; %for.body
744742
; ENABLE-NEXT: ; =>This Inner Loop Header: Depth=1
745743
; ENABLE-NEXT: bl _something
746744
; ENABLE-NEXT: add w20, w0, w20
747745
; ENABLE-NEXT: str w20, [x19]
748-
; ENABLE-NEXT: b LBB10_2
749-
; ENABLE-NEXT: LBB10_3: ; %if.end
750-
; ENABLE-NEXT: sub sp, x29, #16
751-
; ENABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
752-
; ENABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
753-
; ENABLE-NEXT: ret
746+
; ENABLE-NEXT: b LBB10_1
754747
;
755748
; DISABLE-LABEL: infiniteloop:
756749
; DISABLE: ; %bb.0: ; %entry
@@ -762,22 +755,15 @@ define void @infiniteloop() {
762755
; DISABLE-NEXT: .cfi_offset w29, -16
763756
; DISABLE-NEXT: .cfi_offset w19, -24
764757
; DISABLE-NEXT: .cfi_offset w20, -32
765-
; DISABLE-NEXT: cbnz wzr, LBB10_3
766-
; DISABLE-NEXT: ; %bb.1: ; %if.then
767758
; DISABLE-NEXT: sub x19, sp, #16
768759
; DISABLE-NEXT: mov sp, x19
769760
; DISABLE-NEXT: mov w20, wzr
770-
; DISABLE-NEXT: LBB10_2: ; %for.body
761+
; DISABLE-NEXT: LBB10_1: ; %for.body
771762
; DISABLE-NEXT: ; =>This Inner Loop Header: Depth=1
772763
; DISABLE-NEXT: bl _something
773764
; DISABLE-NEXT: add w20, w0, w20
774765
; DISABLE-NEXT: str w20, [x19]
775-
; DISABLE-NEXT: b LBB10_2
776-
; DISABLE-NEXT: LBB10_3: ; %if.end
777-
; DISABLE-NEXT: sub sp, x29, #16
778-
; DISABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
779-
; DISABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
780-
; DISABLE-NEXT: ret
766+
; DISABLE-NEXT: b LBB10_1
781767
entry:
782768
br i1 undef, label %if.then, label %if.end
783769

@@ -808,12 +794,10 @@ define void @infiniteloop2() {
808794
; ENABLE-NEXT: .cfi_offset w29, -16
809795
; ENABLE-NEXT: .cfi_offset w19, -24
810796
; ENABLE-NEXT: .cfi_offset w20, -32
811-
; ENABLE-NEXT: cbnz wzr, LBB11_3
812-
; ENABLE-NEXT: ; %bb.1: ; %if.then
813797
; ENABLE-NEXT: sub x8, sp, #16
814798
; ENABLE-NEXT: mov sp, x8
815799
; ENABLE-NEXT: mov w9, wzr
816-
; ENABLE-NEXT: LBB11_2: ; %for.body
800+
; ENABLE-NEXT: LBB11_1: ; %for.body
817801
; ENABLE-NEXT: ; =>This Inner Loop Header: Depth=1
818802
; ENABLE-NEXT: ; InlineAsm Start
819803
; ENABLE-NEXT: mov x10, #0 ; =0x0
@@ -824,12 +808,7 @@ define void @infiniteloop2() {
824808
; ENABLE-NEXT: ; InlineAsm Start
825809
; ENABLE-NEXT: nop
826810
; ENABLE-NEXT: ; InlineAsm End
827-
; ENABLE-NEXT: b LBB11_2
828-
; ENABLE-NEXT: LBB11_3: ; %if.end
829-
; ENABLE-NEXT: sub sp, x29, #16
830-
; ENABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
831-
; ENABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
832-
; ENABLE-NEXT: ret
811+
; ENABLE-NEXT: b LBB11_1
833812
;
834813
; DISABLE-LABEL: infiniteloop2:
835814
; DISABLE: ; %bb.0: ; %entry
@@ -841,12 +820,10 @@ define void @infiniteloop2() {
841820
; DISABLE-NEXT: .cfi_offset w29, -16
842821
; DISABLE-NEXT: .cfi_offset w19, -24
843822
; DISABLE-NEXT: .cfi_offset w20, -32
844-
; DISABLE-NEXT: cbnz wzr, LBB11_3
845-
; DISABLE-NEXT: ; %bb.1: ; %if.then
846823
; DISABLE-NEXT: sub x8, sp, #16
847824
; DISABLE-NEXT: mov sp, x8
848825
; DISABLE-NEXT: mov w9, wzr
849-
; DISABLE-NEXT: LBB11_2: ; %for.body
826+
; DISABLE-NEXT: LBB11_1: ; %for.body
850827
; DISABLE-NEXT: ; =>This Inner Loop Header: Depth=1
851828
; DISABLE-NEXT: ; InlineAsm Start
852829
; DISABLE-NEXT: mov x10, #0 ; =0x0
@@ -857,12 +834,7 @@ define void @infiniteloop2() {
857834
; DISABLE-NEXT: ; InlineAsm Start
858835
; DISABLE-NEXT: nop
859836
; DISABLE-NEXT: ; InlineAsm End
860-
; DISABLE-NEXT: b LBB11_2
861-
; DISABLE-NEXT: LBB11_3: ; %if.end
862-
; DISABLE-NEXT: sub sp, x29, #16
863-
; DISABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
864-
; DISABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
865-
; DISABLE-NEXT: ret
837+
; DISABLE-NEXT: b LBB11_1
866838
entry:
867839
br i1 undef, label %if.then, label %if.end
868840

@@ -893,51 +865,43 @@ if.end:
893865
define void @infiniteloop3() {
894866
; ENABLE-LABEL: infiniteloop3:
895867
; ENABLE: ; %bb.0: ; %entry
896-
; ENABLE-NEXT: cbnz wzr, LBB12_5
897-
; ENABLE-NEXT: ; %bb.1: ; %loop2a.preheader
898868
; ENABLE-NEXT: mov x8, xzr
899869
; ENABLE-NEXT: mov x9, xzr
900870
; ENABLE-NEXT: mov x11, xzr
901-
; ENABLE-NEXT: b LBB12_3
902-
; ENABLE-NEXT: LBB12_2: ; %loop2b
903-
; ENABLE-NEXT: ; in Loop: Header=BB12_3 Depth=1
871+
; ENABLE-NEXT: b LBB12_2
872+
; ENABLE-NEXT: LBB12_1: ; %loop2b
873+
; ENABLE-NEXT: ; in Loop: Header=BB12_2 Depth=1
904874
; ENABLE-NEXT: str x10, [x11]
905875
; ENABLE-NEXT: mov x11, x10
906-
; ENABLE-NEXT: LBB12_3: ; %loop1
876+
; ENABLE-NEXT: LBB12_2: ; %loop1
907877
; ENABLE-NEXT: ; =>This Inner Loop Header: Depth=1
908878
; ENABLE-NEXT: mov x10, x9
909879
; ENABLE-NEXT: ldr x9, [x8]
910-
; ENABLE-NEXT: cbnz x8, LBB12_2
911-
; ENABLE-NEXT: ; %bb.4: ; in Loop: Header=BB12_3 Depth=1
880+
; ENABLE-NEXT: cbnz x8, LBB12_1
881+
; ENABLE-NEXT: ; %bb.3: ; in Loop: Header=BB12_2 Depth=1
912882
; ENABLE-NEXT: mov x8, x10
913883
; ENABLE-NEXT: mov x11, x10
914-
; ENABLE-NEXT: b LBB12_3
915-
; ENABLE-NEXT: LBB12_5: ; %end
916-
; ENABLE-NEXT: ret
884+
; ENABLE-NEXT: b LBB12_2
917885
;
918886
; DISABLE-LABEL: infiniteloop3:
919887
; DISABLE: ; %bb.0: ; %entry
920-
; DISABLE-NEXT: cbnz wzr, LBB12_5
921-
; DISABLE-NEXT: ; %bb.1: ; %loop2a.preheader
922888
; DISABLE-NEXT: mov x8, xzr
923889
; DISABLE-NEXT: mov x9, xzr
924890
; DISABLE-NEXT: mov x11, xzr
925-
; DISABLE-NEXT: b LBB12_3
926-
; DISABLE-NEXT: LBB12_2: ; %loop2b
927-
; DISABLE-NEXT: ; in Loop: Header=BB12_3 Depth=1
891+
; DISABLE-NEXT: b LBB12_2
892+
; DISABLE-NEXT: LBB12_1: ; %loop2b
893+
; DISABLE-NEXT: ; in Loop: Header=BB12_2 Depth=1
928894
; DISABLE-NEXT: str x10, [x11]
929895
; DISABLE-NEXT: mov x11, x10
930-
; DISABLE-NEXT: LBB12_3: ; %loop1
896+
; DISABLE-NEXT: LBB12_2: ; %loop1
931897
; DISABLE-NEXT: ; =>This Inner Loop Header: Depth=1
932898
; DISABLE-NEXT: mov x10, x9
933899
; DISABLE-NEXT: ldr x9, [x8]
934-
; DISABLE-NEXT: cbnz x8, LBB12_2
935-
; DISABLE-NEXT: ; %bb.4: ; in Loop: Header=BB12_3 Depth=1
900+
; DISABLE-NEXT: cbnz x8, LBB12_1
901+
; DISABLE-NEXT: ; %bb.3: ; in Loop: Header=BB12_2 Depth=1
936902
; DISABLE-NEXT: mov x8, x10
937903
; DISABLE-NEXT: mov x11, x10
938-
; DISABLE-NEXT: b LBB12_3
939-
; DISABLE-NEXT: LBB12_5: ; %end
940-
; DISABLE-NEXT: ret
904+
; DISABLE-NEXT: b LBB12_2
941905
entry:
942906
br i1 undef, label %loop2a, label %body
943907

llvm/test/CodeGen/AArch64/block-placement-optimize-branches.ll

Lines changed: 10 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -8,21 +8,14 @@
88
define i8 @foo_optsize(i32 %v4) optsize {
99
; CHECK-LABEL: foo_optsize:
1010
; CHECK: // %bb.0: // %entry
11-
; CHECK-NEXT: cbz wzr, .LBB0_2
12-
; CHECK-NEXT: .LBB0_1:
13-
; CHECK-NEXT: mov w0, wzr
14-
; CHECK-NEXT: ret
15-
; CHECK-NEXT: .LBB0_2: // %b1
16-
; CHECK-NEXT: cbnz w0, .LBB0_4
17-
; CHECK-NEXT: .LBB0_3: // %b2
11+
; CHECK-NEXT: cbnz w0, .LBB0_2
12+
; CHECK-NEXT: // %bb.1: // %b2
1813
; CHECK-NEXT: mov w0, #1 // =0x1
1914
; CHECK-NEXT: ret
20-
; CHECK-NEXT: .LBB0_4: // %b1
15+
; CHECK-NEXT: .LBB0_2: // %b1
2116
; CHECK-NEXT: cmp w0, #1
22-
; CHECK-NEXT: b.ne .LBB0_1
23-
; CHECK-NEXT: // %bb.5: // %b3
24-
; CHECK-NEXT: cbz wzr, .LBB0_1
25-
; CHECK-NEXT: b .LBB0_3
17+
; CHECK-NEXT: mov w0, wzr
18+
; CHECK-NEXT: ret
2619
entry:
2720
%v2 = icmp eq i32 0, 0
2821
br i1 %v2, label %b1, label %b4
@@ -48,21 +41,14 @@ b4:
4841
define i8 @foo_optspeed(i32 %v4) {
4942
; CHECK-LABEL: foo_optspeed:
5043
; CHECK: // %bb.0: // %entry
51-
; CHECK-NEXT: cbz wzr, .LBB1_2
52-
; CHECK-NEXT: .LBB1_1:
53-
; CHECK-NEXT: mov w0, wzr
54-
; CHECK-NEXT: ret
55-
; CHECK-NEXT: .LBB1_2: // %b1
56-
; CHECK-NEXT: cbnz w0, .LBB1_4
57-
; CHECK-NEXT: .LBB1_3: // %b2
44+
; CHECK-NEXT: cbnz w0, .LBB1_2
45+
; CHECK-NEXT: // %bb.1: // %b2
5846
; CHECK-NEXT: mov w0, #1 // =0x1
5947
; CHECK-NEXT: ret
60-
; CHECK-NEXT: .LBB1_4: // %b1
48+
; CHECK-NEXT: .LBB1_2: // %b1
6149
; CHECK-NEXT: cmp w0, #1
62-
; CHECK-NEXT: b.ne .LBB1_1
63-
; CHECK-NEXT: // %bb.5: // %b3
64-
; CHECK-NEXT: cbnz wzr, .LBB1_3
65-
; CHECK-NEXT: b .LBB1_1
50+
; CHECK-NEXT: mov w0, wzr
51+
; CHECK-NEXT: ret
6652
entry:
6753
%v2 = icmp eq i32 0, 0
6854
br i1 %v2, label %b1, label %b4

0 commit comments

Comments
 (0)