Skip to content

Commit 3d25933

Browse files
committed
[RegisterCoalescer] Improve register allocation for return values by limiting rematerialization
1 parent b6bf196 commit 3d25933

File tree

71 files changed

+1633
-708
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

71 files changed

+1633
-708
lines changed

clang/test/CodeGen/msp430-abi-complex.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,7 @@ float _Complex complex_float_res(void) {
202202
// CHECK-DAG: clr r12
203203
// CHECK-DAG: mov #16256, r13
204204
__imag__ res = -1;
205-
// CHECK-DAG: clr r14
205+
// CHECK-DAG: mov r12, r14
206206
// CHECK-DAG: mov #-16512, r15
207207
return res;
208208
// CHECK: ret

llvm/lib/CodeGen/RegisterCoalescer.cpp

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1326,6 +1326,69 @@ bool RegisterCoalescer::reMaterializeDef(const CoalescerPair &CP,
13261326
if (!TII->isAsCheapAsAMove(*DefMI))
13271327
return false;
13281328

1329+
// Skip rematerialization for physical registers used as return values within
1330+
// the same basic block to enable better coalescing.
1331+
if (DstReg.isPhysical()) {
1332+
MachineBasicBlock *MBB = CopyMI->getParent();
1333+
if (DefMI->getParent() == MBB) {
1334+
// Check if there's already an identical instruction before CopyMI
1335+
// If so, allow rematerialization to avoid redundant instructions
1336+
bool FoundCopy = false;
1337+
for (MachineInstr &MI : *MBB) {
1338+
if (&MI == CopyMI) {
1339+
FoundCopy = true;
1340+
continue;
1341+
}
1342+
1343+
// Before CopyMI: check for duplicate instructions
1344+
if (!FoundCopy && &MI != DefMI &&
1345+
MI.isIdenticalTo(*DefMI, MachineInstr::IgnoreDefs)) {
1346+
break; // Found duplicate, allow rematerialization
1347+
} else if (FoundCopy) {
1348+
// After CopyMI: check if used as return register
1349+
// If the register is redefined, it's not a return register
1350+
if (MI.modifiesRegister(DstReg, TRI))
1351+
break;
1352+
// If there's a return instruction that uses this register, skip remat
1353+
if (MI.isReturn() && MI.readsRegister(DstReg, TRI)) {
1354+
// Exception: if DefMI is moving a constant and SrcReg has no other
1355+
// uses (besides copies), rematerialization is beneficial to
1356+
// eliminate the def
1357+
if (DefMI->isMoveImmediate()) {
1358+
// Quick check: if there's only one use and it's this copy,
1359+
// definitely remat
1360+
if (MRI->hasOneNonDBGUse(SrcReg)) {
1361+
LLVM_DEBUG(dbgs()
1362+
<< "\tAllow remat: single use constant move\n");
1363+
break;
1364+
}
1365+
1366+
// Check all uses to see if they're all copies
1367+
bool OnlyUsedByCopies = true;
1368+
unsigned UseCount = 0;
1369+
for (const MachineOperand &MO : MRI->use_operands(SrcReg)) {
1370+
const MachineInstr *UseMI = MO.getParent();
1371+
if (!UseMI->isCopy() && !UseMI->isSubregToReg()) {
1372+
OnlyUsedByCopies = false;
1373+
break;
1374+
}
1375+
UseCount++;
1376+
}
1377+
1378+
if (OnlyUsedByCopies && UseCount > 0) {
1379+
break;
1380+
}
1381+
}
1382+
1383+
LLVM_DEBUG(dbgs() << "\tSkip remat for return register: "
1384+
<< printReg(DstReg, TRI) << '\n');
1385+
return false;
1386+
}
1387+
}
1388+
}
1389+
}
1390+
}
1391+
13291392
if (!TII->isReMaterializable(*DefMI))
13301393
return false;
13311394

llvm/test/CodeGen/AArch64/aarch64_win64cc_vararg.ll

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,8 @@ define win64cc ptr @f9(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64
5252
; CHECK-LABEL: f9:
5353
; CHECK: // %bb.0: // %entry
5454
; CHECK-NEXT: str x18, [sp, #-16]! // 8-byte Folded Spill
55-
; CHECK-NEXT: add x8, sp, #24
5655
; CHECK-NEXT: add x0, sp, #24
57-
; CHECK-NEXT: str x8, [sp, #8]
56+
; CHECK-NEXT: str x0, [sp, #8]
5857
; CHECK-NEXT: ldr x18, [sp], #16 // 8-byte Folded Reload
5958
; CHECK-NEXT: ret
6059
;
@@ -78,9 +77,8 @@ define win64cc ptr @f8(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64
7877
; CHECK-LABEL: f8:
7978
; CHECK: // %bb.0: // %entry
8079
; CHECK-NEXT: str x18, [sp, #-16]! // 8-byte Folded Spill
81-
; CHECK-NEXT: add x8, sp, #16
8280
; CHECK-NEXT: add x0, sp, #16
83-
; CHECK-NEXT: str x8, [sp, #8]
81+
; CHECK-NEXT: str x0, [sp, #8]
8482
; CHECK-NEXT: ldr x18, [sp], #16 // 8-byte Folded Reload
8583
; CHECK-NEXT: ret
8684
;
@@ -104,10 +102,9 @@ define win64cc ptr @f7(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64
104102
; CHECK-LABEL: f7:
105103
; CHECK: // %bb.0: // %entry
106104
; CHECK-NEXT: str x18, [sp, #-32]! // 8-byte Folded Spill
107-
; CHECK-NEXT: add x8, sp, #24
108105
; CHECK-NEXT: add x0, sp, #24
109106
; CHECK-NEXT: str x7, [sp, #24]
110-
; CHECK-NEXT: str x8, [sp, #8]
107+
; CHECK-NEXT: str x0, [sp, #8]
111108
; CHECK-NEXT: ldr x18, [sp], #32 // 8-byte Folded Reload
112109
; CHECK-NEXT: ret
113110
;

llvm/test/CodeGen/AArch64/arm64-neon-copy.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2156,6 +2156,7 @@ define <4 x i16> @concat_vector_v4i16_const() {
21562156
; CHECK-LABEL: concat_vector_v4i16_const:
21572157
; CHECK: // %bb.0:
21582158
; CHECK-NEXT: movi v0.2d, #0000000000000000
2159+
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
21592160
; CHECK-NEXT: ret
21602161
%r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <4 x i32> zeroinitializer
21612162
ret <4 x i16> %r
@@ -2183,6 +2184,7 @@ define <8 x i8> @concat_vector_v8i8_const() {
21832184
; CHECK-LABEL: concat_vector_v8i8_const:
21842185
; CHECK: // %bb.0:
21852186
; CHECK-NEXT: movi v0.2d, #0000000000000000
2187+
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
21862188
; CHECK-NEXT: ret
21872189
%r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <8 x i32> zeroinitializer
21882190
ret <8 x i8> %r

llvm/test/CodeGen/AArch64/arm64-vector-ext.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ define void @func30(%T0_30 %v0, ptr %p1) {
2020
define <1 x i32> @autogen_SD7918() {
2121
; CHECK-LABEL: autogen_SD7918
2222
; CHECK: movi.2d v0, #0000000000000000
23+
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
2324
; CHECK-NEXT: ret
2425
%I29 = insertelement <1 x i1> zeroinitializer, i1 false, i32 0
2526
%ZE = zext <1 x i1> %I29 to <1 x i32>

llvm/test/CodeGen/AArch64/arm64-vshuffle.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ define <8 x i1> @test1() {
44
; CHECK-LABEL: test1:
55
; CHECK: ; %bb.0: ; %entry
66
; CHECK-NEXT: movi.16b v0, #0
7+
; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0
78
; CHECK-NEXT: ret
89
entry:
910
%Shuff = shufflevector <8 x i1> <i1 0, i1 1, i1 2, i1 3, i1 4, i1 5, i1 6,

llvm/test/CodeGen/AArch64/bitcast.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ define <4 x i16> @foo1(<2 x i32> %a) {
88
; CHECK-SD-LABEL: foo1:
99
; CHECK-SD: // %bb.0:
1010
; CHECK-SD-NEXT: movi v0.2d, #0000000000000000
11+
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
1112
; CHECK-SD-NEXT: ret
1213
;
1314
; CHECK-GI-LABEL: foo1:
@@ -28,6 +29,7 @@ define <4 x i16> @foo2(<2 x i32> %a) {
2829
; CHECK-SD-LABEL: foo2:
2930
; CHECK-SD: // %bb.0:
3031
; CHECK-SD-NEXT: movi v0.2d, #0000000000000000
32+
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
3133
; CHECK-SD-NEXT: ret
3234
;
3335
; CHECK-GI-LABEL: foo2:

llvm/test/CodeGen/AArch64/combine-mul.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ define <4 x i1> @PR48683_vec(<4 x i32> %x) {
1818
; CHECK-LABEL: PR48683_vec:
1919
; CHECK: // %bb.0:
2020
; CHECK-NEXT: movi v0.2d, #0000000000000000
21+
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
2122
; CHECK-NEXT: ret
2223
%a = mul <4 x i32> %x, %x
2324
%b = and <4 x i32> %a, <i32 2, i32 2, i32 2, i32 2>
@@ -29,6 +30,7 @@ define <4 x i1> @PR48683_vec_undef(<4 x i32> %x) {
2930
; CHECK-LABEL: PR48683_vec_undef:
3031
; CHECK: // %bb.0:
3132
; CHECK-NEXT: movi v0.2d, #0000000000000000
33+
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
3234
; CHECK-NEXT: ret
3335
%a = mul <4 x i32> %x, %x
3436
%b = and <4 x i32> %a, <i32 2, i32 2, i32 2, i32 undef>

llvm/test/CodeGen/AArch64/ext-narrow-index.ll

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,7 @@ define <8 x i8> @i8_zero_off22(<16 x i8> %arg1) {
251251
; CHECK-SD-LABEL: i8_zero_off22:
252252
; CHECK-SD: // %bb.0: // %entry
253253
; CHECK-SD-NEXT: movi v0.2d, #0000000000000000
254+
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
254255
; CHECK-SD-NEXT: ret
255256
;
256257
; CHECK-GISEL-LABEL: i8_zero_off22:
@@ -302,6 +303,7 @@ define <4 x i16> @i16_zero_off8(<8 x i16> %arg1) {
302303
; CHECK-LABEL: i16_zero_off8:
303304
; CHECK: // %bb.0: // %entry
304305
; CHECK-NEXT: movi v0.2d, #0000000000000000
306+
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
305307
; CHECK-NEXT: ret
306308
entry:
307309
%shuffle = shufflevector <8 x i16> %arg1, <8 x i16> zeroinitializer, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
@@ -346,6 +348,7 @@ define <2 x i32> @i32_zero_off4(<4 x i32> %arg1) {
346348
; CHECK-LABEL: i32_zero_off4:
347349
; CHECK: // %bb.0: // %entry
348350
; CHECK-NEXT: movi v0.2d, #0000000000000000
351+
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
349352
; CHECK-NEXT: ret
350353
entry:
351354
%shuffle = shufflevector <4 x i32> %arg1, <4 x i32> zeroinitializer, <2 x i32> <i32 4, i32 5>

llvm/test/CodeGen/AArch64/fast-isel-const-float.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ define float @select_fp_const() {
99
; GISEL-LABEL: select_fp_const:
1010
; GISEL: // %bb.0: // %entry
1111
; GISEL-NEXT: movi v0.2s, #79, lsl #24
12+
; GISEL-NEXT: // kill: def $s0 killed $s0 killed $d0
1213
; GISEL-NEXT: ret
1314
;
1415
; FISEL-LABEL: select_fp_const:

0 commit comments

Comments
 (0)