Skip to content

Commit 3075cdd

Browse files
committed
[RegisterCoalescer] Improve register allocation for return values by limiting rematerialization
1 parent b6bf196 commit 3075cdd

File tree

87 files changed

+498
-356
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

87 files changed

+498
-356
lines changed

llvm/lib/CodeGen/RegisterCoalescer.cpp

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1326,6 +1326,41 @@ bool RegisterCoalescer::reMaterializeDef(const CoalescerPair &CP,
13261326
if (!TII->isAsCheapAsAMove(*DefMI))
13271327
return false;
13281328

1329+
// Skip rematerialization for physical registers used as return values within
1330+
// the same basic block to enable better coalescing.
1331+
if (DstReg.isPhysical()) {
1332+
MachineBasicBlock *MBB = CopyMI->getParent();
1333+
if (DefMI->getParent() == MBB) {
1334+
// Check if there's already an identical instruction before CopyMI
1335+
// If so, allow rematerialization to avoid redundant instructions
1336+
bool FoundCopy = false;
1337+
for (MachineInstr &MI : *MBB) {
1338+
if (&MI == CopyMI) {
1339+
FoundCopy = true;
1340+
continue;
1341+
}
1342+
1343+
// Before CopyMI: check for duplicate instructions
1344+
if (!FoundCopy && &MI != DefMI &&
1345+
MI.isIdenticalTo(*DefMI, MachineInstr::IgnoreDefs)) {
1346+
break; // Found duplicate, allow rematerialization
1347+
} else if (FoundCopy) {
1348+
// After CopyMI: check if used as return register
1349+
// If the register is redefined, it's not a return register
1350+
if (MI.modifiesRegister(DstReg, TRI))
1351+
break;
1352+
1353+
// If there's a return instruction that uses this register, skip remat
1354+
if (MI.isReturn() && MI.readsRegister(DstReg, TRI)) {
1355+
LLVM_DEBUG(dbgs() << "\tSkip remat for return register: "
1356+
<< printReg(DstReg, TRI) << '\n');
1357+
return false;
1358+
}
1359+
}
1360+
}
1361+
}
1362+
}
1363+
13291364
if (!TII->isReMaterializable(*DefMI))
13301365
return false;
13311366

llvm/test/CodeGen/AArch64/aarch64_win64cc_vararg.ll

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,8 @@ define win64cc ptr @f9(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64
5252
; CHECK-LABEL: f9:
5353
; CHECK: // %bb.0: // %entry
5454
; CHECK-NEXT: str x18, [sp, #-16]! // 8-byte Folded Spill
55-
; CHECK-NEXT: add x8, sp, #24
5655
; CHECK-NEXT: add x0, sp, #24
57-
; CHECK-NEXT: str x8, [sp, #8]
56+
; CHECK-NEXT: str x0, [sp, #8]
5857
; CHECK-NEXT: ldr x18, [sp], #16 // 8-byte Folded Reload
5958
; CHECK-NEXT: ret
6059
;
@@ -78,9 +77,8 @@ define win64cc ptr @f8(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64
7877
; CHECK-LABEL: f8:
7978
; CHECK: // %bb.0: // %entry
8079
; CHECK-NEXT: str x18, [sp, #-16]! // 8-byte Folded Spill
81-
; CHECK-NEXT: add x8, sp, #16
8280
; CHECK-NEXT: add x0, sp, #16
83-
; CHECK-NEXT: str x8, [sp, #8]
81+
; CHECK-NEXT: str x0, [sp, #8]
8482
; CHECK-NEXT: ldr x18, [sp], #16 // 8-byte Folded Reload
8583
; CHECK-NEXT: ret
8684
;
@@ -104,10 +102,9 @@ define win64cc ptr @f7(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64
104102
; CHECK-LABEL: f7:
105103
; CHECK: // %bb.0: // %entry
106104
; CHECK-NEXT: str x18, [sp, #-32]! // 8-byte Folded Spill
107-
; CHECK-NEXT: add x8, sp, #24
108105
; CHECK-NEXT: add x0, sp, #24
109106
; CHECK-NEXT: str x7, [sp, #24]
110-
; CHECK-NEXT: str x8, [sp, #8]
107+
; CHECK-NEXT: str x0, [sp, #8]
111108
; CHECK-NEXT: ldr x18, [sp], #32 // 8-byte Folded Reload
112109
; CHECK-NEXT: ret
113110
;

llvm/test/CodeGen/AArch64/arm64-neon-copy.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2156,6 +2156,7 @@ define <4 x i16> @concat_vector_v4i16_const() {
21562156
; CHECK-LABEL: concat_vector_v4i16_const:
21572157
; CHECK: // %bb.0:
21582158
; CHECK-NEXT: movi v0.2d, #0000000000000000
2159+
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
21592160
; CHECK-NEXT: ret
21602161
%r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <4 x i32> zeroinitializer
21612162
ret <4 x i16> %r
@@ -2183,6 +2184,7 @@ define <8 x i8> @concat_vector_v8i8_const() {
21832184
; CHECK-LABEL: concat_vector_v8i8_const:
21842185
; CHECK: // %bb.0:
21852186
; CHECK-NEXT: movi v0.2d, #0000000000000000
2187+
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
21862188
; CHECK-NEXT: ret
21872189
%r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <8 x i32> zeroinitializer
21882190
ret <8 x i8> %r

llvm/test/CodeGen/AArch64/arm64-vector-ext.ll

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,16 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
12
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
23

3-
;CHECK: @func30
4-
;CHECK: movi.4h v1, #1
5-
;CHECK: and.8b v0, v0, v1
6-
;CHECK: ushll.4s v0, v0, #0
7-
;CHECK: str q0, [x0]
8-
;CHECK: ret
9-
104
%T0_30 = type <4 x i1>
115
%T1_30 = type <4 x i32>
126
define void @func30(%T0_30 %v0, ptr %p1) {
7+
; CHECK-LABEL: func30:
8+
; CHECK: // %bb.0:
9+
; CHECK-NEXT: movi.4h v1, #1
10+
; CHECK-NEXT: and.8b v0, v0, v1
11+
; CHECK-NEXT: ushll.4s v0, v0, #0
12+
; CHECK-NEXT: str q0, [x0]
13+
; CHECK-NEXT: ret
1314
%r = zext %T0_30 %v0 to %T1_30
1415
store %T1_30 %r, ptr %p1
1516
ret void
@@ -18,9 +19,11 @@ define void @func30(%T0_30 %v0, ptr %p1) {
1819
; Extend from v1i1 was crashing things (PR20791). Make sure we do something
1920
; sensible instead.
2021
define <1 x i32> @autogen_SD7918() {
21-
; CHECK-LABEL: autogen_SD7918
22-
; CHECK: movi.2d v0, #0000000000000000
23-
; CHECK-NEXT: ret
22+
; CHECK-LABEL: autogen_SD7918:
23+
; CHECK: // %bb.0:
24+
; CHECK-NEXT: movi.2d v0, #0000000000000000
25+
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
26+
; CHECK-NEXT: ret
2427
%I29 = insertelement <1 x i1> zeroinitializer, i1 false, i32 0
2528
%ZE = zext <1 x i1> %I29 to <1 x i32>
2629
ret <1 x i32> %ZE

llvm/test/CodeGen/AArch64/arm64-vshuffle.ll

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
12
; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -mcpu=cyclone | FileCheck %s
23

34
define <8 x i1> @test1() {
45
; CHECK-LABEL: test1:
56
; CHECK: ; %bb.0: ; %entry
67
; CHECK-NEXT: movi.16b v0, #0
8+
; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0
79
; CHECK-NEXT: ret
810
entry:
911
%Shuff = shufflevector <8 x i1> <i1 0, i1 1, i1 2, i1 3, i1 4, i1 5, i1 6,
@@ -58,9 +60,14 @@ bb:
5860
; CHECK: .byte 0 ; 0x0
5961
; CHECK: .byte 0 ; 0x0
6062
define <16 x i1> @test4(ptr %ptr, i32 %v) {
61-
; CHECK-LABEL: _test4:
62-
; CHECK: adrp x[[REG3:[0-9]+]], lCPI3_0@PAGE
63-
; CHECK: ldr q[[REG2:[0-9]+]], [x[[REG3]], lCPI3_0@PAGEOFF]
63+
; CHECK-LABEL: test4:
64+
; CHECK: ; %bb.0: ; %bb
65+
; CHECK-NEXT: Lloh0:
66+
; CHECK-NEXT: adrp x8, lCPI3_0@PAGE
67+
; CHECK-NEXT: Lloh1:
68+
; CHECK-NEXT: ldr q0, [x8, lCPI3_0@PAGEOFF]
69+
; CHECK-NEXT: ret
70+
; CHECK-NEXT: .loh AdrpLdr Lloh0, Lloh1
6471
bb:
6572
%Shuff = shufflevector <16 x i1> zeroinitializer,
6673
<16 x i1> <i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1,

llvm/test/CodeGen/AArch64/bitcast.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ define <4 x i16> @foo1(<2 x i32> %a) {
88
; CHECK-SD-LABEL: foo1:
99
; CHECK-SD: // %bb.0:
1010
; CHECK-SD-NEXT: movi v0.2d, #0000000000000000
11+
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
1112
; CHECK-SD-NEXT: ret
1213
;
1314
; CHECK-GI-LABEL: foo1:
@@ -28,6 +29,7 @@ define <4 x i16> @foo2(<2 x i32> %a) {
2829
; CHECK-SD-LABEL: foo2:
2930
; CHECK-SD: // %bb.0:
3031
; CHECK-SD-NEXT: movi v0.2d, #0000000000000000
32+
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
3133
; CHECK-SD-NEXT: ret
3234
;
3335
; CHECK-GI-LABEL: foo2:

llvm/test/CodeGen/AArch64/combine-mul.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ define <4 x i1> @PR48683_vec(<4 x i32> %x) {
1818
; CHECK-LABEL: PR48683_vec:
1919
; CHECK: // %bb.0:
2020
; CHECK-NEXT: movi v0.2d, #0000000000000000
21+
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
2122
; CHECK-NEXT: ret
2223
%a = mul <4 x i32> %x, %x
2324
%b = and <4 x i32> %a, <i32 2, i32 2, i32 2, i32 2>
@@ -29,6 +30,7 @@ define <4 x i1> @PR48683_vec_undef(<4 x i32> %x) {
2930
; CHECK-LABEL: PR48683_vec_undef:
3031
; CHECK: // %bb.0:
3132
; CHECK-NEXT: movi v0.2d, #0000000000000000
33+
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
3234
; CHECK-NEXT: ret
3335
%a = mul <4 x i32> %x, %x
3436
%b = and <4 x i32> %a, <i32 2, i32 2, i32 2, i32 undef>

llvm/test/CodeGen/AArch64/ext-narrow-index.ll

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,7 @@ define <8 x i8> @i8_zero_off22(<16 x i8> %arg1) {
251251
; CHECK-SD-LABEL: i8_zero_off22:
252252
; CHECK-SD: // %bb.0: // %entry
253253
; CHECK-SD-NEXT: movi v0.2d, #0000000000000000
254+
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
254255
; CHECK-SD-NEXT: ret
255256
;
256257
; CHECK-GISEL-LABEL: i8_zero_off22:
@@ -302,6 +303,7 @@ define <4 x i16> @i16_zero_off8(<8 x i16> %arg1) {
302303
; CHECK-LABEL: i16_zero_off8:
303304
; CHECK: // %bb.0: // %entry
304305
; CHECK-NEXT: movi v0.2d, #0000000000000000
306+
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
305307
; CHECK-NEXT: ret
306308
entry:
307309
%shuffle = shufflevector <8 x i16> %arg1, <8 x i16> zeroinitializer, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
@@ -346,6 +348,7 @@ define <2 x i32> @i32_zero_off4(<4 x i32> %arg1) {
346348
; CHECK-LABEL: i32_zero_off4:
347349
; CHECK: // %bb.0: // %entry
348350
; CHECK-NEXT: movi v0.2d, #0000000000000000
351+
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
349352
; CHECK-NEXT: ret
350353
entry:
351354
%shuffle = shufflevector <4 x i32> %arg1, <4 x i32> zeroinitializer, <2 x i32> <i32 4, i32 5>

llvm/test/CodeGen/AArch64/fast-isel-const-float.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ define float @select_fp_const() {
99
; GISEL-LABEL: select_fp_const:
1010
; GISEL: // %bb.0: // %entry
1111
; GISEL-NEXT: movi v0.2s, #79, lsl #24
12+
; GISEL-NEXT: // kill: def $s0 killed $s0 killed $d0
1213
; GISEL-NEXT: ret
1314
;
1415
; FISEL-LABEL: select_fp_const:

llvm/test/CodeGen/AArch64/movi64_sve.ll

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ define <2 x i64> @movi_1_v2i64() {
1212
; SVE-LABEL: movi_1_v2i64:
1313
; SVE: // %bb.0:
1414
; SVE-NEXT: mov z0.d, #1 // =0x1
15+
; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
1516
; SVE-NEXT: ret
1617
ret <2 x i64> splat (i64 1)
1718
}
@@ -26,6 +27,7 @@ define <2 x i64> @movi_127_v2i64() {
2627
; SVE-LABEL: movi_127_v2i64:
2728
; SVE: // %bb.0:
2829
; SVE-NEXT: mov z0.d, #127 // =0x7f
30+
; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
2931
; SVE-NEXT: ret
3032
ret <2 x i64> splat (i64 127)
3133
}
@@ -40,6 +42,7 @@ define <2 x i64> @movi_m128_v2i64() {
4042
; SVE-LABEL: movi_m128_v2i64:
4143
; SVE: // %bb.0:
4244
; SVE-NEXT: mov z0.d, #-128 // =0xffffffffffffff80
45+
; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
4346
; SVE-NEXT: ret
4447
ret <2 x i64> splat (i64 -128)
4548
}
@@ -54,6 +57,7 @@ define <2 x i64> @movi_256_v2i64() {
5457
; SVE-LABEL: movi_256_v2i64:
5558
; SVE: // %bb.0:
5659
; SVE-NEXT: mov z0.d, #256 // =0x100
60+
; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
5761
; SVE-NEXT: ret
5862
ret <2 x i64> splat (i64 256)
5963
}
@@ -68,6 +72,7 @@ define <2 x i64> @movi_32512_v2i64() {
6872
; SVE-LABEL: movi_32512_v2i64:
6973
; SVE: // %bb.0:
7074
; SVE-NEXT: mov z0.d, #32512 // =0x7f00
75+
; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
7176
; SVE-NEXT: ret
7277
ret <2 x i64> splat (i64 32512)
7378
}
@@ -82,6 +87,7 @@ define <2 x i64> @movi_m32768_v2i64() {
8287
; SVE-LABEL: movi_m32768_v2i64:
8388
; SVE: // %bb.0:
8489
; SVE-NEXT: mov z0.d, #-32768 // =0xffffffffffff8000
90+
; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
8591
; SVE-NEXT: ret
8692
ret <2 x i64> splat (i64 -32768)
8793
}
@@ -98,6 +104,7 @@ define <4 x i32> @movi_v4i32_1() {
98104
; SVE-LABEL: movi_v4i32_1:
99105
; SVE: // %bb.0:
100106
; SVE-NEXT: mov z0.d, #127 // =0x7f
107+
; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
101108
; SVE-NEXT: ret
102109
ret <4 x i32> <i32 127, i32 0, i32 127, i32 0>
103110
}
@@ -112,6 +119,7 @@ define <4 x i32> @movi_v4i32_2() {
112119
; SVE-LABEL: movi_v4i32_2:
113120
; SVE: // %bb.0:
114121
; SVE-NEXT: mov z0.d, #32512 // =0x7f00
122+
; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
115123
; SVE-NEXT: ret
116124
ret <4 x i32> <i32 32512, i32 0, i32 32512, i32 0>
117125
}
@@ -126,6 +134,7 @@ define <8 x i16> @movi_v8i16_1() {
126134
; SVE-LABEL: movi_v8i16_1:
127135
; SVE: // %bb.0:
128136
; SVE-NEXT: mov z0.d, #127 // =0x7f
137+
; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
129138
; SVE-NEXT: ret
130139
ret <8 x i16> <i16 127, i16 0, i16 0, i16 0, i16 127, i16 0, i16 0, i16 0>
131140
}
@@ -140,6 +149,7 @@ define <8 x i16> @movi_v8i16_2() {
140149
; SVE-LABEL: movi_v8i16_2:
141150
; SVE: // %bb.0:
142151
; SVE-NEXT: mov z0.d, #32512 // =0x7f00
152+
; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
143153
; SVE-NEXT: ret
144154
ret <8 x i16> <i16 32512, i16 0, i16 0, i16 0, i16 32512, i16 0, i16 0, i16 0>
145155
}
@@ -154,6 +164,7 @@ define <16 x i8> @movi_v16i8_1() {
154164
; SVE-LABEL: movi_v16i8_1:
155165
; SVE: // %bb.0:
156166
; SVE-NEXT: mov z0.d, #127 // =0x7f
167+
; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
157168
; SVE-NEXT: ret
158169
ret <16 x i8> <i8 127, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 127, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
159170
}
@@ -168,6 +179,7 @@ define <16 x i8> @movi_v16i8_2() {
168179
; SVE-LABEL: movi_v16i8_2:
169180
; SVE: // %bb.0:
170181
; SVE-NEXT: mov z0.d, #32512 // =0x7f00
182+
; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
171183
; SVE-NEXT: ret
172184
ret <16 x i8> <i8 0, i8 127, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 127, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
173185
}

0 commit comments

Comments
 (0)