Skip to content

Commit 757fc0a

Browse files
committed
Simplify logic
1 parent 3d25933 commit 757fc0a

File tree

2 files changed

+68
-44
lines changed

2 files changed

+68
-44
lines changed

llvm/lib/CodeGen/RegisterCoalescer.cpp

Lines changed: 58 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1330,59 +1330,73 @@ bool RegisterCoalescer::reMaterializeDef(const CoalescerPair &CP,
13301330
// the same basic block to enable better coalescing.
13311331
if (DstReg.isPhysical()) {
13321332
MachineBasicBlock *MBB = CopyMI->getParent();
1333-
if (DefMI->getParent() == MBB) {
1334-
// Check if there's already an identical instruction before CopyMI
1335-
// If so, allow rematerialization to avoid redundant instructions
1336-
bool FoundCopy = false;
1337-
for (MachineInstr &MI : *MBB) {
1338-
if (&MI == CopyMI) {
1339-
FoundCopy = true;
1340-
continue;
1341-
}
1333+
if (DefMI->getParent() == MBB && !MBB->empty()) {
1334+
// Quick check: is the last instruction a return using DstReg?
1335+
const MachineInstr &LastInstr = MBB->back();
1336+
if (LastInstr.isReturn() && LastInstr.readsRegister(DstReg, TRI)) {
1337+
// This is a return register, perform checks
1338+
1339+
// Exception: allow rematerialization for zero-idiom instructions
1340+
// (e.g., xorps %xmm0, %xmm0) because rematerialization produces
1341+
// independent zero-latency instructions, which is better than copying
1342+
const TargetSubtargetInfo &STI = MF->getSubtarget();
1343+
APInt Mask;
1344+
if (STI.isZeroIdiom(DefMI, Mask)) {
1345+
LLVM_DEBUG(dbgs() << "\tAllow remat: zero-idiom instruction\n");
1346+
} else {
1347+
// Check for duplicate DefMI before CopyMI
1348+
bool HasDuplicateDef = false;
1349+
for (MachineBasicBlock::iterator I = MBB->begin(); &*I != CopyMI;
1350+
++I) {
1351+
if (&*I != DefMI &&
1352+
I->isIdenticalTo(*DefMI, MachineInstr::IgnoreDefs)) {
1353+
HasDuplicateDef = true;
1354+
break;
1355+
}
1356+
}
13421357

1343-
// Before CopyMI: check for duplicate instructions
1344-
if (!FoundCopy && &MI != DefMI &&
1345-
MI.isIdenticalTo(*DefMI, MachineInstr::IgnoreDefs)) {
1346-
break; // Found duplicate, allow rematerialization
1347-
} else if (FoundCopy) {
1348-
// After CopyMI: check if used as return register
1349-
// If the register is redefined, it's not a return register
1350-
if (MI.modifiesRegister(DstReg, TRI))
1351-
break;
1352-
// If there's a return instruction that uses this register, skip remat
1353-
if (MI.isReturn() && MI.readsRegister(DstReg, TRI)) {
1354-
// Exception: if DefMI is moving a constant and SrcReg has no other
1355-
// uses (besides copies), rematerialization is beneficial to
1356-
// eliminate the def
1358+
// Check if register is redefined after CopyMI
1359+
bool RegRedefinedAfterCopy = false;
1360+
for (MachineBasicBlock::iterator I = std::next(CopyMI->getIterator());
1361+
I != MBB->end(); ++I) {
1362+
if (I->modifiesRegister(DstReg, TRI)) {
1363+
RegRedefinedAfterCopy = true;
1364+
break;
1365+
}
1366+
if (I->isReturn())
1367+
break;
1368+
}
1369+
1370+
// Skip remat only if: no duplicate def AND reg not redefined
1371+
if (!HasDuplicateDef && !RegRedefinedAfterCopy) {
1372+
// Exception: allow remat for constant moves with limited uses
13571373
if (DefMI->isMoveImmediate()) {
1358-
// Quick check: if there's only one use and it's this copy,
1359-
// definitely remat
13601374
if (MRI->hasOneNonDBGUse(SrcReg)) {
13611375
LLVM_DEBUG(dbgs()
13621376
<< "\tAllow remat: single use constant move\n");
1363-
break;
1364-
}
1365-
1366-
// Check all uses to see if they're all copies
1367-
bool OnlyUsedByCopies = true;
1368-
unsigned UseCount = 0;
1369-
for (const MachineOperand &MO : MRI->use_operands(SrcReg)) {
1370-
const MachineInstr *UseMI = MO.getParent();
1371-
if (!UseMI->isCopy() && !UseMI->isSubregToReg()) {
1372-
OnlyUsedByCopies = false;
1373-
break;
1377+
} else {
1378+
// Check if all uses are copies
1379+
bool OnlyUsedByCopies = true;
1380+
for (const MachineOperand &MO : MRI->use_operands(SrcReg)) {
1381+
const MachineInstr *UseMI = MO.getParent();
1382+
if (!UseMI->isCopy() && !UseMI->isSubregToReg()) {
1383+
OnlyUsedByCopies = false;
1384+
break;
1385+
}
13741386
}
1375-
UseCount++;
1376-
}
13771387

1378-
if (OnlyUsedByCopies && UseCount > 0) {
1379-
break;
1388+
if (!OnlyUsedByCopies || MRI->use_empty(SrcReg)) {
1389+
LLVM_DEBUG(dbgs() << "\tSkip remat for return register: "
1390+
<< printReg(DstReg, TRI) << '\n');
1391+
return false;
1392+
}
1393+
LLVM_DEBUG(dbgs() << "\tAllow remat: only used by copies\n");
13801394
}
1395+
} else {
1396+
LLVM_DEBUG(dbgs() << "\tSkip remat for return register: "
1397+
<< printReg(DstReg, TRI) << '\n');
1398+
return false;
13811399
}
1382-
1383-
LLVM_DEBUG(dbgs() << "\tSkip remat for return register: "
1384-
<< printReg(DstReg, TRI) << '\n');
1385-
return false;
13861400
}
13871401
}
13881402
}

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.ll

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,11 +96,21 @@ define i64 @test_s_wqm_constant_i64() {
9696
}
9797

9898
define i64 @test_s_wqm_constant_zero_i64() {
99+
; GFX11-LABEL: test_s_wqm_constant_zero_i64:
100+
; GFX11: ; %bb.0:
101+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
102+
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0
103+
; GFX11-NEXT: s_setpc_b64 s[30:31]
99104
%br = call i64 @llvm.amdgcn.s.wqm.i64(i64 0)
100105
ret i64 %br
101106
}
102107

103108
define i64 @test_s_wqm_constant_neg_one_i64() {
109+
; GFX11-LABEL: test_s_wqm_constant_neg_one_i64:
110+
; GFX11: ; %bb.0:
111+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
112+
; GFX11-NEXT: v_dual_mov_b32 v0, -1 :: v_dual_mov_b32 v1, -1
113+
; GFX11-NEXT: s_setpc_b64 s[30:31]
104114
%br = call i64 @llvm.amdgcn.s.wqm.i64(i64 -1)
105115
ret i64 %br
106116
}

0 commit comments

Comments
 (0)