@@ -1375,11 +1375,9 @@ define i32 @test_usdot_v8i8_double(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i
13751375; CHECK-SD-LABEL: test_usdot_v8i8_double:
13761376; CHECK-SD: // %bb.0: // %entry
13771377; CHECK-SD-NEXT: movi v4.2d, #0000000000000000
1378- ; CHECK-SD-NEXT: movi v5.2d, #0000000000000000
1379- ; CHECK-SD-NEXT: usdot v5.2s, v0.8b, v1.8b
13801378; CHECK-SD-NEXT: usdot v4.2s, v2.8b, v3.8b
1381- ; CHECK-SD-NEXT: add v0 .2s, v5.2s, v4.2s
1382- ; CHECK-SD-NEXT: addp v0.2s, v0 .2s, v0 .2s
1379+ ; CHECK-SD-NEXT: usdot v4 .2s, v0.8b, v1.8b
1380+ ; CHECK-SD-NEXT: addp v0.2s, v4 .2s, v4 .2s
13831381; CHECK-SD-NEXT: fmov w0, s0
13841382; CHECK-SD-NEXT: ret
13851383;
@@ -1416,11 +1414,9 @@ define i32 @test_usdot_swapped_operands_v8i8_double(<8 x i8> %a, <8 x i8> %b, <8
14161414; CHECK-SD-LABEL: test_usdot_swapped_operands_v8i8_double:
14171415; CHECK-SD: // %bb.0: // %entry
14181416; CHECK-SD-NEXT: movi v4.2d, #0000000000000000
1419- ; CHECK-SD-NEXT: movi v5.2d, #0000000000000000
1420- ; CHECK-SD-NEXT: usdot v5.2s, v1.8b, v0.8b
14211417; CHECK-SD-NEXT: usdot v4.2s, v3.8b, v2.8b
1422- ; CHECK-SD-NEXT: add v0 .2s, v5.2s, v4.2s
1423- ; CHECK-SD-NEXT: addp v0.2s, v0 .2s, v0 .2s
1418+ ; CHECK-SD-NEXT: usdot v4 .2s, v1.8b, v0.8b
1419+ ; CHECK-SD-NEXT: addp v0.2s, v4 .2s, v4 .2s
14241420; CHECK-SD-NEXT: fmov w0, s0
14251421; CHECK-SD-NEXT: ret
14261422;
@@ -1457,11 +1453,9 @@ define i32 @test_usdot_v16i8_double(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <1
14571453; CHECK-SD-LABEL: test_usdot_v16i8_double:
14581454; CHECK-SD: // %bb.0: // %entry
14591455; CHECK-SD-NEXT: movi v4.2d, #0000000000000000
1460- ; CHECK-SD-NEXT: movi v5.2d, #0000000000000000
1461- ; CHECK-SD-NEXT: usdot v5.4s, v0.16b, v1.16b
14621456; CHECK-SD-NEXT: usdot v4.4s, v2.16b, v3.16b
1463- ; CHECK-SD-NEXT: add v0 .4s, v5.4s, v4.4s
1464- ; CHECK-SD-NEXT: addv s0, v0 .4s
1457+ ; CHECK-SD-NEXT: usdot v4 .4s, v0.16b, v1.16b
1458+ ; CHECK-SD-NEXT: addv s0, v4 .4s
14651459; CHECK-SD-NEXT: fmov w0, s0
14661460; CHECK-SD-NEXT: ret
14671461;
@@ -1509,11 +1503,9 @@ define i32 @test_usdot_swapped_operands_v16i8_double(<16 x i8> %a, <16 x i8> %b,
15091503; CHECK-SD-LABEL: test_usdot_swapped_operands_v16i8_double:
15101504; CHECK-SD: // %bb.0: // %entry
15111505; CHECK-SD-NEXT: movi v4.2d, #0000000000000000
1512- ; CHECK-SD-NEXT: movi v5.2d, #0000000000000000
1513- ; CHECK-SD-NEXT: usdot v5.4s, v1.16b, v0.16b
15141506; CHECK-SD-NEXT: usdot v4.4s, v3.16b, v2.16b
1515- ; CHECK-SD-NEXT: add v0 .4s, v5.4s, v4.4s
1516- ; CHECK-SD-NEXT: addv s0, v0 .4s
1507+ ; CHECK-SD-NEXT: usdot v4 .4s, v1.16b, v0.16b
1508+ ; CHECK-SD-NEXT: addv s0, v4 .4s
15171509; CHECK-SD-NEXT: fmov w0, s0
15181510; CHECK-SD-NEXT: ret
15191511;
@@ -4384,12 +4376,10 @@ define i32 @test_usdot_v32i8(ptr nocapture readonly %a, ptr nocapture readonly %
43844376; CHECK-SD-LABEL: test_usdot_v32i8:
43854377; CHECK-SD: // %bb.0: // %entry
43864378; CHECK-SD-NEXT: movi v0.2d, #0000000000000000
4387- ; CHECK-SD-NEXT: movi v1.2d, #0000000000000000
4388- ; CHECK-SD-NEXT: ldp q2, q3, [x0]
4389- ; CHECK-SD-NEXT: ldp q4, q5, [x1]
4390- ; CHECK-SD-NEXT: usdot v1.4s, v3.16b, v5.16b
4391- ; CHECK-SD-NEXT: usdot v0.4s, v2.16b, v4.16b
4392- ; CHECK-SD-NEXT: add v0.4s, v0.4s, v1.4s
4379+ ; CHECK-SD-NEXT: ldp q1, q3, [x0]
4380+ ; CHECK-SD-NEXT: ldp q2, q4, [x1]
4381+ ; CHECK-SD-NEXT: usdot v0.4s, v3.16b, v4.16b
4382+ ; CHECK-SD-NEXT: usdot v0.4s, v1.16b, v2.16b
43934383; CHECK-SD-NEXT: addv s0, v0.4s
43944384; CHECK-SD-NEXT: fmov w8, s0
43954385; CHECK-SD-NEXT: add w0, w8, w2
@@ -4438,15 +4428,11 @@ define i32 @test_usdot_v32i8_double(<32 x i8> %a, <32 x i8> %b, <32 x i8> %c, <3
44384428; CHECK-SD: // %bb.0: // %entry
44394429; CHECK-SD-NEXT: movi v16.2d, #0000000000000000
44404430; CHECK-SD-NEXT: movi v17.2d, #0000000000000000
4441- ; CHECK-SD-NEXT: movi v18.2d, #0000000000000000
4442- ; CHECK-SD-NEXT: movi v19.2d, #0000000000000000
4443- ; CHECK-SD-NEXT: usdot v16.4s, v1.16b, v3.16b
4444- ; CHECK-SD-NEXT: usdot v18.4s, v0.16b, v2.16b
4445- ; CHECK-SD-NEXT: usdot v17.4s, v4.16b, v6.16b
4446- ; CHECK-SD-NEXT: usdot v19.4s, v5.16b, v7.16b
4447- ; CHECK-SD-NEXT: add v0.4s, v18.4s, v16.4s
4448- ; CHECK-SD-NEXT: add v1.4s, v17.4s, v19.4s
4449- ; CHECK-SD-NEXT: add v0.4s, v0.4s, v1.4s
4431+ ; CHECK-SD-NEXT: usdot v17.4s, v1.16b, v3.16b
4432+ ; CHECK-SD-NEXT: usdot v16.4s, v5.16b, v7.16b
4433+ ; CHECK-SD-NEXT: usdot v17.4s, v0.16b, v2.16b
4434+ ; CHECK-SD-NEXT: usdot v16.4s, v4.16b, v6.16b
4435+ ; CHECK-SD-NEXT: add v0.4s, v17.4s, v16.4s
44504436; CHECK-SD-NEXT: addv s0, v0.4s
44514437; CHECK-SD-NEXT: fmov w0, s0
44524438; CHECK-SD-NEXT: ret
@@ -8781,20 +8767,16 @@ define i32 @test_usdot_v64i8(ptr nocapture readonly %a, ptr nocapture readonly %
87818767; CHECK-SD-LABEL: test_usdot_v64i8:
87828768; CHECK-SD: // %bb.0: // %entry
87838769; CHECK-SD-NEXT: movi v0.2d, #0000000000000000
8784- ; CHECK-SD-NEXT: movi v3.2d, #0000000000000000
8785- ; CHECK-SD-NEXT: movi v4.2d, #0000000000000000
8786- ; CHECK-SD-NEXT: movi v5.2d, #0000000000000000
8787- ; CHECK-SD-NEXT: ldp q1, q2, [x0, #32]
8788- ; CHECK-SD-NEXT: ldp q6, q7, [x1, #32]
8789- ; CHECK-SD-NEXT: ldp q16, q17, [x0]
8790- ; CHECK-SD-NEXT: ldp q18, q19, [x1]
8791- ; CHECK-SD-NEXT: usdot v0.4s, v2.16b, v7.16b
8792- ; CHECK-SD-NEXT: usdot v5.4s, v1.16b, v6.16b
8793- ; CHECK-SD-NEXT: usdot v4.4s, v17.16b, v19.16b
8794- ; CHECK-SD-NEXT: usdot v3.4s, v16.16b, v18.16b
8795- ; CHECK-SD-NEXT: add v0.4s, v4.4s, v0.4s
8796- ; CHECK-SD-NEXT: add v1.4s, v3.4s, v5.4s
8797- ; CHECK-SD-NEXT: add v0.4s, v1.4s, v0.4s
8770+ ; CHECK-SD-NEXT: movi v1.2d, #0000000000000000
8771+ ; CHECK-SD-NEXT: ldp q2, q3, [x0, #32]
8772+ ; CHECK-SD-NEXT: ldp q4, q5, [x1, #32]
8773+ ; CHECK-SD-NEXT: usdot v1.4s, v3.16b, v5.16b
8774+ ; CHECK-SD-NEXT: usdot v0.4s, v2.16b, v4.16b
8775+ ; CHECK-SD-NEXT: ldp q2, q3, [x0]
8776+ ; CHECK-SD-NEXT: ldp q4, q5, [x1]
8777+ ; CHECK-SD-NEXT: usdot v1.4s, v3.16b, v5.16b
8778+ ; CHECK-SD-NEXT: usdot v0.4s, v2.16b, v4.16b
8779+ ; CHECK-SD-NEXT: add v0.4s, v0.4s, v1.4s
87988780; CHECK-SD-NEXT: addv s0, v0.4s
87998781; CHECK-SD-NEXT: fmov w8, s0
88008782; CHECK-SD-NEXT: add w0, w8, w2
@@ -8863,32 +8845,24 @@ entry:
88638845define i32 @test_usdot_v64i8_double (<64 x i8 > %a , <64 x i8 > %b , <64 x i8 > %c , <64 x i8 > %d ) {
88648846; CHECK-SD-LABEL: test_usdot_v64i8_double:
88658847; CHECK-SD: // %bb.0: // %entry
8848+ ; CHECK-SD-NEXT: movi v16.2d, #0000000000000000
8849+ ; CHECK-SD-NEXT: movi v17.2d, #0000000000000000
88668850; CHECK-SD-NEXT: movi v18.2d, #0000000000000000
8867- ; CHECK-SD-NEXT: movi v21.2d, #0000000000000000
8868- ; CHECK-SD-NEXT: movi v22.2d, #0000000000000000
8869- ; CHECK-SD-NEXT: movi v23.2d, #0000000000000000
8870- ; CHECK-SD-NEXT: ldp q16, q17, [sp, #64]
8871- ; CHECK-SD-NEXT: movi v24.2d, #0000000000000000
8872- ; CHECK-SD-NEXT: movi v25.2d, #0000000000000000
8873- ; CHECK-SD-NEXT: movi v26.2d, #0000000000000000
8874- ; CHECK-SD-NEXT: movi v27.2d, #0000000000000000
8875- ; CHECK-SD-NEXT: ldp q19, q20, [sp, #96]
8876- ; CHECK-SD-NEXT: usdot v18.4s, v3.16b, v7.16b
8877- ; CHECK-SD-NEXT: ldp q3, q7, [sp, #32]
8878- ; CHECK-SD-NEXT: usdot v21.4s, v1.16b, v5.16b
8879- ; CHECK-SD-NEXT: ldp q1, q5, [sp]
8880- ; CHECK-SD-NEXT: usdot v22.4s, v2.16b, v6.16b
8881- ; CHECK-SD-NEXT: usdot v23.4s, v0.16b, v4.16b
8882- ; CHECK-SD-NEXT: usdot v24.4s, v7.16b, v20.16b
8883- ; CHECK-SD-NEXT: usdot v27.4s, v3.16b, v19.16b
8884- ; CHECK-SD-NEXT: usdot v26.4s, v5.16b, v17.16b
8885- ; CHECK-SD-NEXT: usdot v25.4s, v1.16b, v16.16b
8886- ; CHECK-SD-NEXT: add v0.4s, v21.4s, v18.4s
8887- ; CHECK-SD-NEXT: add v1.4s, v23.4s, v22.4s
8888- ; CHECK-SD-NEXT: add v2.4s, v26.4s, v24.4s
8889- ; CHECK-SD-NEXT: add v3.4s, v25.4s, v27.4s
8890- ; CHECK-SD-NEXT: add v0.4s, v1.4s, v0.4s
8891- ; CHECK-SD-NEXT: add v1.4s, v3.4s, v2.4s
8851+ ; CHECK-SD-NEXT: movi v19.2d, #0000000000000000
8852+ ; CHECK-SD-NEXT: ldp q20, q21, [sp, #96]
8853+ ; CHECK-SD-NEXT: ldp q22, q23, [sp, #32]
8854+ ; CHECK-SD-NEXT: usdot v16.4s, v3.16b, v7.16b
8855+ ; CHECK-SD-NEXT: usdot v18.4s, v2.16b, v6.16b
8856+ ; CHECK-SD-NEXT: usdot v19.4s, v23.16b, v21.16b
8857+ ; CHECK-SD-NEXT: usdot v17.4s, v22.16b, v20.16b
8858+ ; CHECK-SD-NEXT: ldp q2, q3, [sp, #64]
8859+ ; CHECK-SD-NEXT: ldp q6, q7, [sp]
8860+ ; CHECK-SD-NEXT: usdot v16.4s, v1.16b, v5.16b
8861+ ; CHECK-SD-NEXT: usdot v18.4s, v0.16b, v4.16b
8862+ ; CHECK-SD-NEXT: usdot v19.4s, v7.16b, v3.16b
8863+ ; CHECK-SD-NEXT: usdot v17.4s, v6.16b, v2.16b
8864+ ; CHECK-SD-NEXT: add v0.4s, v18.4s, v16.4s
8865+ ; CHECK-SD-NEXT: add v1.4s, v17.4s, v19.4s
88928866; CHECK-SD-NEXT: add v0.4s, v0.4s, v1.4s
88938867; CHECK-SD-NEXT: addv s0, v0.4s
88948868; CHECK-SD-NEXT: fmov w0, s0
0 commit comments