-
Notifications
You must be signed in to change notification settings - Fork 15.4k
Open
Labels
Description
define <8 x float> @src(<4 x float> %x0, <4 x float> %x1, <4 x float> %y0, <4 x float> %y1) {
%l0 = call <4 x float> @llvm.fma.v4f32(<4 x float> %x0, <4 x float> %x0, <4 x float> zeroinitializer)
%l1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %x1, <4 x float> %x1, <4 x float> %l0)
%h0 = call <4 x float> @llvm.fma.v4f32(<4 x float> %y0, <4 x float> %y0, <4 x float> zeroinitializer)
%h1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %y1, <4 x float> %y1, <4 x float> %h0)
%res = shufflevector <4 x float> %l1, <4 x float> %h1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x float> %res
}We fail to fold @src to @tgt as the cost analysis assume the shuffle of each operand of the fma are independent:
define <8 x float> @tgt(<4 x float> %x0, <4 x float> %x1, <4 x float> %y0, <4 x float> %y1) {
%xy0 = shufflevector <4 x float> %x0, <4 x float> %y0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%xy1 = shufflevector <4 x float> %x1, <4 x float> %y1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%r0 = call <8 x float> @llvm.fma.v8f32(<8 x float> %xy0, <8 x float> %xy0, <8 x float> zeroinitializer)
%r1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %xy1, <8 x float> %xy1, <8 x float> %r0)
ret <8 x float> %r1
}