; RUN: llc < %s -march=x86 -mattr=+sse2 | grep -- -86 define i16 @f(<4 x float>* %tmp116117.i1061.i) nounwind { entry: alloca [4 x <4 x float>] ; <[4 x <4 x float>]*>:0 [#uses=167] alloca [4 x <4 x float>] ; <[4 x <4 x float>]*>:1 [#uses=170] alloca [4 x <4 x i32>] ; <[4 x <4 x i32>]*>:2 [#uses=12] %.sub6235.i = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0 ; <<4 x float>*> [#uses=76] %.sub.i = getelementptr [4 x <4 x float>]* %1, i32 0, i32 0 ; <<4 x float>*> [#uses=59] %tmp124.i1062.i = getelementptr <4 x float>* %tmp116117.i1061.i, i32 63 ; <<4 x float>*> [#uses=1] %tmp125.i1063.i = load <4 x float>* %tmp124.i1062.i ; <<4 x float>> [#uses=5] %tmp828.i1077.i = shufflevector <4 x float> %tmp125.i1063.i, <4 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 > ; <<4 x float>> [#uses=4] %tmp704.i1085.i = load <4 x float>* %.sub6235.i ; <<4 x float>> [#uses=1] %tmp712.i1086.i = call <4 x float> @llvm.x86.sse.max.ps( <4 x float> %tmp704.i1085.i, <4 x float> %tmp828.i1077.i ) ; <<4 x float>> [#uses=1] store <4 x float> %tmp712.i1086.i, <4 x float>* %.sub.i %tmp2587.i1145.gep.i = getelementptr [4 x <4 x float>]* %1, i32 0, i32 0, i32 2 ; <float*> [#uses=1] %tmp5334.i = load float* %tmp2587.i1145.gep.i ; <float> [#uses=5] %tmp2723.i1170.i = insertelement <4 x float> undef, float %tmp5334.i, i32 2 ; <<4 x float>> [#uses=5] store <4 x float> %tmp2723.i1170.i, <4 x float>* %.sub6235.i %tmp1406.i1367.i = shufflevector <4 x float> %tmp2723.i1170.i, <4 x float> undef, <4 x i32> < i32 2, i32 2, i32 2, i32 2 > ; <<4 x float>> [#uses=1] %tmp84.i1413.i = load <4 x float>* %.sub6235.i ; <<4 x float>> [#uses=1] %tmp89.i1415.i = fmul <4 x float> %tmp84.i1413.i, %tmp1406.i1367.i ; <<4 x float>> [#uses=1] store <4 x float> %tmp89.i1415.i, <4 x float>* %.sub.i ret i16 0 } declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>)