; RUN: llc < %s -march=x86-64 ; PR3886 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "x86_64-pc-linux-gnu" define void @mmxCombineMaskU(i32* nocapture %src, i32* nocapture %mask) nounwind { entry: %tmp1 = load i32* %src ; <i32> [#uses=1] %0 = insertelement <2 x i32> undef, i32 %tmp1, i32 0 ; <<2 x i32>> [#uses=1] %1 = insertelement <2 x i32> %0, i32 0, i32 1 ; <<2 x i32>> [#uses=1] %conv.i.i = bitcast <2 x i32> %1 to <1 x i64> ; <<1 x i64>> [#uses=1] %tmp2.i.i = extractelement <1 x i64> %conv.i.i, i32 0 ; <i64> [#uses=1] %tmp22.i = bitcast i64 %tmp2.i.i to <1 x i64> ; <<1 x i64>> [#uses=1] %tmp15.i = extractelement <1 x i64> %tmp22.i, i32 0 ; <i64> [#uses=1] %conv.i26.i = bitcast i64 %tmp15.i to <8 x i8> ; <<8 x i8>> [#uses=1] %shuffle.i.i = shufflevector <8 x i8> %conv.i26.i, <8 x i8> <i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef>, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> ; <<8 x i8>> [#uses=1] %conv6.i.i = bitcast <8 x i8> %shuffle.i.i to <1 x i64> ; <<1 x i64>> [#uses=1] %tmp12.i.i = extractelement <1 x i64> %conv6.i.i, i32 0 ; <i64> [#uses=1] %tmp10.i = bitcast i64 %tmp12.i.i to <1 x i64> ; <<1 x i64>> [#uses=1] %tmp24.i = extractelement <1 x i64> %tmp10.i, i32 0 ; <i64> [#uses=1] %tmp10 = bitcast i64 %tmp24.i to <1 x i64> ; <<1 x i64>> [#uses=1] %tmp7 = extractelement <1 x i64> %tmp10, i32 0 ; <i64> [#uses=1] %call6 = tail call i32 (...)* @store8888(i64 %tmp7) ; <i32> [#uses=1] store i32 %call6, i32* %src ret void } declare i32 @store8888(...)