; RUN: llc < %s -march=x86 -mattr=+sse2 -stats |& grep {Number of 3-address instructions sunk}

define void @t2(<2 x i64>* %vDct, <2 x i64>* %vYp, i8* %skiplist, <2 x i64> %a1) nounwind  {
entry:
	%tmp25 = bitcast <2 x i64> %a1 to <8 x i16>		; <<8 x i16>> [#uses=1]
	br label %bb
bb:		; preds = %bb, %entry
	%skiplist_addr.0.rec = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=3]
	%vYp_addr.0.rec = shl i32 %skiplist_addr.0.rec, 3		; <i32> [#uses=3]
	%vDct_addr.0 = getelementptr <2 x i64>* %vDct, i32 %vYp_addr.0.rec		; <<2 x i64>*> [#uses=1]
	%vYp_addr.0 = getelementptr <2 x i64>* %vYp, i32 %vYp_addr.0.rec		; <<2 x i64>*> [#uses=1]
	%skiplist_addr.0 = getelementptr i8* %skiplist, i32 %skiplist_addr.0.rec		; <i8*> [#uses=1]
	%vDct_addr.0.sum43 = or i32 %vYp_addr.0.rec, 1		; <i32> [#uses=1]
	%tmp7 = getelementptr <2 x i64>* %vDct, i32 %vDct_addr.0.sum43		; <<2 x i64>*> [#uses=1]
	%tmp8 = load <2 x i64>* %tmp7, align 16		; <<2 x i64>> [#uses=1]
	%tmp11 = load <2 x i64>* %vDct_addr.0, align 16		; <<2 x i64>> [#uses=1]
	%tmp13 = bitcast <2 x i64> %tmp8 to <8 x i16>		; <<8 x i16>> [#uses=1]
	%tmp15 = bitcast <2 x i64> %tmp11 to <8 x i16>		; <<8 x i16>> [#uses=1]
	%tmp16 = shufflevector <8 x i16> %tmp15, <8 x i16> %tmp13, <8 x i32> < i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11 >		; <<8 x i16>> [#uses=1]
	%tmp26 = mul <8 x i16> %tmp25, %tmp16		; <<8 x i16>> [#uses=1]
	%tmp27 = bitcast <8 x i16> %tmp26 to <2 x i64>		; <<2 x i64>> [#uses=1]
	store <2 x i64> %tmp27, <2 x i64>* %vYp_addr.0, align 16
	%tmp37 = load i8* %skiplist_addr.0, align 1		; <i8> [#uses=1]
	%tmp38 = icmp eq i8 %tmp37, 0		; <i1> [#uses=1]
	%indvar.next = add i32 %skiplist_addr.0.rec, 1		; <i32> [#uses=1]
	br i1 %tmp38, label %return, label %bb
return:		; preds = %bb
	ret void
}