; This checks to ensure that Subzero aligns spill slots.

; RUN: %p2i --filetype=obj --disassemble -i %s --args -Om1 \
; RUN:   -allow-externally-defined-symbols | FileCheck %s
; RUN: %p2i --filetype=obj --disassemble -i %s --args -O2 \
; RUN:   -allow-externally-defined-symbols | FileCheck %s

; The location of the stack slot for a variable is inferred from the
; return sequence.

; In this file, "global" refers to a variable with a live range across
; multiple basic blocks (not an LLVM global variable) and "local"
; refers to a variable that is live in only a single basic block.

define internal <4 x i32> @align_global_vector(i32 %arg) {
entry:
  %vec.global = insertelement <4 x i32> undef, i32 %arg, i32 0
  br label %block
block:
  call void @ForceXmmSpills()
  ret <4 x i32> %vec.global
; CHECK-LABEL: align_global_vector
; CHECK: movups xmm0,XMMWORD PTR [esp]
; CHECK-NEXT: add esp,0x1c
; CHECK-NEXT: ret
}

define internal <4 x i32> @align_local_vector(i32 %arg) {
entry:
  br label %block
block:
  %vec.local = insertelement <4 x i32> undef, i32 %arg, i32 0
  call void @ForceXmmSpills()
  ret <4 x i32> %vec.local
; CHECK-LABEL: align_local_vector
; CHECK: movups xmm0,XMMWORD PTR [esp]
; CHECK-NEXT: add esp,0x1c
; CHECK-NEXT: ret
}

declare void @ForceXmmSpills()

define internal <4 x i32> @align_global_vector_ebp_based(i32 %arg) {
entry:
  br label %eblock  ; Disable alloca optimization
eblock:
  %alloc = alloca i8, i32 1, align 1
  %vec.global = insertelement <4 x i32> undef, i32 %arg, i32 0
  br label %block
block:
  call void @ForceXmmSpillsAndUseAlloca(i8* %alloc)
  ret <4 x i32> %vec.global
; CHECK-LABEL: align_global_vector_ebp_based
; CHECK: movups xmm0,XMMWORD PTR [ebp-0x18]
; CHECK-NEXT: mov esp,ebp
; CHECK-NEXT: pop ebp
; CHECK: ret
}

define internal <4 x i32> @align_local_vector_ebp_based(i32 %arg) {
entry:
  br label %eblock  ; Disable alloca optimization
eblock:
  %alloc = alloca i8, i32 1, align 1
  %vec.local = insertelement <4 x i32> undef, i32 %arg, i32 0
  call void @ForceXmmSpillsAndUseAlloca(i8* %alloc)
  ret <4 x i32> %vec.local
; CHECK-LABEL: align_local_vector_ebp_based
; CHECK: movups xmm0,XMMWORD PTR [ebp-0x18]
; CHECK-NEXT: mov esp,ebp
; CHECK-NEXT: pop ebp
; CHECK: ret
}

define internal <4 x i32> @align_local_vector_and_global_float(i32 %arg) {
entry:
  %float.global = sitofp i32 %arg to float
  call void @ForceXmmSpillsAndUseFloat(float %float.global)
  br label %block
block:
  %vec.local = insertelement <4 x i32> undef, i32 undef, i32 0
  call void @ForceXmmSpillsAndUseFloat(float %float.global)
  ret <4 x i32> %vec.local
; CHECK-LABEL: align_local_vector_and_global_float
; CHECK: cvtsi2ss xmm0,eax
; CHECK-NEXT: movss DWORD PTR [esp+{{0x1c|0x2c}}],xmm0
; CHECK: movups xmm0,XMMWORD PTR [{{esp\+0x10|esp\+0x20}}]
; CHECK-NEXT: add esp,0x3c
; CHECK-NEXT: ret
}

declare void @ForceXmmSpillsAndUseAlloca(i8*)
declare void @ForceXmmSpillsAndUseFloat(float)