// REQUIRES: x86-registered-target
// REQUIRES: nvptx-registered-target

// Make sure we handle target overloads correctly.
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \
// RUN:     -fcuda-target-overloads -emit-llvm -o - %s \
// RUN:     | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-HOST %s
// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device \
// RUN:     -fcuda-target-overloads -emit-llvm -o - %s \
// RUN:     | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-DEVICE %s

// Check target overloads handling with disabled call target checks.
// RUN: %clang_cc1 -DNOCHECKS -triple x86_64-unknown-linux-gnu -emit-llvm \
// RUN:    -fcuda-disable-target-call-checks -fcuda-target-overloads -o - %s \
// RUN:     | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-HOST \
// RUN:    -check-prefix=CHECK-BOTH-NC -check-prefix=CHECK-HOST-NC %s
// RUN: %clang_cc1 -DNOCHECKS -triple nvptx64-nvidia-cuda -emit-llvm \
// RUN:    -fcuda-disable-target-call-checks -fcuda-target-overloads \
// RUN:    -fcuda-is-device -o - %s \
// RUN:     | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-DEVICE \
// RUN:    -check-prefix=CHECK-BOTH-NC -check-prefix=CHECK-DEVICE-NC %s

#include "Inputs/cuda.h"

typedef int (*fp_t)(void);
typedef void (*gp_t)(void);

// CHECK-HOST: @hp = global i32 ()* @_Z1hv
// CHECK-HOST: @chp = global i32 ()* @ch
// CHECK-HOST: @dhp = global i32 ()* @_Z2dhv
// CHECK-HOST: @cdhp = global i32 ()* @cdh
// CHECK-HOST: @gp = global void ()* @_Z1gv

// CHECK-BOTH-LABEL: define i32 @_Z2dhv()
__device__ int dh(void) { return 1; }
// CHECK-DEVICE: ret i32 1
__host__ int dh(void) { return 2; }
// CHECK-HOST:   ret i32 2

// CHECK-BOTH-LABEL: define i32 @_Z2hdv()
__host__ __device__ int hd(void) { return 3; }
// CHECK-BOTH:   ret i32 3

// CHECK-DEVICE-LABEL: define i32 @_Z1dv()
__device__ int d(void) { return 8; }
// CHECK-DEVICE:   ret i32 8

// CHECK-HOST-LABEL: define i32 @_Z1hv()
__host__ int h(void) { return 9; }
// CHECK-HOST:   ret i32 9

// CHECK-BOTH-LABEL: define void @_Z1gv()
__global__ void g(void) {}
// CHECK-BOTH:   ret void

// mangled names of extern "C" __host__ __device__ functions clash
// with those of their __host__/__device__ counterparts, so
// overloading of extern "C" functions can only happen for __host__
// and __device__ functions -- we never codegen them in the same
// compilation and therefore mangled name conflict is not a problem.

// CHECK-BOTH-LABEL: define i32 @cdh()
extern "C" __device__ int cdh(void) {return 10;}
// CHECK-DEVICE:   ret i32 10
extern "C" __host__ int cdh(void) {return 11;}
// CHECK-HOST:     ret i32 11

// CHECK-DEVICE-LABEL: define i32 @cd()
extern "C" __device__ int cd(void) {return 12;}
// CHECK-DEVICE:   ret i32 12

// CHECK-HOST-LABEL: define i32 @ch()
extern "C" __host__ int ch(void) {return 13;}
// CHECK-HOST:     ret i32 13

// CHECK-BOTH-LABEL: define i32 @chd()
extern "C" __host__ __device__ int chd(void) {return 14;}
// CHECK-BOTH:     ret i32 14

// CHECK-HOST-LABEL: define void @_Z5hostfv()
__host__ void hostf(void) {
#if defined (NOCHECKS)
  fp_t dp = d;   // CHECK-HOST-NC: store {{.*}} @_Z1dv, {{.*}} %dp,
  fp_t cdp = cd; // CHECK-HOST-NC: store {{.*}} @cd, {{.*}} %cdp,
#endif
  fp_t hp = h; // CHECK-HOST: store {{.*}} @_Z1hv, {{.*}} %hp,
  fp_t chp = ch; // CHECK-HOST: store {{.*}} @ch, {{.*}} %chp,
  fp_t dhp = dh; // CHECK-HOST: store {{.*}} @_Z2dhv, {{.*}} %dhp,
  fp_t cdhp = cdh; // CHECK-HOST: store {{.*}} @cdh, {{.*}} %cdhp,
  fp_t hdp = hd; // CHECK-HOST: store {{.*}} @_Z2hdv, {{.*}} %hdp,
  fp_t chdp = chd; // CHECK-HOST: store {{.*}} @chd, {{.*}} %chdp,
  gp_t gp = g; // CHECK-HOST: store {{.*}} @_Z1gv, {{.*}} %gp,

#if defined (NOCHECKS)
  d();     // CHECK-HOST-NC: call i32 @_Z1dv()
  cd();    // CHECK-HOST-NC: call i32 @cd()
#endif
  h();     // CHECK-HOST: call i32 @_Z1hv()
  ch();    // CHECK-HOST: call i32 @ch()
  dh();    // CHECK-HOST: call i32 @_Z2dhv()
  cdh();   // CHECK-HOST: call i32 @cdh()
  g<<<0,0>>>();  // CHECK-HOST: call void @_Z1gv()
}

// CHECK-DEVICE-LABEL: define void @_Z7devicefv()
__device__ void devicef(void) {
  fp_t dp = d;   // CHECK-DEVICE: store {{.*}} @_Z1dv, {{.*}} %dp,
  fp_t cdp = cd; // CHECK-DEVICE: store {{.*}} @cd, {{.*}} %cdp,
#if defined (NOCHECKS)
  fp_t hp = h; // CHECK-DEVICE-NC: store {{.*}} @_Z1hv, {{.*}} %hp,
  fp_t chp = ch; // CHECK-DEVICE-NC: store {{.*}} @ch, {{.*}} %chp,
#endif
  fp_t dhp = dh; // CHECK-DEVICE: store {{.*}} @_Z2dhv, {{.*}} %dhp,
  fp_t cdhp = cdh; // CHECK-DEVICE: store {{.*}} @cdh, {{.*}} %cdhp,
  fp_t hdp = hd; // CHECK-DEVICE: store {{.*}} @_Z2hdv, {{.*}} %hdp,
  fp_t chdp = chd; // CHECK-DEVICE: store {{.*}} @chd, {{.*}} %chdp,

  d();     // CHECK-DEVICE: call i32 @_Z1dv()
  cd();    // CHECK-DEVICE: call i32 @cd()
#if defined (NOCHECKS)
  h();     // CHECK-DEVICE-NC: call i32 @_Z1hv()
  ch();    // CHECK-DEVICE-NC: call i32 @ch()
#endif
  dh();    // CHECK-DEVICE: call i32 @_Z2dhv()
  cdh();   // CHECK-DEVICE: call i32 @cdh()
}

// CHECK-BOTH-LABEL: define void @_Z11hostdevicefv()
__host__ __device__ void hostdevicef(void) {
#if defined (NOCHECKS)
  fp_t dp = d;   // CHECK-BOTH-NC: store {{.*}} @_Z1dv, {{.*}} %dp,
  fp_t cdp = cd; // CHECK-BOTH-NC: store {{.*}} @cd, {{.*}} %cdp,
  fp_t hp = h; // CHECK-BOTH-NC: store {{.*}} @_Z1hv, {{.*}} %hp,
  fp_t chp = ch; // CHECK-BOTH-NC: store {{.*}} @ch, {{.*}} %chp,
#endif
  fp_t dhp = dh; // CHECK-BOTH: store {{.*}} @_Z2dhv, {{.*}} %dhp,
  fp_t cdhp = cdh; // CHECK-BOTH: store {{.*}} @cdh, {{.*}} %cdhp,
  fp_t hdp = hd; // CHECK-BOTH: store {{.*}} @_Z2hdv, {{.*}} %hdp,
  fp_t chdp = chd; // CHECK-BOTH: store {{.*}} @chd, {{.*}} %chdp,
#if defined (NOCHECKS) && !defined(__CUDA_ARCH__)
  gp_t gp = g; // CHECK-HOST-NC: store {{.*}} @_Z1gv, {{.*}} %gp,
#endif

#if defined (NOCHECKS)
  d();     // CHECK-BOTH-NC: call i32 @_Z1dv()
  cd();    // CHECK-BOTH-NC: call i32 @cd()
  h();     // CHECK-BOTH-NC: call i32 @_Z1hv()
  ch();    // CHECK-BOTH-NC: call i32 @ch()
#endif
  dh();    // CHECK-BOTH: call i32 @_Z2dhv()
  cdh();   // CHECK-BOTH: call i32 @cdh()
#if defined (NOCHECKS) && !defined(__CUDA_ARCH__)
  g<<<0,0>>>();  // CHECK-HOST-NC: call void @_Z1gv()
#endif
}

// Test for address of overloaded function resolution in the global context.
fp_t hp = h;
fp_t chp = ch;
fp_t dhp = dh;
fp_t cdhp = cdh;
gp_t gp = g;

int x;
// Check constructors/destructors for D/H functions
struct s_cd_dh {
  __host__ s_cd_dh() { x = 11; }
  __device__ s_cd_dh() { x = 12; }
  __host__ ~s_cd_dh() { x = 21; }
  __device__ ~s_cd_dh() { x = 22; }
};

struct s_cd_hd {
  __host__ __device__ s_cd_hd() { x = 31; }
  __host__ __device__ ~s_cd_hd() { x = 32; }
};

// CHECK-BOTH: define void @_Z7wrapperv
#if defined(__CUDA_ARCH__)
__device__
#else
__host__
#endif
void wrapper() {
  s_cd_dh scddh;
  // CHECK-BOTH: call void @_ZN7s_cd_dhC1Ev(
  s_cd_hd scdhd;
  // CHECK-BOTH: call void @_ZN7s_cd_hdC1Ev

  // CHECK-BOTH: call void @_ZN7s_cd_hdD1Ev(
  // CHECK-BOTH: call void @_ZN7s_cd_dhD1Ev(
}
// CHECK-BOTH: ret void

// Now it's time to check what's been generated for the methods we used.

// CHECK-BOTH: define linkonce_odr void @_ZN7s_cd_dhC2Ev(
// CHECK-HOST:   store i32 11,
// CHECK-DEVICE: store i32 12,
// CHECK-BOTH: ret void

// CHECK-BOTH: define linkonce_odr void @_ZN7s_cd_hdC2Ev(
// CHECK-BOTH:   store i32 31,
// CHECK-BOTH: ret void

// CHECK-BOTH: define linkonce_odr void @_ZN7s_cd_hdD2Ev(
// CHECK-BOTH: store i32 32,
// CHECK-BOTH: ret void

// CHECK-BOTH: define linkonce_odr void @_ZN7s_cd_dhD2Ev(
// CHECK-HOST:   store i32 21,
// CHECK-DEVICE: store i32 22,
// CHECK-BOTH: ret void