//===- IntrinsicsAMDGPU.td - Defines AMDGPU intrinsics -----*- tablegen -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file defines all of the R600-specific intrinsics. // //===----------------------------------------------------------------------===// class AMDGPUReadPreloadRegisterIntrinsic : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>; class AMDGPUReadPreloadRegisterIntrinsicNamed<string name> : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>, GCCBuiltin<name>; let TargetPrefix = "r600" in { multiclass AMDGPUReadPreloadRegisterIntrinsic_xyz { def _x : AMDGPUReadPreloadRegisterIntrinsic; def _y : AMDGPUReadPreloadRegisterIntrinsic; def _z : AMDGPUReadPreloadRegisterIntrinsic; } multiclass AMDGPUReadPreloadRegisterIntrinsic_xyz_named<string prefix> { def _x : AMDGPUReadPreloadRegisterIntrinsicNamed<!strconcat(prefix, "_x")>; def _y : AMDGPUReadPreloadRegisterIntrinsicNamed<!strconcat(prefix, "_y")>; def _z : AMDGPUReadPreloadRegisterIntrinsicNamed<!strconcat(prefix, "_z")>; } defm int_r600_read_global_size : AMDGPUReadPreloadRegisterIntrinsic_xyz_named <"__builtin_r600_read_global_size">; defm int_r600_read_ngroups : AMDGPUReadPreloadRegisterIntrinsic_xyz_named <"__builtin_r600_read_ngroups">; defm int_r600_read_tgid : AMDGPUReadPreloadRegisterIntrinsic_xyz_named <"__builtin_r600_read_tgid">; defm int_r600_read_local_size : AMDGPUReadPreloadRegisterIntrinsic_xyz; defm int_r600_read_tidig : AMDGPUReadPreloadRegisterIntrinsic_xyz; def int_r600_group_barrier : GCCBuiltin<"__builtin_r600_group_barrier">, Intrinsic<[], [], [IntrConvergent]>; // AS 7 is PARAM_I_ADDRESS, used for kernel arguments def int_r600_implicitarg_ptr : GCCBuiltin<"__builtin_r600_implicitarg_ptr">, Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 7>], [], [IntrNoMem, IntrSpeculatable]>; def int_r600_rat_store_typed : // 1st parameter: Data // 2nd parameter: Index // 3rd parameter: Constant RAT ID Intrinsic<[], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], []>, GCCBuiltin<"__builtin_r600_rat_store_typed">; def int_r600_recipsqrt_ieee : Intrinsic< [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] >; def int_r600_recipsqrt_clamped : Intrinsic< [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] >; def int_r600_cube : Intrinsic< [llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem, IntrSpeculatable] >; } // End TargetPrefix = "r600" let TargetPrefix = "amdgcn" in { //===----------------------------------------------------------------------===// // ABI Special Intrinsics //===----------------------------------------------------------------------===// defm int_amdgcn_workitem_id : AMDGPUReadPreloadRegisterIntrinsic_xyz; defm int_amdgcn_workgroup_id : AMDGPUReadPreloadRegisterIntrinsic_xyz_named <"__builtin_amdgcn_workgroup_id">; def int_amdgcn_dispatch_ptr : GCCBuiltin<"__builtin_amdgcn_dispatch_ptr">, Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem, IntrSpeculatable]>; def int_amdgcn_queue_ptr : GCCBuiltin<"__builtin_amdgcn_queue_ptr">, Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem, IntrSpeculatable]>; def int_amdgcn_kernarg_segment_ptr : GCCBuiltin<"__builtin_amdgcn_kernarg_segment_ptr">, Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem, IntrSpeculatable]>; def int_amdgcn_implicitarg_ptr : GCCBuiltin<"__builtin_amdgcn_implicitarg_ptr">, Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem, IntrSpeculatable]>; def int_amdgcn_groupstaticsize : GCCBuiltin<"__builtin_amdgcn_groupstaticsize">, Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>; def int_amdgcn_dispatch_id : GCCBuiltin<"__builtin_amdgcn_dispatch_id">, Intrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrSpeculatable]>; def int_amdgcn_implicit_buffer_ptr : GCCBuiltin<"__builtin_amdgcn_implicit_buffer_ptr">, Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem, IntrSpeculatable]>; // Set EXEC to the 64-bit value given. // This is always moved to the beginning of the basic block. def int_amdgcn_init_exec : Intrinsic<[], [llvm_i64_ty], // 64-bit literal constant [IntrConvergent]>; // Set EXEC according to a thread count packed in an SGPR input: // thread_count = (input >> bitoffset) & 0x7f; // This is always moved to the beginning of the basic block. def int_amdgcn_init_exec_from_input : Intrinsic<[], [llvm_i32_ty, // 32-bit SGPR input llvm_i32_ty], // bit offset of the thread count [IntrConvergent]>; //===----------------------------------------------------------------------===// // Instruction Intrinsics //===----------------------------------------------------------------------===// // The first parameter is s_sendmsg immediate (i16), // the second one is copied to m0 def int_amdgcn_s_sendmsg : GCCBuiltin<"__builtin_amdgcn_s_sendmsg">, Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], []>; def int_amdgcn_s_sendmsghalt : GCCBuiltin<"__builtin_amdgcn_s_sendmsghalt">, Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], []>; def int_amdgcn_s_barrier : GCCBuiltin<"__builtin_amdgcn_s_barrier">, Intrinsic<[], [], [IntrConvergent]>; def int_amdgcn_wave_barrier : GCCBuiltin<"__builtin_amdgcn_wave_barrier">, Intrinsic<[], [], [IntrConvergent]>; def int_amdgcn_s_waitcnt : GCCBuiltin<"__builtin_amdgcn_s_waitcnt">, Intrinsic<[], [llvm_i32_ty], []>; def int_amdgcn_div_scale : Intrinsic< // 1st parameter: Numerator // 2nd parameter: Denominator // 3rd parameter: Constant to select select between first and // second. (0 = first, 1 = second). [llvm_anyfloat_ty, llvm_i1_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty], [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_div_fmas : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty], [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_div_fixup : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_trig_preop : Intrinsic< [llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_sin : Intrinsic< [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_cos : Intrinsic< [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_log_clamp : Intrinsic< [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_fmul_legacy : GCCBuiltin<"__builtin_amdgcn_fmul_legacy">, Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_rcp : Intrinsic< [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_rcp_legacy : GCCBuiltin<"__builtin_amdgcn_rcp_legacy">, Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_rsq : Intrinsic< [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_rsq_legacy : GCCBuiltin<"__builtin_amdgcn_rsq_legacy">, Intrinsic< [llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_rsq_clamp : Intrinsic< [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]>; def int_amdgcn_ldexp : Intrinsic< [llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_frexp_mant : Intrinsic< [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_frexp_exp : Intrinsic< [llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem, IntrSpeculatable] >; // v_fract is buggy on SI/CI. It mishandles infinities, may return 1.0 // and always uses rtz, so is not suitable for implementing the OpenCL // fract function. It should be ok on VI. def int_amdgcn_fract : Intrinsic< [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_cvt_pkrtz : Intrinsic< [llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_class : Intrinsic< [llvm_i1_ty], [llvm_anyfloat_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_fmed3 : GCCBuiltin<"__builtin_amdgcn_fmed3">, Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_cubeid : GCCBuiltin<"__builtin_amdgcn_cubeid">, Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_cubema : GCCBuiltin<"__builtin_amdgcn_cubema">, Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_cubesc : GCCBuiltin<"__builtin_amdgcn_cubesc">, Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_cubetc : GCCBuiltin<"__builtin_amdgcn_cubetc">, Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable] >; // v_ffbh_i32, as opposed to v_ffbh_u32. For v_ffbh_u32, llvm.ctlz // should be used. def int_amdgcn_sffbh : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] >; // Fields should mirror atomicrmw class AMDGPUAtomicIncIntrin : Intrinsic<[llvm_anyint_ty], [llvm_anyptr_ty, LLVMMatchType<0>, llvm_i32_ty, // ordering llvm_i32_ty, // scope llvm_i1_ty], // isVolatile [IntrArgMemOnly, NoCapture<0>] >; def int_amdgcn_atomic_inc : AMDGPUAtomicIncIntrin; def int_amdgcn_atomic_dec : AMDGPUAtomicIncIntrin; class AMDGPUImageLoad : Intrinsic < [llvm_anyfloat_ty], // vdata(VGPR) [llvm_anyint_ty, // vaddr(VGPR) llvm_anyint_ty, // rsrc(SGPR) llvm_i32_ty, // dmask(imm) llvm_i1_ty, // glc(imm) llvm_i1_ty, // slc(imm) llvm_i1_ty, // lwe(imm) llvm_i1_ty], // da(imm) [IntrReadMem]>; def int_amdgcn_image_load : AMDGPUImageLoad; def int_amdgcn_image_load_mip : AMDGPUImageLoad; def int_amdgcn_image_getresinfo : AMDGPUImageLoad; class AMDGPUImageStore : Intrinsic < [], [llvm_anyfloat_ty, // vdata(VGPR) llvm_anyint_ty, // vaddr(VGPR) llvm_anyint_ty, // rsrc(SGPR) llvm_i32_ty, // dmask(imm) llvm_i1_ty, // glc(imm) llvm_i1_ty, // slc(imm) llvm_i1_ty, // lwe(imm) llvm_i1_ty], // da(imm) []>; def int_amdgcn_image_store : AMDGPUImageStore; def int_amdgcn_image_store_mip : AMDGPUImageStore; class AMDGPUImageSample : Intrinsic < [llvm_anyfloat_ty], // vdata(VGPR) [llvm_anyfloat_ty, // vaddr(VGPR) llvm_anyint_ty, // rsrc(SGPR) llvm_v4i32_ty, // sampler(SGPR) llvm_i32_ty, // dmask(imm) llvm_i1_ty, // unorm(imm) llvm_i1_ty, // glc(imm) llvm_i1_ty, // slc(imm) llvm_i1_ty, // lwe(imm) llvm_i1_ty], // da(imm) [IntrReadMem]>; // Basic sample def int_amdgcn_image_sample : AMDGPUImageSample; def int_amdgcn_image_sample_cl : AMDGPUImageSample; def int_amdgcn_image_sample_d : AMDGPUImageSample; def int_amdgcn_image_sample_d_cl : AMDGPUImageSample; def int_amdgcn_image_sample_l : AMDGPUImageSample; def int_amdgcn_image_sample_b : AMDGPUImageSample; def int_amdgcn_image_sample_b_cl : AMDGPUImageSample; def int_amdgcn_image_sample_lz : AMDGPUImageSample; def int_amdgcn_image_sample_cd : AMDGPUImageSample; def int_amdgcn_image_sample_cd_cl : AMDGPUImageSample; // Sample with comparison def int_amdgcn_image_sample_c : AMDGPUImageSample; def int_amdgcn_image_sample_c_cl : AMDGPUImageSample; def int_amdgcn_image_sample_c_d : AMDGPUImageSample; def int_amdgcn_image_sample_c_d_cl : AMDGPUImageSample; def int_amdgcn_image_sample_c_l : AMDGPUImageSample; def int_amdgcn_image_sample_c_b : AMDGPUImageSample; def int_amdgcn_image_sample_c_b_cl : AMDGPUImageSample; def int_amdgcn_image_sample_c_lz : AMDGPUImageSample; def int_amdgcn_image_sample_c_cd : AMDGPUImageSample; def int_amdgcn_image_sample_c_cd_cl : AMDGPUImageSample; // Sample with offsets def int_amdgcn_image_sample_o : AMDGPUImageSample; def int_amdgcn_image_sample_cl_o : AMDGPUImageSample; def int_amdgcn_image_sample_d_o : AMDGPUImageSample; def int_amdgcn_image_sample_d_cl_o : AMDGPUImageSample; def int_amdgcn_image_sample_l_o : AMDGPUImageSample; def int_amdgcn_image_sample_b_o : AMDGPUImageSample; def int_amdgcn_image_sample_b_cl_o : AMDGPUImageSample; def int_amdgcn_image_sample_lz_o : AMDGPUImageSample; def int_amdgcn_image_sample_cd_o : AMDGPUImageSample; def int_amdgcn_image_sample_cd_cl_o : AMDGPUImageSample; // Sample with comparison and offsets def int_amdgcn_image_sample_c_o : AMDGPUImageSample; def int_amdgcn_image_sample_c_cl_o : AMDGPUImageSample; def int_amdgcn_image_sample_c_d_o : AMDGPUImageSample; def int_amdgcn_image_sample_c_d_cl_o : AMDGPUImageSample; def int_amdgcn_image_sample_c_l_o : AMDGPUImageSample; def int_amdgcn_image_sample_c_b_o : AMDGPUImageSample; def int_amdgcn_image_sample_c_b_cl_o : AMDGPUImageSample; def int_amdgcn_image_sample_c_lz_o : AMDGPUImageSample; def int_amdgcn_image_sample_c_cd_o : AMDGPUImageSample; def int_amdgcn_image_sample_c_cd_cl_o : AMDGPUImageSample; // Basic gather4 def int_amdgcn_image_gather4 : AMDGPUImageSample; def int_amdgcn_image_gather4_cl : AMDGPUImageSample; def int_amdgcn_image_gather4_l : AMDGPUImageSample; def int_amdgcn_image_gather4_b : AMDGPUImageSample; def int_amdgcn_image_gather4_b_cl : AMDGPUImageSample; def int_amdgcn_image_gather4_lz : AMDGPUImageSample; // Gather4 with comparison def int_amdgcn_image_gather4_c : AMDGPUImageSample; def int_amdgcn_image_gather4_c_cl : AMDGPUImageSample; def int_amdgcn_image_gather4_c_l : AMDGPUImageSample; def int_amdgcn_image_gather4_c_b : AMDGPUImageSample; def int_amdgcn_image_gather4_c_b_cl : AMDGPUImageSample; def int_amdgcn_image_gather4_c_lz : AMDGPUImageSample; // Gather4 with offsets def int_amdgcn_image_gather4_o : AMDGPUImageSample; def int_amdgcn_image_gather4_cl_o : AMDGPUImageSample; def int_amdgcn_image_gather4_l_o : AMDGPUImageSample; def int_amdgcn_image_gather4_b_o : AMDGPUImageSample; def int_amdgcn_image_gather4_b_cl_o : AMDGPUImageSample; def int_amdgcn_image_gather4_lz_o : AMDGPUImageSample; // Gather4 with comparison and offsets def int_amdgcn_image_gather4_c_o : AMDGPUImageSample; def int_amdgcn_image_gather4_c_cl_o : AMDGPUImageSample; def int_amdgcn_image_gather4_c_l_o : AMDGPUImageSample; def int_amdgcn_image_gather4_c_b_o : AMDGPUImageSample; def int_amdgcn_image_gather4_c_b_cl_o : AMDGPUImageSample; def int_amdgcn_image_gather4_c_lz_o : AMDGPUImageSample; def int_amdgcn_image_getlod : AMDGPUImageSample; class AMDGPUImageAtomic : Intrinsic < [llvm_i32_ty], [llvm_i32_ty, // vdata(VGPR) llvm_anyint_ty, // vaddr(VGPR) llvm_v8i32_ty, // rsrc(SGPR) llvm_i1_ty, // r128(imm) llvm_i1_ty, // da(imm) llvm_i1_ty], // slc(imm) []>; def int_amdgcn_image_atomic_swap : AMDGPUImageAtomic; def int_amdgcn_image_atomic_add : AMDGPUImageAtomic; def int_amdgcn_image_atomic_sub : AMDGPUImageAtomic; def int_amdgcn_image_atomic_smin : AMDGPUImageAtomic; def int_amdgcn_image_atomic_umin : AMDGPUImageAtomic; def int_amdgcn_image_atomic_smax : AMDGPUImageAtomic; def int_amdgcn_image_atomic_umax : AMDGPUImageAtomic; def int_amdgcn_image_atomic_and : AMDGPUImageAtomic; def int_amdgcn_image_atomic_or : AMDGPUImageAtomic; def int_amdgcn_image_atomic_xor : AMDGPUImageAtomic; def int_amdgcn_image_atomic_inc : AMDGPUImageAtomic; def int_amdgcn_image_atomic_dec : AMDGPUImageAtomic; def int_amdgcn_image_atomic_cmpswap : Intrinsic < [llvm_i32_ty], [llvm_i32_ty, // src(VGPR) llvm_i32_ty, // cmp(VGPR) llvm_anyint_ty, // vaddr(VGPR) llvm_v8i32_ty, // rsrc(SGPR) llvm_i1_ty, // r128(imm) llvm_i1_ty, // da(imm) llvm_i1_ty], // slc(imm) []>; class AMDGPUBufferLoad : Intrinsic < [llvm_anyfloat_ty], [llvm_v4i32_ty, // rsrc(SGPR) llvm_i32_ty, // vindex(VGPR) llvm_i32_ty, // offset(SGPR/VGPR/imm) llvm_i1_ty, // glc(imm) llvm_i1_ty], // slc(imm) [IntrReadMem]>; def int_amdgcn_buffer_load_format : AMDGPUBufferLoad; def int_amdgcn_buffer_load : AMDGPUBufferLoad; class AMDGPUBufferStore : Intrinsic < [], [llvm_anyfloat_ty, // vdata(VGPR) -- can currently only select f32, v2f32, v4f32 llvm_v4i32_ty, // rsrc(SGPR) llvm_i32_ty, // vindex(VGPR) llvm_i32_ty, // offset(SGPR/VGPR/imm) llvm_i1_ty, // glc(imm) llvm_i1_ty], // slc(imm) [IntrWriteMem]>; def int_amdgcn_buffer_store_format : AMDGPUBufferStore; def int_amdgcn_buffer_store : AMDGPUBufferStore; def int_amdgcn_tbuffer_load : Intrinsic < [llvm_any_ty], // overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32 [llvm_v4i32_ty, // rsrc(SGPR) llvm_i32_ty, // vindex(VGPR) llvm_i32_ty, // voffset(VGPR) llvm_i32_ty, // soffset(SGPR) llvm_i32_ty, // offset(imm) llvm_i32_ty, // dfmt(imm) llvm_i32_ty, // nfmt(imm) llvm_i1_ty, // glc(imm) llvm_i1_ty], // slc(imm) []>; def int_amdgcn_tbuffer_store : Intrinsic < [], [llvm_any_ty, // vdata(VGPR), overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32 llvm_v4i32_ty, // rsrc(SGPR) llvm_i32_ty, // vindex(VGPR) llvm_i32_ty, // voffset(VGPR) llvm_i32_ty, // soffset(SGPR) llvm_i32_ty, // offset(imm) llvm_i32_ty, // dfmt(imm) llvm_i32_ty, // nfmt(imm) llvm_i1_ty, // glc(imm) llvm_i1_ty], // slc(imm) []>; class AMDGPUBufferAtomic : Intrinsic < [llvm_i32_ty], [llvm_i32_ty, // vdata(VGPR) llvm_v4i32_ty, // rsrc(SGPR) llvm_i32_ty, // vindex(VGPR) llvm_i32_ty, // offset(SGPR/VGPR/imm) llvm_i1_ty], // slc(imm) []>; def int_amdgcn_buffer_atomic_swap : AMDGPUBufferAtomic; def int_amdgcn_buffer_atomic_add : AMDGPUBufferAtomic; def int_amdgcn_buffer_atomic_sub : AMDGPUBufferAtomic; def int_amdgcn_buffer_atomic_smin : AMDGPUBufferAtomic; def int_amdgcn_buffer_atomic_umin : AMDGPUBufferAtomic; def int_amdgcn_buffer_atomic_smax : AMDGPUBufferAtomic; def int_amdgcn_buffer_atomic_umax : AMDGPUBufferAtomic; def int_amdgcn_buffer_atomic_and : AMDGPUBufferAtomic; def int_amdgcn_buffer_atomic_or : AMDGPUBufferAtomic; def int_amdgcn_buffer_atomic_xor : AMDGPUBufferAtomic; def int_amdgcn_buffer_atomic_cmpswap : Intrinsic< [llvm_i32_ty], [llvm_i32_ty, // src(VGPR) llvm_i32_ty, // cmp(VGPR) llvm_v4i32_ty, // rsrc(SGPR) llvm_i32_ty, // vindex(VGPR) llvm_i32_ty, // offset(SGPR/VGPR/imm) llvm_i1_ty], // slc(imm) []>; // Uses that do not set the done bit should set IntrWriteMem on the // call site. def int_amdgcn_exp : Intrinsic <[], [ llvm_i32_ty, // tgt, llvm_i32_ty, // en llvm_any_ty, // src0 (f32 or i32) LLVMMatchType<0>, // src1 LLVMMatchType<0>, // src2 LLVMMatchType<0>, // src3 llvm_i1_ty, // done llvm_i1_ty // vm ], [] >; // exp with compr bit set. def int_amdgcn_exp_compr : Intrinsic <[], [ llvm_i32_ty, // tgt, llvm_i32_ty, // en llvm_anyvector_ty, // src0 (v2f16 or v2i16) LLVMMatchType<0>, // src1 llvm_i1_ty, // done llvm_i1_ty], // vm [] >; def int_amdgcn_buffer_wbinvl1_sc : GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1_sc">, Intrinsic<[], [], []>; def int_amdgcn_buffer_wbinvl1 : GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1">, Intrinsic<[], [], []>; def int_amdgcn_s_dcache_inv : GCCBuiltin<"__builtin_amdgcn_s_dcache_inv">, Intrinsic<[], [], []>; def int_amdgcn_s_memtime : GCCBuiltin<"__builtin_amdgcn_s_memtime">, Intrinsic<[llvm_i64_ty], [], []>; def int_amdgcn_s_sleep : GCCBuiltin<"__builtin_amdgcn_s_sleep">, Intrinsic<[], [llvm_i32_ty], []> { } def int_amdgcn_s_incperflevel : GCCBuiltin<"__builtin_amdgcn_s_incperflevel">, Intrinsic<[], [llvm_i32_ty], []> { } def int_amdgcn_s_decperflevel : GCCBuiltin<"__builtin_amdgcn_s_decperflevel">, Intrinsic<[], [llvm_i32_ty], []> { } def int_amdgcn_s_getreg : GCCBuiltin<"__builtin_amdgcn_s_getreg">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrReadMem, IntrSpeculatable] >; // int_amdgcn_s_getpc is provided to allow a specific style of position // independent code to determine the high part of its address when it is // known (through convention) that the code and any data of interest does // not cross a 4Gb address boundary. Use for any other purpose may not // produce the desired results as optimizations may cause code movement, // especially as we explicitly use IntrNoMem to allow optimizations. def int_amdgcn_s_getpc : GCCBuiltin<"__builtin_amdgcn_s_getpc">, Intrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrSpeculatable]>; // __builtin_amdgcn_interp_mov <param>, <attr_chan>, <attr>, <m0> // param values: 0 = P10, 1 = P20, 2 = P0 def int_amdgcn_interp_mov : GCCBuiltin<"__builtin_amdgcn_interp_mov">, Intrinsic<[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; // __builtin_amdgcn_interp_p1 <i>, <attr_chan>, <attr>, <m0> // This intrinsic reads from lds, but the memory values are constant, // so it behaves like IntrNoMem. def int_amdgcn_interp_p1 : GCCBuiltin<"__builtin_amdgcn_interp_p1">, Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; // __builtin_amdgcn_interp_p2 <p1>, <j>, <attr_chan>, <attr>, <m0> def int_amdgcn_interp_p2 : GCCBuiltin<"__builtin_amdgcn_interp_p2">, Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; // See int_amdgcn_v_interp_p1 for why this is IntrNoMem. // Pixel shaders only: whether the current pixel is live (i.e. not a helper // invocation for derivative computation). def int_amdgcn_ps_live : Intrinsic < [llvm_i1_ty], [], [IntrNoMem]>; def int_amdgcn_mbcnt_lo : GCCBuiltin<"__builtin_amdgcn_mbcnt_lo">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_amdgcn_mbcnt_hi : GCCBuiltin<"__builtin_amdgcn_mbcnt_hi">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; // llvm.amdgcn.ds.swizzle src offset def int_amdgcn_ds_swizzle : GCCBuiltin<"__builtin_amdgcn_ds_swizzle">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>; def int_amdgcn_ubfe : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_sbfe : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_lerp : GCCBuiltin<"__builtin_amdgcn_lerp">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_sad_u8 : GCCBuiltin<"__builtin_amdgcn_sad_u8">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_msad_u8 : GCCBuiltin<"__builtin_amdgcn_msad_u8">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_sad_hi_u8 : GCCBuiltin<"__builtin_amdgcn_sad_hi_u8">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_sad_u16 : GCCBuiltin<"__builtin_amdgcn_sad_u16">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_qsad_pk_u16_u8 : GCCBuiltin<"__builtin_amdgcn_qsad_pk_u16_u8">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_mqsad_pk_u16_u8 : GCCBuiltin<"__builtin_amdgcn_mqsad_pk_u16_u8">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_mqsad_u32_u8 : GCCBuiltin<"__builtin_amdgcn_mqsad_u32_u8">, Intrinsic<[llvm_v4i32_ty], [llvm_i64_ty, llvm_i32_ty, llvm_v4i32_ty], [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_cvt_pk_u8_f32 : GCCBuiltin<"__builtin_amdgcn_cvt_pk_u8_f32">, Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_icmp : Intrinsic<[llvm_i64_ty], [llvm_anyint_ty, LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem, IntrConvergent]>; def int_amdgcn_fcmp : Intrinsic<[llvm_i64_ty], [llvm_anyfloat_ty, LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem, IntrConvergent]>; def int_amdgcn_readfirstlane : GCCBuiltin<"__builtin_amdgcn_readfirstlane">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem, IntrConvergent]>; // The lane argument must be uniform across the currently active threads of the // current wave. Otherwise, the result is undefined. def int_amdgcn_readlane : GCCBuiltin<"__builtin_amdgcn_readlane">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>; def int_amdgcn_alignbit : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_alignbyte : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable] >; // Copies the source value to the destination value, with the guarantee that // the source value is computed as if the entire program were executed in WQM. def int_amdgcn_wqm : Intrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] >; // Copies the active channels of the source value to the destination value, // with the guarantee that the source value is computed as if the entire // program were executed in Whole Wavefront Mode, i.e. with all channels // enabled, with a few exceptions: - Phi nodes with require WWM return an // undefined value. def int_amdgcn_wwm : Intrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] >; // Given a value, copies it while setting all the inactive lanes to a given // value. Note that OpenGL helper lanes are considered active, so if the // program ever uses WQM, then the instruction and the first source will be // computed in WQM. def int_amdgcn_set_inactive : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, // value to be copied LLVMMatchType<0>], // value for the inactive lanes to take [IntrNoMem, IntrConvergent]>; //===----------------------------------------------------------------------===// // CI+ Intrinsics //===----------------------------------------------------------------------===// def int_amdgcn_s_dcache_inv_vol : GCCBuiltin<"__builtin_amdgcn_s_dcache_inv_vol">, Intrinsic<[], [], []>; def int_amdgcn_buffer_wbinvl1_vol : GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1_vol">, Intrinsic<[], [], []>; //===----------------------------------------------------------------------===// // VI Intrinsics //===----------------------------------------------------------------------===// // llvm.amdgcn.mov.dpp.i32 <src> <dpp_ctrl> <row_mask> <bank_mask> <bound_ctrl> def int_amdgcn_mov_dpp : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty], [IntrNoMem, IntrConvergent]>; // llvm.amdgcn.update.dpp.i32 <old> <src> <dpp_ctrl> <row_mask> <bank_mask> <bound_ctrl> // Should be equivalent to: // v_mov_b32 <dest> <old> // v_mov_b32 <dest> <src> <dpp_ctrl> <row_mask> <bank_mask> <bound_ctrl> def int_amdgcn_update_dpp : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty], [IntrNoMem, IntrConvergent]>; def int_amdgcn_s_dcache_wb : GCCBuiltin<"__builtin_amdgcn_s_dcache_wb">, Intrinsic<[], [], []>; def int_amdgcn_s_dcache_wb_vol : GCCBuiltin<"__builtin_amdgcn_s_dcache_wb_vol">, Intrinsic<[], [], []>; def int_amdgcn_s_memrealtime : GCCBuiltin<"__builtin_amdgcn_s_memrealtime">, Intrinsic<[llvm_i64_ty], [], []>; // llvm.amdgcn.ds.permute <index> <src> def int_amdgcn_ds_permute : GCCBuiltin<"__builtin_amdgcn_ds_permute">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>; // llvm.amdgcn.ds.bpermute <index> <src> def int_amdgcn_ds_bpermute : GCCBuiltin<"__builtin_amdgcn_ds_bpermute">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>; //===----------------------------------------------------------------------===// // Special Intrinsics for backend internal use only. No frontend // should emit calls to these. // ===----------------------------------------------------------------------===// def int_amdgcn_if : Intrinsic<[llvm_i1_ty, llvm_i64_ty], [llvm_i1_ty], [IntrConvergent] >; def int_amdgcn_else : Intrinsic<[llvm_i1_ty, llvm_i64_ty], [llvm_i64_ty], [IntrConvergent] >; def int_amdgcn_break : Intrinsic<[llvm_i64_ty], [llvm_i64_ty], [IntrNoMem, IntrConvergent] >; def int_amdgcn_if_break : Intrinsic<[llvm_i64_ty], [llvm_i1_ty, llvm_i64_ty], [IntrNoMem, IntrConvergent] >; def int_amdgcn_else_break : Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem, IntrConvergent] >; def int_amdgcn_loop : Intrinsic<[llvm_i1_ty], [llvm_i64_ty], [IntrConvergent] >; def int_amdgcn_end_cf : Intrinsic<[], [llvm_i64_ty], [IntrConvergent]>; // Represent unreachable in a divergent region. def int_amdgcn_unreachable : Intrinsic<[], [], [IntrConvergent]>; // Emit 2.5 ulp, no denormal division. Should only be inserted by // pass based on !fpmath metadata. def int_amdgcn_fdiv_fast : Intrinsic< [llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable] >; }