//===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // /// \file /// AMDGPU HSA Metadata Streamer. /// // //===----------------------------------------------------------------------===// #include "AMDGPUHSAMetadataStreamer.h" #include "AMDGPU.h" #include "AMDGPUSubtarget.h" #include "SIMachineFunctionInfo.h" #include "SIProgramInfo.h" #include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Module.h" #include "llvm/Support/raw_ostream.h" namespace llvm { static cl::opt<bool> DumpHSAMetadata( "amdgpu-dump-hsa-metadata", cl::desc("Dump AMDGPU HSA Metadata")); static cl::opt<bool> VerifyHSAMetadata( "amdgpu-verify-hsa-metadata", cl::desc("Verify AMDGPU HSA Metadata")); namespace AMDGPU { namespace HSAMD { void MetadataStreamer::dump(StringRef HSAMetadataString) const { errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n'; } void MetadataStreamer::verify(StringRef HSAMetadataString) const { errs() << "AMDGPU HSA Metadata Parser Test: "; HSAMD::Metadata FromHSAMetadataString; if (fromString(HSAMetadataString, FromHSAMetadataString)) { errs() << "FAIL\n"; return; } std::string ToHSAMetadataString; if (toString(FromHSAMetadataString, ToHSAMetadataString)) { errs() << "FAIL\n"; return; } errs() << (HSAMetadataString == ToHSAMetadataString ? "PASS" : "FAIL") << '\n'; if (HSAMetadataString != ToHSAMetadataString) { errs() << "Original input: " << HSAMetadataString << '\n' << "Produced output: " << ToHSAMetadataString << '\n'; } } AccessQualifier MetadataStreamer::getAccessQualifier(StringRef AccQual) const { if (AccQual.empty()) return AccessQualifier::Unknown; return StringSwitch<AccessQualifier>(AccQual) .Case("read_only", AccessQualifier::ReadOnly) .Case("write_only", AccessQualifier::WriteOnly) .Case("read_write", AccessQualifier::ReadWrite) .Default(AccessQualifier::Default); } AddressSpaceQualifier MetadataStreamer::getAddressSpaceQualifer( unsigned AddressSpace) const { if (AddressSpace == AMDGPUASI.PRIVATE_ADDRESS) return AddressSpaceQualifier::Private; if (AddressSpace == AMDGPUASI.GLOBAL_ADDRESS) return AddressSpaceQualifier::Global; if (AddressSpace == AMDGPUASI.CONSTANT_ADDRESS) return AddressSpaceQualifier::Constant; if (AddressSpace == AMDGPUASI.LOCAL_ADDRESS) return AddressSpaceQualifier::Local; if (AddressSpace == AMDGPUASI.FLAT_ADDRESS) return AddressSpaceQualifier::Generic; if (AddressSpace == AMDGPUASI.REGION_ADDRESS) return AddressSpaceQualifier::Region; llvm_unreachable("Unknown address space qualifier"); } ValueKind MetadataStreamer::getValueKind(Type *Ty, StringRef TypeQual, StringRef BaseTypeName) const { if (TypeQual.find("pipe") != StringRef::npos) return ValueKind::Pipe; return StringSwitch<ValueKind>(BaseTypeName) .Case("image1d_t", ValueKind::Image) .Case("image1d_array_t", ValueKind::Image) .Case("image1d_buffer_t", ValueKind::Image) .Case("image2d_t", ValueKind::Image) .Case("image2d_array_t", ValueKind::Image) .Case("image2d_array_depth_t", ValueKind::Image) .Case("image2d_array_msaa_t", ValueKind::Image) .Case("image2d_array_msaa_depth_t", ValueKind::Image) .Case("image2d_depth_t", ValueKind::Image) .Case("image2d_msaa_t", ValueKind::Image) .Case("image2d_msaa_depth_t", ValueKind::Image) .Case("image3d_t", ValueKind::Image) .Case("sampler_t", ValueKind::Sampler) .Case("queue_t", ValueKind::Queue) .Default(isa<PointerType>(Ty) ? (Ty->getPointerAddressSpace() == AMDGPUASI.LOCAL_ADDRESS ? ValueKind::DynamicSharedPointer : ValueKind::GlobalBuffer) : ValueKind::ByValue); } ValueType MetadataStreamer::getValueType(Type *Ty, StringRef TypeName) const { switch (Ty->getTypeID()) { case Type::IntegerTyID: { auto Signed = !TypeName.startswith("u"); switch (Ty->getIntegerBitWidth()) { case 8: return Signed ? ValueType::I8 : ValueType::U8; case 16: return Signed ? ValueType::I16 : ValueType::U16; case 32: return Signed ? ValueType::I32 : ValueType::U32; case 64: return Signed ? ValueType::I64 : ValueType::U64; default: return ValueType::Struct; } } case Type::HalfTyID: return ValueType::F16; case Type::FloatTyID: return ValueType::F32; case Type::DoubleTyID: return ValueType::F64; case Type::PointerTyID: return getValueType(Ty->getPointerElementType(), TypeName); case Type::VectorTyID: return getValueType(Ty->getVectorElementType(), TypeName); default: return ValueType::Struct; } } std::string MetadataStreamer::getTypeName(Type *Ty, bool Signed) const { switch (Ty->getTypeID()) { case Type::IntegerTyID: { if (!Signed) return (Twine('u') + getTypeName(Ty, true)).str(); auto BitWidth = Ty->getIntegerBitWidth(); switch (BitWidth) { case 8: return "char"; case 16: return "short"; case 32: return "int"; case 64: return "long"; default: return (Twine('i') + Twine(BitWidth)).str(); } } case Type::HalfTyID: return "half"; case Type::FloatTyID: return "float"; case Type::DoubleTyID: return "double"; case Type::VectorTyID: { auto VecTy = cast<VectorType>(Ty); auto ElTy = VecTy->getElementType(); auto NumElements = VecTy->getVectorNumElements(); return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str(); } default: return "unknown"; } } std::vector<uint32_t> MetadataStreamer::getWorkGroupDimensions( MDNode *Node) const { std::vector<uint32_t> Dims; if (Node->getNumOperands() != 3) return Dims; for (auto &Op : Node->operands()) Dims.push_back(mdconst::extract<ConstantInt>(Op)->getZExtValue()); return Dims; } Kernel::CodeProps::Metadata MetadataStreamer::getHSACodeProps( const MachineFunction &MF, const SIProgramInfo &ProgramInfo) const { const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); HSAMD::Kernel::CodeProps::Metadata HSACodeProps; const Function &F = MF.getFunction(); assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL || F.getCallingConv() == CallingConv::SPIR_KERNEL); unsigned MaxKernArgAlign; HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F, MaxKernArgAlign); HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize; HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize; HSACodeProps.mKernargSegmentAlign = std::max(MaxKernArgAlign, 4u); HSACodeProps.mWavefrontSize = STM.getWavefrontSize(); HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR; HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR; HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize(); HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack; HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled(); HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs(); HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs(); return HSACodeProps; } Kernel::DebugProps::Metadata MetadataStreamer::getHSADebugProps( const MachineFunction &MF, const SIProgramInfo &ProgramInfo) const { const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); HSAMD::Kernel::DebugProps::Metadata HSADebugProps; if (!STM.debuggerSupported()) return HSADebugProps; HSADebugProps.mDebuggerABIVersion.push_back(1); HSADebugProps.mDebuggerABIVersion.push_back(0); if (STM.debuggerEmitPrologue()) { HSADebugProps.mPrivateSegmentBufferSGPR = ProgramInfo.DebuggerPrivateSegmentBufferSGPR; HSADebugProps.mWavefrontPrivateSegmentOffsetSGPR = ProgramInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR; } return HSADebugProps; } void MetadataStreamer::emitVersion() { auto &Version = HSAMetadata.mVersion; Version.push_back(VersionMajor); Version.push_back(VersionMinor); } void MetadataStreamer::emitPrintf(const Module &Mod) { auto &Printf = HSAMetadata.mPrintf; auto Node = Mod.getNamedMetadata("llvm.printf.fmts"); if (!Node) return; for (auto Op : Node->operands()) if (Op->getNumOperands()) Printf.push_back(cast<MDString>(Op->getOperand(0))->getString()); } void MetadataStreamer::emitKernelLanguage(const Function &Func) { auto &Kernel = HSAMetadata.mKernels.back(); // TODO: What about other languages? auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version"); if (!Node || !Node->getNumOperands()) return; auto Op0 = Node->getOperand(0); if (Op0->getNumOperands() <= 1) return; Kernel.mLanguage = "OpenCL C"; Kernel.mLanguageVersion.push_back( mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()); Kernel.mLanguageVersion.push_back( mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()); } void MetadataStreamer::emitKernelAttrs(const Function &Func) { auto &Attrs = HSAMetadata.mKernels.back().mAttrs; if (auto Node = Func.getMetadata("reqd_work_group_size")) Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node); if (auto Node = Func.getMetadata("work_group_size_hint")) Attrs.mWorkGroupSizeHint = getWorkGroupDimensions(Node); if (auto Node = Func.getMetadata("vec_type_hint")) { Attrs.mVecTypeHint = getTypeName( cast<ValueAsMetadata>(Node->getOperand(0))->getType(), mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()); } if (Func.hasFnAttribute("runtime-handle")) { Attrs.mRuntimeHandle = Func.getFnAttribute("runtime-handle").getValueAsString().str(); } } void MetadataStreamer::emitKernelArgs(const Function &Func) { for (auto &Arg : Func.args()) emitKernelArg(Arg); emitHiddenKernelArgs(Func); } void MetadataStreamer::emitKernelArg(const Argument &Arg) { auto Func = Arg.getParent(); auto ArgNo = Arg.getArgNo(); const MDNode *Node; StringRef Name; Node = Func->getMetadata("kernel_arg_name"); if (Node && ArgNo < Node->getNumOperands()) Name = cast<MDString>(Node->getOperand(ArgNo))->getString(); else if (Arg.hasName()) Name = Arg.getName(); StringRef TypeName; Node = Func->getMetadata("kernel_arg_type"); if (Node && ArgNo < Node->getNumOperands()) TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); StringRef BaseTypeName; Node = Func->getMetadata("kernel_arg_base_type"); if (Node && ArgNo < Node->getNumOperands()) BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); StringRef AccQual; if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() && Arg.hasNoAliasAttr()) { AccQual = "read_only"; } else { Node = Func->getMetadata("kernel_arg_access_qual"); if (Node && ArgNo < Node->getNumOperands()) AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); } StringRef TypeQual; Node = Func->getMetadata("kernel_arg_type_qual"); if (Node && ArgNo < Node->getNumOperands()) TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); Type *Ty = Arg.getType(); const DataLayout &DL = Func->getParent()->getDataLayout(); unsigned PointeeAlign = 0; if (auto PtrTy = dyn_cast<PointerType>(Ty)) { if (PtrTy->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS) { PointeeAlign = Arg.getParamAlignment(); if (PointeeAlign == 0) PointeeAlign = DL.getABITypeAlignment(PtrTy->getElementType()); } } emitKernelArg(DL, Ty, getValueKind(Arg.getType(), TypeQual, BaseTypeName), PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual); } void MetadataStreamer::emitKernelArg(const DataLayout &DL, Type *Ty, ValueKind ValueKind, unsigned PointeeAlign, StringRef Name, StringRef TypeName, StringRef BaseTypeName, StringRef AccQual, StringRef TypeQual) { HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata()); auto &Arg = HSAMetadata.mKernels.back().mArgs.back(); Arg.mName = Name; Arg.mTypeName = TypeName; Arg.mSize = DL.getTypeAllocSize(Ty); Arg.mAlign = DL.getABITypeAlignment(Ty); Arg.mValueKind = ValueKind; Arg.mValueType = getValueType(Ty, BaseTypeName); Arg.mPointeeAlign = PointeeAlign; if (auto PtrTy = dyn_cast<PointerType>(Ty)) Arg.mAddrSpaceQual = getAddressSpaceQualifer(PtrTy->getAddressSpace()); Arg.mAccQual = getAccessQualifier(AccQual); // TODO: Emit Arg.mActualAccQual. SmallVector<StringRef, 1> SplitTypeQuals; TypeQual.split(SplitTypeQuals, " ", -1, false); for (StringRef Key : SplitTypeQuals) { auto P = StringSwitch<bool*>(Key) .Case("const", &Arg.mIsConst) .Case("restrict", &Arg.mIsRestrict) .Case("volatile", &Arg.mIsVolatile) .Case("pipe", &Arg.mIsPipe) .Default(nullptr); if (P) *P = true; } } void MetadataStreamer::emitHiddenKernelArgs(const Function &Func) { int HiddenArgNumBytes = getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0); if (!HiddenArgNumBytes) return; auto &DL = Func.getParent()->getDataLayout(); auto Int64Ty = Type::getInt64Ty(Func.getContext()); if (HiddenArgNumBytes >= 8) emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetX); if (HiddenArgNumBytes >= 16) emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetY); if (HiddenArgNumBytes >= 24) emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetZ); auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(), AMDGPUASI.GLOBAL_ADDRESS); // Emit "printf buffer" argument if printf is used, otherwise emit dummy // "none" argument. if (HiddenArgNumBytes >= 32) { if (Func.getParent()->getNamedMetadata("llvm.printf.fmts")) emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenPrintfBuffer); else emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone); } // Emit "default queue" and "completion action" arguments if enqueue kernel is // used, otherwise emit dummy "none" arguments. if (HiddenArgNumBytes >= 48) { if (Func.hasFnAttribute("calls-enqueue-kernel")) { emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenDefaultQueue); emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenCompletionAction); } else { emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone); emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone); } } } void MetadataStreamer::begin(const Module &Mod) { AMDGPUASI = getAMDGPUAS(Mod); emitVersion(); emitPrintf(Mod); } void MetadataStreamer::end() { std::string HSAMetadataString; if (toString(HSAMetadata, HSAMetadataString)) return; if (DumpHSAMetadata) dump(HSAMetadataString); if (VerifyHSAMetadata) verify(HSAMetadataString); } void MetadataStreamer::emitKernel(const MachineFunction &MF, const SIProgramInfo &ProgramInfo) { auto &Func = MF.getFunction(); if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL) return; auto CodeProps = getHSACodeProps(MF, ProgramInfo); auto DebugProps = getHSADebugProps(MF, ProgramInfo); HSAMetadata.mKernels.push_back(Kernel::Metadata()); auto &Kernel = HSAMetadata.mKernels.back(); Kernel.mName = Func.getName(); Kernel.mSymbolName = (Twine(Func.getName()) + Twine("@kd")).str(); emitKernelLanguage(Func); emitKernelAttrs(Func); emitKernelArgs(Func); HSAMetadata.mKernels.back().mCodeProps = CodeProps; HSAMetadata.mKernels.back().mDebugProps = DebugProps; } } // end namespace HSAMD } // end namespace AMDGPU } // end namespace llvm