//===- subzero/src/IceASanInstrumentation.cpp - ASan ------------*- C++ -*-===//
//
// The Subzero Code Generator
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// \brief Implements the AddressSanitizer instrumentation class.
///
//===----------------------------------------------------------------------===//
#include "IceASanInstrumentation.h"
#include "IceBuildDefs.h"
#include "IceCfg.h"
#include "IceCfgNode.h"
#include "IceGlobalInits.h"
#include "IceInst.h"
#include "IceTargetLowering.h"
#include "IceTypes.h"
#include <sstream>
#include <unordered_map>
#include <unordered_set>
#include <vector>
namespace Ice {
namespace {
constexpr SizeT BytesPerWord = sizeof(uint32_t);
constexpr SizeT RzSize = 32;
constexpr SizeT ShadowScaleLog2 = 3;
constexpr SizeT ShadowScale = 1 << ShadowScaleLog2;
constexpr SizeT ShadowLength32 = 1 << (32 - ShadowScaleLog2);
constexpr int32_t StackPoisonVal = -1;
constexpr const char *ASanPrefix = "__asan";
constexpr const char *RzPrefix = "__$rz";
constexpr const char *RzArrayName = "__$rz_array";
constexpr const char *RzSizesName = "__$rz_sizes";
const llvm::NaClBitcodeRecord::RecordVector RzContents =
llvm::NaClBitcodeRecord::RecordVector(RzSize, 'R');
// In order to instrument the code correctly, the .pexe must not have had its
// symbols stripped.
using StringMap = std::unordered_map<std::string, std::string>;
using StringSet = std::unordered_set<std::string>;
// TODO(tlively): Handle all allocation functions
const StringMap FuncSubstitutions = {{"malloc", "__asan_malloc"},
{"free", "__asan_free"},
{"calloc", "__asan_calloc"},
{"__asan_dummy_calloc", "__asan_calloc"},
{"realloc", "__asan_realloc"}};
const StringSet FuncBlackList = {"_Balloc"};
llvm::NaClBitcodeRecord::RecordVector sizeToByteVec(SizeT Size) {
llvm::NaClBitcodeRecord::RecordVector SizeContents;
for (unsigned i = 0; i < sizeof(Size); ++i) {
SizeContents.emplace_back(Size % (1 << CHAR_BIT));
Size >>= CHAR_BIT;
}
return SizeContents;
}
} // end of anonymous namespace
ICE_TLS_DEFINE_FIELD(VarSizeMap *, ASanInstrumentation, LocalVars);
ICE_TLS_DEFINE_FIELD(std::vector<InstStore *> *, ASanInstrumentation,
LocalDtors);
ICE_TLS_DEFINE_FIELD(CfgNode *, ASanInstrumentation, CurNode);
ICE_TLS_DEFINE_FIELD(VarSizeMap *, ASanInstrumentation, CheckedVars);
bool ASanInstrumentation::isInstrumentable(Cfg *Func) {
std::string FuncName = Func->getFunctionName().toStringOrEmpty();
return FuncName == "" ||
(FuncBlackList.count(FuncName) == 0 && FuncName.find(ASanPrefix) != 0);
}
// Create redzones around all global variables, ensuring that the initializer
// types of the redzones and their associated globals match so that they are
// laid out together in memory.
void ASanInstrumentation::instrumentGlobals(VariableDeclarationList &Globals) {
std::unique_lock<std::mutex> _(GlobalsMutex);
if (DidProcessGlobals)
return;
VariableDeclarationList NewGlobals;
// Global holding pointers to all redzones
auto *RzArray = VariableDeclaration::create(&NewGlobals);
// Global holding sizes of all redzones
auto *RzSizes = VariableDeclaration::create(&NewGlobals);
RzArray->setName(Ctx, RzArrayName);
RzSizes->setName(Ctx, RzSizesName);
RzArray->setIsConstant(true);
RzSizes->setIsConstant(true);
NewGlobals.push_back(RzArray);
NewGlobals.push_back(RzSizes);
using PrototypeMap = std::unordered_map<std::string, FunctionDeclaration *>;
PrototypeMap ProtoSubstitutions;
for (VariableDeclaration *Global : Globals) {
assert(Global->getAlignment() <= RzSize);
VariableDeclaration *RzLeft = VariableDeclaration::create(&NewGlobals);
VariableDeclaration *NewGlobal = Global;
VariableDeclaration *RzRight = VariableDeclaration::create(&NewGlobals);
RzLeft->setName(Ctx, nextRzName());
RzRight->setName(Ctx, nextRzName());
SizeT Alignment = std::max(RzSize, Global->getAlignment());
SizeT RzLeftSize = Alignment;
SizeT RzRightSize =
RzSize + Utils::OffsetToAlignment(Global->getNumBytes(), Alignment);
if (!Global->hasNonzeroInitializer()) {
RzLeft->addInitializer(VariableDeclaration::ZeroInitializer::create(
&NewGlobals, RzLeftSize));
RzRight->addInitializer(VariableDeclaration::ZeroInitializer::create(
&NewGlobals, RzRightSize));
} else {
RzLeft->addInitializer(VariableDeclaration::DataInitializer::create(
&NewGlobals, llvm::NaClBitcodeRecord::RecordVector(RzLeftSize, 'R')));
RzRight->addInitializer(VariableDeclaration::DataInitializer::create(
&NewGlobals,
llvm::NaClBitcodeRecord::RecordVector(RzRightSize, 'R')));
// replace any pointers to allocator functions
NewGlobal = VariableDeclaration::create(&NewGlobals);
NewGlobal->setName(Global->getName());
std::vector<VariableDeclaration::Initializer *> GlobalInits =
Global->getInitializers();
for (VariableDeclaration::Initializer *Init : GlobalInits) {
auto *RelocInit =
llvm::dyn_cast<VariableDeclaration::RelocInitializer>(Init);
if (RelocInit == nullptr) {
NewGlobal->addInitializer(Init);
continue;
}
const GlobalDeclaration *TargetDecl = RelocInit->getDeclaration();
const auto *TargetFunc =
llvm::dyn_cast<FunctionDeclaration>(TargetDecl);
if (TargetFunc == nullptr) {
NewGlobal->addInitializer(Init);
continue;
}
std::string TargetName = TargetDecl->getName().toStringOrEmpty();
StringMap::const_iterator Subst = FuncSubstitutions.find(TargetName);
if (Subst == FuncSubstitutions.end()) {
NewGlobal->addInitializer(Init);
continue;
}
std::string SubstName = Subst->second;
PrototypeMap::iterator SubstProtoEntry =
ProtoSubstitutions.find(SubstName);
FunctionDeclaration *SubstProto;
if (SubstProtoEntry != ProtoSubstitutions.end())
SubstProto = SubstProtoEntry->second;
else {
constexpr bool IsProto = true;
SubstProto = FunctionDeclaration::create(
Ctx, TargetFunc->getSignature(), TargetFunc->getCallingConv(),
llvm::GlobalValue::ExternalLinkage, IsProto);
SubstProto->setName(Ctx, SubstName);
ProtoSubstitutions.insert({SubstName, SubstProto});
}
NewGlobal->addInitializer(VariableDeclaration::RelocInitializer::create(
&NewGlobals, SubstProto, RelocOffsetArray(0)));
}
}
RzLeft->setIsConstant(Global->getIsConstant());
NewGlobal->setIsConstant(Global->getIsConstant());
RzRight->setIsConstant(Global->getIsConstant());
RzLeft->setAlignment(Alignment);
NewGlobal->setAlignment(Alignment);
RzRight->setAlignment(1);
RzArray->addInitializer(VariableDeclaration::RelocInitializer::create(
&NewGlobals, RzLeft, RelocOffsetArray(0)));
RzArray->addInitializer(VariableDeclaration::RelocInitializer::create(
&NewGlobals, RzRight, RelocOffsetArray(0)));
RzSizes->addInitializer(VariableDeclaration::DataInitializer::create(
&NewGlobals, sizeToByteVec(RzLeftSize)));
RzSizes->addInitializer(VariableDeclaration::DataInitializer::create(
&NewGlobals, sizeToByteVec(RzRightSize)));
NewGlobals.push_back(RzLeft);
NewGlobals.push_back(NewGlobal);
NewGlobals.push_back(RzRight);
RzGlobalsNum += 2;
GlobalSizes.insert({NewGlobal->getName(), NewGlobal->getNumBytes()});
}
// Replace old list of globals, without messing up arena allocators
Globals.clear();
Globals.merge(&NewGlobals);
DidProcessGlobals = true;
// Log the new set of globals
if (BuildDefs::dump() && (getFlags().getVerbose() & IceV_GlobalInit)) {
OstreamLocker _(Ctx);
Ctx->getStrDump() << "========= Instrumented Globals =========\n";
for (VariableDeclaration *Global : Globals) {
Global->dump(Ctx->getStrDump());
}
}
}
std::string ASanInstrumentation::nextRzName() {
std::stringstream Name;
Name << RzPrefix << RzNum++;
return Name.str();
}
// Check for an alloca signaling the presence of local variables and add a
// redzone if it is found
void ASanInstrumentation::instrumentFuncStart(LoweringContext &Context) {
if (ICE_TLS_GET_FIELD(LocalDtors) == nullptr) {
ICE_TLS_SET_FIELD(LocalDtors, new std::vector<InstStore *>());
ICE_TLS_SET_FIELD(LocalVars, new VarSizeMap());
}
Cfg *Func = Context.getNode()->getCfg();
using Entry = std::pair<SizeT, int32_t>;
std::vector<InstAlloca *> NewAllocas;
std::vector<Entry> PoisonVals;
Variable *FirstShadowLocVar;
InstArithmetic *ShadowIndexCalc;
InstArithmetic *ShadowLocCalc;
InstAlloca *Cur;
ConstantInteger32 *VarSizeOp;
while (!Context.atEnd()) {
Cur = llvm::dyn_cast<InstAlloca>(iteratorToInst(Context.getCur()));
VarSizeOp = (Cur == nullptr)
? nullptr
: llvm::dyn_cast<ConstantInteger32>(Cur->getSizeInBytes());
if (Cur == nullptr || VarSizeOp == nullptr) {
Context.advanceCur();
Context.advanceNext();
continue;
}
Cur->setDeleted();
if (PoisonVals.empty()) {
// insert leftmost redzone
auto *LastRzVar = Func->makeVariable(IceType_i32);
LastRzVar->setName(Func, nextRzName());
auto *ByteCount = ConstantInteger32::create(Ctx, IceType_i32, RzSize);
constexpr SizeT Alignment = 8;
NewAllocas.emplace_back(
InstAlloca::create(Func, LastRzVar, ByteCount, Alignment));
PoisonVals.emplace_back(Entry{RzSize >> ShadowScaleLog2, StackPoisonVal});
// Calculate starting address for poisoning
FirstShadowLocVar = Func->makeVariable(IceType_i32);
FirstShadowLocVar->setName(Func, "firstShadowLoc");
auto *ShadowIndexVar = Func->makeVariable(IceType_i32);
ShadowIndexVar->setName(Func, "shadowIndex");
auto *ShadowScaleLog2Const =
ConstantInteger32::create(Ctx, IceType_i32, ShadowScaleLog2);
auto *ShadowMemLocConst =
ConstantInteger32::create(Ctx, IceType_i32, ShadowLength32);
ShadowIndexCalc =
InstArithmetic::create(Func, InstArithmetic::Lshr, ShadowIndexVar,
LastRzVar, ShadowScaleLog2Const);
ShadowLocCalc =
InstArithmetic::create(Func, InstArithmetic::Add, FirstShadowLocVar,
ShadowIndexVar, ShadowMemLocConst);
}
// create the new alloca that includes a redzone
SizeT VarSize = VarSizeOp->getValue();
Variable *Dest = Cur->getDest();
ICE_TLS_GET_FIELD(LocalVars)->insert({Dest, VarSize});
SizeT RzPadding = RzSize + Utils::OffsetToAlignment(VarSize, RzSize);
auto *ByteCount =
ConstantInteger32::create(Ctx, IceType_i32, VarSize + RzPadding);
constexpr SizeT Alignment = 8;
NewAllocas.emplace_back(
InstAlloca::create(Func, Dest, ByteCount, Alignment));
const SizeT Zeros = VarSize >> ShadowScaleLog2;
const SizeT Offset = VarSize % ShadowScale;
const SizeT PoisonBytes =
((VarSize + RzPadding) >> ShadowScaleLog2) - Zeros - 1;
if (Zeros > 0)
PoisonVals.emplace_back(Entry{Zeros, 0});
PoisonVals.emplace_back(Entry{1, (Offset == 0) ? StackPoisonVal : Offset});
PoisonVals.emplace_back(Entry{PoisonBytes, StackPoisonVal});
Context.advanceCur();
Context.advanceNext();
}
Context.rewind();
if (PoisonVals.empty()) {
Context.advanceNext();
return;
}
for (InstAlloca *RzAlloca : NewAllocas) {
Context.insert(RzAlloca);
}
Context.insert(ShadowIndexCalc);
Context.insert(ShadowLocCalc);
// Poison redzones
std::vector<Entry>::iterator Iter = PoisonVals.begin();
for (SizeT Offset = 0; Iter != PoisonVals.end(); Offset += BytesPerWord) {
int32_t CurVals[BytesPerWord] = {0};
for (uint32_t i = 0; i < BytesPerWord; ++i) {
if (Iter == PoisonVals.end())
break;
Entry Val = *Iter;
CurVals[i] = Val.second;
--Val.first;
if (Val.first > 0)
*Iter = Val;
else
++Iter;
}
int32_t Poison = ((CurVals[3] & 0xff) << 24) | ((CurVals[2] & 0xff) << 16) |
((CurVals[1] & 0xff) << 8) | (CurVals[0] & 0xff);
if (Poison == 0)
continue;
auto *PoisonConst = ConstantInteger32::create(Ctx, IceType_i32, Poison);
auto *ZeroConst = ConstantInteger32::create(Ctx, IceType_i32, 0);
auto *OffsetConst = ConstantInteger32::create(Ctx, IceType_i32, Offset);
auto *PoisonAddrVar = Func->makeVariable(IceType_i32);
Context.insert(InstArithmetic::create(Func, InstArithmetic::Add,
PoisonAddrVar, FirstShadowLocVar,
OffsetConst));
Context.insert(InstStore::create(Func, PoisonConst, PoisonAddrVar));
ICE_TLS_GET_FIELD(LocalDtors)
->emplace_back(InstStore::create(Func, ZeroConst, PoisonAddrVar));
}
Context.advanceNext();
}
void ASanInstrumentation::instrumentCall(LoweringContext &Context,
InstCall *Instr) {
auto *CallTarget =
llvm::dyn_cast<ConstantRelocatable>(Instr->getCallTarget());
if (CallTarget == nullptr)
return;
std::string TargetName = CallTarget->getName().toStringOrEmpty();
auto Subst = FuncSubstitutions.find(TargetName);
if (Subst == FuncSubstitutions.end())
return;
std::string SubName = Subst->second;
Constant *NewFunc = Ctx->getConstantExternSym(Ctx->getGlobalString(SubName));
auto *NewCall =
InstCall::create(Context.getNode()->getCfg(), Instr->getNumArgs(),
Instr->getDest(), NewFunc, Instr->isTailcall());
for (SizeT I = 0, Args = Instr->getNumArgs(); I < Args; ++I)
NewCall->addArg(Instr->getArg(I));
Context.insert(NewCall);
Instr->setDeleted();
}
void ASanInstrumentation::instrumentLoad(LoweringContext &Context,
InstLoad *Instr) {
Operand *Src = Instr->getSourceAddress();
if (auto *Reloc = llvm::dyn_cast<ConstantRelocatable>(Src)) {
auto *NewLoad = InstLoad::create(Context.getNode()->getCfg(),
Instr->getDest(), instrumentReloc(Reloc));
Instr->setDeleted();
Context.insert(NewLoad);
Instr = NewLoad;
}
Constant *Func =
Ctx->getConstantExternSym(Ctx->getGlobalString("__asan_check_load"));
instrumentAccess(Context, Instr->getSourceAddress(),
typeWidthInBytes(Instr->getDest()->getType()), Func);
}
void ASanInstrumentation::instrumentStore(LoweringContext &Context,
InstStore *Instr) {
Operand *Data = Instr->getData();
if (auto *Reloc = llvm::dyn_cast<ConstantRelocatable>(Data)) {
auto *NewStore = InstStore::create(
Context.getNode()->getCfg(), instrumentReloc(Reloc), Instr->getAddr());
Instr->setDeleted();
Context.insert(NewStore);
Instr = NewStore;
}
Constant *Func =
Ctx->getConstantExternSym(Ctx->getGlobalString("__asan_check_store"));
instrumentAccess(Context, Instr->getAddr(),
typeWidthInBytes(Instr->getData()->getType()), Func);
}
ConstantRelocatable *
ASanInstrumentation::instrumentReloc(ConstantRelocatable *Reloc) {
std::string DataName = Reloc->getName().toString();
StringMap::const_iterator DataSub = FuncSubstitutions.find(DataName);
if (DataSub != FuncSubstitutions.end()) {
return ConstantRelocatable::create(
Ctx, Reloc->getType(),
RelocatableTuple(Reloc->getOffset(), RelocOffsetArray(0),
Ctx->getGlobalString(DataSub->second),
Reloc->getEmitString()));
}
return Reloc;
}
void ASanInstrumentation::instrumentAccess(LoweringContext &Context,
Operand *Op, SizeT Size,
Constant *CheckFunc) {
// Skip redundant checks within basic blocks
VarSizeMap *Checked = ICE_TLS_GET_FIELD(CheckedVars);
if (ICE_TLS_GET_FIELD(CurNode) != Context.getNode()) {
ICE_TLS_SET_FIELD(CurNode, Context.getNode());
if (Checked == NULL) {
Checked = new VarSizeMap();
ICE_TLS_SET_FIELD(CheckedVars, Checked);
}
Checked->clear();
}
VarSizeMap::iterator PrevCheck = Checked->find(Op);
if (PrevCheck != Checked->end() && PrevCheck->second >= Size)
return;
else
Checked->insert({Op, Size});
// check for known good local access
VarSizeMap::iterator LocalSize = ICE_TLS_GET_FIELD(LocalVars)->find(Op);
if (LocalSize != ICE_TLS_GET_FIELD(LocalVars)->end() &&
LocalSize->second >= Size)
return;
if (isOkGlobalAccess(Op, Size))
return;
constexpr SizeT NumArgs = 2;
constexpr Variable *Void = nullptr;
constexpr bool NoTailCall = false;
auto *Call = InstCall::create(Context.getNode()->getCfg(), NumArgs, Void,
CheckFunc, NoTailCall);
Call->addArg(Op);
Call->addArg(ConstantInteger32::create(Ctx, IceType_i32, Size));
// play games to insert the call before the access instruction
InstList::iterator Next = Context.getNext();
Context.setInsertPoint(Context.getCur());
Context.insert(Call);
Context.setNext(Next);
}
// TODO(tlively): Trace back load and store addresses to find their real offsets
bool ASanInstrumentation::isOkGlobalAccess(Operand *Op, SizeT Size) {
auto *Reloc = llvm::dyn_cast<ConstantRelocatable>(Op);
if (Reloc == nullptr)
return false;
RelocOffsetT Offset = Reloc->getOffset();
GlobalSizeMap::iterator GlobalSize = GlobalSizes.find(Reloc->getName());
return GlobalSize != GlobalSizes.end() && GlobalSize->second - Offset >= Size;
}
void ASanInstrumentation::instrumentRet(LoweringContext &Context, InstRet *) {
Cfg *Func = Context.getNode()->getCfg();
Context.setInsertPoint(Context.getCur());
for (InstStore *RzUnpoison : *ICE_TLS_GET_FIELD(LocalDtors)) {
Context.insert(
InstStore::create(Func, RzUnpoison->getData(), RzUnpoison->getAddr()));
}
Context.advanceCur();
Context.advanceNext();
}
void ASanInstrumentation::instrumentStart(Cfg *Func) {
Constant *ShadowMemInit =
Ctx->getConstantExternSym(Ctx->getGlobalString("__asan_init"));
constexpr SizeT NumArgs = 3;
constexpr Variable *Void = nullptr;
constexpr bool NoTailCall = false;
auto *Call = InstCall::create(Func, NumArgs, Void, ShadowMemInit, NoTailCall);
Func->getEntryNode()->getInsts().push_front(Call);
instrumentGlobals(*getGlobals());
Call->addArg(ConstantInteger32::create(Ctx, IceType_i32, RzGlobalsNum));
Call->addArg(Ctx->getConstantSym(0, Ctx->getGlobalString(RzArrayName)));
Call->addArg(Ctx->getConstantSym(0, Ctx->getGlobalString(RzSizesName)));
}
// TODO(tlively): make this more efficient with swap idiom
void ASanInstrumentation::finishFunc(Cfg *) {
ICE_TLS_GET_FIELD(LocalVars)->clear();
ICE_TLS_GET_FIELD(LocalDtors)->clear();
}
} // end of namespace Ice