MemCpyOptimizer.cpp [plain text]
#define DEBUG_TYPE "memcpyopt"
#include "llvm/Transforms/Scalar.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Instructions.h"
#include "llvm/LLVMContext.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetData.h"
#include <list>
using namespace llvm;
STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
STATISTIC(NumMemSetInfer, "Number of memsets inferred");
STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy");
static Value *isBytewiseValue(Value *V) {
LLVMContext &Context = V->getContext();
if (V->getType()->isIntegerTy(8)) return V;
if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
if (CFP->getType()->isFloatTy())
V = ConstantExpr::getBitCast(CFP, Type::getInt32Ty(Context));
if (CFP->getType()->isDoubleTy())
V = ConstantExpr::getBitCast(CFP, Type::getInt64Ty(Context));
}
if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
unsigned Width = CI->getBitWidth();
if (isPowerOf2_32(Width) && Width > 8) {
APInt Val = CI->getValue();
APInt Val2;
while (Val.getBitWidth() != 8) {
unsigned NextWidth = Val.getBitWidth()/2;
Val2 = Val.lshr(NextWidth);
Val2.trunc(Val.getBitWidth()/2);
Val.trunc(Val.getBitWidth()/2);
if (Val != Val2)
return 0;
}
return ConstantInt::get(Context, Val);
}
}
return 0;
}
static int64_t GetOffsetFromIndex(const GetElementPtrInst *GEP, unsigned Idx,
bool &VariableIdxFound, TargetData &TD) {
gep_type_iterator GTI = gep_type_begin(GEP);
for (unsigned i = 1; i != Idx; ++i, ++GTI)
;
int64_t Offset = 0;
for (unsigned i = Idx, e = GEP->getNumOperands(); i != e; ++i, ++GTI) {
ConstantInt *OpC = dyn_cast<ConstantInt>(GEP->getOperand(i));
if (OpC == 0)
return VariableIdxFound = true;
if (OpC->isZero()) continue;
if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
continue;
}
uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
Offset += Size*OpC->getSExtValue();
}
return Offset;
}
static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
TargetData &TD) {
GetElementPtrInst *GEP1 = dyn_cast<GetElementPtrInst>(Ptr1);
GetElementPtrInst *GEP2 = dyn_cast<GetElementPtrInst>(Ptr2);
if (!GEP1 || !GEP2 || GEP1->getOperand(0) != GEP2->getOperand(0))
return false;
unsigned Idx = 1;
for (; Idx != GEP1->getNumOperands() && Idx != GEP2->getNumOperands(); ++Idx)
if (GEP1->getOperand(Idx) != GEP2->getOperand(Idx))
break;
bool VariableIdxFound = false;
int64_t Offset1 = GetOffsetFromIndex(GEP1, Idx, VariableIdxFound, TD);
int64_t Offset2 = GetOffsetFromIndex(GEP2, Idx, VariableIdxFound, TD);
if (VariableIdxFound) return false;
Offset = Offset2-Offset1;
return true;
}
namespace {
struct MemsetRange {
int64_t Start, End;
Value *StartPtr;
unsigned Alignment;
SmallVector<StoreInst*, 16> TheStores;
bool isProfitableToUseMemset(const TargetData &TD) const;
};
}
bool MemsetRange::isProfitableToUseMemset(const TargetData &TD) const {
if (TheStores.size() >= 8 || End-Start >= 64) return true;
if (TheStores.size() <= 2) return false;
unsigned Bytes = unsigned(End-Start);
unsigned NumPointerStores = Bytes/TD.getPointerSize();
unsigned NumByteStores = Bytes - NumPointerStores*TD.getPointerSize();
return TheStores.size() > NumPointerStores+NumByteStores;
}
namespace {
class MemsetRanges {
std::list<MemsetRange> Ranges;
typedef std::list<MemsetRange>::iterator range_iterator;
TargetData &TD;
public:
MemsetRanges(TargetData &td) : TD(td) {}
typedef std::list<MemsetRange>::const_iterator const_iterator;
const_iterator begin() const { return Ranges.begin(); }
const_iterator end() const { return Ranges.end(); }
bool empty() const { return Ranges.empty(); }
void addStore(int64_t OffsetFromFirst, StoreInst *SI);
};
}
void MemsetRanges::addStore(int64_t Start, StoreInst *SI) {
int64_t End = Start+TD.getTypeStoreSize(SI->getOperand(0)->getType());
range_iterator I = Ranges.begin(), E = Ranges.end();
while (I != E && Start > I->End)
++I;
if (I == E || End < I->Start) {
MemsetRange &R = *Ranges.insert(I, MemsetRange());
R.Start = Start;
R.End = End;
R.StartPtr = SI->getPointerOperand();
R.Alignment = SI->getAlignment();
R.TheStores.push_back(SI);
return;
}
I->TheStores.push_back(SI);
if (I->Start <= Start && I->End >= End)
return;
if (Start < I->Start) {
I->Start = Start;
I->StartPtr = SI->getPointerOperand();
I->Alignment = SI->getAlignment();
}
if (End > I->End) {
I->End = End;
range_iterator NextI = I;
while (++NextI != E && End >= NextI->Start) {
I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end());
if (NextI->End > I->End)
I->End = NextI->End;
Ranges.erase(NextI);
NextI = I;
}
}
}
namespace {
class MemCpyOpt : public FunctionPass {
bool runOnFunction(Function &F);
public:
static char ID; MemCpyOpt() : FunctionPass(&ID) {}
private:
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
AU.addRequired<DominatorTree>();
AU.addRequired<MemoryDependenceAnalysis>();
AU.addRequired<AliasAnalysis>();
AU.addPreserved<AliasAnalysis>();
AU.addPreserved<MemoryDependenceAnalysis>();
}
bool processStore(StoreInst *SI, BasicBlock::iterator &BBI);
bool processMemCpy(MemCpyInst *M);
bool processMemMove(MemMoveInst *M);
bool performCallSlotOptzn(MemCpyInst *cpy, CallInst *C);
bool iterateOnFunction(Function &F);
};
char MemCpyOpt::ID = 0;
}
FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOpt(); }
static RegisterPass<MemCpyOpt> X("memcpyopt",
"MemCpy Optimization");
bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
if (SI->isVolatile()) return false;
LLVMContext &Context = SI->getContext();
Value *ByteVal = isBytewiseValue(SI->getOperand(0));
if (!ByteVal)
return false;
TargetData *TD = getAnalysisIfAvailable<TargetData>();
if (!TD) return false;
AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
Module *M = SI->getParent()->getParent()->getParent();
MemsetRanges Ranges(*TD);
Value *StartPtr = SI->getPointerOperand();
BasicBlock::iterator BI = SI;
for (++BI; !isa<TerminatorInst>(BI); ++BI) {
if (isa<CallInst>(BI) || isa<InvokeInst>(BI)) {
if (AA.getModRefBehavior(CallSite::get(BI)) ==
AliasAnalysis::DoesNotAccessMemory)
continue;
break;
} else if (isa<VAArgInst>(BI) || isa<LoadInst>(BI))
break;
StoreInst *NextStore = dyn_cast<StoreInst>(BI);
if (NextStore == 0) continue;
if (NextStore->isVolatile()) break;
if (ByteVal != isBytewiseValue(NextStore->getOperand(0)))
break;
int64_t Offset;
if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset, *TD))
break;
Ranges.addStore(Offset, NextStore);
}
if (Ranges.empty())
return false;
Ranges.addStore(0, SI);
bool MadeChange = false;
for (MemsetRanges::const_iterator I = Ranges.begin(), E = Ranges.end();
I != E; ++I) {
const MemsetRange &Range = *I;
if (Range.TheStores.size() == 1) continue;
if (!Range.isProfitableToUseMemset(*TD))
continue;
BasicBlock::iterator InsertPt = BI;
StartPtr = Range.StartPtr;
unsigned Alignment = Range.Alignment;
if (Alignment == 0) {
const Type *EltType =
cast<PointerType>(StartPtr->getType())->getElementType();
Alignment = TD->getABITypeAlignment(EltType);
}
const PointerType* StartPTy = cast<PointerType>(StartPtr->getType());
const PointerType *i8Ptr = Type::getInt8PtrTy(Context,
StartPTy->getAddressSpace());
if (StartPTy!= i8Ptr)
StartPtr = new BitCastInst(StartPtr, i8Ptr, StartPtr->getName(),
InsertPt);
Value *Ops[] = {
StartPtr, ByteVal, ConstantInt::get(Type::getInt64Ty(Context), Range.End-Range.Start),
ConstantInt::get(Type::getInt32Ty(Context), Alignment),
ConstantInt::get(Type::getInt1Ty(Context), 0),
};
const Type *Tys[] = { Ops[0]->getType(), Ops[2]->getType() };
Function *MemSetF = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys, 2);
Value *C = CallInst::Create(MemSetF, Ops, Ops+5, "", InsertPt);
DEBUG(dbgs() << "Replace stores:\n";
for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i)
dbgs() << *Range.TheStores[i];
dbgs() << "With: " << *C); C=C;
BBI = BI;
for (SmallVector<StoreInst*, 16>::const_iterator
SI = Range.TheStores.begin(),
SE = Range.TheStores.end(); SI != SE; ++SI)
(*SI)->eraseFromParent();
++NumMemSetInfer;
MadeChange = true;
}
return MadeChange;
}
bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
Value *cpyDest = cpy->getDest();
Value *cpySrc = cpy->getSource();
CallSite CS = CallSite::get(C);
ConstantInt *cpyLength = dyn_cast<ConstantInt>(cpy->getLength());
if (!cpyLength)
return false;
AllocaInst *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
if (!srcAlloca)
return false;
TargetData *TD = getAnalysisIfAvailable<TargetData>();
if (!TD) return false;
ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
if (!srcArraySize)
return false;
uint64_t srcSize = TD->getTypeAllocSize(srcAlloca->getAllocatedType()) *
srcArraySize->getZExtValue();
if (cpyLength->getZExtValue() < srcSize)
return false;
if (AllocaInst *A = dyn_cast<AllocaInst>(cpyDest)) {
ConstantInt *destArraySize = dyn_cast<ConstantInt>(A->getArraySize());
if (!destArraySize)
return false;
uint64_t destSize = TD->getTypeAllocSize(A->getAllocatedType()) *
destArraySize->getZExtValue();
if (destSize < srcSize)
return false;
} else if (Argument *A = dyn_cast<Argument>(cpyDest)) {
if (!A->hasStructRetAttr())
return false;
const Type *StructTy = cast<PointerType>(A->getType())->getElementType();
uint64_t destSize = TD->getTypeAllocSize(StructTy);
if (destSize < srcSize)
return false;
} else {
return false;
}
SmallVector<User*, 8> srcUseList(srcAlloca->use_begin(),
srcAlloca->use_end());
while (!srcUseList.empty()) {
User *UI = srcUseList.pop_back_val();
if (isa<BitCastInst>(UI)) {
for (User::use_iterator I = UI->use_begin(), E = UI->use_end();
I != E; ++I)
srcUseList.push_back(*I);
} else if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(UI)) {
if (G->hasAllZeroIndices())
for (User::use_iterator I = UI->use_begin(), E = UI->use_end();
I != E; ++I)
srcUseList.push_back(*I);
else
return false;
} else if (UI != C && UI != cpy) {
return false;
}
}
DominatorTree &DT = getAnalysis<DominatorTree>();
if (Instruction *cpyDestInst = dyn_cast<Instruction>(cpyDest))
if (!DT.dominates(cpyDestInst, C))
return false;
AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
if (AA.getModRefInfo(C, cpy->getRawDest(), srcSize) !=
AliasAnalysis::NoModRef)
return false;
bool changedArgument = false;
for (unsigned i = 0; i < CS.arg_size(); ++i)
if (CS.getArgument(i)->stripPointerCasts() == cpySrc) {
if (cpySrc->getType() != cpyDest->getType())
cpyDest = CastInst::CreatePointerCast(cpyDest, cpySrc->getType(),
cpyDest->getName(), C);
changedArgument = true;
if (CS.getArgument(i)->getType() == cpyDest->getType())
CS.setArgument(i, cpyDest);
else
CS.setArgument(i, CastInst::CreatePointerCast(cpyDest,
CS.getArgument(i)->getType(), cpyDest->getName(), C));
}
if (!changedArgument)
return false;
MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>();
MD.removeInstruction(C);
MD.removeInstruction(cpy);
cpy->eraseFromParent();
NumMemCpyInstr++;
return true;
}
bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>();
MemDepResult dep = MD.getDependency(M);
if (!dep.isClobber())
return false;
if (!isa<MemCpyInst>(dep.getInst())) {
if (CallInst *C = dyn_cast<CallInst>(dep.getInst()))
return performCallSlotOptzn(M, C);
return false;
}
MemCpyInst *MDep = cast<MemCpyInst>(dep.getInst());
if (M->getSource() != MDep->getDest())
return false;
ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength());
ConstantInt *C2 = dyn_cast<ConstantInt>(M->getLength());
if (!C1 || !C2)
return false;
uint64_t DepSize = C1->getValue().getZExtValue();
uint64_t CpySize = C2->getValue().getZExtValue();
if (DepSize < CpySize)
return false;
AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
if (AA.alias(M->getRawDest(), CpySize, MDep->getRawSource(), DepSize) !=
AliasAnalysis::NoAlias)
return false;
else if (AA.alias(M->getRawDest(), CpySize, M->getRawSource(), CpySize) !=
AliasAnalysis::NoAlias)
return false;
else if (AA.alias(MDep->getRawDest(), DepSize, MDep->getRawSource(), DepSize)
!= AliasAnalysis::NoAlias)
return false;
const Type *ArgTys[3] = { M->getRawDest()->getType(),
MDep->getRawSource()->getType(),
M->getLength()->getType() };
Function *MemCpyFun = Intrinsic::getDeclaration(
M->getParent()->getParent()->getParent(),
M->getIntrinsicID(), ArgTys, 3);
Value *Args[5] = {
M->getRawDest(), MDep->getRawSource(), M->getLength(),
M->getAlignmentCst(), M->getVolatileCst()
};
CallInst *C = CallInst::Create(MemCpyFun, Args, Args+5, "", M);
if (MD.getDependency(C) == dep) {
MD.removeInstruction(M);
M->eraseFromParent();
NumMemCpyInstr++;
return true;
}
MD.removeInstruction(C);
C->eraseFromParent();
return false;
}
bool MemCpyOpt::processMemMove(MemMoveInst *M) {
AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
uint64_t MemMoveSize = ~0ULL;
if (ConstantInt *Len = dyn_cast<ConstantInt>(M->getLength()))
MemMoveSize = Len->getZExtValue();
if (AA.alias(M->getRawDest(), MemMoveSize, M->getRawSource(), MemMoveSize) !=
AliasAnalysis::NoAlias)
return false;
DEBUG(dbgs() << "MemCpyOpt: Optimizing memmove -> memcpy: " << *M << "\n");
Module *Mod = M->getParent()->getParent()->getParent();
const Type *ArgTys[3] = { M->getRawDest()->getType(),
M->getRawSource()->getType(),
M->getLength()->getType() };
M->setOperand(0,Intrinsic::getDeclaration(Mod, Intrinsic::memcpy, ArgTys, 3));
getAnalysis<MemoryDependenceAnalysis>().removeInstruction(M);
++NumMoveToCpy;
return true;
}
bool MemCpyOpt::iterateOnFunction(Function &F) {
bool MadeChange = false;
for (Function::iterator BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) {
for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
BI != BE;) {
Instruction *I = BI++;
if (StoreInst *SI = dyn_cast<StoreInst>(I))
MadeChange |= processStore(SI, BI);
else if (MemCpyInst *M = dyn_cast<MemCpyInst>(I))
MadeChange |= processMemCpy(M);
else if (MemMoveInst *M = dyn_cast<MemMoveInst>(I)) {
if (processMemMove(M)) {
--BI; MadeChange = true;
}
}
}
}
return MadeChange;
}
bool MemCpyOpt::runOnFunction(Function &F) {
bool MadeChange = false;
while (1) {
if (!iterateOnFunction(F))
break;
MadeChange = true;
}
return MadeChange;
}