LegalizeVectorOps.cpp [plain text]
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/Target/TargetLowering.h"
using namespace llvm;
namespace {
class VectorLegalizer {
SelectionDAG& DAG;
const TargetLowering &TLI;
bool Changed;
DenseMap<SDValue, SDValue> LegalizedNodes;
void AddLegalizedOperand(SDValue From, SDValue To) {
LegalizedNodes.insert(std::make_pair(From, To));
if (From != To)
LegalizedNodes.insert(std::make_pair(To, To));
}
SDValue LegalizeOp(SDValue Op);
SDValue TranslateLegalizeResults(SDValue Op, SDValue Result);
SDValue UnrollVSETCC(SDValue Op);
SDValue ExpandUINT_TO_FLOAT(SDValue Op);
SDValue ExpandVSELECT(SDValue Op);
SDValue ExpandSELECT(SDValue Op);
SDValue ExpandLoad(SDValue Op);
SDValue ExpandStore(SDValue Op);
SDValue ExpandFNEG(SDValue Op);
SDValue PromoteVectorOp(SDValue Op);
SDValue PromoteVectorOpINT_TO_FP(SDValue Op);
public:
bool Run();
VectorLegalizer(SelectionDAG& dag) :
DAG(dag), TLI(dag.getTargetLoweringInfo()), Changed(false) {}
};
bool VectorLegalizer::Run() {
DAG.AssignTopologicalOrder();
for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I)
LegalizeOp(SDValue(I, 0));
SDValue OldRoot = DAG.getRoot();
assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?");
DAG.setRoot(LegalizedNodes[OldRoot]);
LegalizedNodes.clear();
DAG.RemoveDeadNodes();
return Changed;
}
SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDValue Result) {
for (unsigned i = 0, e = Op.getNode()->getNumValues(); i != e; ++i)
AddLegalizedOperand(Op.getValue(i), Result.getValue(i));
return Result.getValue(Op.getResNo());
}
SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
if (I != LegalizedNodes.end()) return I->second;
SDNode* Node = Op.getNode();
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i)
Ops.push_back(LegalizeOp(Node->getOperand(i)));
SDValue Result =
SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops.data(), Ops.size()), 0);
if (Op.getOpcode() == ISD::LOAD) {
LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
ISD::LoadExtType ExtType = LD->getExtensionType();
if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) {
if (TLI.isLoadExtLegal(LD->getExtensionType(), LD->getMemoryVT()))
return TranslateLegalizeResults(Op, Result);
Changed = true;
return LegalizeOp(ExpandLoad(Op));
}
} else if (Op.getOpcode() == ISD::STORE) {
StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
EVT StVT = ST->getMemoryVT();
EVT ValVT = ST->getValue().getValueType();
if (StVT.isVector() && ST->isTruncatingStore())
switch (TLI.getTruncStoreAction(ValVT, StVT)) {
default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Legal:
return TranslateLegalizeResults(Op, Result);
case TargetLowering::Custom:
Changed = true;
return LegalizeOp(TLI.LowerOperation(Result, DAG));
case TargetLowering::Expand:
Changed = true;
return LegalizeOp(ExpandStore(Op));
}
}
bool HasVectorValue = false;
for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end();
J != E;
++J)
HasVectorValue |= J->isVector();
if (!HasVectorValue)
return TranslateLegalizeResults(Op, Result);
EVT QueryType;
switch (Op.getOpcode()) {
default:
return TranslateLegalizeResults(Op, Result);
case ISD::ADD:
case ISD::SUB:
case ISD::MUL:
case ISD::SDIV:
case ISD::UDIV:
case ISD::SREM:
case ISD::UREM:
case ISD::FADD:
case ISD::FSUB:
case ISD::FMUL:
case ISD::FDIV:
case ISD::FREM:
case ISD::AND:
case ISD::OR:
case ISD::XOR:
case ISD::SHL:
case ISD::SRA:
case ISD::SRL:
case ISD::ROTL:
case ISD::ROTR:
case ISD::CTLZ:
case ISD::CTTZ:
case ISD::CTLZ_ZERO_UNDEF:
case ISD::CTTZ_ZERO_UNDEF:
case ISD::CTPOP:
case ISD::SELECT:
case ISD::VSELECT:
case ISD::SELECT_CC:
case ISD::SETCC:
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND:
case ISD::TRUNCATE:
case ISD::SIGN_EXTEND:
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
case ISD::FNEG:
case ISD::FABS:
case ISD::FSQRT:
case ISD::FSIN:
case ISD::FCOS:
case ISD::FPOWI:
case ISD::FPOW:
case ISD::FLOG:
case ISD::FLOG2:
case ISD::FLOG10:
case ISD::FEXP:
case ISD::FEXP2:
case ISD::FCEIL:
case ISD::FTRUNC:
case ISD::FRINT:
case ISD::FNEARBYINT:
case ISD::FFLOOR:
case ISD::FMA:
case ISD::SIGN_EXTEND_INREG:
QueryType = Node->getValueType(0);
break;
case ISD::FP_ROUND_INREG:
QueryType = cast<VTSDNode>(Node->getOperand(1))->getVT();
break;
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
QueryType = Node->getOperand(0).getValueType();
break;
}
switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) {
case TargetLowering::Promote:
switch (Op.getOpcode()) {
default:
Result = PromoteVectorOp(Op);
Changed = true;
break;
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
Result = PromoteVectorOpINT_TO_FP(Op);
Changed = true;
break;
}
break;
case TargetLowering::Legal: break;
case TargetLowering::Custom: {
SDValue Tmp1 = TLI.LowerOperation(Op, DAG);
if (Tmp1.getNode()) {
Result = Tmp1;
break;
}
}
case TargetLowering::Expand:
if (Node->getOpcode() == ISD::VSELECT)
Result = ExpandVSELECT(Op);
else if (Node->getOpcode() == ISD::SELECT)
Result = ExpandSELECT(Op);
else if (Node->getOpcode() == ISD::UINT_TO_FP)
Result = ExpandUINT_TO_FLOAT(Op);
else if (Node->getOpcode() == ISD::FNEG)
Result = ExpandFNEG(Op);
else if (Node->getOpcode() == ISD::SETCC)
Result = UnrollVSETCC(Op);
else
Result = DAG.UnrollVectorOp(Op.getNode());
break;
}
if (Result != Op) {
Result = LegalizeOp(Result);
Changed = true;
}
AddLegalizedOperand(Op, Result);
return Result;
}
SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) {
EVT VT = Op.getValueType();
assert(Op.getNode()->getNumValues() == 1 &&
"Can't promote a vector with multiple results!");
EVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
DebugLoc dl = Op.getDebugLoc();
SmallVector<SDValue, 4> Operands(Op.getNumOperands());
for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
if (Op.getOperand(j).getValueType().isVector())
Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Op.getOperand(j));
else
Operands[j] = Op.getOperand(j);
}
Op = DAG.getNode(Op.getOpcode(), dl, NVT, &Operands[0], Operands.size());
return DAG.getNode(ISD::BITCAST, dl, VT, Op);
}
SDValue VectorLegalizer::PromoteVectorOpINT_TO_FP(SDValue Op) {
EVT VT = Op.getOperand(0).getValueType();
assert(Op.getNode()->getNumValues() == 1 &&
"Can't promote a vector with multiple results!");
unsigned NumElts = VT.getVectorNumElements();
EVT EltVT = VT.getVectorElementType();
EltVT = EVT::getIntegerVT(*DAG.getContext(), 2 * EltVT.getSizeInBits());
assert(EltVT.isSimple() && "Promoting to a non-simple vector type!");
MVT NVT = MVT::getVectorVT(EltVT.getSimpleVT(), NumElts);
DebugLoc dl = Op.getDebugLoc();
SmallVector<SDValue, 4> Operands(Op.getNumOperands());
unsigned Opc = Op.getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND :
ISD::SIGN_EXTEND;
for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
if (Op.getOperand(j).getValueType().isVector())
Operands[j] = DAG.getNode(Opc, dl, NVT, Op.getOperand(j));
else
Operands[j] = Op.getOperand(j);
}
return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), &Operands[0],
Operands.size());
}
SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
DebugLoc dl = Op.getDebugLoc();
LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
SDValue Chain = LD->getChain();
SDValue BasePTR = LD->getBasePtr();
EVT SrcVT = LD->getMemoryVT();
ISD::LoadExtType ExtType = LD->getExtensionType();
SmallVector<SDValue, 8> LoadVals;
SmallVector<SDValue, 8> LoadChains;
unsigned NumElem = SrcVT.getVectorNumElements();
unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8;
for (unsigned Idx=0; Idx<NumElem; Idx++) {
SDValue ScalarLoad = DAG.getExtLoad(ExtType, dl,
Op.getNode()->getValueType(0).getScalarType(),
Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride),
SrcVT.getScalarType(),
LD->isVolatile(), LD->isNonTemporal(),
LD->getAlignment());
BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
DAG.getIntPtrConstant(Stride));
LoadVals.push_back(ScalarLoad.getValue(0));
LoadChains.push_back(ScalarLoad.getValue(1));
}
SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
&LoadChains[0], LoadChains.size());
SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl,
Op.getNode()->getValueType(0), &LoadVals[0], LoadVals.size());
AddLegalizedOperand(Op.getValue(0), Value);
AddLegalizedOperand(Op.getValue(1), NewChain);
return (Op.getResNo() ? NewChain : Value);
}
SDValue VectorLegalizer::ExpandStore(SDValue Op) {
DebugLoc dl = Op.getDebugLoc();
StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
SDValue Chain = ST->getChain();
SDValue BasePTR = ST->getBasePtr();
SDValue Value = ST->getValue();
EVT StVT = ST->getMemoryVT();
unsigned Alignment = ST->getAlignment();
bool isVolatile = ST->isVolatile();
bool isNonTemporal = ST->isNonTemporal();
unsigned NumElem = StVT.getVectorNumElements();
EVT RegVT = Value.getValueType();
EVT RegSclVT = RegVT.getScalarType();
EVT MemSclVT = StVT.getScalarType();
unsigned ScalarSize = MemSclVT.getSizeInBits();
if (!isPowerOf2_32(ScalarSize))
ScalarSize = NextPowerOf2(ScalarSize);
unsigned Stride = ScalarSize/8;
SmallVector<SDValue, 8> Stores;
for (unsigned Idx = 0; Idx < NumElem; Idx++) {
SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
RegSclVT, Value, DAG.getIntPtrConstant(Idx));
SDValue Store = DAG.getTruncStore(Chain, dl, Ex, BasePTR,
ST->getPointerInfo().getWithOffset(Idx*Stride), MemSclVT,
isVolatile, isNonTemporal, Alignment);
BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
DAG.getIntPtrConstant(Stride));
Stores.push_back(Store);
}
SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
&Stores[0], Stores.size());
AddLegalizedOperand(Op, TF);
return TF;
}
SDValue VectorLegalizer::ExpandSELECT(SDValue Op) {
EVT VT = Op.getValueType();
DebugLoc DL = Op.getDebugLoc();
SDValue Mask = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
SDValue Op2 = Op.getOperand(2);
assert(VT.isVector() && !Mask.getValueType().isVector()
&& Op1.getValueType() == Op2.getValueType() && "Invalid type");
unsigned NumElem = VT.getVectorNumElements();
if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand ||
TLI.getOperationAction(ISD::BUILD_VECTOR, VT) == TargetLowering::Expand)
return DAG.UnrollVectorOp(Op.getNode());
EVT MaskTy = TLI.getSetCCResultType(VT);
assert(MaskTy.isVector() && "Invalid CC type");
assert(MaskTy.getSizeInBits() == Op1.getValueType().getSizeInBits()
&& "Invalid mask size");
EVT BitTy = MaskTy.getScalarType();
Mask = DAG.getNode(ISD::SELECT, DL, BitTy, Mask,
DAG.getConstant(APInt::getAllOnesValue(BitTy.getSizeInBits()), BitTy),
DAG.getConstant(0, BitTy));
SmallVector<SDValue, 8> Ops(NumElem, Mask);
Mask = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskTy, &Ops[0], Ops.size());
Op1 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op1);
Op2 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op2);
SDValue AllOnes = DAG.getConstant(
APInt::getAllOnesValue(BitTy.getSizeInBits()), MaskTy);
SDValue NotMask = DAG.getNode(ISD::XOR, DL, MaskTy, Mask, AllOnes);
Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask);
Op2 = DAG.getNode(ISD::AND, DL, MaskTy, Op2, NotMask);
SDValue Val = DAG.getNode(ISD::OR, DL, MaskTy, Op1, Op2);
return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val);
}
SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
EVT VT = Op.getOperand(0).getValueType();
DebugLoc DL = Op.getDebugLoc();
SDValue Mask = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
SDValue Op2 = Op.getOperand(2);
if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand ||
TLI.getBooleanContents(true) !=
TargetLowering::ZeroOrNegativeOneBooleanContent)
return DAG.UnrollVectorOp(Op.getNode());
assert(VT.getSizeInBits() == Op1.getValueType().getSizeInBits()
&& "Invalid mask size");
Op1 = DAG.getNode(ISD::BITCAST, DL, VT, Op1);
Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2);
SDValue AllOnes = DAG.getConstant(
APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()), VT);
SDValue NotMask = DAG.getNode(ISD::XOR, DL, VT, Mask, AllOnes);
Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask);
Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask);
SDValue Val = DAG.getNode(ISD::OR, DL, VT, Op1, Op2);
return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val);
}
SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
EVT VT = Op.getOperand(0).getValueType();
DebugLoc DL = Op.getDebugLoc();
if (TLI.getOperationAction(ISD::SINT_TO_FP, VT) == TargetLowering::Expand ||
TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand)
return DAG.UnrollVectorOp(Op.getNode());
EVT SVT = VT.getScalarType();
assert((SVT.getSizeInBits() == 64 || SVT.getSizeInBits() == 32) &&
"Elements in vector-UINT_TO_FP must be 32 or 64 bits wide");
unsigned BW = SVT.getSizeInBits();
SDValue HalfWord = DAG.getConstant(BW/2, VT);
uint64_t HWMask = (SVT.getSizeInBits()==64)?0x00000000FFFFFFFF:0x0000FFFF;
SDValue HalfWordMask = DAG.getConstant(HWMask, VT);
SDValue TWOHW = DAG.getConstantFP((1<<(BW/2)), Op.getValueType());
SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Op.getOperand(0), HalfWord);
SDValue LO = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), HalfWordMask);
SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), HI);
fHI = DAG.getNode(ISD::FMUL, DL, Op.getValueType(), fHI, TWOHW);
SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), LO);
return DAG.getNode(ISD::FADD, DL, Op.getValueType(), fHI, fLO);
}
SDValue VectorLegalizer::ExpandFNEG(SDValue Op) {
if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) {
SDValue Zero = DAG.getConstantFP(-0.0, Op.getValueType());
return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),
Zero, Op.getOperand(0));
}
return DAG.UnrollVectorOp(Op.getNode());
}
SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) {
EVT VT = Op.getValueType();
unsigned NumElems = VT.getVectorNumElements();
EVT EltVT = VT.getVectorElementType();
SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1), CC = Op.getOperand(2);
EVT TmpEltVT = LHS.getValueType().getVectorElementType();
DebugLoc dl = Op.getDebugLoc();
SmallVector<SDValue, 8> Ops(NumElems);
for (unsigned i = 0; i < NumElems; ++i) {
SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
DAG.getIntPtrConstant(i));
SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
DAG.getIntPtrConstant(i));
Ops[i] = DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(TmpEltVT),
LHSElem, RHSElem, CC);
Ops[i] = DAG.getNode(ISD::SELECT, dl, EltVT, Ops[i],
DAG.getConstant(APInt::getAllOnesValue
(EltVT.getSizeInBits()), EltVT),
DAG.getConstant(0, EltVT));
}
return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElems);
}
}
bool SelectionDAG::LegalizeVectors() {
return VectorLegalizer(*this).Run();
}