NVPTXAsmPrinter.cpp revision 263508
1//===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file contains a printer that converts from our internal representation 11// of machine-dependent LLVM code to NVPTX assembly language. 12// 13//===----------------------------------------------------------------------===// 14 15#include "NVPTXAsmPrinter.h" 16#include "MCTargetDesc/NVPTXMCAsmInfo.h" 17#include "NVPTX.h" 18#include "NVPTXInstrInfo.h" 19#include "NVPTXMCExpr.h" 20#include "NVPTXRegisterInfo.h" 21#include "NVPTXTargetMachine.h" 22#include "NVPTXUtilities.h" 23#include "InstPrinter/NVPTXInstPrinter.h" 24#include "cl_common_defines.h" 25#include "llvm/ADT/StringExtras.h" 26#include "llvm/Analysis/ConstantFolding.h" 27#include "llvm/Assembly/Writer.h" 28#include "llvm/CodeGen/Analysis.h" 29#include "llvm/CodeGen/MachineFrameInfo.h" 30#include "llvm/CodeGen/MachineModuleInfo.h" 31#include "llvm/CodeGen/MachineRegisterInfo.h" 32#include "llvm/DebugInfo.h" 33#include "llvm/IR/DerivedTypes.h" 34#include "llvm/IR/Function.h" 35#include "llvm/IR/GlobalVariable.h" 36#include "llvm/IR/Module.h" 37#include "llvm/IR/Operator.h" 38#include "llvm/MC/MCStreamer.h" 39#include "llvm/MC/MCSymbol.h" 40#include "llvm/Support/CommandLine.h" 41#include "llvm/Support/ErrorHandling.h" 42#include "llvm/Support/FormattedStream.h" 43#include "llvm/Support/Path.h" 44#include "llvm/Support/TargetRegistry.h" 45#include "llvm/Support/TimeValue.h" 46#include "llvm/Target/Mangler.h" 47#include "llvm/Target/TargetLoweringObjectFile.h" 48#include <sstream> 49using namespace llvm; 50 51#define DEPOTNAME "__local_depot" 52 53static cl::opt<bool> 54EmitLineNumbers("nvptx-emit-line-numbers", cl::Hidden, 55 cl::desc("NVPTX Specific: Emit Line numbers even without -G"), 56 cl::init(true)); 57 58static cl::opt<bool> 59InterleaveSrc("nvptx-emit-src", cl::ZeroOrMore, cl::Hidden, 60 cl::desc("NVPTX Specific: Emit source line in ptx file"), 61 cl::init(false)); 62 63namespace { 64/// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V 65/// depends. 66void DiscoverDependentGlobals(const Value *V, 67 DenseSet<const GlobalVariable *> &Globals) { 68 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) 69 Globals.insert(GV); 70 else { 71 if (const User *U = dyn_cast<User>(V)) { 72 for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) { 73 DiscoverDependentGlobals(U->getOperand(i), Globals); 74 } 75 } 76 } 77} 78 79/// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable 80/// instances to be emitted, but only after any dependents have been added 81/// first. 82void VisitGlobalVariableForEmission( 83 const GlobalVariable *GV, SmallVectorImpl<const GlobalVariable *> &Order, 84 DenseSet<const GlobalVariable *> &Visited, 85 DenseSet<const GlobalVariable *> &Visiting) { 86 // Have we already visited this one? 87 if (Visited.count(GV)) 88 return; 89 90 // Do we have a circular dependency? 91 if (Visiting.count(GV)) 92 report_fatal_error("Circular dependency found in global variable set"); 93 94 // Start visiting this global 95 Visiting.insert(GV); 96 97 // Make sure we visit all dependents first 98 DenseSet<const GlobalVariable *> Others; 99 for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i) 100 DiscoverDependentGlobals(GV->getOperand(i), Others); 101 102 for (DenseSet<const GlobalVariable *>::iterator I = Others.begin(), 103 E = Others.end(); 104 I != E; ++I) 105 VisitGlobalVariableForEmission(*I, Order, Visited, Visiting); 106 107 // Now we can visit ourself 108 Order.push_back(GV); 109 Visited.insert(GV); 110 Visiting.erase(GV); 111} 112} 113 114// @TODO: This is a copy from AsmPrinter.cpp. The function is static, so we 115// cannot just link to the existing version. 116/// LowerConstant - Lower the specified LLVM Constant to an MCExpr. 117/// 118using namespace nvptx; 119const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) { 120 MCContext &Ctx = AP.OutContext; 121 122 if (CV->isNullValue() || isa<UndefValue>(CV)) 123 return MCConstantExpr::Create(0, Ctx); 124 125 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) 126 return MCConstantExpr::Create(CI->getZExtValue(), Ctx); 127 128 if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) 129 return MCSymbolRefExpr::Create(AP.getSymbol(GV), Ctx); 130 131 if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV)) 132 return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx); 133 134 const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV); 135 if (CE == 0) 136 llvm_unreachable("Unknown constant value to lower!"); 137 138 switch (CE->getOpcode()) { 139 default: 140 // If the code isn't optimized, there may be outstanding folding 141 // opportunities. Attempt to fold the expression using DataLayout as a 142 // last resort before giving up. 143 if (Constant *C = ConstantFoldConstantExpression(CE, AP.TM.getDataLayout())) 144 if (C != CE) 145 return LowerConstant(C, AP); 146 147 // Otherwise report the problem to the user. 148 { 149 std::string S; 150 raw_string_ostream OS(S); 151 OS << "Unsupported expression in static initializer: "; 152 WriteAsOperand(OS, CE, /*PrintType=*/ false, 153 !AP.MF ? 0 : AP.MF->getFunction()->getParent()); 154 report_fatal_error(OS.str()); 155 } 156 case Instruction::GetElementPtr: { 157 const DataLayout &TD = *AP.TM.getDataLayout(); 158 // Generate a symbolic expression for the byte address 159 APInt OffsetAI(TD.getPointerSizeInBits(), 0); 160 cast<GEPOperator>(CE)->accumulateConstantOffset(TD, OffsetAI); 161 162 const MCExpr *Base = LowerConstant(CE->getOperand(0), AP); 163 if (!OffsetAI) 164 return Base; 165 166 int64_t Offset = OffsetAI.getSExtValue(); 167 return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx), 168 Ctx); 169 } 170 171 case Instruction::Trunc: 172 // We emit the value and depend on the assembler to truncate the generated 173 // expression properly. This is important for differences between 174 // blockaddress labels. Since the two labels are in the same function, it 175 // is reasonable to treat their delta as a 32-bit value. 176 // FALL THROUGH. 177 case Instruction::BitCast: 178 return LowerConstant(CE->getOperand(0), AP); 179 180 case Instruction::IntToPtr: { 181 const DataLayout &TD = *AP.TM.getDataLayout(); 182 // Handle casts to pointers by changing them into casts to the appropriate 183 // integer type. This promotes constant folding and simplifies this code. 184 Constant *Op = CE->getOperand(0); 185 Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()), 186 false /*ZExt*/); 187 return LowerConstant(Op, AP); 188 } 189 190 case Instruction::PtrToInt: { 191 const DataLayout &TD = *AP.TM.getDataLayout(); 192 // Support only foldable casts to/from pointers that can be eliminated by 193 // changing the pointer to the appropriately sized integer type. 194 Constant *Op = CE->getOperand(0); 195 Type *Ty = CE->getType(); 196 197 const MCExpr *OpExpr = LowerConstant(Op, AP); 198 199 // We can emit the pointer value into this slot if the slot is an 200 // integer slot equal to the size of the pointer. 201 if (TD.getTypeAllocSize(Ty) == TD.getTypeAllocSize(Op->getType())) 202 return OpExpr; 203 204 // Otherwise the pointer is smaller than the resultant integer, mask off 205 // the high bits so we are sure to get a proper truncation if the input is 206 // a constant expr. 207 unsigned InBits = TD.getTypeAllocSizeInBits(Op->getType()); 208 const MCExpr *MaskExpr = 209 MCConstantExpr::Create(~0ULL >> (64 - InBits), Ctx); 210 return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx); 211 } 212 213 // The MC library also has a right-shift operator, but it isn't consistently 214 // signed or unsigned between different targets. 215 case Instruction::Add: 216 case Instruction::Sub: 217 case Instruction::Mul: 218 case Instruction::SDiv: 219 case Instruction::SRem: 220 case Instruction::Shl: 221 case Instruction::And: 222 case Instruction::Or: 223 case Instruction::Xor: { 224 const MCExpr *LHS = LowerConstant(CE->getOperand(0), AP); 225 const MCExpr *RHS = LowerConstant(CE->getOperand(1), AP); 226 switch (CE->getOpcode()) { 227 default: 228 llvm_unreachable("Unknown binary operator constant cast expr"); 229 case Instruction::Add: 230 return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx); 231 case Instruction::Sub: 232 return MCBinaryExpr::CreateSub(LHS, RHS, Ctx); 233 case Instruction::Mul: 234 return MCBinaryExpr::CreateMul(LHS, RHS, Ctx); 235 case Instruction::SDiv: 236 return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx); 237 case Instruction::SRem: 238 return MCBinaryExpr::CreateMod(LHS, RHS, Ctx); 239 case Instruction::Shl: 240 return MCBinaryExpr::CreateShl(LHS, RHS, Ctx); 241 case Instruction::And: 242 return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx); 243 case Instruction::Or: 244 return MCBinaryExpr::CreateOr(LHS, RHS, Ctx); 245 case Instruction::Xor: 246 return MCBinaryExpr::CreateXor(LHS, RHS, Ctx); 247 } 248 } 249 } 250} 251 252void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) { 253 if (!EmitLineNumbers) 254 return; 255 if (ignoreLoc(MI)) 256 return; 257 258 DebugLoc curLoc = MI.getDebugLoc(); 259 260 if (prevDebugLoc.isUnknown() && curLoc.isUnknown()) 261 return; 262 263 if (prevDebugLoc == curLoc) 264 return; 265 266 prevDebugLoc = curLoc; 267 268 if (curLoc.isUnknown()) 269 return; 270 271 const MachineFunction *MF = MI.getParent()->getParent(); 272 //const TargetMachine &TM = MF->getTarget(); 273 274 const LLVMContext &ctx = MF->getFunction()->getContext(); 275 DIScope Scope(curLoc.getScope(ctx)); 276 277 assert((!Scope || Scope.isScope()) && 278 "Scope of a DebugLoc should be null or a DIScope."); 279 if (!Scope) 280 return; 281 282 StringRef fileName(Scope.getFilename()); 283 StringRef dirName(Scope.getDirectory()); 284 SmallString<128> FullPathName = dirName; 285 if (!dirName.empty() && !sys::path::is_absolute(fileName)) { 286 sys::path::append(FullPathName, fileName); 287 fileName = FullPathName.str(); 288 } 289 290 if (filenameMap.find(fileName.str()) == filenameMap.end()) 291 return; 292 293 // Emit the line from the source file. 294 if (InterleaveSrc) 295 this->emitSrcInText(fileName.str(), curLoc.getLine()); 296 297 std::stringstream temp; 298 temp << "\t.loc " << filenameMap[fileName.str()] << " " << curLoc.getLine() 299 << " " << curLoc.getCol(); 300 OutStreamer.EmitRawText(Twine(temp.str().c_str())); 301} 302 303void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) { 304 SmallString<128> Str; 305 raw_svector_ostream OS(Str); 306 if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) 307 emitLineNumberAsDotLoc(*MI); 308 309 MCInst Inst; 310 lowerToMCInst(MI, Inst); 311 OutStreamer.EmitInstruction(Inst); 312} 313 314void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) { 315 OutMI.setOpcode(MI->getOpcode()); 316 317 // Special: Do not mangle symbol operand of CALL_PROTOTYPE 318 if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) { 319 const MachineOperand &MO = MI->getOperand(0); 320 OutMI.addOperand(GetSymbolRef(MO, 321 OutContext.GetOrCreateSymbol(Twine(MO.getSymbolName())))); 322 return; 323 } 324 325 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 326 const MachineOperand &MO = MI->getOperand(i); 327 328 MCOperand MCOp; 329 if (lowerOperand(MO, MCOp)) 330 OutMI.addOperand(MCOp); 331 } 332} 333 334bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO, 335 MCOperand &MCOp) { 336 switch (MO.getType()) { 337 default: llvm_unreachable("unknown operand type"); 338 case MachineOperand::MO_Register: 339 MCOp = MCOperand::CreateReg(encodeVirtualRegister(MO.getReg())); 340 break; 341 case MachineOperand::MO_Immediate: 342 MCOp = MCOperand::CreateImm(MO.getImm()); 343 break; 344 case MachineOperand::MO_MachineBasicBlock: 345 MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create( 346 MO.getMBB()->getSymbol(), OutContext)); 347 break; 348 case MachineOperand::MO_ExternalSymbol: 349 MCOp = GetSymbolRef(MO, GetExternalSymbolSymbol(MO.getSymbolName())); 350 break; 351 case MachineOperand::MO_GlobalAddress: 352 MCOp = GetSymbolRef(MO, getSymbol(MO.getGlobal())); 353 break; 354 case MachineOperand::MO_FPImmediate: { 355 const ConstantFP *Cnt = MO.getFPImm(); 356 APFloat Val = Cnt->getValueAPF(); 357 358 switch (Cnt->getType()->getTypeID()) { 359 default: report_fatal_error("Unsupported FP type"); break; 360 case Type::FloatTyID: 361 MCOp = MCOperand::CreateExpr( 362 NVPTXFloatMCExpr::CreateConstantFPSingle(Val, OutContext)); 363 break; 364 case Type::DoubleTyID: 365 MCOp = MCOperand::CreateExpr( 366 NVPTXFloatMCExpr::CreateConstantFPDouble(Val, OutContext)); 367 break; 368 } 369 break; 370 } 371 } 372 return true; 373} 374 375unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) { 376 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 377 const TargetRegisterClass *RC = MRI->getRegClass(Reg); 378 379 DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC]; 380 unsigned RegNum = RegMap[Reg]; 381 382 // Encode the register class in the upper 4 bits 383 // Must be kept in sync with NVPTXInstPrinter::printRegName 384 unsigned Ret = 0; 385 if (RC == &NVPTX::Int1RegsRegClass) { 386 Ret = (1 << 28); 387 } else if (RC == &NVPTX::Int16RegsRegClass) { 388 Ret = (2 << 28); 389 } else if (RC == &NVPTX::Int32RegsRegClass) { 390 Ret = (3 << 28); 391 } else if (RC == &NVPTX::Int64RegsRegClass) { 392 Ret = (4 << 28); 393 } else if (RC == &NVPTX::Float32RegsRegClass) { 394 Ret = (5 << 28); 395 } else if (RC == &NVPTX::Float64RegsRegClass) { 396 Ret = (6 << 28); 397 } else { 398 report_fatal_error("Bad register class"); 399 } 400 401 // Insert the vreg number 402 Ret |= (RegNum & 0x0FFFFFFF); 403 return Ret; 404 } else { 405 // Some special-use registers are actually physical registers. 406 // Encode this as the register class ID of 0 and the real register ID. 407 return Reg & 0x0FFFFFFF; 408 } 409} 410 411MCOperand NVPTXAsmPrinter::GetSymbolRef(const MachineOperand &MO, 412 const MCSymbol *Symbol) { 413 const MCExpr *Expr; 414 Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, 415 OutContext); 416 return MCOperand::CreateExpr(Expr); 417} 418 419void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) { 420 const DataLayout *TD = TM.getDataLayout(); 421 const TargetLowering *TLI = TM.getTargetLowering(); 422 423 Type *Ty = F->getReturnType(); 424 425 bool isABI = (nvptxSubtarget.getSmVersion() >= 20); 426 427 if (Ty->getTypeID() == Type::VoidTyID) 428 return; 429 430 O << " ("; 431 432 if (isABI) { 433 if (Ty->isPrimitiveType() || Ty->isIntegerTy()) { 434 unsigned size = 0; 435 if (const IntegerType *ITy = dyn_cast<IntegerType>(Ty)) { 436 size = ITy->getBitWidth(); 437 if (size < 32) 438 size = 32; 439 } else { 440 assert(Ty->isFloatingPointTy() && "Floating point type expected here"); 441 size = Ty->getPrimitiveSizeInBits(); 442 } 443 444 O << ".param .b" << size << " func_retval0"; 445 } else if (isa<PointerType>(Ty)) { 446 O << ".param .b" << TLI->getPointerTy().getSizeInBits() 447 << " func_retval0"; 448 } else { 449 if ((Ty->getTypeID() == Type::StructTyID) || isa<VectorType>(Ty)) { 450 SmallVector<EVT, 16> vtparts; 451 ComputeValueVTs(*TLI, Ty, vtparts); 452 unsigned totalsz = 0; 453 for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { 454 unsigned elems = 1; 455 EVT elemtype = vtparts[i]; 456 if (vtparts[i].isVector()) { 457 elems = vtparts[i].getVectorNumElements(); 458 elemtype = vtparts[i].getVectorElementType(); 459 } 460 for (unsigned j = 0, je = elems; j != je; ++j) { 461 unsigned sz = elemtype.getSizeInBits(); 462 if (elemtype.isInteger() && (sz < 8)) 463 sz = 8; 464 totalsz += sz / 8; 465 } 466 } 467 unsigned retAlignment = 0; 468 if (!llvm::getAlign(*F, 0, retAlignment)) 469 retAlignment = TD->getABITypeAlignment(Ty); 470 O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz 471 << "]"; 472 } else 473 assert(false && "Unknown return type"); 474 } 475 } else { 476 SmallVector<EVT, 16> vtparts; 477 ComputeValueVTs(*TLI, Ty, vtparts); 478 unsigned idx = 0; 479 for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { 480 unsigned elems = 1; 481 EVT elemtype = vtparts[i]; 482 if (vtparts[i].isVector()) { 483 elems = vtparts[i].getVectorNumElements(); 484 elemtype = vtparts[i].getVectorElementType(); 485 } 486 487 for (unsigned j = 0, je = elems; j != je; ++j) { 488 unsigned sz = elemtype.getSizeInBits(); 489 if (elemtype.isInteger() && (sz < 32)) 490 sz = 32; 491 O << ".reg .b" << sz << " func_retval" << idx; 492 if (j < je - 1) 493 O << ", "; 494 ++idx; 495 } 496 if (i < e - 1) 497 O << ", "; 498 } 499 } 500 O << ") "; 501 return; 502} 503 504void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF, 505 raw_ostream &O) { 506 const Function *F = MF.getFunction(); 507 printReturnValStr(F, O); 508} 509 510void NVPTXAsmPrinter::EmitFunctionEntryLabel() { 511 SmallString<128> Str; 512 raw_svector_ostream O(Str); 513 514 if (!GlobalsEmitted) { 515 emitGlobals(*MF->getFunction()->getParent()); 516 GlobalsEmitted = true; 517 } 518 519 // Set up 520 MRI = &MF->getRegInfo(); 521 F = MF->getFunction(); 522 emitLinkageDirective(F, O); 523 if (llvm::isKernelFunction(*F)) 524 O << ".entry "; 525 else { 526 O << ".func "; 527 printReturnValStr(*MF, O); 528 } 529 530 O << *CurrentFnSym; 531 532 emitFunctionParamList(*MF, O); 533 534 if (llvm::isKernelFunction(*F)) 535 emitKernelFunctionDirectives(*F, O); 536 537 OutStreamer.EmitRawText(O.str()); 538 539 prevDebugLoc = DebugLoc(); 540} 541 542void NVPTXAsmPrinter::EmitFunctionBodyStart() { 543 VRegMapping.clear(); 544 OutStreamer.EmitRawText(StringRef("{\n")); 545 setAndEmitFunctionVirtualRegisters(*MF); 546 547 SmallString<128> Str; 548 raw_svector_ostream O(Str); 549 emitDemotedVars(MF->getFunction(), O); 550 OutStreamer.EmitRawText(O.str()); 551} 552 553void NVPTXAsmPrinter::EmitFunctionBodyEnd() { 554 OutStreamer.EmitRawText(StringRef("}\n")); 555 VRegMapping.clear(); 556} 557 558void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const { 559 unsigned RegNo = MI->getOperand(0).getReg(); 560 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 561 if (TRI->isVirtualRegister(RegNo)) { 562 OutStreamer.AddComment(Twine("implicit-def: ") + 563 getVirtualRegisterName(RegNo)); 564 } else { 565 OutStreamer.AddComment(Twine("implicit-def: ") + 566 TM.getRegisterInfo()->getName(RegNo)); 567 } 568 OutStreamer.AddBlankLine(); 569} 570 571void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F, 572 raw_ostream &O) const { 573 // If the NVVM IR has some of reqntid* specified, then output 574 // the reqntid directive, and set the unspecified ones to 1. 575 // If none of reqntid* is specified, don't output reqntid directive. 576 unsigned reqntidx, reqntidy, reqntidz; 577 bool specified = false; 578 if (llvm::getReqNTIDx(F, reqntidx) == false) 579 reqntidx = 1; 580 else 581 specified = true; 582 if (llvm::getReqNTIDy(F, reqntidy) == false) 583 reqntidy = 1; 584 else 585 specified = true; 586 if (llvm::getReqNTIDz(F, reqntidz) == false) 587 reqntidz = 1; 588 else 589 specified = true; 590 591 if (specified) 592 O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz 593 << "\n"; 594 595 // If the NVVM IR has some of maxntid* specified, then output 596 // the maxntid directive, and set the unspecified ones to 1. 597 // If none of maxntid* is specified, don't output maxntid directive. 598 unsigned maxntidx, maxntidy, maxntidz; 599 specified = false; 600 if (llvm::getMaxNTIDx(F, maxntidx) == false) 601 maxntidx = 1; 602 else 603 specified = true; 604 if (llvm::getMaxNTIDy(F, maxntidy) == false) 605 maxntidy = 1; 606 else 607 specified = true; 608 if (llvm::getMaxNTIDz(F, maxntidz) == false) 609 maxntidz = 1; 610 else 611 specified = true; 612 613 if (specified) 614 O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz 615 << "\n"; 616 617 unsigned mincta; 618 if (llvm::getMinCTASm(F, mincta)) 619 O << ".minnctapersm " << mincta << "\n"; 620} 621 622std::string 623NVPTXAsmPrinter::getVirtualRegisterName(unsigned Reg) const { 624 const TargetRegisterClass *RC = MRI->getRegClass(Reg); 625 626 std::string Name; 627 raw_string_ostream NameStr(Name); 628 629 VRegRCMap::const_iterator I = VRegMapping.find(RC); 630 assert(I != VRegMapping.end() && "Bad register class"); 631 const DenseMap<unsigned, unsigned> &RegMap = I->second; 632 633 VRegMap::const_iterator VI = RegMap.find(Reg); 634 assert(VI != RegMap.end() && "Bad virtual register"); 635 unsigned MappedVR = VI->second; 636 637 NameStr << getNVPTXRegClassStr(RC) << MappedVR; 638 639 NameStr.flush(); 640 return Name; 641} 642 643void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr, 644 raw_ostream &O) { 645 O << getVirtualRegisterName(vr); 646} 647 648void NVPTXAsmPrinter::printVecModifiedImmediate( 649 const MachineOperand &MO, const char *Modifier, raw_ostream &O) { 650 static const char vecelem[] = { '0', '1', '2', '3', '0', '1', '2', '3' }; 651 int Imm = (int) MO.getImm(); 652 if (0 == strcmp(Modifier, "vecelem")) 653 O << "_" << vecelem[Imm]; 654 else if (0 == strcmp(Modifier, "vecv4comm1")) { 655 if ((Imm < 0) || (Imm > 3)) 656 O << "//"; 657 } else if (0 == strcmp(Modifier, "vecv4comm2")) { 658 if ((Imm < 4) || (Imm > 7)) 659 O << "//"; 660 } else if (0 == strcmp(Modifier, "vecv4pos")) { 661 if (Imm < 0) 662 Imm = 0; 663 O << "_" << vecelem[Imm % 4]; 664 } else if (0 == strcmp(Modifier, "vecv2comm1")) { 665 if ((Imm < 0) || (Imm > 1)) 666 O << "//"; 667 } else if (0 == strcmp(Modifier, "vecv2comm2")) { 668 if ((Imm < 2) || (Imm > 3)) 669 O << "//"; 670 } else if (0 == strcmp(Modifier, "vecv2pos")) { 671 if (Imm < 0) 672 Imm = 0; 673 O << "_" << vecelem[Imm % 2]; 674 } else 675 llvm_unreachable("Unknown Modifier on immediate operand"); 676} 677 678 679 680void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) { 681 682 emitLinkageDirective(F, O); 683 if (llvm::isKernelFunction(*F)) 684 O << ".entry "; 685 else 686 O << ".func "; 687 printReturnValStr(F, O); 688 O << *getSymbol(F) << "\n"; 689 emitFunctionParamList(F, O); 690 O << ";\n"; 691} 692 693static bool usedInGlobalVarDef(const Constant *C) { 694 if (!C) 695 return false; 696 697 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) { 698 if (GV->getName().str() == "llvm.used") 699 return false; 700 return true; 701 } 702 703 for (Value::const_use_iterator ui = C->use_begin(), ue = C->use_end(); 704 ui != ue; ++ui) { 705 const Constant *C = dyn_cast<Constant>(*ui); 706 if (usedInGlobalVarDef(C)) 707 return true; 708 } 709 return false; 710} 711 712static bool usedInOneFunc(const User *U, Function const *&oneFunc) { 713 if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) { 714 if (othergv->getName().str() == "llvm.used") 715 return true; 716 } 717 718 if (const Instruction *instr = dyn_cast<Instruction>(U)) { 719 if (instr->getParent() && instr->getParent()->getParent()) { 720 const Function *curFunc = instr->getParent()->getParent(); 721 if (oneFunc && (curFunc != oneFunc)) 722 return false; 723 oneFunc = curFunc; 724 return true; 725 } else 726 return false; 727 } 728 729 if (const MDNode *md = dyn_cast<MDNode>(U)) 730 if (md->hasName() && ((md->getName().str() == "llvm.dbg.gv") || 731 (md->getName().str() == "llvm.dbg.sp"))) 732 return true; 733 734 for (User::const_use_iterator ui = U->use_begin(), ue = U->use_end(); 735 ui != ue; ++ui) { 736 if (usedInOneFunc(*ui, oneFunc) == false) 737 return false; 738 } 739 return true; 740} 741 742/* Find out if a global variable can be demoted to local scope. 743 * Currently, this is valid for CUDA shared variables, which have local 744 * scope and global lifetime. So the conditions to check are : 745 * 1. Is the global variable in shared address space? 746 * 2. Does it have internal linkage? 747 * 3. Is the global variable referenced only in one function? 748 */ 749static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) { 750 if (gv->hasInternalLinkage() == false) 751 return false; 752 const PointerType *Pty = gv->getType(); 753 if (Pty->getAddressSpace() != llvm::ADDRESS_SPACE_SHARED) 754 return false; 755 756 const Function *oneFunc = 0; 757 758 bool flag = usedInOneFunc(gv, oneFunc); 759 if (flag == false) 760 return false; 761 if (!oneFunc) 762 return false; 763 f = oneFunc; 764 return true; 765} 766 767static bool useFuncSeen(const Constant *C, 768 llvm::DenseMap<const Function *, bool> &seenMap) { 769 for (Value::const_use_iterator ui = C->use_begin(), ue = C->use_end(); 770 ui != ue; ++ui) { 771 if (const Constant *cu = dyn_cast<Constant>(*ui)) { 772 if (useFuncSeen(cu, seenMap)) 773 return true; 774 } else if (const Instruction *I = dyn_cast<Instruction>(*ui)) { 775 const BasicBlock *bb = I->getParent(); 776 if (!bb) 777 continue; 778 const Function *caller = bb->getParent(); 779 if (!caller) 780 continue; 781 if (seenMap.find(caller) != seenMap.end()) 782 return true; 783 } 784 } 785 return false; 786} 787 788void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) { 789 llvm::DenseMap<const Function *, bool> seenMap; 790 for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) { 791 const Function *F = FI; 792 793 if (F->isDeclaration()) { 794 if (F->use_empty()) 795 continue; 796 if (F->getIntrinsicID()) 797 continue; 798 emitDeclaration(F, O); 799 continue; 800 } 801 for (Value::const_use_iterator iter = F->use_begin(), 802 iterEnd = F->use_end(); 803 iter != iterEnd; ++iter) { 804 if (const Constant *C = dyn_cast<Constant>(*iter)) { 805 if (usedInGlobalVarDef(C)) { 806 // The use is in the initialization of a global variable 807 // that is a function pointer, so print a declaration 808 // for the original function 809 emitDeclaration(F, O); 810 break; 811 } 812 // Emit a declaration of this function if the function that 813 // uses this constant expr has already been seen. 814 if (useFuncSeen(C, seenMap)) { 815 emitDeclaration(F, O); 816 break; 817 } 818 } 819 820 if (!isa<Instruction>(*iter)) 821 continue; 822 const Instruction *instr = cast<Instruction>(*iter); 823 const BasicBlock *bb = instr->getParent(); 824 if (!bb) 825 continue; 826 const Function *caller = bb->getParent(); 827 if (!caller) 828 continue; 829 830 // If a caller has already been seen, then the caller is 831 // appearing in the module before the callee. so print out 832 // a declaration for the callee. 833 if (seenMap.find(caller) != seenMap.end()) { 834 emitDeclaration(F, O); 835 break; 836 } 837 } 838 seenMap[F] = true; 839 } 840} 841 842void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) { 843 DebugInfoFinder DbgFinder; 844 DbgFinder.processModule(M); 845 846 unsigned i = 1; 847 for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(), 848 E = DbgFinder.compile_unit_end(); 849 I != E; ++I) { 850 DICompileUnit DIUnit(*I); 851 StringRef Filename(DIUnit.getFilename()); 852 StringRef Dirname(DIUnit.getDirectory()); 853 SmallString<128> FullPathName = Dirname; 854 if (!Dirname.empty() && !sys::path::is_absolute(Filename)) { 855 sys::path::append(FullPathName, Filename); 856 Filename = FullPathName.str(); 857 } 858 if (filenameMap.find(Filename.str()) != filenameMap.end()) 859 continue; 860 filenameMap[Filename.str()] = i; 861 OutStreamer.EmitDwarfFileDirective(i, "", Filename.str()); 862 ++i; 863 } 864 865 for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(), 866 E = DbgFinder.subprogram_end(); 867 I != E; ++I) { 868 DISubprogram SP(*I); 869 StringRef Filename(SP.getFilename()); 870 StringRef Dirname(SP.getDirectory()); 871 SmallString<128> FullPathName = Dirname; 872 if (!Dirname.empty() && !sys::path::is_absolute(Filename)) { 873 sys::path::append(FullPathName, Filename); 874 Filename = FullPathName.str(); 875 } 876 if (filenameMap.find(Filename.str()) != filenameMap.end()) 877 continue; 878 filenameMap[Filename.str()] = i; 879 ++i; 880 } 881} 882 883bool NVPTXAsmPrinter::doInitialization(Module &M) { 884 885 SmallString<128> Str1; 886 raw_svector_ostream OS1(Str1); 887 888 MMI = getAnalysisIfAvailable<MachineModuleInfo>(); 889 MMI->AnalyzeModule(M); 890 891 // We need to call the parent's one explicitly. 892 //bool Result = AsmPrinter::doInitialization(M); 893 894 // Initialize TargetLoweringObjectFile. 895 const_cast<TargetLoweringObjectFile &>(getObjFileLowering()) 896 .Initialize(OutContext, TM); 897 898 Mang = new Mangler(&TM); 899 900 // Emit header before any dwarf directives are emitted below. 901 emitHeader(M, OS1); 902 OutStreamer.EmitRawText(OS1.str()); 903 904 // Already commented out 905 //bool Result = AsmPrinter::doInitialization(M); 906 907 // Emit module-level inline asm if it exists. 908 if (!M.getModuleInlineAsm().empty()) { 909 OutStreamer.AddComment("Start of file scope inline assembly"); 910 OutStreamer.AddBlankLine(); 911 OutStreamer.EmitRawText(StringRef(M.getModuleInlineAsm())); 912 OutStreamer.AddBlankLine(); 913 OutStreamer.AddComment("End of file scope inline assembly"); 914 OutStreamer.AddBlankLine(); 915 } 916 917 if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) 918 recordAndEmitFilenames(M); 919 920 GlobalsEmitted = false; 921 922 return false; // success 923} 924 925void NVPTXAsmPrinter::emitGlobals(const Module &M) { 926 SmallString<128> Str2; 927 raw_svector_ostream OS2(Str2); 928 929 emitDeclarations(M, OS2); 930 931 // As ptxas does not support forward references of globals, we need to first 932 // sort the list of module-level globals in def-use order. We visit each 933 // global variable in order, and ensure that we emit it *after* its dependent 934 // globals. We use a little extra memory maintaining both a set and a list to 935 // have fast searches while maintaining a strict ordering. 936 SmallVector<const GlobalVariable *, 8> Globals; 937 DenseSet<const GlobalVariable *> GVVisited; 938 DenseSet<const GlobalVariable *> GVVisiting; 939 940 // Visit each global variable, in order 941 for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); 942 I != E; ++I) 943 VisitGlobalVariableForEmission(I, Globals, GVVisited, GVVisiting); 944 945 assert(GVVisited.size() == M.getGlobalList().size() && 946 "Missed a global variable"); 947 assert(GVVisiting.size() == 0 && "Did not fully process a global variable"); 948 949 // Print out module-level global variables in proper order 950 for (unsigned i = 0, e = Globals.size(); i != e; ++i) 951 printModuleLevelGV(Globals[i], OS2); 952 953 OS2 << '\n'; 954 955 OutStreamer.EmitRawText(OS2.str()); 956} 957 958void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) { 959 O << "//\n"; 960 O << "// Generated by LLVM NVPTX Back-End\n"; 961 O << "//\n"; 962 O << "\n"; 963 964 unsigned PTXVersion = nvptxSubtarget.getPTXVersion(); 965 O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n"; 966 967 O << ".target "; 968 O << nvptxSubtarget.getTargetName(); 969 970 if (nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) 971 O << ", texmode_independent"; 972 if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) { 973 if (!nvptxSubtarget.hasDouble()) 974 O << ", map_f64_to_f32"; 975 } 976 977 if (MAI->doesSupportDebugInformation()) 978 O << ", debug"; 979 980 O << "\n"; 981 982 O << ".address_size "; 983 if (nvptxSubtarget.is64Bit()) 984 O << "64"; 985 else 986 O << "32"; 987 O << "\n"; 988 989 O << "\n"; 990} 991 992bool NVPTXAsmPrinter::doFinalization(Module &M) { 993 994 // If we did not emit any functions, then the global declarations have not 995 // yet been emitted. 996 if (!GlobalsEmitted) { 997 emitGlobals(M); 998 GlobalsEmitted = true; 999 } 1000 1001 // XXX Temproarily remove global variables so that doFinalization() will not 1002 // emit them again (global variables are emitted at beginning). 1003 1004 Module::GlobalListType &global_list = M.getGlobalList(); 1005 int i, n = global_list.size(); 1006 GlobalVariable **gv_array = new GlobalVariable *[n]; 1007 1008 // first, back-up GlobalVariable in gv_array 1009 i = 0; 1010 for (Module::global_iterator I = global_list.begin(), E = global_list.end(); 1011 I != E; ++I) 1012 gv_array[i++] = &*I; 1013 1014 // second, empty global_list 1015 while (!global_list.empty()) 1016 global_list.remove(global_list.begin()); 1017 1018 // call doFinalization 1019 bool ret = AsmPrinter::doFinalization(M); 1020 1021 // now we restore global variables 1022 for (i = 0; i < n; i++) 1023 global_list.insert(global_list.end(), gv_array[i]); 1024 1025 delete[] gv_array; 1026 return ret; 1027 1028 //bool Result = AsmPrinter::doFinalization(M); 1029 // Instead of calling the parents doFinalization, we may 1030 // clone parents doFinalization and customize here. 1031 // Currently, we if NVISA out the EmitGlobals() in 1032 // parent's doFinalization, which is too intrusive. 1033 // 1034 // Same for the doInitialization. 1035 //return Result; 1036} 1037 1038// This function emits appropriate linkage directives for 1039// functions and global variables. 1040// 1041// extern function declaration -> .extern 1042// extern function definition -> .visible 1043// external global variable with init -> .visible 1044// external without init -> .extern 1045// appending -> not allowed, assert. 1046 1047void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V, 1048 raw_ostream &O) { 1049 if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) { 1050 if (V->hasExternalLinkage()) { 1051 if (isa<GlobalVariable>(V)) { 1052 const GlobalVariable *GVar = cast<GlobalVariable>(V); 1053 if (GVar) { 1054 if (GVar->hasInitializer()) 1055 O << ".visible "; 1056 else 1057 O << ".extern "; 1058 } 1059 } else if (V->isDeclaration()) 1060 O << ".extern "; 1061 else 1062 O << ".visible "; 1063 } else if (V->hasAppendingLinkage()) { 1064 std::string msg; 1065 msg.append("Error: "); 1066 msg.append("Symbol "); 1067 if (V->hasName()) 1068 msg.append(V->getName().str()); 1069 msg.append("has unsupported appending linkage type"); 1070 llvm_unreachable(msg.c_str()); 1071 } 1072 } 1073} 1074 1075void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, 1076 raw_ostream &O, 1077 bool processDemoted) { 1078 1079 // Skip meta data 1080 if (GVar->hasSection()) { 1081 if (GVar->getSection() == "llvm.metadata") 1082 return; 1083 } 1084 1085 const DataLayout *TD = TM.getDataLayout(); 1086 1087 // GlobalVariables are always constant pointers themselves. 1088 const PointerType *PTy = GVar->getType(); 1089 Type *ETy = PTy->getElementType(); 1090 1091 if (GVar->hasExternalLinkage()) { 1092 if (GVar->hasInitializer()) 1093 O << ".visible "; 1094 else 1095 O << ".extern "; 1096 } 1097 1098 if (llvm::isTexture(*GVar)) { 1099 O << ".global .texref " << llvm::getTextureName(*GVar) << ";\n"; 1100 return; 1101 } 1102 1103 if (llvm::isSurface(*GVar)) { 1104 O << ".global .surfref " << llvm::getSurfaceName(*GVar) << ";\n"; 1105 return; 1106 } 1107 1108 if (GVar->isDeclaration()) { 1109 // (extern) declarations, no definition or initializer 1110 // Currently the only known declaration is for an automatic __local 1111 // (.shared) promoted to global. 1112 emitPTXGlobalVariable(GVar, O); 1113 O << ";\n"; 1114 return; 1115 } 1116 1117 if (llvm::isSampler(*GVar)) { 1118 O << ".global .samplerref " << llvm::getSamplerName(*GVar); 1119 1120 const Constant *Initializer = NULL; 1121 if (GVar->hasInitializer()) 1122 Initializer = GVar->getInitializer(); 1123 const ConstantInt *CI = NULL; 1124 if (Initializer) 1125 CI = dyn_cast<ConstantInt>(Initializer); 1126 if (CI) { 1127 unsigned sample = CI->getZExtValue(); 1128 1129 O << " = { "; 1130 1131 for (int i = 0, 1132 addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE); 1133 i < 3; i++) { 1134 O << "addr_mode_" << i << " = "; 1135 switch (addr) { 1136 case 0: 1137 O << "wrap"; 1138 break; 1139 case 1: 1140 O << "clamp_to_border"; 1141 break; 1142 case 2: 1143 O << "clamp_to_edge"; 1144 break; 1145 case 3: 1146 O << "wrap"; 1147 break; 1148 case 4: 1149 O << "mirror"; 1150 break; 1151 } 1152 O << ", "; 1153 } 1154 O << "filter_mode = "; 1155 switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) { 1156 case 0: 1157 O << "nearest"; 1158 break; 1159 case 1: 1160 O << "linear"; 1161 break; 1162 case 2: 1163 assert(0 && "Anisotropic filtering is not supported"); 1164 default: 1165 O << "nearest"; 1166 break; 1167 } 1168 if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) { 1169 O << ", force_unnormalized_coords = 1"; 1170 } 1171 O << " }"; 1172 } 1173 1174 O << ";\n"; 1175 return; 1176 } 1177 1178 if (GVar->hasPrivateLinkage()) { 1179 1180 if (!strncmp(GVar->getName().data(), "unrollpragma", 12)) 1181 return; 1182 1183 // FIXME - need better way (e.g. Metadata) to avoid generating this global 1184 if (!strncmp(GVar->getName().data(), "filename", 8)) 1185 return; 1186 if (GVar->use_empty()) 1187 return; 1188 } 1189 1190 const Function *demotedFunc = 0; 1191 if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) { 1192 O << "// " << GVar->getName().str() << " has been demoted\n"; 1193 if (localDecls.find(demotedFunc) != localDecls.end()) 1194 localDecls[demotedFunc].push_back(GVar); 1195 else { 1196 std::vector<const GlobalVariable *> temp; 1197 temp.push_back(GVar); 1198 localDecls[demotedFunc] = temp; 1199 } 1200 return; 1201 } 1202 1203 O << "."; 1204 emitPTXAddressSpace(PTy->getAddressSpace(), O); 1205 if (GVar->getAlignment() == 0) 1206 O << " .align " << (int) TD->getPrefTypeAlignment(ETy); 1207 else 1208 O << " .align " << GVar->getAlignment(); 1209 1210 if (ETy->isPrimitiveType() || ETy->isIntegerTy() || isa<PointerType>(ETy)) { 1211 O << " ."; 1212 // Special case: ABI requires that we use .u8 for predicates 1213 if (ETy->isIntegerTy(1)) 1214 O << "u8"; 1215 else 1216 O << getPTXFundamentalTypeStr(ETy, false); 1217 O << " "; 1218 O << *getSymbol(GVar); 1219 1220 // Ptx allows variable initilization only for constant and global state 1221 // spaces. 1222 if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) || 1223 (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) && 1224 GVar->hasInitializer()) { 1225 const Constant *Initializer = GVar->getInitializer(); 1226 if (!Initializer->isNullValue()) { 1227 O << " = "; 1228 printScalarConstant(Initializer, O); 1229 } 1230 } 1231 } else { 1232 unsigned int ElementSize = 0; 1233 1234 // Although PTX has direct support for struct type and array type and 1235 // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for 1236 // targets that support these high level field accesses. Structs, arrays 1237 // and vectors are lowered into arrays of bytes. 1238 switch (ETy->getTypeID()) { 1239 case Type::StructTyID: 1240 case Type::ArrayTyID: 1241 case Type::VectorTyID: 1242 ElementSize = TD->getTypeStoreSize(ETy); 1243 // Ptx allows variable initilization only for constant and 1244 // global state spaces. 1245 if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) || 1246 (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) && 1247 GVar->hasInitializer()) { 1248 const Constant *Initializer = GVar->getInitializer(); 1249 if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) { 1250 AggBuffer aggBuffer(ElementSize, O, *this); 1251 bufferAggregateConstant(Initializer, &aggBuffer); 1252 if (aggBuffer.numSymbols) { 1253 if (nvptxSubtarget.is64Bit()) { 1254 O << " .u64 " << *getSymbol(GVar) << "["; 1255 O << ElementSize / 8; 1256 } else { 1257 O << " .u32 " << *getSymbol(GVar) << "["; 1258 O << ElementSize / 4; 1259 } 1260 O << "]"; 1261 } else { 1262 O << " .b8 " << *getSymbol(GVar) << "["; 1263 O << ElementSize; 1264 O << "]"; 1265 } 1266 O << " = {"; 1267 aggBuffer.print(); 1268 O << "}"; 1269 } else { 1270 O << " .b8 " << *getSymbol(GVar); 1271 if (ElementSize) { 1272 O << "["; 1273 O << ElementSize; 1274 O << "]"; 1275 } 1276 } 1277 } else { 1278 O << " .b8 " << *getSymbol(GVar); 1279 if (ElementSize) { 1280 O << "["; 1281 O << ElementSize; 1282 O << "]"; 1283 } 1284 } 1285 break; 1286 default: 1287 assert(0 && "type not supported yet"); 1288 } 1289 1290 } 1291 O << ";\n"; 1292} 1293 1294void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) { 1295 if (localDecls.find(f) == localDecls.end()) 1296 return; 1297 1298 std::vector<const GlobalVariable *> &gvars = localDecls[f]; 1299 1300 for (unsigned i = 0, e = gvars.size(); i != e; ++i) { 1301 O << "\t// demoted variable\n\t"; 1302 printModuleLevelGV(gvars[i], O, true); 1303 } 1304} 1305 1306void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace, 1307 raw_ostream &O) const { 1308 switch (AddressSpace) { 1309 case llvm::ADDRESS_SPACE_LOCAL: 1310 O << "local"; 1311 break; 1312 case llvm::ADDRESS_SPACE_GLOBAL: 1313 O << "global"; 1314 break; 1315 case llvm::ADDRESS_SPACE_CONST: 1316 O << "const"; 1317 break; 1318 case llvm::ADDRESS_SPACE_SHARED: 1319 O << "shared"; 1320 break; 1321 default: 1322 report_fatal_error("Bad address space found while emitting PTX"); 1323 break; 1324 } 1325} 1326 1327std::string 1328NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const { 1329 switch (Ty->getTypeID()) { 1330 default: 1331 llvm_unreachable("unexpected type"); 1332 break; 1333 case Type::IntegerTyID: { 1334 unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth(); 1335 if (NumBits == 1) 1336 return "pred"; 1337 else if (NumBits <= 64) { 1338 std::string name = "u"; 1339 return name + utostr(NumBits); 1340 } else { 1341 llvm_unreachable("Integer too large"); 1342 break; 1343 } 1344 break; 1345 } 1346 case Type::FloatTyID: 1347 return "f32"; 1348 case Type::DoubleTyID: 1349 return "f64"; 1350 case Type::PointerTyID: 1351 if (nvptxSubtarget.is64Bit()) 1352 if (useB4PTR) 1353 return "b64"; 1354 else 1355 return "u64"; 1356 else if (useB4PTR) 1357 return "b32"; 1358 else 1359 return "u32"; 1360 } 1361 llvm_unreachable("unexpected type"); 1362 return NULL; 1363} 1364 1365void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar, 1366 raw_ostream &O) { 1367 1368 const DataLayout *TD = TM.getDataLayout(); 1369 1370 // GlobalVariables are always constant pointers themselves. 1371 const PointerType *PTy = GVar->getType(); 1372 Type *ETy = PTy->getElementType(); 1373 1374 O << "."; 1375 emitPTXAddressSpace(PTy->getAddressSpace(), O); 1376 if (GVar->getAlignment() == 0) 1377 O << " .align " << (int) TD->getPrefTypeAlignment(ETy); 1378 else 1379 O << " .align " << GVar->getAlignment(); 1380 1381 if (ETy->isPrimitiveType() || ETy->isIntegerTy() || isa<PointerType>(ETy)) { 1382 O << " ."; 1383 O << getPTXFundamentalTypeStr(ETy); 1384 O << " "; 1385 O << *getSymbol(GVar); 1386 return; 1387 } 1388 1389 int64_t ElementSize = 0; 1390 1391 // Although PTX has direct support for struct type and array type and LLVM IR 1392 // is very similar to PTX, the LLVM CodeGen does not support for targets that 1393 // support these high level field accesses. Structs and arrays are lowered 1394 // into arrays of bytes. 1395 switch (ETy->getTypeID()) { 1396 case Type::StructTyID: 1397 case Type::ArrayTyID: 1398 case Type::VectorTyID: 1399 ElementSize = TD->getTypeStoreSize(ETy); 1400 O << " .b8 " << *getSymbol(GVar) << "["; 1401 if (ElementSize) { 1402 O << itostr(ElementSize); 1403 } 1404 O << "]"; 1405 break; 1406 default: 1407 assert(0 && "type not supported yet"); 1408 } 1409 return; 1410} 1411 1412static unsigned int getOpenCLAlignment(const DataLayout *TD, Type *Ty) { 1413 if (Ty->isPrimitiveType() || Ty->isIntegerTy() || isa<PointerType>(Ty)) 1414 return TD->getPrefTypeAlignment(Ty); 1415 1416 const ArrayType *ATy = dyn_cast<ArrayType>(Ty); 1417 if (ATy) 1418 return getOpenCLAlignment(TD, ATy->getElementType()); 1419 1420 const VectorType *VTy = dyn_cast<VectorType>(Ty); 1421 if (VTy) { 1422 Type *ETy = VTy->getElementType(); 1423 unsigned int numE = VTy->getNumElements(); 1424 unsigned int alignE = TD->getPrefTypeAlignment(ETy); 1425 if (numE == 3) 1426 return 4 * alignE; 1427 else 1428 return numE * alignE; 1429 } 1430 1431 const StructType *STy = dyn_cast<StructType>(Ty); 1432 if (STy) { 1433 unsigned int alignStruct = 1; 1434 // Go through each element of the struct and find the 1435 // largest alignment. 1436 for (unsigned i = 0, e = STy->getNumElements(); i != e; i++) { 1437 Type *ETy = STy->getElementType(i); 1438 unsigned int align = getOpenCLAlignment(TD, ETy); 1439 if (align > alignStruct) 1440 alignStruct = align; 1441 } 1442 return alignStruct; 1443 } 1444 1445 const FunctionType *FTy = dyn_cast<FunctionType>(Ty); 1446 if (FTy) 1447 return TD->getPointerPrefAlignment(); 1448 return TD->getPrefTypeAlignment(Ty); 1449} 1450 1451void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I, 1452 int paramIndex, raw_ostream &O) { 1453 if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) || 1454 (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)) 1455 O << *getSymbol(I->getParent()) << "_param_" << paramIndex; 1456 else { 1457 std::string argName = I->getName(); 1458 const char *p = argName.c_str(); 1459 while (*p) { 1460 if (*p == '.') 1461 O << "_"; 1462 else 1463 O << *p; 1464 p++; 1465 } 1466 } 1467} 1468 1469void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) { 1470 Function::const_arg_iterator I, E; 1471 int i = 0; 1472 1473 if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) || 1474 (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)) { 1475 O << *CurrentFnSym << "_param_" << paramIndex; 1476 return; 1477 } 1478 1479 for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, i++) { 1480 if (i == paramIndex) { 1481 printParamName(I, paramIndex, O); 1482 return; 1483 } 1484 } 1485 llvm_unreachable("paramIndex out of bound"); 1486} 1487 1488void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { 1489 const DataLayout *TD = TM.getDataLayout(); 1490 const AttributeSet &PAL = F->getAttributes(); 1491 const TargetLowering *TLI = TM.getTargetLowering(); 1492 Function::const_arg_iterator I, E; 1493 unsigned paramIndex = 0; 1494 bool first = true; 1495 bool isKernelFunc = llvm::isKernelFunction(*F); 1496 bool isABI = (nvptxSubtarget.getSmVersion() >= 20); 1497 MVT thePointerTy = TLI->getPointerTy(); 1498 1499 O << "(\n"; 1500 1501 for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) { 1502 Type *Ty = I->getType(); 1503 1504 if (!first) 1505 O << ",\n"; 1506 1507 first = false; 1508 1509 // Handle image/sampler parameters 1510 if (llvm::isSampler(*I) || llvm::isImage(*I)) { 1511 if (llvm::isImage(*I)) { 1512 std::string sname = I->getName(); 1513 if (llvm::isImageWriteOnly(*I)) 1514 O << "\t.param .surfref " << *getSymbol(F) << "_param_" 1515 << paramIndex; 1516 else // Default image is read_only 1517 O << "\t.param .texref " << *getSymbol(F) << "_param_" 1518 << paramIndex; 1519 } else // Should be llvm::isSampler(*I) 1520 O << "\t.param .samplerref " << *getSymbol(F) << "_param_" 1521 << paramIndex; 1522 continue; 1523 } 1524 1525 if (PAL.hasAttribute(paramIndex + 1, Attribute::ByVal) == false) { 1526 if (Ty->isVectorTy()) { 1527 // Just print .param .b8 .align <a> .param[size]; 1528 // <a> = PAL.getparamalignment 1529 // size = typeallocsize of element type 1530 unsigned align = PAL.getParamAlignment(paramIndex + 1); 1531 if (align == 0) 1532 align = TD->getABITypeAlignment(Ty); 1533 1534 unsigned sz = TD->getTypeAllocSize(Ty); 1535 O << "\t.param .align " << align << " .b8 "; 1536 printParamName(I, paramIndex, O); 1537 O << "[" << sz << "]"; 1538 1539 continue; 1540 } 1541 // Just a scalar 1542 const PointerType *PTy = dyn_cast<PointerType>(Ty); 1543 if (isKernelFunc) { 1544 if (PTy) { 1545 // Special handling for pointer arguments to kernel 1546 O << "\t.param .u" << thePointerTy.getSizeInBits() << " "; 1547 1548 if (nvptxSubtarget.getDrvInterface() != NVPTX::CUDA) { 1549 Type *ETy = PTy->getElementType(); 1550 int addrSpace = PTy->getAddressSpace(); 1551 switch (addrSpace) { 1552 default: 1553 O << ".ptr "; 1554 break; 1555 case llvm::ADDRESS_SPACE_CONST: 1556 O << ".ptr .const "; 1557 break; 1558 case llvm::ADDRESS_SPACE_SHARED: 1559 O << ".ptr .shared "; 1560 break; 1561 case llvm::ADDRESS_SPACE_GLOBAL: 1562 O << ".ptr .global "; 1563 break; 1564 } 1565 O << ".align " << (int) getOpenCLAlignment(TD, ETy) << " "; 1566 } 1567 printParamName(I, paramIndex, O); 1568 continue; 1569 } 1570 1571 // non-pointer scalar to kernel func 1572 O << "\t.param ."; 1573 // Special case: predicate operands become .u8 types 1574 if (Ty->isIntegerTy(1)) 1575 O << "u8"; 1576 else 1577 O << getPTXFundamentalTypeStr(Ty); 1578 O << " "; 1579 printParamName(I, paramIndex, O); 1580 continue; 1581 } 1582 // Non-kernel function, just print .param .b<size> for ABI 1583 // and .reg .b<size> for non ABY 1584 unsigned sz = 0; 1585 if (isa<IntegerType>(Ty)) { 1586 sz = cast<IntegerType>(Ty)->getBitWidth(); 1587 if (sz < 32) 1588 sz = 32; 1589 } else if (isa<PointerType>(Ty)) 1590 sz = thePointerTy.getSizeInBits(); 1591 else 1592 sz = Ty->getPrimitiveSizeInBits(); 1593 if (isABI) 1594 O << "\t.param .b" << sz << " "; 1595 else 1596 O << "\t.reg .b" << sz << " "; 1597 printParamName(I, paramIndex, O); 1598 continue; 1599 } 1600 1601 // param has byVal attribute. So should be a pointer 1602 const PointerType *PTy = dyn_cast<PointerType>(Ty); 1603 assert(PTy && "Param with byval attribute should be a pointer type"); 1604 Type *ETy = PTy->getElementType(); 1605 1606 if (isABI || isKernelFunc) { 1607 // Just print .param .b8 .align <a> .param[size]; 1608 // <a> = PAL.getparamalignment 1609 // size = typeallocsize of element type 1610 unsigned align = PAL.getParamAlignment(paramIndex + 1); 1611 if (align == 0) 1612 align = TD->getABITypeAlignment(ETy); 1613 1614 unsigned sz = TD->getTypeAllocSize(ETy); 1615 O << "\t.param .align " << align << " .b8 "; 1616 printParamName(I, paramIndex, O); 1617 O << "[" << sz << "]"; 1618 continue; 1619 } else { 1620 // Split the ETy into constituent parts and 1621 // print .param .b<size> <name> for each part. 1622 // Further, if a part is vector, print the above for 1623 // each vector element. 1624 SmallVector<EVT, 16> vtparts; 1625 ComputeValueVTs(*TLI, ETy, vtparts); 1626 for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { 1627 unsigned elems = 1; 1628 EVT elemtype = vtparts[i]; 1629 if (vtparts[i].isVector()) { 1630 elems = vtparts[i].getVectorNumElements(); 1631 elemtype = vtparts[i].getVectorElementType(); 1632 } 1633 1634 for (unsigned j = 0, je = elems; j != je; ++j) { 1635 unsigned sz = elemtype.getSizeInBits(); 1636 if (elemtype.isInteger() && (sz < 32)) 1637 sz = 32; 1638 O << "\t.reg .b" << sz << " "; 1639 printParamName(I, paramIndex, O); 1640 if (j < je - 1) 1641 O << ",\n"; 1642 ++paramIndex; 1643 } 1644 if (i < e - 1) 1645 O << ",\n"; 1646 } 1647 --paramIndex; 1648 continue; 1649 } 1650 } 1651 1652 O << "\n)\n"; 1653} 1654 1655void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF, 1656 raw_ostream &O) { 1657 const Function *F = MF.getFunction(); 1658 emitFunctionParamList(F, O); 1659} 1660 1661void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters( 1662 const MachineFunction &MF) { 1663 SmallString<128> Str; 1664 raw_svector_ostream O(Str); 1665 1666 // Map the global virtual register number to a register class specific 1667 // virtual register number starting from 1 with that class. 1668 const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); 1669 //unsigned numRegClasses = TRI->getNumRegClasses(); 1670 1671 // Emit the Fake Stack Object 1672 const MachineFrameInfo *MFI = MF.getFrameInfo(); 1673 int NumBytes = (int) MFI->getStackSize(); 1674 if (NumBytes) { 1675 O << "\t.local .align " << MFI->getMaxAlignment() << " .b8 \t" << DEPOTNAME 1676 << getFunctionNumber() << "[" << NumBytes << "];\n"; 1677 if (nvptxSubtarget.is64Bit()) { 1678 O << "\t.reg .b64 \t%SP;\n"; 1679 O << "\t.reg .b64 \t%SPL;\n"; 1680 } else { 1681 O << "\t.reg .b32 \t%SP;\n"; 1682 O << "\t.reg .b32 \t%SPL;\n"; 1683 } 1684 } 1685 1686 // Go through all virtual registers to establish the mapping between the 1687 // global virtual 1688 // register number and the per class virtual register number. 1689 // We use the per class virtual register number in the ptx output. 1690 unsigned int numVRs = MRI->getNumVirtRegs(); 1691 for (unsigned i = 0; i < numVRs; i++) { 1692 unsigned int vr = TRI->index2VirtReg(i); 1693 const TargetRegisterClass *RC = MRI->getRegClass(vr); 1694 DenseMap<unsigned, unsigned> ®map = VRegMapping[RC]; 1695 int n = regmap.size(); 1696 regmap.insert(std::make_pair(vr, n + 1)); 1697 } 1698 1699 // Emit register declarations 1700 // @TODO: Extract out the real register usage 1701 // O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n"; 1702 // O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n"; 1703 // O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n"; 1704 // O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n"; 1705 // O << "\t.reg .s64 %rl<" << NVPTXNumRegisters << ">;\n"; 1706 // O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n"; 1707 // O << "\t.reg .f64 %fl<" << NVPTXNumRegisters << ">;\n"; 1708 1709 // Emit declaration of the virtual registers or 'physical' registers for 1710 // each register class 1711 for (unsigned i=0; i< TRI->getNumRegClasses(); i++) { 1712 const TargetRegisterClass *RC = TRI->getRegClass(i); 1713 DenseMap<unsigned, unsigned> ®map = VRegMapping[RC]; 1714 std::string rcname = getNVPTXRegClassName(RC); 1715 std::string rcStr = getNVPTXRegClassStr(RC); 1716 int n = regmap.size(); 1717 1718 // Only declare those registers that may be used. 1719 if (n) { 1720 O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1) 1721 << ">;\n"; 1722 } 1723 } 1724 1725 OutStreamer.EmitRawText(O.str()); 1726} 1727 1728void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) { 1729 APFloat APF = APFloat(Fp->getValueAPF()); // make a copy 1730 bool ignored; 1731 unsigned int numHex; 1732 const char *lead; 1733 1734 if (Fp->getType()->getTypeID() == Type::FloatTyID) { 1735 numHex = 8; 1736 lead = "0f"; 1737 APF.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &ignored); 1738 } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) { 1739 numHex = 16; 1740 lead = "0d"; 1741 APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored); 1742 } else 1743 llvm_unreachable("unsupported fp type"); 1744 1745 APInt API = APF.bitcastToAPInt(); 1746 std::string hexstr(utohexstr(API.getZExtValue())); 1747 O << lead; 1748 if (hexstr.length() < numHex) 1749 O << std::string(numHex - hexstr.length(), '0'); 1750 O << utohexstr(API.getZExtValue()); 1751} 1752 1753void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) { 1754 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) { 1755 O << CI->getValue(); 1756 return; 1757 } 1758 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) { 1759 printFPConstant(CFP, O); 1760 return; 1761 } 1762 if (isa<ConstantPointerNull>(CPV)) { 1763 O << "0"; 1764 return; 1765 } 1766 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) { 1767 O << *getSymbol(GVar); 1768 return; 1769 } 1770 if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 1771 const Value *v = Cexpr->stripPointerCasts(); 1772 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) { 1773 O << *getSymbol(GVar); 1774 return; 1775 } else { 1776 O << *LowerConstant(CPV, *this); 1777 return; 1778 } 1779 } 1780 llvm_unreachable("Not scalar type found in printScalarConstant()"); 1781} 1782 1783void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, 1784 AggBuffer *aggBuffer) { 1785 1786 const DataLayout *TD = TM.getDataLayout(); 1787 1788 if (isa<UndefValue>(CPV) || CPV->isNullValue()) { 1789 int s = TD->getTypeAllocSize(CPV->getType()); 1790 if (s < Bytes) 1791 s = Bytes; 1792 aggBuffer->addZeros(s); 1793 return; 1794 } 1795 1796 unsigned char *ptr; 1797 switch (CPV->getType()->getTypeID()) { 1798 1799 case Type::IntegerTyID: { 1800 const Type *ETy = CPV->getType(); 1801 if (ETy == Type::getInt8Ty(CPV->getContext())) { 1802 unsigned char c = 1803 (unsigned char)(dyn_cast<ConstantInt>(CPV))->getZExtValue(); 1804 ptr = &c; 1805 aggBuffer->addBytes(ptr, 1, Bytes); 1806 } else if (ETy == Type::getInt16Ty(CPV->getContext())) { 1807 short int16 = (short)(dyn_cast<ConstantInt>(CPV))->getZExtValue(); 1808 ptr = (unsigned char *)&int16; 1809 aggBuffer->addBytes(ptr, 2, Bytes); 1810 } else if (ETy == Type::getInt32Ty(CPV->getContext())) { 1811 if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) { 1812 int int32 = (int)(constInt->getZExtValue()); 1813 ptr = (unsigned char *)&int32; 1814 aggBuffer->addBytes(ptr, 4, Bytes); 1815 break; 1816 } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 1817 if (const ConstantInt *constInt = dyn_cast<ConstantInt>( 1818 ConstantFoldConstantExpression(Cexpr, TD))) { 1819 int int32 = (int)(constInt->getZExtValue()); 1820 ptr = (unsigned char *)&int32; 1821 aggBuffer->addBytes(ptr, 4, Bytes); 1822 break; 1823 } 1824 if (Cexpr->getOpcode() == Instruction::PtrToInt) { 1825 Value *v = Cexpr->getOperand(0)->stripPointerCasts(); 1826 aggBuffer->addSymbol(v); 1827 aggBuffer->addZeros(4); 1828 break; 1829 } 1830 } 1831 llvm_unreachable("unsupported integer const type"); 1832 } else if (ETy == Type::getInt64Ty(CPV->getContext())) { 1833 if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) { 1834 long long int64 = (long long)(constInt->getZExtValue()); 1835 ptr = (unsigned char *)&int64; 1836 aggBuffer->addBytes(ptr, 8, Bytes); 1837 break; 1838 } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 1839 if (const ConstantInt *constInt = dyn_cast<ConstantInt>( 1840 ConstantFoldConstantExpression(Cexpr, TD))) { 1841 long long int64 = (long long)(constInt->getZExtValue()); 1842 ptr = (unsigned char *)&int64; 1843 aggBuffer->addBytes(ptr, 8, Bytes); 1844 break; 1845 } 1846 if (Cexpr->getOpcode() == Instruction::PtrToInt) { 1847 Value *v = Cexpr->getOperand(0)->stripPointerCasts(); 1848 aggBuffer->addSymbol(v); 1849 aggBuffer->addZeros(8); 1850 break; 1851 } 1852 } 1853 llvm_unreachable("unsupported integer const type"); 1854 } else 1855 llvm_unreachable("unsupported integer const type"); 1856 break; 1857 } 1858 case Type::FloatTyID: 1859 case Type::DoubleTyID: { 1860 const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV); 1861 const Type *Ty = CFP->getType(); 1862 if (Ty == Type::getFloatTy(CPV->getContext())) { 1863 float float32 = (float) CFP->getValueAPF().convertToFloat(); 1864 ptr = (unsigned char *)&float32; 1865 aggBuffer->addBytes(ptr, 4, Bytes); 1866 } else if (Ty == Type::getDoubleTy(CPV->getContext())) { 1867 double float64 = CFP->getValueAPF().convertToDouble(); 1868 ptr = (unsigned char *)&float64; 1869 aggBuffer->addBytes(ptr, 8, Bytes); 1870 } else { 1871 llvm_unreachable("unsupported fp const type"); 1872 } 1873 break; 1874 } 1875 case Type::PointerTyID: { 1876 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) { 1877 aggBuffer->addSymbol(GVar); 1878 } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 1879 const Value *v = Cexpr->stripPointerCasts(); 1880 aggBuffer->addSymbol(v); 1881 } 1882 unsigned int s = TD->getTypeAllocSize(CPV->getType()); 1883 aggBuffer->addZeros(s); 1884 break; 1885 } 1886 1887 case Type::ArrayTyID: 1888 case Type::VectorTyID: 1889 case Type::StructTyID: { 1890 if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV) || 1891 isa<ConstantStruct>(CPV) || isa<ConstantDataSequential>(CPV)) { 1892 int ElementSize = TD->getTypeAllocSize(CPV->getType()); 1893 bufferAggregateConstant(CPV, aggBuffer); 1894 if (Bytes > ElementSize) 1895 aggBuffer->addZeros(Bytes - ElementSize); 1896 } else if (isa<ConstantAggregateZero>(CPV)) 1897 aggBuffer->addZeros(Bytes); 1898 else 1899 llvm_unreachable("Unexpected Constant type"); 1900 break; 1901 } 1902 1903 default: 1904 llvm_unreachable("unsupported type"); 1905 } 1906} 1907 1908void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV, 1909 AggBuffer *aggBuffer) { 1910 const DataLayout *TD = TM.getDataLayout(); 1911 int Bytes; 1912 1913 // Old constants 1914 if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) { 1915 if (CPV->getNumOperands()) 1916 for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) 1917 bufferLEByte(cast<Constant>(CPV->getOperand(i)), 0, aggBuffer); 1918 return; 1919 } 1920 1921 if (const ConstantDataSequential *CDS = 1922 dyn_cast<ConstantDataSequential>(CPV)) { 1923 if (CDS->getNumElements()) 1924 for (unsigned i = 0; i < CDS->getNumElements(); ++i) 1925 bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0, 1926 aggBuffer); 1927 return; 1928 } 1929 1930 if (isa<ConstantStruct>(CPV)) { 1931 if (CPV->getNumOperands()) { 1932 StructType *ST = cast<StructType>(CPV->getType()); 1933 for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) { 1934 if (i == (e - 1)) 1935 Bytes = TD->getStructLayout(ST)->getElementOffset(0) + 1936 TD->getTypeAllocSize(ST) - 1937 TD->getStructLayout(ST)->getElementOffset(i); 1938 else 1939 Bytes = TD->getStructLayout(ST)->getElementOffset(i + 1) - 1940 TD->getStructLayout(ST)->getElementOffset(i); 1941 bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer); 1942 } 1943 } 1944 return; 1945 } 1946 llvm_unreachable("unsupported constant type in printAggregateConstant()"); 1947} 1948 1949// buildTypeNameMap - Run through symbol table looking for type names. 1950// 1951 1952bool NVPTXAsmPrinter::isImageType(const Type *Ty) { 1953 1954 std::map<const Type *, std::string>::iterator PI = TypeNameMap.find(Ty); 1955 1956 if (PI != TypeNameMap.end() && (!PI->second.compare("struct._image1d_t") || 1957 !PI->second.compare("struct._image2d_t") || 1958 !PI->second.compare("struct._image3d_t"))) 1959 return true; 1960 1961 return false; 1962} 1963 1964 1965bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) { 1966 switch (MI.getOpcode()) { 1967 default: 1968 return false; 1969 case NVPTX::CallArgBeginInst: 1970 case NVPTX::CallArgEndInst0: 1971 case NVPTX::CallArgEndInst1: 1972 case NVPTX::CallArgF32: 1973 case NVPTX::CallArgF64: 1974 case NVPTX::CallArgI16: 1975 case NVPTX::CallArgI32: 1976 case NVPTX::CallArgI32imm: 1977 case NVPTX::CallArgI64: 1978 case NVPTX::CallArgParam: 1979 case NVPTX::CallVoidInst: 1980 case NVPTX::CallVoidInstReg: 1981 case NVPTX::Callseq_End: 1982 case NVPTX::CallVoidInstReg64: 1983 case NVPTX::DeclareParamInst: 1984 case NVPTX::DeclareRetMemInst: 1985 case NVPTX::DeclareRetRegInst: 1986 case NVPTX::DeclareRetScalarInst: 1987 case NVPTX::DeclareScalarParamInst: 1988 case NVPTX::DeclareScalarRegInst: 1989 case NVPTX::StoreParamF32: 1990 case NVPTX::StoreParamF64: 1991 case NVPTX::StoreParamI16: 1992 case NVPTX::StoreParamI32: 1993 case NVPTX::StoreParamI64: 1994 case NVPTX::StoreParamI8: 1995 case NVPTX::StoreRetvalF32: 1996 case NVPTX::StoreRetvalF64: 1997 case NVPTX::StoreRetvalI16: 1998 case NVPTX::StoreRetvalI32: 1999 case NVPTX::StoreRetvalI64: 2000 case NVPTX::StoreRetvalI8: 2001 case NVPTX::LastCallArgF32: 2002 case NVPTX::LastCallArgF64: 2003 case NVPTX::LastCallArgI16: 2004 case NVPTX::LastCallArgI32: 2005 case NVPTX::LastCallArgI32imm: 2006 case NVPTX::LastCallArgI64: 2007 case NVPTX::LastCallArgParam: 2008 case NVPTX::LoadParamMemF32: 2009 case NVPTX::LoadParamMemF64: 2010 case NVPTX::LoadParamMemI16: 2011 case NVPTX::LoadParamMemI32: 2012 case NVPTX::LoadParamMemI64: 2013 case NVPTX::LoadParamMemI8: 2014 case NVPTX::PrototypeInst: 2015 case NVPTX::DBG_VALUE: 2016 return true; 2017 } 2018 return false; 2019} 2020 2021/// PrintAsmOperand - Print out an operand for an inline asm expression. 2022/// 2023bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, 2024 unsigned AsmVariant, 2025 const char *ExtraCode, raw_ostream &O) { 2026 if (ExtraCode && ExtraCode[0]) { 2027 if (ExtraCode[1] != 0) 2028 return true; // Unknown modifier. 2029 2030 switch (ExtraCode[0]) { 2031 default: 2032 // See if this is a generic print operand 2033 return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O); 2034 case 'r': 2035 break; 2036 } 2037 } 2038 2039 printOperand(MI, OpNo, O); 2040 2041 return false; 2042} 2043 2044bool NVPTXAsmPrinter::PrintAsmMemoryOperand( 2045 const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, 2046 const char *ExtraCode, raw_ostream &O) { 2047 if (ExtraCode && ExtraCode[0]) 2048 return true; // Unknown modifier 2049 2050 O << '['; 2051 printMemOperand(MI, OpNo, O); 2052 O << ']'; 2053 2054 return false; 2055} 2056 2057void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum, 2058 raw_ostream &O, const char *Modifier) { 2059 const MachineOperand &MO = MI->getOperand(opNum); 2060 switch (MO.getType()) { 2061 case MachineOperand::MO_Register: 2062 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { 2063 if (MO.getReg() == NVPTX::VRDepot) 2064 O << DEPOTNAME << getFunctionNumber(); 2065 else 2066 O << NVPTXInstPrinter::getRegisterName(MO.getReg()); 2067 } else { 2068 emitVirtualRegister(MO.getReg(), O); 2069 } 2070 return; 2071 2072 case MachineOperand::MO_Immediate: 2073 if (!Modifier) 2074 O << MO.getImm(); 2075 else if (strstr(Modifier, "vec") == Modifier) 2076 printVecModifiedImmediate(MO, Modifier, O); 2077 else 2078 llvm_unreachable( 2079 "Don't know how to handle modifier on immediate operand"); 2080 return; 2081 2082 case MachineOperand::MO_FPImmediate: 2083 printFPConstant(MO.getFPImm(), O); 2084 break; 2085 2086 case MachineOperand::MO_GlobalAddress: 2087 O << *getSymbol(MO.getGlobal()); 2088 break; 2089 2090 case MachineOperand::MO_ExternalSymbol: { 2091 const char *symbname = MO.getSymbolName(); 2092 if (strstr(symbname, ".PARAM") == symbname) { 2093 unsigned index; 2094 sscanf(symbname + 6, "%u[];", &index); 2095 printParamName(index, O); 2096 } else if (strstr(symbname, ".HLPPARAM") == symbname) { 2097 unsigned index; 2098 sscanf(symbname + 9, "%u[];", &index); 2099 O << *CurrentFnSym << "_param_" << index << "_offset"; 2100 } else 2101 O << symbname; 2102 break; 2103 } 2104 2105 case MachineOperand::MO_MachineBasicBlock: 2106 O << *MO.getMBB()->getSymbol(); 2107 return; 2108 2109 default: 2110 llvm_unreachable("Operand type not supported."); 2111 } 2112} 2113 2114void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum, 2115 raw_ostream &O, const char *Modifier) { 2116 printOperand(MI, opNum, O); 2117 2118 if (Modifier && !strcmp(Modifier, "add")) { 2119 O << ", "; 2120 printOperand(MI, opNum + 1, O); 2121 } else { 2122 if (MI->getOperand(opNum + 1).isImm() && 2123 MI->getOperand(opNum + 1).getImm() == 0) 2124 return; // don't print ',0' or '+0' 2125 O << "+"; 2126 printOperand(MI, opNum + 1, O); 2127 } 2128} 2129 2130 2131// Force static initialization. 2132extern "C" void LLVMInitializeNVPTXBackendAsmPrinter() { 2133 RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32); 2134 RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64); 2135} 2136 2137void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) { 2138 std::stringstream temp; 2139 LineReader *reader = this->getReader(filename.str()); 2140 temp << "\n//"; 2141 temp << filename.str(); 2142 temp << ":"; 2143 temp << line; 2144 temp << " "; 2145 temp << reader->readLine(line); 2146 temp << "\n"; 2147 this->OutStreamer.EmitRawText(Twine(temp.str())); 2148} 2149 2150LineReader *NVPTXAsmPrinter::getReader(std::string filename) { 2151 if (reader == NULL) { 2152 reader = new LineReader(filename); 2153 } 2154 2155 if (reader->fileName() != filename) { 2156 delete reader; 2157 reader = new LineReader(filename); 2158 } 2159 2160 return reader; 2161} 2162 2163std::string LineReader::readLine(unsigned lineNum) { 2164 if (lineNum < theCurLine) { 2165 theCurLine = 0; 2166 fstr.seekg(0, std::ios::beg); 2167 } 2168 while (theCurLine < lineNum) { 2169 fstr.getline(buff, 500); 2170 theCurLine++; 2171 } 2172 return buff; 2173} 2174 2175// Force static initialization. 2176extern "C" void LLVMInitializeNVPTXAsmPrinter() { 2177 RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32); 2178 RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64); 2179} 2180