TypeBasedAliasAnalysis.cpp revision 263508
1//===- TypeBasedAliasAnalysis.cpp - Type-Based Alias Analysis -------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the TypeBasedAliasAnalysis pass, which implements 11// metadata-based TBAA. 12// 13// In LLVM IR, memory does not have types, so LLVM's own type system is not 14// suitable for doing TBAA. Instead, metadata is added to the IR to describe 15// a type system of a higher level language. This can be used to implement 16// typical C/C++ TBAA, but it can also be used to implement custom alias 17// analysis behavior for other languages. 18// 19// We now support two types of metadata format: scalar TBAA and struct-path 20// aware TBAA. After all testing cases are upgraded to use struct-path aware 21// TBAA and we can auto-upgrade existing bc files, the support for scalar TBAA 22// can be dropped. 23// 24// The scalar TBAA metadata format is very simple. TBAA MDNodes have up to 25// three fields, e.g.: 26// !0 = metadata !{ metadata !"an example type tree" } 27// !1 = metadata !{ metadata !"int", metadata !0 } 28// !2 = metadata !{ metadata !"float", metadata !0 } 29// !3 = metadata !{ metadata !"const float", metadata !2, i64 1 } 30// 31// The first field is an identity field. It can be any value, usually 32// an MDString, which uniquely identifies the type. The most important 33// name in the tree is the name of the root node. Two trees with 34// different root node names are entirely disjoint, even if they 35// have leaves with common names. 36// 37// The second field identifies the type's parent node in the tree, or 38// is null or omitted for a root node. A type is considered to alias 39// all of its descendants and all of its ancestors in the tree. Also, 40// a type is considered to alias all types in other trees, so that 41// bitcode produced from multiple front-ends is handled conservatively. 42// 43// If the third field is present, it's an integer which if equal to 1 44// indicates that the type is "constant" (meaning pointsToConstantMemory 45// should return true; see 46// http://llvm.org/docs/AliasAnalysis.html#OtherItfs). 47// 48// With struct-path aware TBAA, the MDNodes attached to an instruction using 49// "!tbaa" are called path tag nodes. 50// 51// The path tag node has 4 fields with the last field being optional. 52// 53// The first field is the base type node, it can be a struct type node 54// or a scalar type node. The second field is the access type node, it 55// must be a scalar type node. The third field is the offset into the base type. 56// The last field has the same meaning as the last field of our scalar TBAA: 57// it's an integer which if equal to 1 indicates that the access is "constant". 58// 59// The struct type node has a name and a list of pairs, one pair for each member 60// of the struct. The first element of each pair is a type node (a struct type 61// node or a sclar type node), specifying the type of the member, the second 62// element of each pair is the offset of the member. 63// 64// Given an example 65// typedef struct { 66// short s; 67// } A; 68// typedef struct { 69// uint16_t s; 70// A a; 71// } B; 72// 73// For an acess to B.a.s, we attach !5 (a path tag node) to the load/store 74// instruction. The base type is !4 (struct B), the access type is !2 (scalar 75// type short) and the offset is 4. 76// 77// !0 = metadata !{metadata !"Simple C/C++ TBAA"} 78// !1 = metadata !{metadata !"omnipotent char", metadata !0} // Scalar type node 79// !2 = metadata !{metadata !"short", metadata !1} // Scalar type node 80// !3 = metadata !{metadata !"A", metadata !2, i64 0} // Struct type node 81// !4 = metadata !{metadata !"B", metadata !2, i64 0, metadata !3, i64 4} 82// // Struct type node 83// !5 = metadata !{metadata !4, metadata !2, i64 4} // Path tag node 84// 85// The struct type nodes and the scalar type nodes form a type DAG. 86// Root (!0) 87// char (!1) -- edge to Root 88// short (!2) -- edge to char 89// A (!3) -- edge with offset 0 to short 90// B (!4) -- edge with offset 0 to short and edge with offset 4 to A 91// 92// To check if two tags (tagX and tagY) can alias, we start from the base type 93// of tagX, follow the edge with the correct offset in the type DAG and adjust 94// the offset until we reach the base type of tagY or until we reach the Root 95// node. 96// If we reach the base type of tagY, compare the adjusted offset with 97// offset of tagY, return Alias if the offsets are the same, return NoAlias 98// otherwise. 99// If we reach the Root node, perform the above starting from base type of tagY 100// to see if we reach base type of tagX. 101// 102// If they have different roots, they're part of different potentially 103// unrelated type systems, so we return Alias to be conservative. 104// If neither node is an ancestor of the other and they have the same root, 105// then we say NoAlias. 106// 107// TODO: The current metadata format doesn't support struct 108// fields. For example: 109// struct X { 110// double d; 111// int i; 112// }; 113// void foo(struct X *x, struct X *y, double *p) { 114// *x = *y; 115// *p = 0.0; 116// } 117// Struct X has a double member, so the store to *x can alias the store to *p. 118// Currently it's not possible to precisely describe all the things struct X 119// aliases, so struct assignments must use conservative TBAA nodes. There's 120// no scheme for attaching metadata to @llvm.memcpy yet either. 121// 122//===----------------------------------------------------------------------===// 123 124#include "llvm/Analysis/Passes.h" 125#include "llvm/Analysis/AliasAnalysis.h" 126#include "llvm/IR/Constants.h" 127#include "llvm/IR/LLVMContext.h" 128#include "llvm/IR/Metadata.h" 129#include "llvm/IR/Module.h" 130#include "llvm/Pass.h" 131#include "llvm/Support/CommandLine.h" 132using namespace llvm; 133 134// A handy option for disabling TBAA functionality. The same effect can also be 135// achieved by stripping the !tbaa tags from IR, but this option is sometimes 136// more convenient. 137static cl::opt<bool> EnableTBAA("enable-tbaa", cl::init(true)); 138 139namespace { 140 /// TBAANode - This is a simple wrapper around an MDNode which provides a 141 /// higher-level interface by hiding the details of how alias analysis 142 /// information is encoded in its operands. 143 class TBAANode { 144 const MDNode *Node; 145 146 public: 147 TBAANode() : Node(0) {} 148 explicit TBAANode(const MDNode *N) : Node(N) {} 149 150 /// getNode - Get the MDNode for this TBAANode. 151 const MDNode *getNode() const { return Node; } 152 153 /// getParent - Get this TBAANode's Alias tree parent. 154 TBAANode getParent() const { 155 if (Node->getNumOperands() < 2) 156 return TBAANode(); 157 MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1)); 158 if (!P) 159 return TBAANode(); 160 // Ok, this node has a valid parent. Return it. 161 return TBAANode(P); 162 } 163 164 /// TypeIsImmutable - Test if this TBAANode represents a type for objects 165 /// which are not modified (by any means) in the context where this 166 /// AliasAnalysis is relevant. 167 bool TypeIsImmutable() const { 168 if (Node->getNumOperands() < 3) 169 return false; 170 ConstantInt *CI = dyn_cast<ConstantInt>(Node->getOperand(2)); 171 if (!CI) 172 return false; 173 return CI->getValue()[0]; 174 } 175 }; 176 177 /// This is a simple wrapper around an MDNode which provides a 178 /// higher-level interface by hiding the details of how alias analysis 179 /// information is encoded in its operands. 180 class TBAAStructTagNode { 181 /// This node should be created with createTBAAStructTagNode. 182 const MDNode *Node; 183 184 public: 185 TBAAStructTagNode() : Node(0) {} 186 explicit TBAAStructTagNode(const MDNode *N) : Node(N) {} 187 188 /// Get the MDNode for this TBAAStructTagNode. 189 const MDNode *getNode() const { return Node; } 190 191 const MDNode *getBaseType() const { 192 return dyn_cast_or_null<MDNode>(Node->getOperand(0)); 193 } 194 const MDNode *getAccessType() const { 195 return dyn_cast_or_null<MDNode>(Node->getOperand(1)); 196 } 197 uint64_t getOffset() const { 198 return cast<ConstantInt>(Node->getOperand(2))->getZExtValue(); 199 } 200 /// TypeIsImmutable - Test if this TBAAStructTagNode represents a type for 201 /// objects which are not modified (by any means) in the context where this 202 /// AliasAnalysis is relevant. 203 bool TypeIsImmutable() const { 204 if (Node->getNumOperands() < 4) 205 return false; 206 ConstantInt *CI = dyn_cast<ConstantInt>(Node->getOperand(3)); 207 if (!CI) 208 return false; 209 return CI->getValue()[0]; 210 } 211 }; 212 213 /// This is a simple wrapper around an MDNode which provides a 214 /// higher-level interface by hiding the details of how alias analysis 215 /// information is encoded in its operands. 216 class TBAAStructTypeNode { 217 /// This node should be created with createTBAAStructTypeNode. 218 const MDNode *Node; 219 220 public: 221 TBAAStructTypeNode() : Node(0) {} 222 explicit TBAAStructTypeNode(const MDNode *N) : Node(N) {} 223 224 /// Get the MDNode for this TBAAStructTypeNode. 225 const MDNode *getNode() const { return Node; } 226 227 /// Get this TBAAStructTypeNode's field in the type DAG with 228 /// given offset. Update the offset to be relative to the field type. 229 TBAAStructTypeNode getParent(uint64_t &Offset) const { 230 // Parent can be omitted for the root node. 231 if (Node->getNumOperands() < 2) 232 return TBAAStructTypeNode(); 233 234 // Fast path for a scalar type node and a struct type node with a single 235 // field. 236 if (Node->getNumOperands() <= 3) { 237 uint64_t Cur = Node->getNumOperands() == 2 ? 0 : 238 cast<ConstantInt>(Node->getOperand(2))->getZExtValue(); 239 Offset -= Cur; 240 MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1)); 241 if (!P) 242 return TBAAStructTypeNode(); 243 return TBAAStructTypeNode(P); 244 } 245 246 // Assume the offsets are in order. We return the previous field if 247 // the current offset is bigger than the given offset. 248 unsigned TheIdx = 0; 249 for (unsigned Idx = 1; Idx < Node->getNumOperands(); Idx += 2) { 250 uint64_t Cur = cast<ConstantInt>(Node->getOperand(Idx + 1))-> 251 getZExtValue(); 252 if (Cur > Offset) { 253 assert(Idx >= 3 && 254 "TBAAStructTypeNode::getParent should have an offset match!"); 255 TheIdx = Idx - 2; 256 break; 257 } 258 } 259 // Move along the last field. 260 if (TheIdx == 0) 261 TheIdx = Node->getNumOperands() - 2; 262 uint64_t Cur = cast<ConstantInt>(Node->getOperand(TheIdx + 1))-> 263 getZExtValue(); 264 Offset -= Cur; 265 MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(TheIdx)); 266 if (!P) 267 return TBAAStructTypeNode(); 268 return TBAAStructTypeNode(P); 269 } 270 }; 271} 272 273namespace { 274 /// TypeBasedAliasAnalysis - This is a simple alias analysis 275 /// implementation that uses TypeBased to answer queries. 276 class TypeBasedAliasAnalysis : public ImmutablePass, 277 public AliasAnalysis { 278 public: 279 static char ID; // Class identification, replacement for typeinfo 280 TypeBasedAliasAnalysis() : ImmutablePass(ID) { 281 initializeTypeBasedAliasAnalysisPass(*PassRegistry::getPassRegistry()); 282 } 283 284 virtual void initializePass() { 285 InitializeAliasAnalysis(this); 286 } 287 288 /// getAdjustedAnalysisPointer - This method is used when a pass implements 289 /// an analysis interface through multiple inheritance. If needed, it 290 /// should override this to adjust the this pointer as needed for the 291 /// specified pass info. 292 virtual void *getAdjustedAnalysisPointer(const void *PI) { 293 if (PI == &AliasAnalysis::ID) 294 return (AliasAnalysis*)this; 295 return this; 296 } 297 298 bool Aliases(const MDNode *A, const MDNode *B) const; 299 bool PathAliases(const MDNode *A, const MDNode *B) const; 300 301 private: 302 virtual void getAnalysisUsage(AnalysisUsage &AU) const; 303 virtual AliasResult alias(const Location &LocA, const Location &LocB); 304 virtual bool pointsToConstantMemory(const Location &Loc, bool OrLocal); 305 virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS); 306 virtual ModRefBehavior getModRefBehavior(const Function *F); 307 virtual ModRefResult getModRefInfo(ImmutableCallSite CS, 308 const Location &Loc); 309 virtual ModRefResult getModRefInfo(ImmutableCallSite CS1, 310 ImmutableCallSite CS2); 311 }; 312} // End of anonymous namespace 313 314// Register this pass... 315char TypeBasedAliasAnalysis::ID = 0; 316INITIALIZE_AG_PASS(TypeBasedAliasAnalysis, AliasAnalysis, "tbaa", 317 "Type-Based Alias Analysis", false, true, false) 318 319ImmutablePass *llvm::createTypeBasedAliasAnalysisPass() { 320 return new TypeBasedAliasAnalysis(); 321} 322 323void 324TypeBasedAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { 325 AU.setPreservesAll(); 326 AliasAnalysis::getAnalysisUsage(AU); 327} 328 329/// Check the first operand of the tbaa tag node, if it is a MDNode, we treat 330/// it as struct-path aware TBAA format, otherwise, we treat it as scalar TBAA 331/// format. 332static bool isStructPathTBAA(const MDNode *MD) { 333 // Anonymous TBAA root starts with a MDNode and dragonegg uses it as 334 // a TBAA tag. 335 return isa<MDNode>(MD->getOperand(0)) && MD->getNumOperands() >= 3; 336} 337 338/// Aliases - Test whether the type represented by A may alias the 339/// type represented by B. 340bool 341TypeBasedAliasAnalysis::Aliases(const MDNode *A, 342 const MDNode *B) const { 343 if (isStructPathTBAA(A)) 344 return PathAliases(A, B); 345 346 // Keep track of the root node for A and B. 347 TBAANode RootA, RootB; 348 349 // Climb the tree from A to see if we reach B. 350 for (TBAANode T(A); ; ) { 351 if (T.getNode() == B) 352 // B is an ancestor of A. 353 return true; 354 355 RootA = T; 356 T = T.getParent(); 357 if (!T.getNode()) 358 break; 359 } 360 361 // Climb the tree from B to see if we reach A. 362 for (TBAANode T(B); ; ) { 363 if (T.getNode() == A) 364 // A is an ancestor of B. 365 return true; 366 367 RootB = T; 368 T = T.getParent(); 369 if (!T.getNode()) 370 break; 371 } 372 373 // Neither node is an ancestor of the other. 374 375 // If they have different roots, they're part of different potentially 376 // unrelated type systems, so we must be conservative. 377 if (RootA.getNode() != RootB.getNode()) 378 return true; 379 380 // If they have the same root, then we've proved there's no alias. 381 return false; 382} 383 384/// Test whether the struct-path tag represented by A may alias the 385/// struct-path tag represented by B. 386bool 387TypeBasedAliasAnalysis::PathAliases(const MDNode *A, 388 const MDNode *B) const { 389 // Keep track of the root node for A and B. 390 TBAAStructTypeNode RootA, RootB; 391 TBAAStructTagNode TagA(A), TagB(B); 392 393 // TODO: We need to check if AccessType of TagA encloses AccessType of 394 // TagB to support aggregate AccessType. If yes, return true. 395 396 // Start from the base type of A, follow the edge with the correct offset in 397 // the type DAG and adjust the offset until we reach the base type of B or 398 // until we reach the Root node. 399 // Compare the adjusted offset once we have the same base. 400 401 // Climb the type DAG from base type of A to see if we reach base type of B. 402 const MDNode *BaseA = TagA.getBaseType(); 403 const MDNode *BaseB = TagB.getBaseType(); 404 uint64_t OffsetA = TagA.getOffset(), OffsetB = TagB.getOffset(); 405 for (TBAAStructTypeNode T(BaseA); ; ) { 406 if (T.getNode() == BaseB) 407 // Base type of A encloses base type of B, check if the offsets match. 408 return OffsetA == OffsetB; 409 410 RootA = T; 411 // Follow the edge with the correct offset, OffsetA will be adjusted to 412 // be relative to the field type. 413 T = T.getParent(OffsetA); 414 if (!T.getNode()) 415 break; 416 } 417 418 // Reset OffsetA and climb the type DAG from base type of B to see if we reach 419 // base type of A. 420 OffsetA = TagA.getOffset(); 421 for (TBAAStructTypeNode T(BaseB); ; ) { 422 if (T.getNode() == BaseA) 423 // Base type of B encloses base type of A, check if the offsets match. 424 return OffsetA == OffsetB; 425 426 RootB = T; 427 // Follow the edge with the correct offset, OffsetB will be adjusted to 428 // be relative to the field type. 429 T = T.getParent(OffsetB); 430 if (!T.getNode()) 431 break; 432 } 433 434 // Neither node is an ancestor of the other. 435 436 // If they have different roots, they're part of different potentially 437 // unrelated type systems, so we must be conservative. 438 if (RootA.getNode() != RootB.getNode()) 439 return true; 440 441 // If they have the same root, then we've proved there's no alias. 442 return false; 443} 444 445AliasAnalysis::AliasResult 446TypeBasedAliasAnalysis::alias(const Location &LocA, 447 const Location &LocB) { 448 if (!EnableTBAA) 449 return AliasAnalysis::alias(LocA, LocB); 450 451 // Get the attached MDNodes. If either value lacks a tbaa MDNode, we must 452 // be conservative. 453 const MDNode *AM = LocA.TBAATag; 454 if (!AM) return AliasAnalysis::alias(LocA, LocB); 455 const MDNode *BM = LocB.TBAATag; 456 if (!BM) return AliasAnalysis::alias(LocA, LocB); 457 458 // If they may alias, chain to the next AliasAnalysis. 459 if (Aliases(AM, BM)) 460 return AliasAnalysis::alias(LocA, LocB); 461 462 // Otherwise return a definitive result. 463 return NoAlias; 464} 465 466bool TypeBasedAliasAnalysis::pointsToConstantMemory(const Location &Loc, 467 bool OrLocal) { 468 if (!EnableTBAA) 469 return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); 470 471 const MDNode *M = Loc.TBAATag; 472 if (!M) return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); 473 474 // If this is an "immutable" type, we can assume the pointer is pointing 475 // to constant memory. 476 if ((!isStructPathTBAA(M) && TBAANode(M).TypeIsImmutable()) || 477 (isStructPathTBAA(M) && TBAAStructTagNode(M).TypeIsImmutable())) 478 return true; 479 480 return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); 481} 482 483AliasAnalysis::ModRefBehavior 484TypeBasedAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) { 485 if (!EnableTBAA) 486 return AliasAnalysis::getModRefBehavior(CS); 487 488 ModRefBehavior Min = UnknownModRefBehavior; 489 490 // If this is an "immutable" type, we can assume the call doesn't write 491 // to memory. 492 if (const MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa)) 493 if ((!isStructPathTBAA(M) && TBAANode(M).TypeIsImmutable()) || 494 (isStructPathTBAA(M) && TBAAStructTagNode(M).TypeIsImmutable())) 495 Min = OnlyReadsMemory; 496 497 return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min); 498} 499 500AliasAnalysis::ModRefBehavior 501TypeBasedAliasAnalysis::getModRefBehavior(const Function *F) { 502 // Functions don't have metadata. Just chain to the next implementation. 503 return AliasAnalysis::getModRefBehavior(F); 504} 505 506AliasAnalysis::ModRefResult 507TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS, 508 const Location &Loc) { 509 if (!EnableTBAA) 510 return AliasAnalysis::getModRefInfo(CS, Loc); 511 512 if (const MDNode *L = Loc.TBAATag) 513 if (const MDNode *M = 514 CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa)) 515 if (!Aliases(L, M)) 516 return NoModRef; 517 518 return AliasAnalysis::getModRefInfo(CS, Loc); 519} 520 521AliasAnalysis::ModRefResult 522TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS1, 523 ImmutableCallSite CS2) { 524 if (!EnableTBAA) 525 return AliasAnalysis::getModRefInfo(CS1, CS2); 526 527 if (const MDNode *M1 = 528 CS1.getInstruction()->getMetadata(LLVMContext::MD_tbaa)) 529 if (const MDNode *M2 = 530 CS2.getInstruction()->getMetadata(LLVMContext::MD_tbaa)) 531 if (!Aliases(M1, M2)) 532 return NoModRef; 533 534 return AliasAnalysis::getModRefInfo(CS1, CS2); 535} 536 537bool MDNode::isTBAAVtableAccess() const { 538 if (!isStructPathTBAA(this)) { 539 if (getNumOperands() < 1) return false; 540 if (MDString *Tag1 = dyn_cast<MDString>(getOperand(0))) { 541 if (Tag1->getString() == "vtable pointer") return true; 542 } 543 return false; 544 } 545 546 // For struct-path aware TBAA, we use the access type of the tag. 547 if (getNumOperands() < 2) return false; 548 MDNode *Tag = cast_or_null<MDNode>(getOperand(1)); 549 if (!Tag) return false; 550 if (MDString *Tag1 = dyn_cast<MDString>(Tag->getOperand(0))) { 551 if (Tag1->getString() == "vtable pointer") return true; 552 } 553 return false; 554} 555 556MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) { 557 if (!A || !B) 558 return NULL; 559 560 if (A == B) 561 return A; 562 563 // For struct-path aware TBAA, we use the access type of the tag. 564 bool StructPath = isStructPathTBAA(A); 565 if (StructPath) { 566 A = cast_or_null<MDNode>(A->getOperand(1)); 567 if (!A) return 0; 568 B = cast_or_null<MDNode>(B->getOperand(1)); 569 if (!B) return 0; 570 } 571 572 SmallVector<MDNode *, 4> PathA; 573 MDNode *T = A; 574 while (T) { 575 PathA.push_back(T); 576 T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) : 0; 577 } 578 579 SmallVector<MDNode *, 4> PathB; 580 T = B; 581 while (T) { 582 PathB.push_back(T); 583 T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) : 0; 584 } 585 586 int IA = PathA.size() - 1; 587 int IB = PathB.size() - 1; 588 589 MDNode *Ret = 0; 590 while (IA >= 0 && IB >=0) { 591 if (PathA[IA] == PathB[IB]) 592 Ret = PathA[IA]; 593 else 594 break; 595 --IA; 596 --IB; 597 } 598 if (!StructPath) 599 return Ret; 600 601 if (!Ret) 602 return 0; 603 // We need to convert from a type node to a tag node. 604 Type *Int64 = IntegerType::get(A->getContext(), 64); 605 Value *Ops[3] = { Ret, Ret, ConstantInt::get(Int64, 0) }; 606 return MDNode::get(A->getContext(), Ops); 607} 608