ArchiveWriter.cpp revision 207618
1193323Sed//===-- ArchiveWriter.cpp - Write LLVM archive files ----------------------===// 2193323Sed// 3193323Sed// The LLVM Compiler Infrastructure 4193323Sed// 5193323Sed// This file is distributed under the University of Illinois Open Source 6193323Sed// License. See LICENSE.TXT for details. 7193323Sed// 8193323Sed//===----------------------------------------------------------------------===// 9193323Sed// 10193323Sed// Builds up an LLVM archive file (.a) containing LLVM bitcode. 11193323Sed// 12193323Sed//===----------------------------------------------------------------------===// 13193323Sed 14193323Sed#include "ArchiveInternals.h" 15203954Srdivacky#include "llvm/Module.h" 16203954Srdivacky#include "llvm/ADT/OwningPtr.h" 17193323Sed#include "llvm/Bitcode/ReaderWriter.h" 18193323Sed#include "llvm/Support/MemoryBuffer.h" 19203954Srdivacky#include "llvm/System/Process.h" 20193323Sed#include "llvm/System/Signals.h" 21193323Sed#include <fstream> 22193323Sed#include <ostream> 23193323Sed#include <iomanip> 24193323Sedusing namespace llvm; 25193323Sed 26193323Sed// Write an integer using variable bit rate encoding. This saves a few bytes 27193323Sed// per entry in the symbol table. 28193323Sedstatic inline void writeInteger(unsigned num, std::ofstream& ARFile) { 29193323Sed while (1) { 30193323Sed if (num < 0x80) { // done? 31193323Sed ARFile << (unsigned char)num; 32193323Sed return; 33193323Sed } 34193323Sed 35193323Sed // Nope, we are bigger than a character, output the next 7 bits and set the 36193323Sed // high bit to say that there is more coming... 37193323Sed ARFile << (unsigned char)(0x80 | ((unsigned char)num & 0x7F)); 38193323Sed num >>= 7; // Shift out 7 bits now... 39193323Sed } 40193323Sed} 41193323Sed 42193323Sed// Compute how many bytes are taken by a given VBR encoded value. This is needed 43193323Sed// to pre-compute the size of the symbol table. 44193323Sedstatic inline unsigned numVbrBytes(unsigned num) { 45193323Sed 46193323Sed // Note that the following nested ifs are somewhat equivalent to a binary 47193323Sed // search. We split it in half by comparing against 2^14 first. This allows 48193323Sed // most reasonable values to be done in 2 comparisons instead of 1 for 49193323Sed // small ones and four for large ones. We expect this to access file offsets 50193323Sed // in the 2^10 to 2^24 range and symbol lengths in the 2^0 to 2^8 range, 51193323Sed // so this approach is reasonable. 52193323Sed if (num < 1<<14) { 53193323Sed if (num < 1<<7) 54193323Sed return 1; 55193323Sed else 56193323Sed return 2; 57193323Sed } 58193323Sed if (num < 1<<21) 59193323Sed return 3; 60193323Sed 61193323Sed if (num < 1<<28) 62193323Sed return 4; 63193323Sed return 5; // anything >= 2^28 takes 5 bytes 64193323Sed} 65193323Sed 66193323Sed// Create an empty archive. 67195340SedArchive* Archive::CreateEmpty(const sys::Path& FilePath, LLVMContext& C) { 68195340Sed Archive* result = new Archive(FilePath, C); 69193323Sed return result; 70193323Sed} 71193323Sed 72193323Sed// Fill the ArchiveMemberHeader with the information from a member. If 73193323Sed// TruncateNames is true, names are flattened to 15 chars or less. The sz field 74193323Sed// is provided here instead of coming from the mbr because the member might be 75193323Sed// stored compressed and the compressed size is not the ArchiveMember's size. 76193323Sed// Furthermore compressed files have negative size fields to identify them as 77193323Sed// compressed. 78193323Sedbool 79193323SedArchive::fillHeader(const ArchiveMember &mbr, ArchiveMemberHeader& hdr, 80193323Sed int sz, bool TruncateNames) const { 81193323Sed 82193323Sed // Set the permissions mode, uid and gid 83193323Sed hdr.init(); 84193323Sed char buffer[32]; 85193323Sed sprintf(buffer, "%-8o", mbr.getMode()); 86193323Sed memcpy(hdr.mode,buffer,8); 87193323Sed sprintf(buffer, "%-6u", mbr.getUser()); 88193323Sed memcpy(hdr.uid,buffer,6); 89193323Sed sprintf(buffer, "%-6u", mbr.getGroup()); 90193323Sed memcpy(hdr.gid,buffer,6); 91193323Sed 92193323Sed // Set the last modification date 93193323Sed uint64_t secondsSinceEpoch = mbr.getModTime().toEpochTime(); 94193323Sed sprintf(buffer,"%-12u", unsigned(secondsSinceEpoch)); 95193323Sed memcpy(hdr.date,buffer,12); 96193323Sed 97193323Sed // Get rid of trailing blanks in the name 98198090Srdivacky std::string mbrPath = mbr.getPath().str(); 99193323Sed size_t mbrLen = mbrPath.length(); 100193323Sed while (mbrLen > 0 && mbrPath[mbrLen-1] == ' ') { 101193323Sed mbrPath.erase(mbrLen-1,1); 102193323Sed mbrLen--; 103193323Sed } 104193323Sed 105193323Sed // Set the name field in one of its various flavors. 106193323Sed bool writeLongName = false; 107193323Sed if (mbr.isStringTable()) { 108193323Sed memcpy(hdr.name,ARFILE_STRTAB_NAME,16); 109193323Sed } else if (mbr.isSVR4SymbolTable()) { 110193323Sed memcpy(hdr.name,ARFILE_SVR4_SYMTAB_NAME,16); 111193323Sed } else if (mbr.isBSD4SymbolTable()) { 112193323Sed memcpy(hdr.name,ARFILE_BSD4_SYMTAB_NAME,16); 113193323Sed } else if (mbr.isLLVMSymbolTable()) { 114193323Sed memcpy(hdr.name,ARFILE_LLVM_SYMTAB_NAME,16); 115193323Sed } else if (TruncateNames) { 116193323Sed const char* nm = mbrPath.c_str(); 117193323Sed unsigned len = mbrPath.length(); 118193323Sed size_t slashpos = mbrPath.rfind('/'); 119193323Sed if (slashpos != std::string::npos) { 120193323Sed nm += slashpos + 1; 121193323Sed len -= slashpos +1; 122193323Sed } 123193323Sed if (len > 15) 124193323Sed len = 15; 125193323Sed memcpy(hdr.name,nm,len); 126193323Sed hdr.name[len] = '/'; 127193323Sed } else if (mbrPath.length() < 16 && mbrPath.find('/') == std::string::npos) { 128193323Sed memcpy(hdr.name,mbrPath.c_str(),mbrPath.length()); 129193323Sed hdr.name[mbrPath.length()] = '/'; 130193323Sed } else { 131193323Sed std::string nm = "#1/"; 132193323Sed nm += utostr(mbrPath.length()); 133193323Sed memcpy(hdr.name,nm.data(),nm.length()); 134193323Sed if (sz < 0) 135193323Sed sz -= mbrPath.length(); 136193323Sed else 137193323Sed sz += mbrPath.length(); 138193323Sed writeLongName = true; 139193323Sed } 140193323Sed 141193323Sed // Set the size field 142193323Sed if (sz < 0) { 143193323Sed buffer[0] = '-'; 144193323Sed sprintf(&buffer[1],"%-9u",(unsigned)-sz); 145193323Sed } else { 146193323Sed sprintf(buffer, "%-10u", (unsigned)sz); 147193323Sed } 148193323Sed memcpy(hdr.size,buffer,10); 149193323Sed 150193323Sed return writeLongName; 151193323Sed} 152193323Sed 153193323Sed// Insert a file into the archive before some other member. This also takes care 154193323Sed// of extracting the necessary flags and information from the file. 155193323Sedbool 156193323SedArchive::addFileBefore(const sys::Path& filePath, iterator where, 157193323Sed std::string* ErrMsg) { 158193323Sed if (!filePath.exists()) { 159193323Sed if (ErrMsg) 160193323Sed *ErrMsg = "Can not add a non-existent file to archive"; 161193323Sed return true; 162193323Sed } 163193323Sed 164193323Sed ArchiveMember* mbr = new ArchiveMember(this); 165193323Sed 166193323Sed mbr->data = 0; 167193323Sed mbr->path = filePath; 168193323Sed const sys::FileStatus *FSInfo = mbr->path.getFileStatus(false, ErrMsg); 169194178Sed if (!FSInfo) { 170194178Sed delete mbr; 171193323Sed return true; 172194178Sed } 173194178Sed mbr->info = *FSInfo; 174193323Sed 175193323Sed unsigned flags = 0; 176198090Srdivacky bool hasSlash = filePath.str().find('/') != std::string::npos; 177193323Sed if (hasSlash) 178193323Sed flags |= ArchiveMember::HasPathFlag; 179198090Srdivacky if (hasSlash || filePath.str().length() > 15) 180193323Sed flags |= ArchiveMember::HasLongFilenameFlag; 181193323Sed std::string magic; 182193323Sed mbr->path.getMagicNumber(magic,4); 183193323Sed switch (sys::IdentifyFileType(magic.c_str(),4)) { 184193323Sed case sys::Bitcode_FileType: 185193323Sed flags |= ArchiveMember::BitcodeFlag; 186193323Sed break; 187193323Sed default: 188193323Sed break; 189193323Sed } 190193323Sed mbr->flags = flags; 191193323Sed members.insert(where,mbr); 192193323Sed return false; 193193323Sed} 194193323Sed 195193323Sed// Write one member out to the file. 196193323Sedbool 197193323SedArchive::writeMember( 198193323Sed const ArchiveMember& member, 199193323Sed std::ofstream& ARFile, 200193323Sed bool CreateSymbolTable, 201193323Sed bool TruncateNames, 202193323Sed bool ShouldCompress, 203193323Sed std::string* ErrMsg 204193323Sed) { 205193323Sed 206193323Sed unsigned filepos = ARFile.tellp(); 207193323Sed filepos -= 8; 208193323Sed 209193323Sed // Get the data and its size either from the 210193323Sed // member's in-memory data or directly from the file. 211193323Sed size_t fSize = member.getSize(); 212193323Sed const char *data = (const char*)member.getData(); 213193323Sed MemoryBuffer *mFile = 0; 214193323Sed if (!data) { 215193323Sed mFile = MemoryBuffer::getFile(member.getPath().c_str(), ErrMsg); 216193323Sed if (mFile == 0) 217193323Sed return true; 218193323Sed data = mFile->getBufferStart(); 219193323Sed fSize = mFile->getBufferSize(); 220193323Sed } 221193323Sed 222193323Sed // Now that we have the data in memory, update the 223206083Srdivacky // symbol table if it's a bitcode file. 224193323Sed if (CreateSymbolTable && member.isBitcode()) { 225193323Sed std::vector<std::string> symbols; 226198090Srdivacky std::string FullMemberName = archPath.str() + "(" + member.getPath().str() 227193323Sed + ")"; 228203954Srdivacky Module* M = 229207618Srdivacky GetBitcodeSymbols(data, fSize, FullMemberName, Context, symbols, ErrMsg); 230193323Sed 231193323Sed // If the bitcode parsed successfully 232203954Srdivacky if ( M ) { 233193323Sed for (std::vector<std::string>::iterator SI = symbols.begin(), 234193323Sed SE = symbols.end(); SI != SE; ++SI) { 235193323Sed 236193323Sed std::pair<SymTabType::iterator,bool> Res = 237193323Sed symTab.insert(std::make_pair(*SI,filepos)); 238193323Sed 239193323Sed if (Res.second) { 240193323Sed symTabSize += SI->length() + 241193323Sed numVbrBytes(SI->length()) + 242193323Sed numVbrBytes(filepos); 243193323Sed } 244193323Sed } 245193323Sed // We don't need this module any more. 246203954Srdivacky delete M; 247193323Sed } else { 248193323Sed delete mFile; 249193323Sed if (ErrMsg) 250198090Srdivacky *ErrMsg = "Can't parse bitcode member: " + member.getPath().str() 251193323Sed + ": " + *ErrMsg; 252193323Sed return true; 253193323Sed } 254193323Sed } 255193323Sed 256193323Sed int hdrSize = fSize; 257193323Sed 258193323Sed // Compute the fields of the header 259193323Sed ArchiveMemberHeader Hdr; 260193323Sed bool writeLongName = fillHeader(member,Hdr,hdrSize,TruncateNames); 261193323Sed 262193323Sed // Write header to archive file 263193323Sed ARFile.write((char*)&Hdr, sizeof(Hdr)); 264193323Sed 265193323Sed // Write the long filename if its long 266193323Sed if (writeLongName) { 267198090Srdivacky ARFile.write(member.getPath().str().data(), 268198090Srdivacky member.getPath().str().length()); 269193323Sed } 270193323Sed 271193323Sed // Write the (possibly compressed) member's content to the file. 272193323Sed ARFile.write(data,fSize); 273193323Sed 274193323Sed // Make sure the member is an even length 275193323Sed if ((ARFile.tellp() & 1) == 1) 276193323Sed ARFile << ARFILE_PAD; 277193323Sed 278193323Sed // Close the mapped file if it was opened 279193323Sed delete mFile; 280193323Sed return false; 281193323Sed} 282193323Sed 283193323Sed// Write out the LLVM symbol table as an archive member to the file. 284193323Sedvoid 285193323SedArchive::writeSymbolTable(std::ofstream& ARFile) { 286193323Sed 287193323Sed // Construct the symbol table's header 288193323Sed ArchiveMemberHeader Hdr; 289193323Sed Hdr.init(); 290193323Sed memcpy(Hdr.name,ARFILE_LLVM_SYMTAB_NAME,16); 291193323Sed uint64_t secondsSinceEpoch = sys::TimeValue::now().toEpochTime(); 292193323Sed char buffer[32]; 293193323Sed sprintf(buffer, "%-8o", 0644); 294193323Sed memcpy(Hdr.mode,buffer,8); 295193323Sed sprintf(buffer, "%-6u", sys::Process::GetCurrentUserId()); 296193323Sed memcpy(Hdr.uid,buffer,6); 297193323Sed sprintf(buffer, "%-6u", sys::Process::GetCurrentGroupId()); 298193323Sed memcpy(Hdr.gid,buffer,6); 299193323Sed sprintf(buffer,"%-12u", unsigned(secondsSinceEpoch)); 300193323Sed memcpy(Hdr.date,buffer,12); 301193323Sed sprintf(buffer,"%-10u",symTabSize); 302193323Sed memcpy(Hdr.size,buffer,10); 303193323Sed 304193323Sed // Write the header 305193323Sed ARFile.write((char*)&Hdr, sizeof(Hdr)); 306193323Sed 307193323Sed#ifndef NDEBUG 308193323Sed // Save the starting position of the symbol tables data content. 309193323Sed unsigned startpos = ARFile.tellp(); 310193323Sed#endif 311193323Sed 312193323Sed // Write out the symbols sequentially 313193323Sed for ( Archive::SymTabType::iterator I = symTab.begin(), E = symTab.end(); 314193323Sed I != E; ++I) 315193323Sed { 316193323Sed // Write out the file index 317193323Sed writeInteger(I->second, ARFile); 318193323Sed // Write out the length of the symbol 319193323Sed writeInteger(I->first.length(), ARFile); 320193323Sed // Write out the symbol 321193323Sed ARFile.write(I->first.data(), I->first.length()); 322193323Sed } 323193323Sed 324193323Sed#ifndef NDEBUG 325193323Sed // Now that we're done with the symbol table, get the ending file position 326193323Sed unsigned endpos = ARFile.tellp(); 327193323Sed#endif 328193323Sed 329193323Sed // Make sure that the amount we wrote is what we pre-computed. This is 330193323Sed // critical for file integrity purposes. 331193323Sed assert(endpos - startpos == symTabSize && "Invalid symTabSize computation"); 332193323Sed 333193323Sed // Make sure the symbol table is even sized 334193323Sed if (symTabSize % 2 != 0 ) 335193323Sed ARFile << ARFILE_PAD; 336193323Sed} 337193323Sed 338193323Sed// Write the entire archive to the file specified when the archive was created. 339193323Sed// This writes to a temporary file first. Options are for creating a symbol 340193323Sed// table, flattening the file names (no directories, 15 chars max) and 341193323Sed// compressing each archive member. 342193323Sedbool 343193323SedArchive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress, 344193323Sed std::string* ErrMsg) 345193323Sed{ 346193323Sed // Make sure they haven't opened up the file, not loaded it, 347193323Sed // but are now trying to write it which would wipe out the file. 348193323Sed if (members.empty() && mapfile && mapfile->getBufferSize() > 8) { 349193323Sed if (ErrMsg) 350193323Sed *ErrMsg = "Can't write an archive not opened for writing"; 351193323Sed return true; 352193323Sed } 353193323Sed 354193323Sed // Create a temporary file to store the archive in 355193323Sed sys::Path TmpArchive = archPath; 356193323Sed if (TmpArchive.createTemporaryFileOnDisk(ErrMsg)) 357193323Sed return true; 358193323Sed 359193323Sed // Make sure the temporary gets removed if we crash 360193323Sed sys::RemoveFileOnSignal(TmpArchive); 361193323Sed 362193323Sed // Create archive file for output. 363193323Sed std::ios::openmode io_mode = std::ios::out | std::ios::trunc | 364193323Sed std::ios::binary; 365193323Sed std::ofstream ArchiveFile(TmpArchive.c_str(), io_mode); 366193323Sed 367193323Sed // Check for errors opening or creating archive file. 368193323Sed if (!ArchiveFile.is_open() || ArchiveFile.bad()) { 369193323Sed if (TmpArchive.exists()) 370193323Sed TmpArchive.eraseFromDisk(); 371193323Sed if (ErrMsg) 372198090Srdivacky *ErrMsg = "Error opening archive file: " + archPath.str(); 373193323Sed return true; 374193323Sed } 375193323Sed 376193323Sed // If we're creating a symbol table, reset it now 377193323Sed if (CreateSymbolTable) { 378193323Sed symTabSize = 0; 379193323Sed symTab.clear(); 380193323Sed } 381193323Sed 382193323Sed // Write magic string to archive. 383193323Sed ArchiveFile << ARFILE_MAGIC; 384193323Sed 385193323Sed // Loop over all member files, and write them out. Note that this also 386193323Sed // builds the symbol table, symTab. 387193323Sed for (MembersList::iterator I = begin(), E = end(); I != E; ++I) { 388193323Sed if (writeMember(*I, ArchiveFile, CreateSymbolTable, 389193323Sed TruncateNames, Compress, ErrMsg)) { 390193323Sed if (TmpArchive.exists()) 391193323Sed TmpArchive.eraseFromDisk(); 392193323Sed ArchiveFile.close(); 393193323Sed return true; 394193323Sed } 395193323Sed } 396193323Sed 397193323Sed // Close archive file. 398193323Sed ArchiveFile.close(); 399193323Sed 400193323Sed // Write the symbol table 401193323Sed if (CreateSymbolTable) { 402193323Sed // At this point we have written a file that is a legal archive but it 403193323Sed // doesn't have a symbol table in it. To aid in faster reading and to 404193323Sed // ensure compatibility with other archivers we need to put the symbol 405193323Sed // table first in the file. Unfortunately, this means mapping the file 406193323Sed // we just wrote back in and copying it to the destination file. 407193323Sed sys::Path FinalFilePath = archPath; 408193323Sed 409193323Sed // Map in the archive we just wrote. 410193323Sed { 411193323Sed OwningPtr<MemoryBuffer> arch(MemoryBuffer::getFile(TmpArchive.c_str())); 412193323Sed if (arch == 0) return true; 413193323Sed const char* base = arch->getBufferStart(); 414193323Sed 415193323Sed // Open another temporary file in order to avoid invalidating the 416193323Sed // mmapped data 417193323Sed if (FinalFilePath.createTemporaryFileOnDisk(ErrMsg)) 418193323Sed return true; 419193323Sed sys::RemoveFileOnSignal(FinalFilePath); 420193323Sed 421193323Sed std::ofstream FinalFile(FinalFilePath.c_str(), io_mode); 422193323Sed if (!FinalFile.is_open() || FinalFile.bad()) { 423193323Sed if (TmpArchive.exists()) 424193323Sed TmpArchive.eraseFromDisk(); 425193323Sed if (ErrMsg) 426198090Srdivacky *ErrMsg = "Error opening archive file: " + FinalFilePath.str(); 427193323Sed return true; 428193323Sed } 429193323Sed 430193323Sed // Write the file magic number 431193323Sed FinalFile << ARFILE_MAGIC; 432193323Sed 433193323Sed // If there is a foreign symbol table, put it into the file now. Most 434193323Sed // ar(1) implementations require the symbol table to be first but llvm-ar 435193323Sed // can deal with it being after a foreign symbol table. This ensures 436193323Sed // compatibility with other ar(1) implementations as well as allowing the 437193323Sed // archive to store both native .o and LLVM .bc files, both indexed. 438193323Sed if (foreignST) { 439193323Sed if (writeMember(*foreignST, FinalFile, false, false, false, ErrMsg)) { 440193323Sed FinalFile.close(); 441193323Sed if (TmpArchive.exists()) 442193323Sed TmpArchive.eraseFromDisk(); 443193323Sed return true; 444193323Sed } 445193323Sed } 446193323Sed 447193323Sed // Put out the LLVM symbol table now. 448193323Sed writeSymbolTable(FinalFile); 449193323Sed 450193323Sed // Copy the temporary file contents being sure to skip the file's magic 451193323Sed // number. 452193323Sed FinalFile.write(base + sizeof(ARFILE_MAGIC)-1, 453193323Sed arch->getBufferSize()-sizeof(ARFILE_MAGIC)+1); 454193323Sed 455193323Sed // Close up shop 456193323Sed FinalFile.close(); 457193323Sed } // free arch. 458193323Sed 459193323Sed // Move the final file over top of TmpArchive 460193323Sed if (FinalFilePath.renamePathOnDisk(TmpArchive, ErrMsg)) 461193323Sed return true; 462193323Sed } 463193323Sed 464193323Sed // Before we replace the actual archive, we need to forget all the 465193323Sed // members, since they point to data in that old archive. We need to do 466193323Sed // this because we cannot replace an open file on Windows. 467193323Sed cleanUpMemory(); 468193323Sed 469193323Sed if (TmpArchive.renamePathOnDisk(archPath, ErrMsg)) 470193323Sed return true; 471193323Sed 472193323Sed // Set correct read and write permissions after temporary file is moved 473193323Sed // to final destination path. 474193323Sed if (archPath.makeReadableOnDisk(ErrMsg)) 475193323Sed return true; 476193323Sed if (archPath.makeWriteableOnDisk(ErrMsg)) 477193323Sed return true; 478193323Sed 479193323Sed return false; 480193323Sed} 481