ArchiveWriter.cpp revision 195340
1193323Sed//===-- ArchiveWriter.cpp - Write LLVM archive files ----------------------===// 2193323Sed// 3193323Sed// The LLVM Compiler Infrastructure 4193323Sed// 5193323Sed// This file is distributed under the University of Illinois Open Source 6193323Sed// License. See LICENSE.TXT for details. 7193323Sed// 8193323Sed//===----------------------------------------------------------------------===// 9193323Sed// 10193323Sed// Builds up an LLVM archive file (.a) containing LLVM bitcode. 11193323Sed// 12193323Sed//===----------------------------------------------------------------------===// 13193323Sed 14193323Sed#include "ArchiveInternals.h" 15193323Sed#include "llvm/Bitcode/ReaderWriter.h" 16193323Sed#include "llvm/ADT/OwningPtr.h" 17193323Sed#include "llvm/Support/MemoryBuffer.h" 18193323Sed#include "llvm/System/Signals.h" 19193323Sed#include "llvm/System/Process.h" 20193323Sed#include "llvm/ModuleProvider.h" 21193323Sed#include <fstream> 22193323Sed#include <ostream> 23193323Sed#include <iomanip> 24193323Sedusing namespace llvm; 25193323Sed 26193323Sed// Write an integer using variable bit rate encoding. This saves a few bytes 27193323Sed// per entry in the symbol table. 28193323Sedstatic inline void writeInteger(unsigned num, std::ofstream& ARFile) { 29193323Sed while (1) { 30193323Sed if (num < 0x80) { // done? 31193323Sed ARFile << (unsigned char)num; 32193323Sed return; 33193323Sed } 34193323Sed 35193323Sed // Nope, we are bigger than a character, output the next 7 bits and set the 36193323Sed // high bit to say that there is more coming... 37193323Sed ARFile << (unsigned char)(0x80 | ((unsigned char)num & 0x7F)); 38193323Sed num >>= 7; // Shift out 7 bits now... 39193323Sed } 40193323Sed} 41193323Sed 42193323Sed// Compute how many bytes are taken by a given VBR encoded value. This is needed 43193323Sed// to pre-compute the size of the symbol table. 44193323Sedstatic inline unsigned numVbrBytes(unsigned num) { 45193323Sed 46193323Sed // Note that the following nested ifs are somewhat equivalent to a binary 47193323Sed // search. We split it in half by comparing against 2^14 first. This allows 48193323Sed // most reasonable values to be done in 2 comparisons instead of 1 for 49193323Sed // small ones and four for large ones. We expect this to access file offsets 50193323Sed // in the 2^10 to 2^24 range and symbol lengths in the 2^0 to 2^8 range, 51193323Sed // so this approach is reasonable. 52193323Sed if (num < 1<<14) { 53193323Sed if (num < 1<<7) 54193323Sed return 1; 55193323Sed else 56193323Sed return 2; 57193323Sed } 58193323Sed if (num < 1<<21) 59193323Sed return 3; 60193323Sed 61193323Sed if (num < 1<<28) 62193323Sed return 4; 63193323Sed return 5; // anything >= 2^28 takes 5 bytes 64193323Sed} 65193323Sed 66193323Sed// Create an empty archive. 67195340SedArchive* Archive::CreateEmpty(const sys::Path& FilePath, LLVMContext& C) { 68195340Sed Archive* result = new Archive(FilePath, C); 69193323Sed return result; 70193323Sed} 71193323Sed 72193323Sed// Fill the ArchiveMemberHeader with the information from a member. If 73193323Sed// TruncateNames is true, names are flattened to 15 chars or less. The sz field 74193323Sed// is provided here instead of coming from the mbr because the member might be 75193323Sed// stored compressed and the compressed size is not the ArchiveMember's size. 76193323Sed// Furthermore compressed files have negative size fields to identify them as 77193323Sed// compressed. 78193323Sedbool 79193323SedArchive::fillHeader(const ArchiveMember &mbr, ArchiveMemberHeader& hdr, 80193323Sed int sz, bool TruncateNames) const { 81193323Sed 82193323Sed // Set the permissions mode, uid and gid 83193323Sed hdr.init(); 84193323Sed char buffer[32]; 85193323Sed sprintf(buffer, "%-8o", mbr.getMode()); 86193323Sed memcpy(hdr.mode,buffer,8); 87193323Sed sprintf(buffer, "%-6u", mbr.getUser()); 88193323Sed memcpy(hdr.uid,buffer,6); 89193323Sed sprintf(buffer, "%-6u", mbr.getGroup()); 90193323Sed memcpy(hdr.gid,buffer,6); 91193323Sed 92193323Sed // Set the last modification date 93193323Sed uint64_t secondsSinceEpoch = mbr.getModTime().toEpochTime(); 94193323Sed sprintf(buffer,"%-12u", unsigned(secondsSinceEpoch)); 95193323Sed memcpy(hdr.date,buffer,12); 96193323Sed 97193323Sed // Get rid of trailing blanks in the name 98193323Sed std::string mbrPath = mbr.getPath().toString(); 99193323Sed size_t mbrLen = mbrPath.length(); 100193323Sed while (mbrLen > 0 && mbrPath[mbrLen-1] == ' ') { 101193323Sed mbrPath.erase(mbrLen-1,1); 102193323Sed mbrLen--; 103193323Sed } 104193323Sed 105193323Sed // Set the name field in one of its various flavors. 106193323Sed bool writeLongName = false; 107193323Sed if (mbr.isStringTable()) { 108193323Sed memcpy(hdr.name,ARFILE_STRTAB_NAME,16); 109193323Sed } else if (mbr.isSVR4SymbolTable()) { 110193323Sed memcpy(hdr.name,ARFILE_SVR4_SYMTAB_NAME,16); 111193323Sed } else if (mbr.isBSD4SymbolTable()) { 112193323Sed memcpy(hdr.name,ARFILE_BSD4_SYMTAB_NAME,16); 113193323Sed } else if (mbr.isLLVMSymbolTable()) { 114193323Sed memcpy(hdr.name,ARFILE_LLVM_SYMTAB_NAME,16); 115193323Sed } else if (TruncateNames) { 116193323Sed const char* nm = mbrPath.c_str(); 117193323Sed unsigned len = mbrPath.length(); 118193323Sed size_t slashpos = mbrPath.rfind('/'); 119193323Sed if (slashpos != std::string::npos) { 120193323Sed nm += slashpos + 1; 121193323Sed len -= slashpos +1; 122193323Sed } 123193323Sed if (len > 15) 124193323Sed len = 15; 125193323Sed memcpy(hdr.name,nm,len); 126193323Sed hdr.name[len] = '/'; 127193323Sed } else if (mbrPath.length() < 16 && mbrPath.find('/') == std::string::npos) { 128193323Sed memcpy(hdr.name,mbrPath.c_str(),mbrPath.length()); 129193323Sed hdr.name[mbrPath.length()] = '/'; 130193323Sed } else { 131193323Sed std::string nm = "#1/"; 132193323Sed nm += utostr(mbrPath.length()); 133193323Sed memcpy(hdr.name,nm.data(),nm.length()); 134193323Sed if (sz < 0) 135193323Sed sz -= mbrPath.length(); 136193323Sed else 137193323Sed sz += mbrPath.length(); 138193323Sed writeLongName = true; 139193323Sed } 140193323Sed 141193323Sed // Set the size field 142193323Sed if (sz < 0) { 143193323Sed buffer[0] = '-'; 144193323Sed sprintf(&buffer[1],"%-9u",(unsigned)-sz); 145193323Sed } else { 146193323Sed sprintf(buffer, "%-10u", (unsigned)sz); 147193323Sed } 148193323Sed memcpy(hdr.size,buffer,10); 149193323Sed 150193323Sed return writeLongName; 151193323Sed} 152193323Sed 153193323Sed// Insert a file into the archive before some other member. This also takes care 154193323Sed// of extracting the necessary flags and information from the file. 155193323Sedbool 156193323SedArchive::addFileBefore(const sys::Path& filePath, iterator where, 157193323Sed std::string* ErrMsg) { 158193323Sed if (!filePath.exists()) { 159193323Sed if (ErrMsg) 160193323Sed *ErrMsg = "Can not add a non-existent file to archive"; 161193323Sed return true; 162193323Sed } 163193323Sed 164193323Sed ArchiveMember* mbr = new ArchiveMember(this); 165193323Sed 166193323Sed mbr->data = 0; 167193323Sed mbr->path = filePath; 168193323Sed const sys::FileStatus *FSInfo = mbr->path.getFileStatus(false, ErrMsg); 169194178Sed if (!FSInfo) { 170194178Sed delete mbr; 171193323Sed return true; 172194178Sed } 173194178Sed mbr->info = *FSInfo; 174193323Sed 175193323Sed unsigned flags = 0; 176193323Sed bool hasSlash = filePath.toString().find('/') != std::string::npos; 177193323Sed if (hasSlash) 178193323Sed flags |= ArchiveMember::HasPathFlag; 179193323Sed if (hasSlash || filePath.toString().length() > 15) 180193323Sed flags |= ArchiveMember::HasLongFilenameFlag; 181193323Sed std::string magic; 182193323Sed mbr->path.getMagicNumber(magic,4); 183193323Sed switch (sys::IdentifyFileType(magic.c_str(),4)) { 184193323Sed case sys::Bitcode_FileType: 185193323Sed flags |= ArchiveMember::BitcodeFlag; 186193323Sed break; 187193323Sed default: 188193323Sed break; 189193323Sed } 190193323Sed mbr->flags = flags; 191193323Sed members.insert(where,mbr); 192193323Sed return false; 193193323Sed} 194193323Sed 195193323Sed// Write one member out to the file. 196193323Sedbool 197193323SedArchive::writeMember( 198193323Sed const ArchiveMember& member, 199193323Sed std::ofstream& ARFile, 200193323Sed bool CreateSymbolTable, 201193323Sed bool TruncateNames, 202193323Sed bool ShouldCompress, 203193323Sed std::string* ErrMsg 204193323Sed) { 205193323Sed 206193323Sed unsigned filepos = ARFile.tellp(); 207193323Sed filepos -= 8; 208193323Sed 209193323Sed // Get the data and its size either from the 210193323Sed // member's in-memory data or directly from the file. 211193323Sed size_t fSize = member.getSize(); 212193323Sed const char *data = (const char*)member.getData(); 213193323Sed MemoryBuffer *mFile = 0; 214193323Sed if (!data) { 215193323Sed mFile = MemoryBuffer::getFile(member.getPath().c_str(), ErrMsg); 216193323Sed if (mFile == 0) 217193323Sed return true; 218193323Sed data = mFile->getBufferStart(); 219193323Sed fSize = mFile->getBufferSize(); 220193323Sed } 221193323Sed 222193323Sed // Now that we have the data in memory, update the 223193323Sed // symbol table if its a bitcode file. 224193323Sed if (CreateSymbolTable && member.isBitcode()) { 225193323Sed std::vector<std::string> symbols; 226193323Sed std::string FullMemberName = archPath.toString() + "(" + 227193323Sed member.getPath().toString() 228193323Sed + ")"; 229193323Sed ModuleProvider* MP = 230193323Sed GetBitcodeSymbols((const unsigned char*)data,fSize, 231195340Sed FullMemberName, Context, symbols, ErrMsg); 232193323Sed 233193323Sed // If the bitcode parsed successfully 234193323Sed if ( MP ) { 235193323Sed for (std::vector<std::string>::iterator SI = symbols.begin(), 236193323Sed SE = symbols.end(); SI != SE; ++SI) { 237193323Sed 238193323Sed std::pair<SymTabType::iterator,bool> Res = 239193323Sed symTab.insert(std::make_pair(*SI,filepos)); 240193323Sed 241193323Sed if (Res.second) { 242193323Sed symTabSize += SI->length() + 243193323Sed numVbrBytes(SI->length()) + 244193323Sed numVbrBytes(filepos); 245193323Sed } 246193323Sed } 247193323Sed // We don't need this module any more. 248193323Sed delete MP; 249193323Sed } else { 250193323Sed delete mFile; 251193323Sed if (ErrMsg) 252193323Sed *ErrMsg = "Can't parse bitcode member: " + member.getPath().toString() 253193323Sed + ": " + *ErrMsg; 254193323Sed return true; 255193323Sed } 256193323Sed } 257193323Sed 258193323Sed int hdrSize = fSize; 259193323Sed 260193323Sed // Compute the fields of the header 261193323Sed ArchiveMemberHeader Hdr; 262193323Sed bool writeLongName = fillHeader(member,Hdr,hdrSize,TruncateNames); 263193323Sed 264193323Sed // Write header to archive file 265193323Sed ARFile.write((char*)&Hdr, sizeof(Hdr)); 266193323Sed 267193323Sed // Write the long filename if its long 268193323Sed if (writeLongName) { 269193323Sed ARFile.write(member.getPath().toString().data(), 270193323Sed member.getPath().toString().length()); 271193323Sed } 272193323Sed 273193323Sed // Write the (possibly compressed) member's content to the file. 274193323Sed ARFile.write(data,fSize); 275193323Sed 276193323Sed // Make sure the member is an even length 277193323Sed if ((ARFile.tellp() & 1) == 1) 278193323Sed ARFile << ARFILE_PAD; 279193323Sed 280193323Sed // Close the mapped file if it was opened 281193323Sed delete mFile; 282193323Sed return false; 283193323Sed} 284193323Sed 285193323Sed// Write out the LLVM symbol table as an archive member to the file. 286193323Sedvoid 287193323SedArchive::writeSymbolTable(std::ofstream& ARFile) { 288193323Sed 289193323Sed // Construct the symbol table's header 290193323Sed ArchiveMemberHeader Hdr; 291193323Sed Hdr.init(); 292193323Sed memcpy(Hdr.name,ARFILE_LLVM_SYMTAB_NAME,16); 293193323Sed uint64_t secondsSinceEpoch = sys::TimeValue::now().toEpochTime(); 294193323Sed char buffer[32]; 295193323Sed sprintf(buffer, "%-8o", 0644); 296193323Sed memcpy(Hdr.mode,buffer,8); 297193323Sed sprintf(buffer, "%-6u", sys::Process::GetCurrentUserId()); 298193323Sed memcpy(Hdr.uid,buffer,6); 299193323Sed sprintf(buffer, "%-6u", sys::Process::GetCurrentGroupId()); 300193323Sed memcpy(Hdr.gid,buffer,6); 301193323Sed sprintf(buffer,"%-12u", unsigned(secondsSinceEpoch)); 302193323Sed memcpy(Hdr.date,buffer,12); 303193323Sed sprintf(buffer,"%-10u",symTabSize); 304193323Sed memcpy(Hdr.size,buffer,10); 305193323Sed 306193323Sed // Write the header 307193323Sed ARFile.write((char*)&Hdr, sizeof(Hdr)); 308193323Sed 309193323Sed#ifndef NDEBUG 310193323Sed // Save the starting position of the symbol tables data content. 311193323Sed unsigned startpos = ARFile.tellp(); 312193323Sed#endif 313193323Sed 314193323Sed // Write out the symbols sequentially 315193323Sed for ( Archive::SymTabType::iterator I = symTab.begin(), E = symTab.end(); 316193323Sed I != E; ++I) 317193323Sed { 318193323Sed // Write out the file index 319193323Sed writeInteger(I->second, ARFile); 320193323Sed // Write out the length of the symbol 321193323Sed writeInteger(I->first.length(), ARFile); 322193323Sed // Write out the symbol 323193323Sed ARFile.write(I->first.data(), I->first.length()); 324193323Sed } 325193323Sed 326193323Sed#ifndef NDEBUG 327193323Sed // Now that we're done with the symbol table, get the ending file position 328193323Sed unsigned endpos = ARFile.tellp(); 329193323Sed#endif 330193323Sed 331193323Sed // Make sure that the amount we wrote is what we pre-computed. This is 332193323Sed // critical for file integrity purposes. 333193323Sed assert(endpos - startpos == symTabSize && "Invalid symTabSize computation"); 334193323Sed 335193323Sed // Make sure the symbol table is even sized 336193323Sed if (symTabSize % 2 != 0 ) 337193323Sed ARFile << ARFILE_PAD; 338193323Sed} 339193323Sed 340193323Sed// Write the entire archive to the file specified when the archive was created. 341193323Sed// This writes to a temporary file first. Options are for creating a symbol 342193323Sed// table, flattening the file names (no directories, 15 chars max) and 343193323Sed// compressing each archive member. 344193323Sedbool 345193323SedArchive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress, 346193323Sed std::string* ErrMsg) 347193323Sed{ 348193323Sed // Make sure they haven't opened up the file, not loaded it, 349193323Sed // but are now trying to write it which would wipe out the file. 350193323Sed if (members.empty() && mapfile && mapfile->getBufferSize() > 8) { 351193323Sed if (ErrMsg) 352193323Sed *ErrMsg = "Can't write an archive not opened for writing"; 353193323Sed return true; 354193323Sed } 355193323Sed 356193323Sed // Create a temporary file to store the archive in 357193323Sed sys::Path TmpArchive = archPath; 358193323Sed if (TmpArchive.createTemporaryFileOnDisk(ErrMsg)) 359193323Sed return true; 360193323Sed 361193323Sed // Make sure the temporary gets removed if we crash 362193323Sed sys::RemoveFileOnSignal(TmpArchive); 363193323Sed 364193323Sed // Create archive file for output. 365193323Sed std::ios::openmode io_mode = std::ios::out | std::ios::trunc | 366193323Sed std::ios::binary; 367193323Sed std::ofstream ArchiveFile(TmpArchive.c_str(), io_mode); 368193323Sed 369193323Sed // Check for errors opening or creating archive file. 370193323Sed if (!ArchiveFile.is_open() || ArchiveFile.bad()) { 371193323Sed if (TmpArchive.exists()) 372193323Sed TmpArchive.eraseFromDisk(); 373193323Sed if (ErrMsg) 374193323Sed *ErrMsg = "Error opening archive file: " + archPath.toString(); 375193323Sed return true; 376193323Sed } 377193323Sed 378193323Sed // If we're creating a symbol table, reset it now 379193323Sed if (CreateSymbolTable) { 380193323Sed symTabSize = 0; 381193323Sed symTab.clear(); 382193323Sed } 383193323Sed 384193323Sed // Write magic string to archive. 385193323Sed ArchiveFile << ARFILE_MAGIC; 386193323Sed 387193323Sed // Loop over all member files, and write them out. Note that this also 388193323Sed // builds the symbol table, symTab. 389193323Sed for (MembersList::iterator I = begin(), E = end(); I != E; ++I) { 390193323Sed if (writeMember(*I, ArchiveFile, CreateSymbolTable, 391193323Sed TruncateNames, Compress, ErrMsg)) { 392193323Sed if (TmpArchive.exists()) 393193323Sed TmpArchive.eraseFromDisk(); 394193323Sed ArchiveFile.close(); 395193323Sed return true; 396193323Sed } 397193323Sed } 398193323Sed 399193323Sed // Close archive file. 400193323Sed ArchiveFile.close(); 401193323Sed 402193323Sed // Write the symbol table 403193323Sed if (CreateSymbolTable) { 404193323Sed // At this point we have written a file that is a legal archive but it 405193323Sed // doesn't have a symbol table in it. To aid in faster reading and to 406193323Sed // ensure compatibility with other archivers we need to put the symbol 407193323Sed // table first in the file. Unfortunately, this means mapping the file 408193323Sed // we just wrote back in and copying it to the destination file. 409193323Sed sys::Path FinalFilePath = archPath; 410193323Sed 411193323Sed // Map in the archive we just wrote. 412193323Sed { 413193323Sed OwningPtr<MemoryBuffer> arch(MemoryBuffer::getFile(TmpArchive.c_str())); 414193323Sed if (arch == 0) return true; 415193323Sed const char* base = arch->getBufferStart(); 416193323Sed 417193323Sed // Open another temporary file in order to avoid invalidating the 418193323Sed // mmapped data 419193323Sed if (FinalFilePath.createTemporaryFileOnDisk(ErrMsg)) 420193323Sed return true; 421193323Sed sys::RemoveFileOnSignal(FinalFilePath); 422193323Sed 423193323Sed std::ofstream FinalFile(FinalFilePath.c_str(), io_mode); 424193323Sed if (!FinalFile.is_open() || FinalFile.bad()) { 425193323Sed if (TmpArchive.exists()) 426193323Sed TmpArchive.eraseFromDisk(); 427193323Sed if (ErrMsg) 428193323Sed *ErrMsg = "Error opening archive file: " + FinalFilePath.toString(); 429193323Sed return true; 430193323Sed } 431193323Sed 432193323Sed // Write the file magic number 433193323Sed FinalFile << ARFILE_MAGIC; 434193323Sed 435193323Sed // If there is a foreign symbol table, put it into the file now. Most 436193323Sed // ar(1) implementations require the symbol table to be first but llvm-ar 437193323Sed // can deal with it being after a foreign symbol table. This ensures 438193323Sed // compatibility with other ar(1) implementations as well as allowing the 439193323Sed // archive to store both native .o and LLVM .bc files, both indexed. 440193323Sed if (foreignST) { 441193323Sed if (writeMember(*foreignST, FinalFile, false, false, false, ErrMsg)) { 442193323Sed FinalFile.close(); 443193323Sed if (TmpArchive.exists()) 444193323Sed TmpArchive.eraseFromDisk(); 445193323Sed return true; 446193323Sed } 447193323Sed } 448193323Sed 449193323Sed // Put out the LLVM symbol table now. 450193323Sed writeSymbolTable(FinalFile); 451193323Sed 452193323Sed // Copy the temporary file contents being sure to skip the file's magic 453193323Sed // number. 454193323Sed FinalFile.write(base + sizeof(ARFILE_MAGIC)-1, 455193323Sed arch->getBufferSize()-sizeof(ARFILE_MAGIC)+1); 456193323Sed 457193323Sed // Close up shop 458193323Sed FinalFile.close(); 459193323Sed } // free arch. 460193323Sed 461193323Sed // Move the final file over top of TmpArchive 462193323Sed if (FinalFilePath.renamePathOnDisk(TmpArchive, ErrMsg)) 463193323Sed return true; 464193323Sed } 465193323Sed 466193323Sed // Before we replace the actual archive, we need to forget all the 467193323Sed // members, since they point to data in that old archive. We need to do 468193323Sed // this because we cannot replace an open file on Windows. 469193323Sed cleanUpMemory(); 470193323Sed 471193323Sed if (TmpArchive.renamePathOnDisk(archPath, ErrMsg)) 472193323Sed return true; 473193323Sed 474193323Sed // Set correct read and write permissions after temporary file is moved 475193323Sed // to final destination path. 476193323Sed if (archPath.makeReadableOnDisk(ErrMsg)) 477193323Sed return true; 478193323Sed if (archPath.makeWriteableOnDisk(ErrMsg)) 479193323Sed return true; 480193323Sed 481193323Sed return false; 482193323Sed} 483