ArchiveWriter.cpp revision 239462
1193323Sed//===-- ArchiveWriter.cpp - Write LLVM archive files ----------------------===// 2193323Sed// 3193323Sed// The LLVM Compiler Infrastructure 4193323Sed// 5193323Sed// This file is distributed under the University of Illinois Open Source 6193323Sed// License. See LICENSE.TXT for details. 7193323Sed// 8193323Sed//===----------------------------------------------------------------------===// 9193323Sed// 10193323Sed// Builds up an LLVM archive file (.a) containing LLVM bitcode. 11193323Sed// 12193323Sed//===----------------------------------------------------------------------===// 13193323Sed 14193323Sed#include "ArchiveInternals.h" 15203954Srdivacky#include "llvm/Module.h" 16203954Srdivacky#include "llvm/ADT/OwningPtr.h" 17193323Sed#include "llvm/Bitcode/ReaderWriter.h" 18218893Sdim#include "llvm/Support/FileSystem.h" 19193323Sed#include "llvm/Support/MemoryBuffer.h" 20218893Sdim#include "llvm/Support/Process.h" 21218893Sdim#include "llvm/Support/Signals.h" 22218893Sdim#include "llvm/Support/system_error.h" 23193323Sed#include <fstream> 24193323Sed#include <ostream> 25193323Sed#include <iomanip> 26193323Sedusing namespace llvm; 27193323Sed 28193323Sed// Write an integer using variable bit rate encoding. This saves a few bytes 29193323Sed// per entry in the symbol table. 30221345Sdimstatic inline void writeInteger(unsigned num, std::ofstream& ARFile) { 31193323Sed while (1) { 32193323Sed if (num < 0x80) { // done? 33193323Sed ARFile << (unsigned char)num; 34193323Sed return; 35193323Sed } 36193323Sed 37193323Sed // Nope, we are bigger than a character, output the next 7 bits and set the 38193323Sed // high bit to say that there is more coming... 39193323Sed ARFile << (unsigned char)(0x80 | ((unsigned char)num & 0x7F)); 40193323Sed num >>= 7; // Shift out 7 bits now... 41193323Sed } 42193323Sed} 43193323Sed 44193323Sed// Compute how many bytes are taken by a given VBR encoded value. This is needed 45193323Sed// to pre-compute the size of the symbol table. 46193323Sedstatic inline unsigned numVbrBytes(unsigned num) { 47193323Sed 48193323Sed // Note that the following nested ifs are somewhat equivalent to a binary 49193323Sed // search. We split it in half by comparing against 2^14 first. This allows 50193323Sed // most reasonable values to be done in 2 comparisons instead of 1 for 51193323Sed // small ones and four for large ones. We expect this to access file offsets 52193323Sed // in the 2^10 to 2^24 range and symbol lengths in the 2^0 to 2^8 range, 53193323Sed // so this approach is reasonable. 54193323Sed if (num < 1<<14) { 55193323Sed if (num < 1<<7) 56193323Sed return 1; 57193323Sed else 58193323Sed return 2; 59193323Sed } 60193323Sed if (num < 1<<21) 61193323Sed return 3; 62193323Sed 63193323Sed if (num < 1<<28) 64193323Sed return 4; 65193323Sed return 5; // anything >= 2^28 takes 5 bytes 66193323Sed} 67193323Sed 68193323Sed// Create an empty archive. 69195340SedArchive* Archive::CreateEmpty(const sys::Path& FilePath, LLVMContext& C) { 70195340Sed Archive* result = new Archive(FilePath, C); 71193323Sed return result; 72193323Sed} 73193323Sed 74193323Sed// Fill the ArchiveMemberHeader with the information from a member. If 75193323Sed// TruncateNames is true, names are flattened to 15 chars or less. The sz field 76193323Sed// is provided here instead of coming from the mbr because the member might be 77193323Sed// stored compressed and the compressed size is not the ArchiveMember's size. 78193323Sed// Furthermore compressed files have negative size fields to identify them as 79193323Sed// compressed. 80193323Sedbool 81193323SedArchive::fillHeader(const ArchiveMember &mbr, ArchiveMemberHeader& hdr, 82193323Sed int sz, bool TruncateNames) const { 83193323Sed 84193323Sed // Set the permissions mode, uid and gid 85193323Sed hdr.init(); 86193323Sed char buffer[32]; 87193323Sed sprintf(buffer, "%-8o", mbr.getMode()); 88193323Sed memcpy(hdr.mode,buffer,8); 89193323Sed sprintf(buffer, "%-6u", mbr.getUser()); 90193323Sed memcpy(hdr.uid,buffer,6); 91193323Sed sprintf(buffer, "%-6u", mbr.getGroup()); 92193323Sed memcpy(hdr.gid,buffer,6); 93193323Sed 94193323Sed // Set the last modification date 95193323Sed uint64_t secondsSinceEpoch = mbr.getModTime().toEpochTime(); 96193323Sed sprintf(buffer,"%-12u", unsigned(secondsSinceEpoch)); 97193323Sed memcpy(hdr.date,buffer,12); 98193323Sed 99193323Sed // Get rid of trailing blanks in the name 100198090Srdivacky std::string mbrPath = mbr.getPath().str(); 101193323Sed size_t mbrLen = mbrPath.length(); 102193323Sed while (mbrLen > 0 && mbrPath[mbrLen-1] == ' ') { 103193323Sed mbrPath.erase(mbrLen-1,1); 104193323Sed mbrLen--; 105193323Sed } 106193323Sed 107193323Sed // Set the name field in one of its various flavors. 108193323Sed bool writeLongName = false; 109193323Sed if (mbr.isStringTable()) { 110193323Sed memcpy(hdr.name,ARFILE_STRTAB_NAME,16); 111193323Sed } else if (mbr.isSVR4SymbolTable()) { 112193323Sed memcpy(hdr.name,ARFILE_SVR4_SYMTAB_NAME,16); 113193323Sed } else if (mbr.isBSD4SymbolTable()) { 114193323Sed memcpy(hdr.name,ARFILE_BSD4_SYMTAB_NAME,16); 115193323Sed } else if (mbr.isLLVMSymbolTable()) { 116193323Sed memcpy(hdr.name,ARFILE_LLVM_SYMTAB_NAME,16); 117193323Sed } else if (TruncateNames) { 118193323Sed const char* nm = mbrPath.c_str(); 119193323Sed unsigned len = mbrPath.length(); 120193323Sed size_t slashpos = mbrPath.rfind('/'); 121193323Sed if (slashpos != std::string::npos) { 122193323Sed nm += slashpos + 1; 123193323Sed len -= slashpos +1; 124193323Sed } 125193323Sed if (len > 15) 126193323Sed len = 15; 127193323Sed memcpy(hdr.name,nm,len); 128193323Sed hdr.name[len] = '/'; 129193323Sed } else if (mbrPath.length() < 16 && mbrPath.find('/') == std::string::npos) { 130193323Sed memcpy(hdr.name,mbrPath.c_str(),mbrPath.length()); 131193323Sed hdr.name[mbrPath.length()] = '/'; 132193323Sed } else { 133193323Sed std::string nm = "#1/"; 134193323Sed nm += utostr(mbrPath.length()); 135193323Sed memcpy(hdr.name,nm.data(),nm.length()); 136193323Sed if (sz < 0) 137193323Sed sz -= mbrPath.length(); 138193323Sed else 139193323Sed sz += mbrPath.length(); 140193323Sed writeLongName = true; 141193323Sed } 142193323Sed 143193323Sed // Set the size field 144193323Sed if (sz < 0) { 145193323Sed buffer[0] = '-'; 146193323Sed sprintf(&buffer[1],"%-9u",(unsigned)-sz); 147193323Sed } else { 148193323Sed sprintf(buffer, "%-10u", (unsigned)sz); 149193323Sed } 150193323Sed memcpy(hdr.size,buffer,10); 151193323Sed 152193323Sed return writeLongName; 153193323Sed} 154193323Sed 155193323Sed// Insert a file into the archive before some other member. This also takes care 156193323Sed// of extracting the necessary flags and information from the file. 157193323Sedbool 158218893SdimArchive::addFileBefore(const sys::Path& filePath, iterator where, 159193323Sed std::string* ErrMsg) { 160218893Sdim bool Exists; 161218893Sdim if (sys::fs::exists(filePath.str(), Exists) || !Exists) { 162193323Sed if (ErrMsg) 163193323Sed *ErrMsg = "Can not add a non-existent file to archive"; 164193323Sed return true; 165193323Sed } 166193323Sed 167193323Sed ArchiveMember* mbr = new ArchiveMember(this); 168193323Sed 169193323Sed mbr->data = 0; 170193323Sed mbr->path = filePath; 171193323Sed const sys::FileStatus *FSInfo = mbr->path.getFileStatus(false, ErrMsg); 172194178Sed if (!FSInfo) { 173194178Sed delete mbr; 174193323Sed return true; 175194178Sed } 176194178Sed mbr->info = *FSInfo; 177193323Sed 178193323Sed unsigned flags = 0; 179198090Srdivacky bool hasSlash = filePath.str().find('/') != std::string::npos; 180193323Sed if (hasSlash) 181193323Sed flags |= ArchiveMember::HasPathFlag; 182198090Srdivacky if (hasSlash || filePath.str().length() > 15) 183193323Sed flags |= ArchiveMember::HasLongFilenameFlag; 184218893Sdim 185234353Sdim sys::fs::file_magic type; 186218893Sdim if (sys::fs::identify_magic(mbr->path.str(), type)) 187234353Sdim type = sys::fs::file_magic::unknown; 188218893Sdim switch (type) { 189234353Sdim case sys::fs::file_magic::bitcode: 190193323Sed flags |= ArchiveMember::BitcodeFlag; 191193323Sed break; 192193323Sed default: 193193323Sed break; 194193323Sed } 195193323Sed mbr->flags = flags; 196193323Sed members.insert(where,mbr); 197193323Sed return false; 198193323Sed} 199193323Sed 200193323Sed// Write one member out to the file. 201193323Sedbool 202193323SedArchive::writeMember( 203193323Sed const ArchiveMember& member, 204221345Sdim std::ofstream& ARFile, 205193323Sed bool CreateSymbolTable, 206193323Sed bool TruncateNames, 207193323Sed std::string* ErrMsg 208193323Sed) { 209193323Sed 210221345Sdim unsigned filepos = ARFile.tellp(); 211193323Sed filepos -= 8; 212193323Sed 213193323Sed // Get the data and its size either from the 214193323Sed // member's in-memory data or directly from the file. 215193323Sed size_t fSize = member.getSize(); 216193323Sed const char *data = (const char*)member.getData(); 217193323Sed MemoryBuffer *mFile = 0; 218193323Sed if (!data) { 219218893Sdim OwningPtr<MemoryBuffer> File; 220218893Sdim if (error_code ec = MemoryBuffer::getFile(member.getPath().c_str(), File)) { 221218893Sdim if (ErrMsg) 222218893Sdim *ErrMsg = ec.message(); 223193323Sed return true; 224218893Sdim } 225218893Sdim mFile = File.take(); 226193323Sed data = mFile->getBufferStart(); 227193323Sed fSize = mFile->getBufferSize(); 228193323Sed } 229193323Sed 230193323Sed // Now that we have the data in memory, update the 231206083Srdivacky // symbol table if it's a bitcode file. 232193323Sed if (CreateSymbolTable && member.isBitcode()) { 233193323Sed std::vector<std::string> symbols; 234198090Srdivacky std::string FullMemberName = archPath.str() + "(" + member.getPath().str() 235193323Sed + ")"; 236218893Sdim Module* M = 237207618Srdivacky GetBitcodeSymbols(data, fSize, FullMemberName, Context, symbols, ErrMsg); 238193323Sed 239193323Sed // If the bitcode parsed successfully 240203954Srdivacky if ( M ) { 241193323Sed for (std::vector<std::string>::iterator SI = symbols.begin(), 242193323Sed SE = symbols.end(); SI != SE; ++SI) { 243193323Sed 244193323Sed std::pair<SymTabType::iterator,bool> Res = 245193323Sed symTab.insert(std::make_pair(*SI,filepos)); 246193323Sed 247193323Sed if (Res.second) { 248193323Sed symTabSize += SI->length() + 249193323Sed numVbrBytes(SI->length()) + 250193323Sed numVbrBytes(filepos); 251193323Sed } 252193323Sed } 253193323Sed // We don't need this module any more. 254203954Srdivacky delete M; 255193323Sed } else { 256193323Sed delete mFile; 257193323Sed if (ErrMsg) 258198090Srdivacky *ErrMsg = "Can't parse bitcode member: " + member.getPath().str() 259193323Sed + ": " + *ErrMsg; 260193323Sed return true; 261193323Sed } 262193323Sed } 263193323Sed 264193323Sed int hdrSize = fSize; 265193323Sed 266193323Sed // Compute the fields of the header 267193323Sed ArchiveMemberHeader Hdr; 268193323Sed bool writeLongName = fillHeader(member,Hdr,hdrSize,TruncateNames); 269193323Sed 270193323Sed // Write header to archive file 271193323Sed ARFile.write((char*)&Hdr, sizeof(Hdr)); 272193323Sed 273193323Sed // Write the long filename if its long 274193323Sed if (writeLongName) { 275198090Srdivacky ARFile.write(member.getPath().str().data(), 276198090Srdivacky member.getPath().str().length()); 277193323Sed } 278193323Sed 279193323Sed // Write the (possibly compressed) member's content to the file. 280193323Sed ARFile.write(data,fSize); 281193323Sed 282193323Sed // Make sure the member is an even length 283221345Sdim if ((ARFile.tellp() & 1) == 1) 284193323Sed ARFile << ARFILE_PAD; 285193323Sed 286193323Sed // Close the mapped file if it was opened 287193323Sed delete mFile; 288193323Sed return false; 289193323Sed} 290193323Sed 291193323Sed// Write out the LLVM symbol table as an archive member to the file. 292193323Sedvoid 293221345SdimArchive::writeSymbolTable(std::ofstream& ARFile) { 294193323Sed 295193323Sed // Construct the symbol table's header 296193323Sed ArchiveMemberHeader Hdr; 297193323Sed Hdr.init(); 298193323Sed memcpy(Hdr.name,ARFILE_LLVM_SYMTAB_NAME,16); 299193323Sed uint64_t secondsSinceEpoch = sys::TimeValue::now().toEpochTime(); 300193323Sed char buffer[32]; 301193323Sed sprintf(buffer, "%-8o", 0644); 302193323Sed memcpy(Hdr.mode,buffer,8); 303193323Sed sprintf(buffer, "%-6u", sys::Process::GetCurrentUserId()); 304193323Sed memcpy(Hdr.uid,buffer,6); 305193323Sed sprintf(buffer, "%-6u", sys::Process::GetCurrentGroupId()); 306193323Sed memcpy(Hdr.gid,buffer,6); 307193323Sed sprintf(buffer,"%-12u", unsigned(secondsSinceEpoch)); 308193323Sed memcpy(Hdr.date,buffer,12); 309193323Sed sprintf(buffer,"%-10u",symTabSize); 310193323Sed memcpy(Hdr.size,buffer,10); 311193323Sed 312193323Sed // Write the header 313193323Sed ARFile.write((char*)&Hdr, sizeof(Hdr)); 314193323Sed 315193323Sed#ifndef NDEBUG 316193323Sed // Save the starting position of the symbol tables data content. 317221345Sdim unsigned startpos = ARFile.tellp(); 318193323Sed#endif 319193323Sed 320193323Sed // Write out the symbols sequentially 321193323Sed for ( Archive::SymTabType::iterator I = symTab.begin(), E = symTab.end(); 322193323Sed I != E; ++I) 323193323Sed { 324193323Sed // Write out the file index 325193323Sed writeInteger(I->second, ARFile); 326193323Sed // Write out the length of the symbol 327193323Sed writeInteger(I->first.length(), ARFile); 328193323Sed // Write out the symbol 329193323Sed ARFile.write(I->first.data(), I->first.length()); 330193323Sed } 331193323Sed 332193323Sed#ifndef NDEBUG 333193323Sed // Now that we're done with the symbol table, get the ending file position 334221345Sdim unsigned endpos = ARFile.tellp(); 335193323Sed#endif 336193323Sed 337193323Sed // Make sure that the amount we wrote is what we pre-computed. This is 338193323Sed // critical for file integrity purposes. 339193323Sed assert(endpos - startpos == symTabSize && "Invalid symTabSize computation"); 340193323Sed 341193323Sed // Make sure the symbol table is even sized 342193323Sed if (symTabSize % 2 != 0 ) 343193323Sed ARFile << ARFILE_PAD; 344193323Sed} 345193323Sed 346193323Sed// Write the entire archive to the file specified when the archive was created. 347193323Sed// This writes to a temporary file first. Options are for creating a symbol 348193323Sed// table, flattening the file names (no directories, 15 chars max) and 349193323Sed// compressing each archive member. 350193323Sedbool 351239462SdimArchive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, 352193323Sed std::string* ErrMsg) 353193323Sed{ 354193323Sed // Make sure they haven't opened up the file, not loaded it, 355193323Sed // but are now trying to write it which would wipe out the file. 356193323Sed if (members.empty() && mapfile && mapfile->getBufferSize() > 8) { 357193323Sed if (ErrMsg) 358193323Sed *ErrMsg = "Can't write an archive not opened for writing"; 359193323Sed return true; 360193323Sed } 361193323Sed 362193323Sed // Create a temporary file to store the archive in 363221345Sdim sys::Path TmpArchive = archPath; 364221345Sdim if (TmpArchive.createTemporaryFileOnDisk(ErrMsg)) 365193323Sed return true; 366193323Sed 367193323Sed // Make sure the temporary gets removed if we crash 368221345Sdim sys::RemoveFileOnSignal(TmpArchive); 369193323Sed 370193323Sed // Create archive file for output. 371221345Sdim std::ios::openmode io_mode = std::ios::out | std::ios::trunc | 372221345Sdim std::ios::binary; 373221345Sdim std::ofstream ArchiveFile(TmpArchive.c_str(), io_mode); 374193323Sed 375221345Sdim // Check for errors opening or creating archive file. 376221345Sdim if (!ArchiveFile.is_open() || ArchiveFile.bad()) { 377221345Sdim TmpArchive.eraseFromDisk(); 378221345Sdim if (ErrMsg) 379221345Sdim *ErrMsg = "Error opening archive file: " + archPath.str(); 380221345Sdim return true; 381221345Sdim } 382221345Sdim 383193323Sed // If we're creating a symbol table, reset it now 384193323Sed if (CreateSymbolTable) { 385193323Sed symTabSize = 0; 386193323Sed symTab.clear(); 387193323Sed } 388193323Sed 389193323Sed // Write magic string to archive. 390193323Sed ArchiveFile << ARFILE_MAGIC; 391193323Sed 392193323Sed // Loop over all member files, and write them out. Note that this also 393193323Sed // builds the symbol table, symTab. 394193323Sed for (MembersList::iterator I = begin(), E = end(); I != E; ++I) { 395193323Sed if (writeMember(*I, ArchiveFile, CreateSymbolTable, 396239462Sdim TruncateNames, ErrMsg)) { 397221345Sdim TmpArchive.eraseFromDisk(); 398193323Sed ArchiveFile.close(); 399193323Sed return true; 400193323Sed } 401193323Sed } 402193323Sed 403193323Sed // Close archive file. 404193323Sed ArchiveFile.close(); 405193323Sed 406193323Sed // Write the symbol table 407193323Sed if (CreateSymbolTable) { 408193323Sed // At this point we have written a file that is a legal archive but it 409193323Sed // doesn't have a symbol table in it. To aid in faster reading and to 410193323Sed // ensure compatibility with other archivers we need to put the symbol 411193323Sed // table first in the file. Unfortunately, this means mapping the file 412193323Sed // we just wrote back in and copying it to the destination file. 413221345Sdim sys::Path FinalFilePath = archPath; 414193323Sed 415193323Sed // Map in the archive we just wrote. 416193323Sed { 417218893Sdim OwningPtr<MemoryBuffer> arch; 418221345Sdim if (error_code ec = MemoryBuffer::getFile(TmpArchive.c_str(), arch)) { 419218893Sdim if (ErrMsg) 420218893Sdim *ErrMsg = ec.message(); 421218893Sdim return true; 422218893Sdim } 423193323Sed const char* base = arch->getBufferStart(); 424193323Sed 425218893Sdim // Open another temporary file in order to avoid invalidating the 426193323Sed // mmapped data 427221345Sdim if (FinalFilePath.createTemporaryFileOnDisk(ErrMsg)) 428193323Sed return true; 429221345Sdim sys::RemoveFileOnSignal(FinalFilePath); 430221345Sdim 431221345Sdim std::ofstream FinalFile(FinalFilePath.c_str(), io_mode); 432221345Sdim if (!FinalFile.is_open() || FinalFile.bad()) { 433221345Sdim TmpArchive.eraseFromDisk(); 434221345Sdim if (ErrMsg) 435221345Sdim *ErrMsg = "Error opening archive file: " + FinalFilePath.str(); 436221345Sdim return true; 437193323Sed } 438193323Sed 439193323Sed // Write the file magic number 440193323Sed FinalFile << ARFILE_MAGIC; 441193323Sed 442193323Sed // If there is a foreign symbol table, put it into the file now. Most 443193323Sed // ar(1) implementations require the symbol table to be first but llvm-ar 444193323Sed // can deal with it being after a foreign symbol table. This ensures 445193323Sed // compatibility with other ar(1) implementations as well as allowing the 446193323Sed // archive to store both native .o and LLVM .bc files, both indexed. 447193323Sed if (foreignST) { 448239462Sdim if (writeMember(*foreignST, FinalFile, false, false, ErrMsg)) { 449193323Sed FinalFile.close(); 450221345Sdim TmpArchive.eraseFromDisk(); 451193323Sed return true; 452193323Sed } 453193323Sed } 454193323Sed 455193323Sed // Put out the LLVM symbol table now. 456193323Sed writeSymbolTable(FinalFile); 457193323Sed 458193323Sed // Copy the temporary file contents being sure to skip the file's magic 459193323Sed // number. 460193323Sed FinalFile.write(base + sizeof(ARFILE_MAGIC)-1, 461193323Sed arch->getBufferSize()-sizeof(ARFILE_MAGIC)+1); 462193323Sed 463193323Sed // Close up shop 464193323Sed FinalFile.close(); 465193323Sed } // free arch. 466218893Sdim 467193323Sed // Move the final file over top of TmpArchive 468221345Sdim if (FinalFilePath.renamePathOnDisk(TmpArchive, ErrMsg)) 469193323Sed return true; 470193323Sed } 471218893Sdim 472193323Sed // Before we replace the actual archive, we need to forget all the 473193323Sed // members, since they point to data in that old archive. We need to do 474193323Sed // this because we cannot replace an open file on Windows. 475193323Sed cleanUpMemory(); 476218893Sdim 477221345Sdim if (TmpArchive.renamePathOnDisk(archPath, ErrMsg)) 478193323Sed return true; 479193323Sed 480193323Sed // Set correct read and write permissions after temporary file is moved 481193323Sed // to final destination path. 482193323Sed if (archPath.makeReadableOnDisk(ErrMsg)) 483193323Sed return true; 484193323Sed if (archPath.makeWriteableOnDisk(ErrMsg)) 485193323Sed return true; 486193323Sed 487193323Sed return false; 488193323Sed} 489