1193323Sed//===-- ArchiveWriter.cpp - Write LLVM archive files ----------------------===// 2193323Sed// 3193323Sed// The LLVM Compiler Infrastructure 4193323Sed// 5193323Sed// This file is distributed under the University of Illinois Open Source 6193323Sed// License. See LICENSE.TXT for details. 7193323Sed// 8193323Sed//===----------------------------------------------------------------------===// 9193323Sed// 10193323Sed// Builds up an LLVM archive file (.a) containing LLVM bitcode. 11193323Sed// 12193323Sed//===----------------------------------------------------------------------===// 13193323Sed 14249423Sdim#include "llvm/Bitcode/Archive.h" 15193323Sed#include "ArchiveInternals.h" 16203954Srdivacky#include "llvm/ADT/OwningPtr.h" 17193323Sed#include "llvm/Bitcode/ReaderWriter.h" 18249423Sdim#include "llvm/IR/Module.h" 19218893Sdim#include "llvm/Support/FileSystem.h" 20193323Sed#include "llvm/Support/MemoryBuffer.h" 21218893Sdim#include "llvm/Support/Process.h" 22218893Sdim#include "llvm/Support/Signals.h" 23218893Sdim#include "llvm/Support/system_error.h" 24193323Sed#include <fstream> 25249423Sdim#include <iomanip> 26193323Sed#include <ostream> 27193323Sedusing namespace llvm; 28193323Sed 29193323Sed// Write an integer using variable bit rate encoding. This saves a few bytes 30193323Sed// per entry in the symbol table. 31221345Sdimstatic inline void writeInteger(unsigned num, std::ofstream& ARFile) { 32193323Sed while (1) { 33193323Sed if (num < 0x80) { // done? 34193323Sed ARFile << (unsigned char)num; 35193323Sed return; 36193323Sed } 37193323Sed 38193323Sed // Nope, we are bigger than a character, output the next 7 bits and set the 39193323Sed // high bit to say that there is more coming... 40193323Sed ARFile << (unsigned char)(0x80 | ((unsigned char)num & 0x7F)); 41193323Sed num >>= 7; // Shift out 7 bits now... 42193323Sed } 43193323Sed} 44193323Sed 45193323Sed// Compute how many bytes are taken by a given VBR encoded value. This is needed 46193323Sed// to pre-compute the size of the symbol table. 47193323Sedstatic inline unsigned numVbrBytes(unsigned num) { 48193323Sed 49193323Sed // Note that the following nested ifs are somewhat equivalent to a binary 50193323Sed // search. We split it in half by comparing against 2^14 first. This allows 51193323Sed // most reasonable values to be done in 2 comparisons instead of 1 for 52193323Sed // small ones and four for large ones. We expect this to access file offsets 53193323Sed // in the 2^10 to 2^24 range and symbol lengths in the 2^0 to 2^8 range, 54193323Sed // so this approach is reasonable. 55193323Sed if (num < 1<<14) { 56193323Sed if (num < 1<<7) 57193323Sed return 1; 58193323Sed else 59193323Sed return 2; 60193323Sed } 61193323Sed if (num < 1<<21) 62193323Sed return 3; 63193323Sed 64193323Sed if (num < 1<<28) 65193323Sed return 4; 66193323Sed return 5; // anything >= 2^28 takes 5 bytes 67193323Sed} 68193323Sed 69193323Sed// Create an empty archive. 70195340SedArchive* Archive::CreateEmpty(const sys::Path& FilePath, LLVMContext& C) { 71195340Sed Archive* result = new Archive(FilePath, C); 72193323Sed return result; 73193323Sed} 74193323Sed 75193323Sed// Fill the ArchiveMemberHeader with the information from a member. If 76193323Sed// TruncateNames is true, names are flattened to 15 chars or less. The sz field 77193323Sed// is provided here instead of coming from the mbr because the member might be 78193323Sed// stored compressed and the compressed size is not the ArchiveMember's size. 79193323Sed// Furthermore compressed files have negative size fields to identify them as 80193323Sed// compressed. 81193323Sedbool 82193323SedArchive::fillHeader(const ArchiveMember &mbr, ArchiveMemberHeader& hdr, 83193323Sed int sz, bool TruncateNames) const { 84193323Sed 85193323Sed // Set the permissions mode, uid and gid 86193323Sed hdr.init(); 87193323Sed char buffer[32]; 88193323Sed sprintf(buffer, "%-8o", mbr.getMode()); 89193323Sed memcpy(hdr.mode,buffer,8); 90193323Sed sprintf(buffer, "%-6u", mbr.getUser()); 91193323Sed memcpy(hdr.uid,buffer,6); 92193323Sed sprintf(buffer, "%-6u", mbr.getGroup()); 93193323Sed memcpy(hdr.gid,buffer,6); 94193323Sed 95193323Sed // Set the last modification date 96193323Sed uint64_t secondsSinceEpoch = mbr.getModTime().toEpochTime(); 97193323Sed sprintf(buffer,"%-12u", unsigned(secondsSinceEpoch)); 98193323Sed memcpy(hdr.date,buffer,12); 99193323Sed 100193323Sed // Get rid of trailing blanks in the name 101198090Srdivacky std::string mbrPath = mbr.getPath().str(); 102193323Sed size_t mbrLen = mbrPath.length(); 103193323Sed while (mbrLen > 0 && mbrPath[mbrLen-1] == ' ') { 104193323Sed mbrPath.erase(mbrLen-1,1); 105193323Sed mbrLen--; 106193323Sed } 107193323Sed 108193323Sed // Set the name field in one of its various flavors. 109193323Sed bool writeLongName = false; 110193323Sed if (mbr.isStringTable()) { 111193323Sed memcpy(hdr.name,ARFILE_STRTAB_NAME,16); 112193323Sed } else if (mbr.isSVR4SymbolTable()) { 113193323Sed memcpy(hdr.name,ARFILE_SVR4_SYMTAB_NAME,16); 114193323Sed } else if (mbr.isBSD4SymbolTable()) { 115193323Sed memcpy(hdr.name,ARFILE_BSD4_SYMTAB_NAME,16); 116193323Sed } else if (mbr.isLLVMSymbolTable()) { 117193323Sed memcpy(hdr.name,ARFILE_LLVM_SYMTAB_NAME,16); 118193323Sed } else if (TruncateNames) { 119193323Sed const char* nm = mbrPath.c_str(); 120193323Sed unsigned len = mbrPath.length(); 121193323Sed size_t slashpos = mbrPath.rfind('/'); 122193323Sed if (slashpos != std::string::npos) { 123193323Sed nm += slashpos + 1; 124193323Sed len -= slashpos +1; 125193323Sed } 126193323Sed if (len > 15) 127193323Sed len = 15; 128193323Sed memcpy(hdr.name,nm,len); 129193323Sed hdr.name[len] = '/'; 130193323Sed } else if (mbrPath.length() < 16 && mbrPath.find('/') == std::string::npos) { 131193323Sed memcpy(hdr.name,mbrPath.c_str(),mbrPath.length()); 132193323Sed hdr.name[mbrPath.length()] = '/'; 133193323Sed } else { 134193323Sed std::string nm = "#1/"; 135193323Sed nm += utostr(mbrPath.length()); 136193323Sed memcpy(hdr.name,nm.data(),nm.length()); 137193323Sed if (sz < 0) 138193323Sed sz -= mbrPath.length(); 139193323Sed else 140193323Sed sz += mbrPath.length(); 141193323Sed writeLongName = true; 142193323Sed } 143193323Sed 144193323Sed // Set the size field 145193323Sed if (sz < 0) { 146193323Sed buffer[0] = '-'; 147193323Sed sprintf(&buffer[1],"%-9u",(unsigned)-sz); 148193323Sed } else { 149193323Sed sprintf(buffer, "%-10u", (unsigned)sz); 150193323Sed } 151193323Sed memcpy(hdr.size,buffer,10); 152193323Sed 153193323Sed return writeLongName; 154193323Sed} 155193323Sed 156193323Sed// Insert a file into the archive before some other member. This also takes care 157193323Sed// of extracting the necessary flags and information from the file. 158193323Sedbool 159218893SdimArchive::addFileBefore(const sys::Path& filePath, iterator where, 160193323Sed std::string* ErrMsg) { 161218893Sdim bool Exists; 162218893Sdim if (sys::fs::exists(filePath.str(), Exists) || !Exists) { 163193323Sed if (ErrMsg) 164193323Sed *ErrMsg = "Can not add a non-existent file to archive"; 165193323Sed return true; 166193323Sed } 167193323Sed 168193323Sed ArchiveMember* mbr = new ArchiveMember(this); 169193323Sed 170193323Sed mbr->data = 0; 171193323Sed mbr->path = filePath; 172193323Sed const sys::FileStatus *FSInfo = mbr->path.getFileStatus(false, ErrMsg); 173194178Sed if (!FSInfo) { 174194178Sed delete mbr; 175193323Sed return true; 176194178Sed } 177194178Sed mbr->info = *FSInfo; 178193323Sed 179193323Sed unsigned flags = 0; 180198090Srdivacky bool hasSlash = filePath.str().find('/') != std::string::npos; 181193323Sed if (hasSlash) 182193323Sed flags |= ArchiveMember::HasPathFlag; 183198090Srdivacky if (hasSlash || filePath.str().length() > 15) 184193323Sed flags |= ArchiveMember::HasLongFilenameFlag; 185218893Sdim 186234353Sdim sys::fs::file_magic type; 187218893Sdim if (sys::fs::identify_magic(mbr->path.str(), type)) 188234353Sdim type = sys::fs::file_magic::unknown; 189218893Sdim switch (type) { 190234353Sdim case sys::fs::file_magic::bitcode: 191193323Sed flags |= ArchiveMember::BitcodeFlag; 192193323Sed break; 193193323Sed default: 194193323Sed break; 195193323Sed } 196193323Sed mbr->flags = flags; 197193323Sed members.insert(where,mbr); 198193323Sed return false; 199193323Sed} 200193323Sed 201193323Sed// Write one member out to the file. 202193323Sedbool 203193323SedArchive::writeMember( 204193323Sed const ArchiveMember& member, 205221345Sdim std::ofstream& ARFile, 206193323Sed bool CreateSymbolTable, 207193323Sed bool TruncateNames, 208193323Sed std::string* ErrMsg 209193323Sed) { 210193323Sed 211221345Sdim unsigned filepos = ARFile.tellp(); 212193323Sed filepos -= 8; 213193323Sed 214193323Sed // Get the data and its size either from the 215193323Sed // member's in-memory data or directly from the file. 216193323Sed size_t fSize = member.getSize(); 217193323Sed const char *data = (const char*)member.getData(); 218193323Sed MemoryBuffer *mFile = 0; 219193323Sed if (!data) { 220218893Sdim OwningPtr<MemoryBuffer> File; 221218893Sdim if (error_code ec = MemoryBuffer::getFile(member.getPath().c_str(), File)) { 222218893Sdim if (ErrMsg) 223218893Sdim *ErrMsg = ec.message(); 224193323Sed return true; 225218893Sdim } 226218893Sdim mFile = File.take(); 227193323Sed data = mFile->getBufferStart(); 228193323Sed fSize = mFile->getBufferSize(); 229193323Sed } 230193323Sed 231193323Sed // Now that we have the data in memory, update the 232206083Srdivacky // symbol table if it's a bitcode file. 233193323Sed if (CreateSymbolTable && member.isBitcode()) { 234193323Sed std::vector<std::string> symbols; 235198090Srdivacky std::string FullMemberName = archPath.str() + "(" + member.getPath().str() 236193323Sed + ")"; 237218893Sdim Module* M = 238207618Srdivacky GetBitcodeSymbols(data, fSize, FullMemberName, Context, symbols, ErrMsg); 239193323Sed 240193323Sed // If the bitcode parsed successfully 241203954Srdivacky if ( M ) { 242193323Sed for (std::vector<std::string>::iterator SI = symbols.begin(), 243193323Sed SE = symbols.end(); SI != SE; ++SI) { 244193323Sed 245193323Sed std::pair<SymTabType::iterator,bool> Res = 246193323Sed symTab.insert(std::make_pair(*SI,filepos)); 247193323Sed 248193323Sed if (Res.second) { 249193323Sed symTabSize += SI->length() + 250193323Sed numVbrBytes(SI->length()) + 251193323Sed numVbrBytes(filepos); 252193323Sed } 253193323Sed } 254193323Sed // We don't need this module any more. 255203954Srdivacky delete M; 256193323Sed } else { 257193323Sed delete mFile; 258193323Sed if (ErrMsg) 259198090Srdivacky *ErrMsg = "Can't parse bitcode member: " + member.getPath().str() 260193323Sed + ": " + *ErrMsg; 261193323Sed return true; 262193323Sed } 263193323Sed } 264193323Sed 265193323Sed int hdrSize = fSize; 266193323Sed 267193323Sed // Compute the fields of the header 268193323Sed ArchiveMemberHeader Hdr; 269193323Sed bool writeLongName = fillHeader(member,Hdr,hdrSize,TruncateNames); 270193323Sed 271193323Sed // Write header to archive file 272193323Sed ARFile.write((char*)&Hdr, sizeof(Hdr)); 273193323Sed 274193323Sed // Write the long filename if its long 275193323Sed if (writeLongName) { 276198090Srdivacky ARFile.write(member.getPath().str().data(), 277198090Srdivacky member.getPath().str().length()); 278193323Sed } 279193323Sed 280193323Sed // Write the (possibly compressed) member's content to the file. 281193323Sed ARFile.write(data,fSize); 282193323Sed 283193323Sed // Make sure the member is an even length 284221345Sdim if ((ARFile.tellp() & 1) == 1) 285193323Sed ARFile << ARFILE_PAD; 286193323Sed 287193323Sed // Close the mapped file if it was opened 288193323Sed delete mFile; 289193323Sed return false; 290193323Sed} 291193323Sed 292193323Sed// Write out the LLVM symbol table as an archive member to the file. 293193323Sedvoid 294221345SdimArchive::writeSymbolTable(std::ofstream& ARFile) { 295193323Sed 296193323Sed // Construct the symbol table's header 297193323Sed ArchiveMemberHeader Hdr; 298193323Sed Hdr.init(); 299193323Sed memcpy(Hdr.name,ARFILE_LLVM_SYMTAB_NAME,16); 300193323Sed uint64_t secondsSinceEpoch = sys::TimeValue::now().toEpochTime(); 301193323Sed char buffer[32]; 302193323Sed sprintf(buffer, "%-8o", 0644); 303193323Sed memcpy(Hdr.mode,buffer,8); 304193323Sed sprintf(buffer, "%-6u", sys::Process::GetCurrentUserId()); 305193323Sed memcpy(Hdr.uid,buffer,6); 306193323Sed sprintf(buffer, "%-6u", sys::Process::GetCurrentGroupId()); 307193323Sed memcpy(Hdr.gid,buffer,6); 308193323Sed sprintf(buffer,"%-12u", unsigned(secondsSinceEpoch)); 309193323Sed memcpy(Hdr.date,buffer,12); 310193323Sed sprintf(buffer,"%-10u",symTabSize); 311193323Sed memcpy(Hdr.size,buffer,10); 312193323Sed 313193323Sed // Write the header 314193323Sed ARFile.write((char*)&Hdr, sizeof(Hdr)); 315193323Sed 316193323Sed#ifndef NDEBUG 317193323Sed // Save the starting position of the symbol tables data content. 318221345Sdim unsigned startpos = ARFile.tellp(); 319193323Sed#endif 320193323Sed 321193323Sed // Write out the symbols sequentially 322193323Sed for ( Archive::SymTabType::iterator I = symTab.begin(), E = symTab.end(); 323193323Sed I != E; ++I) 324193323Sed { 325193323Sed // Write out the file index 326193323Sed writeInteger(I->second, ARFile); 327193323Sed // Write out the length of the symbol 328193323Sed writeInteger(I->first.length(), ARFile); 329193323Sed // Write out the symbol 330193323Sed ARFile.write(I->first.data(), I->first.length()); 331193323Sed } 332193323Sed 333193323Sed#ifndef NDEBUG 334193323Sed // Now that we're done with the symbol table, get the ending file position 335221345Sdim unsigned endpos = ARFile.tellp(); 336193323Sed#endif 337193323Sed 338193323Sed // Make sure that the amount we wrote is what we pre-computed. This is 339193323Sed // critical for file integrity purposes. 340193323Sed assert(endpos - startpos == symTabSize && "Invalid symTabSize computation"); 341193323Sed 342193323Sed // Make sure the symbol table is even sized 343193323Sed if (symTabSize % 2 != 0 ) 344193323Sed ARFile << ARFILE_PAD; 345193323Sed} 346193323Sed 347193323Sed// Write the entire archive to the file specified when the archive was created. 348193323Sed// This writes to a temporary file first. Options are for creating a symbol 349193323Sed// table, flattening the file names (no directories, 15 chars max) and 350193323Sed// compressing each archive member. 351193323Sedbool 352239462SdimArchive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, 353193323Sed std::string* ErrMsg) 354193323Sed{ 355193323Sed // Make sure they haven't opened up the file, not loaded it, 356193323Sed // but are now trying to write it which would wipe out the file. 357193323Sed if (members.empty() && mapfile && mapfile->getBufferSize() > 8) { 358193323Sed if (ErrMsg) 359193323Sed *ErrMsg = "Can't write an archive not opened for writing"; 360193323Sed return true; 361193323Sed } 362193323Sed 363193323Sed // Create a temporary file to store the archive in 364221345Sdim sys::Path TmpArchive = archPath; 365221345Sdim if (TmpArchive.createTemporaryFileOnDisk(ErrMsg)) 366193323Sed return true; 367193323Sed 368193323Sed // Make sure the temporary gets removed if we crash 369221345Sdim sys::RemoveFileOnSignal(TmpArchive); 370193323Sed 371193323Sed // Create archive file for output. 372221345Sdim std::ios::openmode io_mode = std::ios::out | std::ios::trunc | 373221345Sdim std::ios::binary; 374221345Sdim std::ofstream ArchiveFile(TmpArchive.c_str(), io_mode); 375193323Sed 376221345Sdim // Check for errors opening or creating archive file. 377221345Sdim if (!ArchiveFile.is_open() || ArchiveFile.bad()) { 378221345Sdim TmpArchive.eraseFromDisk(); 379221345Sdim if (ErrMsg) 380221345Sdim *ErrMsg = "Error opening archive file: " + archPath.str(); 381221345Sdim return true; 382221345Sdim } 383221345Sdim 384193323Sed // If we're creating a symbol table, reset it now 385193323Sed if (CreateSymbolTable) { 386193323Sed symTabSize = 0; 387193323Sed symTab.clear(); 388193323Sed } 389193323Sed 390193323Sed // Write magic string to archive. 391193323Sed ArchiveFile << ARFILE_MAGIC; 392193323Sed 393193323Sed // Loop over all member files, and write them out. Note that this also 394193323Sed // builds the symbol table, symTab. 395193323Sed for (MembersList::iterator I = begin(), E = end(); I != E; ++I) { 396193323Sed if (writeMember(*I, ArchiveFile, CreateSymbolTable, 397239462Sdim TruncateNames, ErrMsg)) { 398221345Sdim TmpArchive.eraseFromDisk(); 399193323Sed ArchiveFile.close(); 400193323Sed return true; 401193323Sed } 402193323Sed } 403193323Sed 404193323Sed // Close archive file. 405193323Sed ArchiveFile.close(); 406193323Sed 407193323Sed // Write the symbol table 408193323Sed if (CreateSymbolTable) { 409193323Sed // At this point we have written a file that is a legal archive but it 410193323Sed // doesn't have a symbol table in it. To aid in faster reading and to 411193323Sed // ensure compatibility with other archivers we need to put the symbol 412193323Sed // table first in the file. Unfortunately, this means mapping the file 413193323Sed // we just wrote back in and copying it to the destination file. 414221345Sdim sys::Path FinalFilePath = archPath; 415193323Sed 416193323Sed // Map in the archive we just wrote. 417193323Sed { 418218893Sdim OwningPtr<MemoryBuffer> arch; 419221345Sdim if (error_code ec = MemoryBuffer::getFile(TmpArchive.c_str(), arch)) { 420218893Sdim if (ErrMsg) 421218893Sdim *ErrMsg = ec.message(); 422218893Sdim return true; 423218893Sdim } 424193323Sed const char* base = arch->getBufferStart(); 425193323Sed 426218893Sdim // Open another temporary file in order to avoid invalidating the 427193323Sed // mmapped data 428221345Sdim if (FinalFilePath.createTemporaryFileOnDisk(ErrMsg)) 429193323Sed return true; 430221345Sdim sys::RemoveFileOnSignal(FinalFilePath); 431221345Sdim 432221345Sdim std::ofstream FinalFile(FinalFilePath.c_str(), io_mode); 433221345Sdim if (!FinalFile.is_open() || FinalFile.bad()) { 434221345Sdim TmpArchive.eraseFromDisk(); 435221345Sdim if (ErrMsg) 436221345Sdim *ErrMsg = "Error opening archive file: " + FinalFilePath.str(); 437221345Sdim return true; 438193323Sed } 439193323Sed 440193323Sed // Write the file magic number 441193323Sed FinalFile << ARFILE_MAGIC; 442193323Sed 443193323Sed // If there is a foreign symbol table, put it into the file now. Most 444193323Sed // ar(1) implementations require the symbol table to be first but llvm-ar 445193323Sed // can deal with it being after a foreign symbol table. This ensures 446193323Sed // compatibility with other ar(1) implementations as well as allowing the 447193323Sed // archive to store both native .o and LLVM .bc files, both indexed. 448193323Sed if (foreignST) { 449239462Sdim if (writeMember(*foreignST, FinalFile, false, false, ErrMsg)) { 450193323Sed FinalFile.close(); 451221345Sdim TmpArchive.eraseFromDisk(); 452193323Sed return true; 453193323Sed } 454193323Sed } 455193323Sed 456193323Sed // Put out the LLVM symbol table now. 457193323Sed writeSymbolTable(FinalFile); 458193323Sed 459193323Sed // Copy the temporary file contents being sure to skip the file's magic 460193323Sed // number. 461193323Sed FinalFile.write(base + sizeof(ARFILE_MAGIC)-1, 462193323Sed arch->getBufferSize()-sizeof(ARFILE_MAGIC)+1); 463193323Sed 464193323Sed // Close up shop 465193323Sed FinalFile.close(); 466193323Sed } // free arch. 467218893Sdim 468193323Sed // Move the final file over top of TmpArchive 469221345Sdim if (FinalFilePath.renamePathOnDisk(TmpArchive, ErrMsg)) 470193323Sed return true; 471193323Sed } 472218893Sdim 473193323Sed // Before we replace the actual archive, we need to forget all the 474193323Sed // members, since they point to data in that old archive. We need to do 475193323Sed // this because we cannot replace an open file on Windows. 476193323Sed cleanUpMemory(); 477218893Sdim 478221345Sdim if (TmpArchive.renamePathOnDisk(archPath, ErrMsg)) 479193323Sed return true; 480193323Sed 481193323Sed // Set correct read and write permissions after temporary file is moved 482193323Sed // to final destination path. 483193323Sed if (archPath.makeReadableOnDisk(ErrMsg)) 484193323Sed return true; 485193323Sed if (archPath.makeWriteableOnDisk(ErrMsg)) 486193323Sed return true; 487193323Sed 488193323Sed return false; 489193323Sed} 490