1/* 2 * Copyright 2008, Axel D��rfler, axeld@pinc-software.de. 3 * Distributed under the terms of the MIT License. 4 */ 5 6 7#include <algorithm> 8#include <string> 9#include <vector> 10 11#include <dirent.h> 12#include <errno.h> 13#include <stdio.h> 14#include <stdlib.h> 15#include <string.h> 16#include <unistd.h> 17 18#include <OS.h> 19#include <Path.h> 20 21#include <SHA256.h> 22 23#include "AdaptiveBuffering.h" 24 25 26//#define TRACE(x...) printf(x) 27#define TRACE(x...) ; 28 29 30extern const char *__progname; 31static const char *kProgramName = __progname; 32 33const size_t kInitialBufferSize = 1 * 1024 * 1024; 34const size_t kMaxBufferSize = 10 * 1024 * 1024; 35 36 37class SHAProcessor : public AdaptiveBuffering { 38public: 39 SHAProcessor() 40 : AdaptiveBuffering(kInitialBufferSize, kMaxBufferSize, 3), 41 fFile(-1) 42 { 43 } 44 45 virtual ~SHAProcessor() 46 { 47 Unset(); 48 } 49 50 void Unset() 51 { 52 if (fFile >= 0) 53 close(fFile); 54 } 55 56 status_t Process(int file) 57 { 58 Unset(); 59 fSHA.Init(); 60 fFile = file; 61 62 return Run(); 63 } 64 65 virtual status_t Read(uint8* buffer, size_t* _length) 66 { 67 ssize_t bytes = read(fFile, buffer, *_length); 68 if (bytes < B_OK) 69 return errno; 70 71 *_length = bytes; 72 return B_OK; 73 } 74 75 virtual status_t Write(uint8* buffer, size_t length) 76 { 77 fSHA.Update(buffer, length); 78 return B_OK; 79 } 80 81 const uint8* Digest() { return fSHA.Digest(); } 82 size_t DigestLength() const { return fSHA.DigestLength(); } 83 84private: 85 SHA256 fSHA; 86 int fFile; 87}; 88 89struct file_entry { 90 uint8 hash[SHA_DIGEST_LENGTH]; 91 ino_t node; 92 std::string path; 93 94 bool operator<(const struct file_entry& other) const 95 { 96 return path < other.path; 97 } 98 99 std::string HashString() const 100 { 101 char buffer[128]; 102 for (int i = 0; i < SHA_DIGEST_LENGTH; i++) { 103 sprintf(buffer + i * 2, "%02x", hash[i]); 104 } 105 106 return buffer; 107 } 108}; 109 110typedef std::vector<file_entry> FileList; 111 112void process_file(const char* path); 113 114 115SHAProcessor gSHA; 116FileList gFiles; 117 118 119void 120process_file(const file_entry& entry, int number) 121{ 122 struct stat stat; 123 if (::stat(entry.path.c_str(), &stat) != 0) { 124 fprintf(stderr, "Could not stat file \"%s\": %s\n", entry.path.c_str(), 125 strerror(errno)); 126 return; 127 } 128 129 if (stat.st_ino != entry.node) { 130 fprintf(stderr, "\"%s\": inode changed from %lld to %lld\n", 131 entry.path.c_str(), entry.node, stat.st_ino); 132 } 133 134 int file = open(entry.path.c_str(), O_RDONLY); 135 if (file < 0) { 136 fprintf(stderr, "Could not open file \"%s\": %s\n", entry.path.c_str(), 137 strerror(errno)); 138 return; 139 } 140 141 status_t status = gSHA.Process(file); 142 if (status != B_OK) { 143 fprintf(stderr, "Computing SHA failed \"%s\": %s\n", entry.path.c_str(), 144 strerror(status)); 145 return; 146 } 147 148 if (memcmp(entry.hash, gSHA.Digest(), SHA_DIGEST_LENGTH)) 149 fprintf(stderr, "\"%s\": Contents differ!\n", entry.path.c_str()); 150 151 static bigtime_t sLastUpdate = -1; 152 if (system_time() - sLastUpdate > 500000) { 153 printf("%ld files scanned\33[1A\n", number); 154 sLastUpdate = system_time(); 155 } 156} 157 158 159int 160main(int argc, char** argv) 161{ 162 if (argc != 2) { 163 fprintf(stderr, "usage: %s <hash-file>\n", kProgramName); 164 return 1; 165 } 166 167 const char* hashFileName = argv[1]; 168 169 status_t status = gSHA.Init(); 170 if (status != B_OK) { 171 fprintf(stderr, "%s: Could not initialize SHA processor: %s\n", 172 kProgramName, strerror(status)); 173 return 1; 174 } 175 176 // read files from hash file 177 178 int file = open(hashFileName, O_RDONLY); 179 if (file < 0) { 180 fprintf(stderr, "%s: Could not open hash file \"%s\": %s\n", 181 kProgramName, hashFileName, strerror(status)); 182 return 1; 183 } 184 185 char buffer[2048]; 186 read(file, buffer, 4); 187 if (memcmp(buffer, "HASH", 4)) { 188 fprintf(stderr, "%s: \"%s\" is not a hash file\n", 189 kProgramName, hashFileName); 190 close(file); 191 return 1; 192 } 193 194 int fileCount; 195 read(file, &fileCount, sizeof(int)); 196 TRACE("Skip %d path(s)\n", fileCount); 197 198 // Skip paths, we don't need it for the consistency check 199 200 for (int i = 0; i < fileCount; i++) { 201 int length; 202 read(file, &length, sizeof(int)); 203 lseek(file, length + 1, SEEK_CUR); 204 } 205 206 // Read file names and their hash 207 208 read(file, &fileCount, sizeof(int)); 209 TRACE("Found %d file(s)\n", fileCount); 210 211 for (int i = 0; i < fileCount; i++) { 212 file_entry entry; 213 read(file, entry.hash, SHA_DIGEST_LENGTH); 214 read(file, &entry.node, sizeof(ino_t)); 215 216 int length; 217 read(file, &length, sizeof(int)); 218 read(file, buffer, length + 1); 219 220 entry.path = buffer; 221 222 gFiles.push_back(entry); 223 } 224 225 close(file); 226 227 bigtime_t start = system_time(); 228 229 for (int i = 0; i < fileCount; i++) { 230 process_file(gFiles[i], i); 231 } 232 233 bigtime_t runtime = system_time() - start; 234 235 if (gFiles.size() > 0) { 236 printf("Consistency check for %ld files in %g seconds, %g msec per " 237 "file.\n", gFiles.size(), runtime / 1000000.0, 238 runtime / 1000.0 / gFiles.size()); 239 } 240 241 return 0; 242} 243