1/*
2 * Copyright 2008, Axel D��rfler, axeld@pinc-software.de.
3 * Distributed under the terms of the MIT License.
4 */
5
6
7#include <algorithm>
8#include <string>
9#include <vector>
10
11#include <dirent.h>
12#include <errno.h>
13#include <stdio.h>
14#include <stdlib.h>
15#include <string.h>
16#include <unistd.h>
17
18#include <OS.h>
19#include <Path.h>
20
21#include <SHA256.h>
22
23#include "AdaptiveBuffering.h"
24
25
26//#define TRACE(x...) printf(x)
27#define TRACE(x...) ;
28
29
30extern const char *__progname;
31static const char *kProgramName = __progname;
32
33const size_t kInitialBufferSize = 1 * 1024 * 1024;
34const size_t kMaxBufferSize = 10 * 1024 * 1024;
35
36
37class SHAProcessor : public AdaptiveBuffering {
38public:
39	SHAProcessor()
40		: AdaptiveBuffering(kInitialBufferSize, kMaxBufferSize, 3),
41		fFile(-1)
42	{
43	}
44
45	virtual ~SHAProcessor()
46	{
47		Unset();
48	}
49
50	void Unset()
51	{
52		if (fFile >= 0)
53			close(fFile);
54	}
55
56	status_t Process(int file)
57	{
58		Unset();
59		fSHA.Init();
60		fFile = file;
61
62		return Run();
63	}
64
65	virtual status_t Read(uint8* buffer, size_t* _length)
66	{
67		ssize_t bytes = read(fFile, buffer, *_length);
68		if (bytes < B_OK)
69			return errno;
70
71		*_length = bytes;
72		return B_OK;
73	}
74
75	virtual status_t Write(uint8* buffer, size_t length)
76	{
77		fSHA.Update(buffer, length);
78		return B_OK;
79	}
80
81	const uint8* Digest() { return fSHA.Digest(); }
82	size_t DigestLength() const	{ return fSHA.DigestLength(); }
83
84private:
85	SHA256	fSHA;
86	int		fFile;
87};
88
89struct file_entry {
90	uint8			hash[SHA_DIGEST_LENGTH];
91	ino_t			node;
92	std::string		path;
93
94	bool operator<(const struct file_entry& other) const
95	{
96		return path < other.path;
97	}
98
99	std::string HashString() const
100	{
101		char buffer[128];
102		for (int i = 0; i < SHA_DIGEST_LENGTH; i++) {
103			sprintf(buffer + i * 2, "%02x", hash[i]);
104		}
105
106		return buffer;
107	}
108};
109
110typedef std::vector<file_entry> FileList;
111
112void process_file(const char* path);
113
114
115SHAProcessor gSHA;
116FileList gFiles;
117
118
119void
120process_file(const file_entry& entry, int number)
121{
122	struct stat stat;
123	if (::stat(entry.path.c_str(), &stat) != 0) {
124		fprintf(stderr, "Could not stat file \"%s\": %s\n", entry.path.c_str(),
125			strerror(errno));
126		return;
127	}
128
129	if (stat.st_ino != entry.node) {
130		fprintf(stderr, "\"%s\": inode changed from %lld to %lld\n",
131			entry.path.c_str(), entry.node, stat.st_ino);
132	}
133
134	int file = open(entry.path.c_str(), O_RDONLY);
135	if (file < 0) {
136		fprintf(stderr, "Could not open file \"%s\": %s\n", entry.path.c_str(),
137			strerror(errno));
138		return;
139	}
140
141	status_t status = gSHA.Process(file);
142	if (status != B_OK) {
143		fprintf(stderr, "Computing SHA failed \"%s\": %s\n", entry.path.c_str(),
144			strerror(status));
145		return;
146	}
147
148	if (memcmp(entry.hash, gSHA.Digest(), SHA_DIGEST_LENGTH))
149		fprintf(stderr, "\"%s\": Contents differ!\n", entry.path.c_str());
150
151	static bigtime_t sLastUpdate = -1;
152	if (system_time() - sLastUpdate > 500000) {
153		printf("%ld files scanned\33[1A\n", number);
154		sLastUpdate = system_time();
155	}
156}
157
158
159int
160main(int argc, char** argv)
161{
162	if (argc != 2) {
163		fprintf(stderr, "usage: %s <hash-file>\n", kProgramName);
164		return 1;
165	}
166
167	const char* hashFileName = argv[1];
168
169	status_t status = gSHA.Init();
170	if (status != B_OK) {
171		fprintf(stderr, "%s: Could not initialize SHA processor: %s\n",
172			kProgramName, strerror(status));
173		return 1;
174	}
175
176	// read files from hash file
177
178	int file = open(hashFileName, O_RDONLY);
179	if (file < 0) {
180		fprintf(stderr, "%s: Could not open hash file \"%s\": %s\n",
181			kProgramName, hashFileName, strerror(status));
182		return 1;
183	}
184
185	char buffer[2048];
186	read(file, buffer, 4);
187	if (memcmp(buffer, "HASH", 4)) {
188		fprintf(stderr, "%s: \"%s\" is not a hash file\n",
189			kProgramName, hashFileName);
190		close(file);
191		return 1;
192	}
193
194	int fileCount;
195	read(file, &fileCount, sizeof(int));
196	TRACE("Skip %d path(s)\n", fileCount);
197
198	// Skip paths, we don't need it for the consistency check
199
200	for (int i = 0; i < fileCount; i++) {
201		int length;
202		read(file, &length, sizeof(int));
203		lseek(file, length + 1, SEEK_CUR);
204	}
205
206	// Read file names and their hash
207
208	read(file, &fileCount, sizeof(int));
209	TRACE("Found %d file(s)\n", fileCount);
210
211	for (int i = 0; i < fileCount; i++) {
212		file_entry entry;
213		read(file, entry.hash, SHA_DIGEST_LENGTH);
214		read(file, &entry.node, sizeof(ino_t));
215
216		int length;
217		read(file, &length, sizeof(int));
218		read(file, buffer, length + 1);
219
220		entry.path = buffer;
221
222		gFiles.push_back(entry);
223	}
224
225	close(file);
226
227	bigtime_t start = system_time();
228
229	for (int i = 0; i < fileCount; i++) {
230		process_file(gFiles[i], i);
231	}
232
233	bigtime_t runtime = system_time() - start;
234
235	if (gFiles.size() > 0) {
236		printf("Consistency check for %ld files in %g seconds, %g msec per "
237			"file.\n", gFiles.size(), runtime / 1000000.0,
238			runtime / 1000.0 / gFiles.size());
239	}
240
241	return 0;
242}
243