1/* recovery.c --- FSX recovery functionality
2*
3 * ====================================================================
4 *    Licensed to the Apache Software Foundation (ASF) under one
5 *    or more contributor license agreements.  See the NOTICE file
6 *    distributed with this work for additional information
7 *    regarding copyright ownership.  The ASF licenses this file
8 *    to you under the Apache License, Version 2.0 (the
9 *    "License"); you may not use this file except in compliance
10 *    with the License.  You may obtain a copy of the License at
11 *
12 *      http://www.apache.org/licenses/LICENSE-2.0
13 *
14 *    Unless required by applicable law or agreed to in writing,
15 *    software distributed under the License is distributed on an
16 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 *    KIND, either express or implied.  See the License for the
18 *    specific language governing permissions and limitations
19 *    under the License.
20 * ====================================================================
21 */
22
23#include "recovery.h"
24
25#include "svn_dirent_uri.h"
26#include "svn_hash.h"
27#include "svn_pools.h"
28#include "private/svn_string_private.h"
29
30#include "low_level.h"
31#include "rep-cache.h"
32#include "revprops.h"
33#include "transaction.h"
34#include "util.h"
35#include "cached_data.h"
36#include "index.h"
37
38#include "../libsvn_fs/fs-loader.h"
39
40#include "svn_private_config.h"
41
42/* Set *EXISTS to TRUE, if the revision / pack file for REV exists in FS.
43   Use SCRATCH_POOL for temporary allocations. */
44static svn_error_t *
45revision_file_exists(svn_boolean_t *exists,
46                     svn_fs_t *fs,
47                     svn_revnum_t rev,
48                     apr_pool_t *scratch_pool)
49{
50  svn_node_kind_t kind;
51  const char *path = svn_fs_x__path_rev_absolute(fs, rev, scratch_pool);
52  SVN_ERR(svn_io_check_path(path, &kind, scratch_pool));
53
54  *exists = kind == svn_node_file;
55  return SVN_NO_ERROR;
56}
57
58/* Part of the recovery procedure.  Return the largest revision *REV in
59   filesystem FS.  Use SCRATCH_POOL for temporary allocation. */
60static svn_error_t *
61recover_get_largest_revision(svn_fs_t *fs,
62                             svn_revnum_t *rev,
63                             apr_pool_t *scratch_pool)
64{
65  /* Discovering the largest revision in the filesystem would be an
66     expensive operation if we did a readdir() or searched linearly,
67     so we'll do a form of binary search.  left is a revision that we
68     know exists, right a revision that we know does not exist. */
69  apr_pool_t *iterpool;
70  svn_revnum_t left, right = 1;
71
72  iterpool = svn_pool_create(scratch_pool);
73  /* Keep doubling right, until we find a revision that doesn't exist. */
74  while (1)
75    {
76      svn_boolean_t exists;
77      svn_pool_clear(iterpool);
78
79      SVN_ERR(revision_file_exists(&exists, fs, right, iterpool));
80      if (!exists)
81        break;
82
83      right <<= 1;
84    }
85
86  left = right >> 1;
87
88  /* We know that left exists and right doesn't.  Do a normal bsearch to find
89     the last revision. */
90  while (left + 1 < right)
91    {
92      svn_revnum_t probe = left + ((right - left) / 2);
93      svn_boolean_t exists;
94      svn_pool_clear(iterpool);
95
96      SVN_ERR(revision_file_exists(&exists, fs, probe, iterpool));
97      if (exists)
98        left = probe;
99      else
100        right = probe;
101    }
102
103  svn_pool_destroy(iterpool);
104
105  /* left is now the largest revision that exists. */
106  *rev = left;
107  return SVN_NO_ERROR;
108}
109
110/* Delete all files and sub-directories (recursively) of DIR_PATH but
111   leave DIR_PATH itself in place.  Use SCRATCH_POOL for temporaries. */
112static svn_error_t *
113clear_directory(const char *dir_path,
114                apr_pool_t *scratch_pool)
115{
116  apr_hash_t *dirents;
117  apr_hash_index_t *hi;
118  apr_pool_t *iterpool = svn_pool_create(scratch_pool);
119
120  SVN_ERR(svn_io_get_dirents3(&dirents, dir_path, TRUE, scratch_pool,
121                              scratch_pool));
122
123  for (hi = apr_hash_first(scratch_pool, dirents);
124       hi;
125       hi = apr_hash_next(hi))
126    {
127      const char *path;
128      const char *name;
129      svn_dirent_t *dirent;
130
131      svn_pool_clear(iterpool);
132      apr_hash_this(hi, (const void **)&name, NULL, (void **)&dirent);
133
134      path = svn_dirent_join(dir_path, name, iterpool);
135      if (dirent->kind == svn_node_dir)
136        SVN_ERR(svn_io_remove_dir2(path, TRUE, NULL, NULL, iterpool));
137      else
138        SVN_ERR(svn_io_remove_file2(path, TRUE, iterpool));
139    }
140
141  svn_pool_destroy(iterpool);
142
143  return SVN_NO_ERROR;
144}
145
146/* Delete all uncommitted transaction data from FS.
147   Use SCRATCH_POOL for temporaries. */
148static svn_error_t *
149discard_transactions(svn_fs_t *fs,
150                     apr_pool_t *scratch_pool)
151{
152  svn_fs_x__data_t *ffd = fs->fsap_data;
153  svn_fs_x__shared_data_t *ffsd = ffd->shared;
154
155  /* In case this FS has been opened more than once in this process,
156     we should purge their shared transaction data as well.  We do the
157     same as abort_txn would, except that we don't expect all txn files
158     to be complete on disk. */
159  while (ffsd->txns)
160    {
161      svn_fs_x__shared_txn_data_t *txn = ffsd->txns;
162      ffsd->txns = txn->next;
163
164      svn_pool_destroy(txn->pool);
165    }
166
167  /* Remove anything from the transaction folders. */
168  SVN_ERR(clear_directory(svn_fs_x__path_txns_dir(fs, scratch_pool),
169                          scratch_pool));
170  SVN_ERR(clear_directory(svn_fs_x__path_txn_proto_revs(fs, scratch_pool),
171                          scratch_pool));
172
173  return SVN_NO_ERROR;
174}
175
176/* Reset txn-current in FS.  Use SCRATCH_POOL for temporaries. */
177static svn_error_t *
178reset_txn_number(svn_fs_t *fs,
179                 apr_pool_t *scratch_pool)
180{
181  const char *initial_txn = "0\n";
182  SVN_ERR(svn_io_write_atomic2(svn_fs_x__path_txn_current(fs, scratch_pool),
183                               initial_txn, strlen(initial_txn),
184                               svn_fs_x__path_uuid(fs, scratch_pool),
185                               FALSE, scratch_pool));
186
187  return SVN_NO_ERROR;
188}
189
190/* Baton used for recover_body below. */
191typedef struct recover_baton_t {
192  svn_fs_t *fs;
193  svn_cancel_func_t cancel_func;
194  void *cancel_baton;
195} recover_baton_t;
196
197/* The work-horse for svn_fs_x__recover, called with the FS
198   write lock.  This implements the svn_fs_x__with_write_lock()
199   'body' callback type.  BATON is a 'recover_baton_t *'. */
200static svn_error_t *
201recover_body(void *baton,
202             apr_pool_t *scratch_pool)
203{
204  recover_baton_t *b = baton;
205  svn_fs_t *fs = b->fs;
206  svn_fs_x__data_t *ffd = fs->fsap_data;
207  svn_revnum_t max_rev;
208  svn_revnum_t youngest_rev;
209  svn_boolean_t revprop_missing = TRUE;
210  svn_boolean_t revprop_accessible = FALSE;
211
212  /* Lose potentially corrupted data in temp files */
213  SVN_ERR(svn_fs_x__reset_revprop_generation_file(fs, scratch_pool));
214
215  /* The admin may have created a plain copy of this repo before attempting
216     to recover it (hotcopy may or may not work with corrupted repos).
217     Bump the instance ID. */
218  SVN_ERR(svn_fs_x__set_uuid(fs, fs->uuid, NULL, TRUE, scratch_pool));
219
220  /* Because transactions are not resilient against system crashes,
221     any existing transaction is suspect (and would probably not be
222     reopened anyway).  Get rid of those. */
223  SVN_ERR(discard_transactions(fs, scratch_pool));
224  SVN_ERR(reset_txn_number(fs, scratch_pool));
225
226  /* We need to know the largest revision in the filesystem. */
227  SVN_ERR(recover_get_largest_revision(fs, &max_rev, scratch_pool));
228
229  /* Get the expected youngest revision */
230  SVN_ERR(svn_fs_x__youngest_rev(&youngest_rev, fs, scratch_pool));
231
232  /* Policy note:
233
234     Since the revprops file is written after the revs file, the true
235     maximum available revision is the youngest one for which both are
236     present.  That's probably the same as the max_rev we just found,
237     but if it's not, we could, in theory, repeatedly decrement
238     max_rev until we find a revision that has both a revs and
239     revprops file, then write db/current with that.
240
241     But we choose not to.  If a repository is so corrupt that it's
242     missing at least one revprops file, we shouldn't assume that the
243     youngest revision for which both the revs and revprops files are
244     present is healthy.  In other words, we're willing to recover
245     from a missing or out-of-date db/current file, because db/current
246     is truly redundant -- it's basically a cache so we don't have to
247     find max_rev each time, albeit a cache with unusual semantics,
248     since it also officially defines when a revision goes live.  But
249     if we're missing more than the cache, it's time to back out and
250     let the admin reconstruct things by hand: correctness at that
251     point may depend on external things like checking a commit email
252     list, looking in particular working copies, etc.
253
254     This policy matches well with a typical naive backup scenario.
255     Say you're rsyncing your FSX repository nightly to the same
256     location.  Once revs and revprops are written, you've got the
257     maximum rev; if the backup should bomb before db/current is
258     written, then db/current could stay arbitrarily out-of-date, but
259     we can still recover.  It's a small window, but we might as well
260     do what we can. */
261
262  /* Even if db/current were missing, it would be created with 0 by
263     get_youngest(), so this conditional remains valid. */
264  if (youngest_rev > max_rev)
265    return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
266                             _("Expected current rev to be <= %ld "
267                               "but found %ld"), max_rev, youngest_rev);
268
269  /* Before setting current, verify that there is a revprops file
270     for the youngest revision.  (Issue #2992) */
271  if (svn_fs_x__is_packed_revprop(fs, max_rev))
272    {
273      revprop_accessible
274        = svn_fs_x__packed_revprop_available(&revprop_missing, fs, max_rev,
275                                             scratch_pool);
276    }
277  else
278    {
279      svn_node_kind_t youngest_revprops_kind;
280      SVN_ERR(svn_io_check_path(svn_fs_x__path_revprops(fs, max_rev,
281                                                        scratch_pool),
282                                &youngest_revprops_kind, scratch_pool));
283
284      if (youngest_revprops_kind == svn_node_file)
285        {
286          revprop_missing = FALSE;
287          revprop_accessible = TRUE;
288        }
289      else if (youngest_revprops_kind != svn_node_none)
290        {
291          return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
292                                  _("Revision %ld has a non-file where its "
293                                    "revprops file should be"),
294                                  max_rev);
295        }
296    }
297
298  if (!revprop_accessible)
299    {
300      if (revprop_missing)
301        {
302          return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
303                                  _("Revision %ld has a revs file but no "
304                                    "revprops file"),
305                                  max_rev);
306        }
307      else
308        {
309          return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
310                                  _("Revision %ld has a revs file but the "
311                                    "revprops file is inaccessible"),
312                                  max_rev);
313        }
314    }
315
316  /* Prune younger-than-(newfound-youngest) revisions from the rep
317     cache if sharing is enabled taking care not to create the cache
318     if it does not exist. */
319  if (ffd->rep_sharing_allowed)
320    {
321      svn_boolean_t rep_cache_exists;
322
323      SVN_ERR(svn_fs_x__exists_rep_cache(&rep_cache_exists, fs,
324                                         scratch_pool));
325      if (rep_cache_exists)
326        SVN_ERR(svn_fs_x__del_rep_reference(fs, max_rev, scratch_pool));
327    }
328
329  /* Now store the discovered youngest revision, and the next IDs if
330     relevant, in a new 'current' file. */
331  return svn_fs_x__write_current(fs, max_rev, scratch_pool);
332}
333
334/* This implements the fs_library_vtable_t.recover() API. */
335svn_error_t *
336svn_fs_x__recover(svn_fs_t *fs,
337                  svn_cancel_func_t cancel_func,
338                  void *cancel_baton,
339                  apr_pool_t *scratch_pool)
340{
341  recover_baton_t b;
342
343  /* We have no way to take out an exclusive lock in FSX, so we're
344     restricted as to the types of recovery we can do.  Luckily,
345     we just want to recreate the 'current' file, and we can do that just
346     by blocking other writers. */
347  b.fs = fs;
348  b.cancel_func = cancel_func;
349  b.cancel_baton = cancel_baton;
350  return svn_fs_x__with_all_locks(fs, recover_body, &b, scratch_pool);
351}
352