1/*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1980, 1986, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 *    may be used to endorse or promote products derived from this software
17 *    without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32#include <sys/param.h>
33#include <sys/time.h>
34#include <sys/types.h>
35#include <sys/sysctl.h>
36#include <sys/disk.h>
37#include <sys/disklabel.h>
38#include <sys/ioctl.h>
39#include <sys/stat.h>
40
41#include <ufs/ufs/dinode.h>
42#include <ufs/ufs/dir.h>
43#include <ufs/ffs/fs.h>
44
45#include <err.h>
46#include <errno.h>
47#include <string.h>
48#include <ctype.h>
49#include <fstab.h>
50#include <stdint.h>
51#include <stdio.h>
52#include <stdlib.h>
53#include <time.h>
54#include <unistd.h>
55
56#include "fsck.h"
57
58int		sujrecovery = 0;
59
60static struct bufarea *allocbuf(const char *);
61static void cg_write(struct bufarea *);
62static void slowio_start(void);
63static void slowio_end(void);
64static void printIOstats(void);
65
66static long diskreads, totaldiskreads, totalreads; /* Disk cache statistics */
67static struct timespec startpass, finishpass;
68struct timeval slowio_starttime;
69int slowio_delay_usec = 10000;	/* Initial IO delay for background fsck */
70int slowio_pollcnt;
71static struct bufarea cgblk;	/* backup buffer for cylinder group blocks */
72static struct bufarea failedbuf; /* returned by failed getdatablk() */
73static TAILQ_HEAD(bufqueue, bufarea) bufqueuehd; /* head of buffer cache LRU */
74static LIST_HEAD(bufhash, bufarea) bufhashhd[HASHSIZE]; /* buffer hash list */
75static struct bufhash freebufs;	/* unused buffers */
76static int numbufs;		/* size of buffer cache */
77static int cachelookups;	/* number of cache lookups */
78static int cachereads;		/* number of cache reads */
79static int flushtries;		/* number of tries to reclaim memory */
80
81char *buftype[BT_NUMBUFTYPES] = BT_NAMES;
82
83void
84fsutilinit(void)
85{
86	diskreads = totaldiskreads = totalreads = 0;
87	bzero(&startpass, sizeof(struct timespec));
88	bzero(&finishpass, sizeof(struct timespec));
89	bzero(&slowio_starttime, sizeof(struct timeval));
90	slowio_delay_usec = 10000;
91	slowio_pollcnt = 0;
92	flushtries = 0;
93}
94
95int
96ftypeok(union dinode *dp)
97{
98	switch (DIP(dp, di_mode) & IFMT) {
99
100	case IFDIR:
101	case IFREG:
102	case IFBLK:
103	case IFCHR:
104	case IFLNK:
105	case IFSOCK:
106	case IFIFO:
107		return (1);
108
109	default:
110		if (debug)
111			printf("bad file type 0%o\n", DIP(dp, di_mode));
112		return (0);
113	}
114}
115
116int
117reply(const char *question)
118{
119	int persevere;
120	char c;
121
122	if (preen)
123		pfatal("INTERNAL ERROR: GOT TO reply()");
124	persevere = strcmp(question, "CONTINUE") == 0 ||
125		strcmp(question, "LOOK FOR ALTERNATE SUPERBLOCKS") == 0;
126	printf("\n");
127	if (!persevere && (nflag || (fswritefd < 0 && bkgrdflag == 0))) {
128		printf("%s? no\n\n", question);
129		resolved = 0;
130		return (0);
131	}
132	if (yflag || (persevere && nflag)) {
133		printf("%s? yes\n\n", question);
134		return (1);
135	}
136	do	{
137		printf("%s? [yn] ", question);
138		(void) fflush(stdout);
139		c = getc(stdin);
140		while (c != '\n' && getc(stdin) != '\n') {
141			if (feof(stdin)) {
142				resolved = 0;
143				return (0);
144			}
145		}
146	} while (c != 'y' && c != 'Y' && c != 'n' && c != 'N');
147	printf("\n");
148	if (c == 'y' || c == 'Y')
149		return (1);
150	resolved = 0;
151	return (0);
152}
153
154/*
155 * Look up state information for an inode.
156 */
157struct inostat *
158inoinfo(ino_t inum)
159{
160	static struct inostat unallocated = { USTATE, 0, 0, 0 };
161	struct inostatlist *ilp;
162	int iloff;
163
164	if (inum >= maxino)
165		errx(EEXIT, "inoinfo: inumber %ju out of range",
166		    (uintmax_t)inum);
167	ilp = &inostathead[inum / sblock.fs_ipg];
168	iloff = inum % sblock.fs_ipg;
169	if (iloff >= ilp->il_numalloced)
170		return (&unallocated);
171	return (&ilp->il_stat[iloff]);
172}
173
174/*
175 * Malloc buffers and set up cache.
176 */
177void
178bufinit(void)
179{
180	int i;
181
182	initbarea(&failedbuf, BT_UNKNOWN);
183	failedbuf.b_errs = -1;
184	failedbuf.b_un.b_buf = NULL;
185	if ((cgblk.b_un.b_buf = Balloc((unsigned int)sblock.fs_bsize)) == NULL)
186		errx(EEXIT, "Initial malloc(%d) failed", sblock.fs_bsize);
187	initbarea(&cgblk, BT_CYLGRP);
188	numbufs = cachelookups = cachereads = 0;
189	TAILQ_INIT(&bufqueuehd);
190	LIST_INIT(&freebufs);
191	for (i = 0; i < HASHSIZE; i++)
192		LIST_INIT(&bufhashhd[i]);
193	for (i = 0; i < BT_NUMBUFTYPES; i++) {
194		readtime[i].tv_sec = totalreadtime[i].tv_sec = 0;
195		readtime[i].tv_nsec = totalreadtime[i].tv_nsec = 0;
196		readcnt[i] = totalreadcnt[i] = 0;
197	}
198}
199
200static struct bufarea *
201allocbuf(const char *failreason)
202{
203	struct bufarea *bp;
204	char *bufp;
205
206	bp = (struct bufarea *)Malloc(sizeof(struct bufarea));
207	bufp = Balloc((unsigned int)sblock.fs_bsize);
208	if (bp == NULL || bufp == NULL) {
209		errx(EEXIT, "%s", failreason);
210		/* NOTREACHED */
211	}
212	numbufs++;
213	bp->b_un.b_buf = bufp;
214	TAILQ_INSERT_HEAD(&bufqueuehd, bp, b_list);
215	initbarea(bp, BT_UNKNOWN);
216	return (bp);
217}
218
219/*
220 * Manage cylinder group buffers.
221 *
222 * Use getblk() here rather than cgget() because the cylinder group
223 * may be corrupted but we want it anyway so we can fix it.
224 */
225static struct bufarea *cgbufs;	/* header for cylinder group cache */
226static int flushtries;		/* number of tries to reclaim memory */
227
228struct bufarea *
229cglookup(int cg)
230{
231	struct bufarea *cgbp;
232	struct cg *cgp;
233
234	if ((unsigned) cg >= sblock.fs_ncg)
235		errx(EEXIT, "cglookup: out of range cylinder group %d", cg);
236	if (cgbufs == NULL) {
237		cgbufs = Calloc(sblock.fs_ncg, sizeof(struct bufarea));
238		if (cgbufs == NULL)
239			errx(EEXIT, "Cannot allocate cylinder group buffers");
240	}
241	cgbp = &cgbufs[cg];
242	if (cgbp->b_un.b_cg != NULL)
243		return (cgbp);
244	cgp = NULL;
245	if (flushtries == 0)
246		cgp = Balloc((unsigned int)sblock.fs_cgsize);
247	if (cgp == NULL) {
248		if (sujrecovery)
249			errx(EEXIT,"Ran out of memory during journal recovery");
250		flush(fswritefd, &cgblk);
251		getblk(&cgblk, cgtod(&sblock, cg), sblock.fs_cgsize);
252		return (&cgblk);
253	}
254	cgbp->b_un.b_cg = cgp;
255	initbarea(cgbp, BT_CYLGRP);
256	getblk(cgbp, cgtod(&sblock, cg), sblock.fs_cgsize);
257	return (cgbp);
258}
259
260/*
261 * Mark a cylinder group buffer as dirty.
262 * Update its check-hash if they are enabled.
263 */
264void
265cgdirty(struct bufarea *cgbp)
266{
267	struct cg *cg;
268
269	cg = cgbp->b_un.b_cg;
270	if ((sblock.fs_metackhash & CK_CYLGRP) != 0) {
271		cg->cg_ckhash = 0;
272		cg->cg_ckhash =
273		    calculate_crc32c(~0L, (void *)cg, sblock.fs_cgsize);
274	}
275	dirty(cgbp);
276}
277
278/*
279 * Attempt to flush a cylinder group cache entry.
280 * Return whether the flush was successful.
281 */
282int
283flushentry(void)
284{
285	struct bufarea *cgbp;
286
287	if (sujrecovery || flushtries == sblock.fs_ncg || cgbufs == NULL)
288		return (0);
289	cgbp = &cgbufs[flushtries++];
290	if (cgbp->b_un.b_cg == NULL)
291		return (0);
292	flush(fswritefd, cgbp);
293	free(cgbp->b_un.b_buf);
294	cgbp->b_un.b_buf = NULL;
295	return (1);
296}
297
298/*
299 * Manage a cache of filesystem disk blocks.
300 */
301struct bufarea *
302getdatablk(ufs2_daddr_t blkno, long size, int type)
303{
304	struct bufarea *bp;
305	struct bufhash *bhdp;
306
307	cachelookups++;
308	/*
309	 * If out of range, return empty buffer with b_err == -1
310	 *
311	 * Skip check for inodes because chkrange() considers
312	 * metadata areas invalid to write data.
313	 */
314	if (type != BT_INODES && chkrange(blkno, size / sblock.fs_fsize)) {
315		failedbuf.b_refcnt++;
316		return (&failedbuf);
317	}
318	bhdp = &bufhashhd[HASH(blkno)];
319	LIST_FOREACH(bp, bhdp, b_hash)
320		if (bp->b_bno == fsbtodb(&sblock, blkno)) {
321			if (debug && bp->b_size != size) {
322				prtbuf(bp, "getdatablk: size mismatch");
323				pfatal("getdatablk: b_size %d != size %ld\n",
324				    bp->b_size, size);
325			}
326			TAILQ_REMOVE(&bufqueuehd, bp, b_list);
327			goto foundit;
328		}
329	/*
330	 * Move long-term busy buffer back to the front of the LRU so we
331	 * do not endless inspect them for recycling.
332	 */
333	bp = TAILQ_LAST(&bufqueuehd, bufqueue);
334	if (bp != NULL && bp->b_refcnt != 0) {
335		TAILQ_REMOVE(&bufqueuehd, bp, b_list);
336		TAILQ_INSERT_HEAD(&bufqueuehd, bp, b_list);
337	}
338	/*
339	 * Allocate up to the minimum number of buffers before
340	 * considering recycling any of them.
341	 */
342	if (size > sblock.fs_bsize)
343		errx(EEXIT, "Excessive buffer size %ld > %d\n", size,
344		    sblock.fs_bsize);
345	if ((bp = LIST_FIRST(&freebufs)) != NULL) {
346		LIST_REMOVE(bp, b_hash);
347	} else if (numbufs < MINBUFS) {
348		bp = allocbuf("cannot create minimal buffer pool");
349	} else if (sujrecovery) {
350		/*
351		 * SUJ recovery does not want anything written until it
352		 * has successfully completed (so it can fail back to
353		 * full fsck). Thus, we can only recycle clean buffers.
354		 */
355		TAILQ_FOREACH_REVERSE(bp, &bufqueuehd, bufqueue, b_list)
356			if ((bp->b_flags & B_DIRTY) == 0 && bp->b_refcnt == 0)
357				break;
358		if (bp == NULL)
359			bp = allocbuf("Ran out of memory during "
360			    "journal recovery");
361		else
362			LIST_REMOVE(bp, b_hash);
363	} else {
364		/*
365		 * Recycle oldest non-busy buffer.
366		 */
367		TAILQ_FOREACH_REVERSE(bp, &bufqueuehd, bufqueue, b_list)
368			if (bp->b_refcnt == 0)
369				break;
370		if (bp == NULL)
371			bp = allocbuf("Ran out of memory for buffers");
372		else
373			LIST_REMOVE(bp, b_hash);
374	}
375	TAILQ_REMOVE(&bufqueuehd, bp, b_list);
376	flush(fswritefd, bp);
377	bp->b_type = type;
378	LIST_INSERT_HEAD(bhdp, bp, b_hash);
379	getblk(bp, blkno, size);
380	cachereads++;
381	/* fall through */
382foundit:
383	TAILQ_INSERT_HEAD(&bufqueuehd, bp, b_list);
384	if (debug && bp->b_type != type) {
385		printf("getdatablk: buffer type changed to %s",
386		    BT_BUFTYPE(type));
387		prtbuf(bp, "");
388	}
389	if (bp->b_errs == 0)
390		bp->b_refcnt++;
391	return (bp);
392}
393
394void
395getblk(struct bufarea *bp, ufs2_daddr_t blk, long size)
396{
397	ufs2_daddr_t dblk;
398	struct timespec start, finish;
399
400	dblk = fsbtodb(&sblock, blk);
401	if (bp->b_bno == dblk) {
402		totalreads++;
403	} else {
404		if (debug) {
405			readcnt[bp->b_type]++;
406			clock_gettime(CLOCK_REALTIME_PRECISE, &start);
407		}
408		bp->b_errs = blread(fsreadfd, bp->b_un.b_buf, dblk, size);
409		if (debug) {
410			clock_gettime(CLOCK_REALTIME_PRECISE, &finish);
411			timespecsub(&finish, &start, &finish);
412			timespecadd(&readtime[bp->b_type], &finish,
413			    &readtime[bp->b_type]);
414		}
415		bp->b_bno = dblk;
416		bp->b_size = size;
417	}
418}
419
420void
421brelse(struct bufarea *bp)
422{
423
424	if (bp->b_refcnt <= 0)
425		prtbuf(bp, "brelse: buffer with negative reference count");
426	bp->b_refcnt--;
427}
428
429void
430binval(struct bufarea *bp)
431{
432
433	bp->b_flags &= ~B_DIRTY;
434	LIST_REMOVE(bp, b_hash);
435	LIST_INSERT_HEAD(&freebufs, bp, b_hash);
436}
437
438void
439flush(int fd, struct bufarea *bp)
440{
441	struct inode ip;
442
443	if ((bp->b_flags & B_DIRTY) == 0)
444		return;
445	bp->b_flags &= ~B_DIRTY;
446	if (fswritefd < 0) {
447		pfatal("WRITING IN READ_ONLY MODE.\n");
448		return;
449	}
450	if (bp->b_errs != 0)
451		pfatal("WRITING %sZERO'ED BLOCK %lld TO DISK\n",
452		    (bp->b_errs == bp->b_size / dev_bsize) ? "" : "PARTIALLY ",
453		    (long long)bp->b_bno);
454	bp->b_errs = 0;
455	/*
456	 * Write using the appropriate function.
457	 */
458	switch (bp->b_type) {
459	case BT_SUPERBLK:
460		if (bp != &sblk)
461			pfatal("BUFFER %p DOES NOT MATCH SBLK %p\n",
462			    bp, &sblk);
463		/*
464		 * Superblocks are always pre-copied so we do not need
465		 * to check them for copy-on-write.
466		 */
467		if (sbput(fd, bp->b_un.b_fs, 0) == 0)
468			fsmodified = 1;
469		break;
470	case BT_CYLGRP:
471		/*
472		 * Cylinder groups are always pre-copied so we do not
473		 * need to check them for copy-on-write.
474		 */
475		if (sujrecovery)
476			cg_write(bp);
477		if (cgput(fswritefd, &sblock, bp->b_un.b_cg) == 0)
478			fsmodified = 1;
479		break;
480	case BT_INODES:
481		if (debug && sblock.fs_magic == FS_UFS2_MAGIC) {
482			struct ufs2_dinode *dp = bp->b_un.b_dinode2;
483			int i;
484
485			for (i = 0; i < bp->b_size; dp++, i += sizeof(*dp)) {
486				if (ffs_verify_dinode_ckhash(&sblock, dp) == 0)
487					continue;
488				pwarn("flush: INODE CHECK-HASH FAILED");
489				ip.i_bp = bp;
490				ip.i_dp = (union dinode *)dp;
491				ip.i_number = bp->b_index + (i / sizeof(*dp));
492				prtinode(&ip);
493				if (preen || reply("FIX") != 0) {
494					if (preen)
495						printf(" (FIXED)\n");
496					ffs_update_dinode_ckhash(&sblock, dp);
497					inodirty(&ip);
498				}
499			}
500		}
501		/* FALLTHROUGH */
502	default:
503		copyonwrite(&sblock, bp, std_checkblkavail);
504		blwrite(fd, bp->b_un.b_buf, bp->b_bno, bp->b_size);
505		break;
506	}
507}
508
509/*
510 * If there are any snapshots, ensure that all the blocks that they
511 * care about have been copied, then release the snapshot inodes.
512 * These operations need to be done before we rebuild the cylinder
513 * groups so that any block allocations are properly recorded.
514 * Since all the cylinder group maps have already been copied in
515 * the snapshots, no further snapshot copies will need to be done.
516 */
517void
518snapflush(ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t, long))
519{
520	struct bufarea *bp;
521	int cnt;
522
523	if (snapcnt > 0) {
524		if (debug)
525			printf("Check for snapshot copies\n");
526		TAILQ_FOREACH_REVERSE(bp, &bufqueuehd, bufqueue, b_list)
527			if ((bp->b_flags & B_DIRTY) != 0)
528				copyonwrite(&sblock, bp, checkblkavail);
529		for (cnt = 0; cnt < snapcnt; cnt++)
530			irelse(&snaplist[cnt]);
531		snapcnt = 0;
532	}
533}
534
535/*
536 * Journaled soft updates does not maintain cylinder group summary
537 * information during cleanup, so this routine recalculates the summary
538 * information and updates the superblock summary in preparation for
539 * writing out the cylinder group.
540 */
541static void
542cg_write(struct bufarea *bp)
543{
544	ufs1_daddr_t fragno, cgbno, maxbno;
545	u_int8_t *blksfree;
546	struct csum *csp;
547	struct cg *cgp;
548	int blk;
549	int i;
550
551	/*
552	 * Fix the frag and cluster summary.
553	 */
554	cgp = bp->b_un.b_cg;
555	cgp->cg_cs.cs_nbfree = 0;
556	cgp->cg_cs.cs_nffree = 0;
557	bzero(&cgp->cg_frsum, sizeof(cgp->cg_frsum));
558	maxbno = fragstoblks(&sblock, sblock.fs_fpg);
559	if (sblock.fs_contigsumsize > 0) {
560		for (i = 1; i <= sblock.fs_contigsumsize; i++)
561			cg_clustersum(cgp)[i] = 0;
562		bzero(cg_clustersfree(cgp), howmany(maxbno, CHAR_BIT));
563	}
564	blksfree = cg_blksfree(cgp);
565	for (cgbno = 0; cgbno < maxbno; cgbno++) {
566		if (ffs_isfreeblock(&sblock, blksfree, cgbno))
567			continue;
568		if (ffs_isblock(&sblock, blksfree, cgbno)) {
569			ffs_clusteracct(&sblock, cgp, cgbno, 1);
570			cgp->cg_cs.cs_nbfree++;
571			continue;
572		}
573		fragno = blkstofrags(&sblock, cgbno);
574		blk = blkmap(&sblock, blksfree, fragno);
575		ffs_fragacct(&sblock, blk, cgp->cg_frsum, 1);
576		for (i = 0; i < sblock.fs_frag; i++)
577			if (isset(blksfree, fragno + i))
578				cgp->cg_cs.cs_nffree++;
579	}
580	/*
581	 * Update the superblock cg summary from our now correct values
582	 * before writing the block.
583	 */
584	csp = &sblock.fs_cs(&sblock, cgp->cg_cgx);
585	sblock.fs_cstotal.cs_ndir += cgp->cg_cs.cs_ndir - csp->cs_ndir;
586	sblock.fs_cstotal.cs_nbfree += cgp->cg_cs.cs_nbfree - csp->cs_nbfree;
587	sblock.fs_cstotal.cs_nifree += cgp->cg_cs.cs_nifree - csp->cs_nifree;
588	sblock.fs_cstotal.cs_nffree += cgp->cg_cs.cs_nffree - csp->cs_nffree;
589	sblock.fs_cs(&sblock, cgp->cg_cgx) = cgp->cg_cs;
590}
591
592void
593rwerror(const char *mesg, ufs2_daddr_t blk)
594{
595
596	if (bkgrdcheck)
597		exit(EEXIT);
598	if (preen == 0)
599		printf("\n");
600	pfatal("CANNOT %s: %ld", mesg, (long)blk);
601	if (reply("CONTINUE") == 0)
602		exit(EEXIT);
603}
604
605void
606ckfini(int markclean)
607{
608	struct bufarea *bp, *nbp;
609	int ofsmodified, cnt, cg;
610
611	if (bkgrdflag) {
612		if ((!(sblock.fs_flags & FS_UNCLEAN)) != markclean) {
613			cmd.value = FS_UNCLEAN;
614			cmd.size = markclean ? -1 : 1;
615			if (sysctlbyname("vfs.ffs.setflags", 0, 0,
616			    &cmd, sizeof cmd) == -1)
617				pwarn("CANNOT SET FILE SYSTEM DIRTY FLAG\n");
618			if (!preen) {
619				printf("\n***** FILE SYSTEM MARKED %s *****\n",
620				    markclean ? "CLEAN" : "DIRTY");
621				if (!markclean)
622					rerun = 1;
623			}
624		} else if (!preen && !markclean) {
625			printf("\n***** FILE SYSTEM STILL DIRTY *****\n");
626			rerun = 1;
627		}
628		bkgrdflag = 0;
629	}
630	if (debug && cachelookups > 0)
631		printf("cache with %d buffers missed %d of %d (%d%%)\n",
632		    numbufs, cachereads, cachelookups,
633		    (int)(cachereads * 100 / cachelookups));
634	if (fswritefd < 0) {
635		(void)close(fsreadfd);
636		return;
637	}
638
639	/*
640	 * To remain idempotent with partial truncations the buffers
641	 * must be flushed in this order:
642	 *  1) cylinder groups (bitmaps)
643	 *  2) indirect, directory, external attribute, and data blocks
644	 *  3) inode blocks
645	 *  4) superblock
646	 * This ordering preserves access to the modified pointers
647	 * until they are freed.
648	 */
649	/* Step 1: cylinder groups */
650	if (debug)
651		printf("Flush Cylinder groups\n");
652	if (cgbufs != NULL) {
653		for (cnt = 0; cnt < sblock.fs_ncg; cnt++) {
654			if (cgbufs[cnt].b_un.b_cg == NULL)
655				continue;
656			flush(fswritefd, &cgbufs[cnt]);
657			free(cgbufs[cnt].b_un.b_cg);
658		}
659		free(cgbufs);
660		cgbufs = NULL;
661	}
662	flush(fswritefd, &cgblk);
663	free(cgblk.b_un.b_buf);
664	cgblk.b_un.b_buf = NULL;
665	cnt = 0;
666	/* Step 2: indirect, directory, external attribute, and data blocks */
667	if (debug)
668		printf("Flush indirect, directory, external attribute, "
669		    "and data blocks\n");
670	if (pdirbp != NULL) {
671		brelse(pdirbp);
672		pdirbp = NULL;
673	}
674	TAILQ_FOREACH_REVERSE_SAFE(bp, &bufqueuehd, bufqueue, b_list, nbp) {
675		switch (bp->b_type) {
676		/* These should not be in the buffer cache list */
677		case BT_UNKNOWN:
678		case BT_SUPERBLK:
679		case BT_CYLGRP:
680		default:
681			prtbuf(bp,"ckfini: improper buffer type on cache list");
682			continue;
683		/* These are the ones to flush in this step */
684		case BT_LEVEL1:
685		case BT_LEVEL2:
686		case BT_LEVEL3:
687		case BT_EXTATTR:
688		case BT_DIRDATA:
689		case BT_DATA:
690			break;
691		/* These are the ones to flush in the next step */
692		case BT_INODES:
693			continue;
694		}
695		if (debug && bp->b_refcnt != 0)
696			prtbuf(bp, "ckfini: clearing in-use buffer");
697		TAILQ_REMOVE(&bufqueuehd, bp, b_list);
698		LIST_REMOVE(bp, b_hash);
699		cnt++;
700		flush(fswritefd, bp);
701		free(bp->b_un.b_buf);
702		free((char *)bp);
703	}
704	/* Step 3: inode blocks */
705	if (debug)
706		printf("Flush inode blocks\n");
707	if (icachebp != NULL) {
708		brelse(icachebp);
709		icachebp = NULL;
710	}
711	TAILQ_FOREACH_REVERSE_SAFE(bp, &bufqueuehd, bufqueue, b_list, nbp) {
712		if (debug && bp->b_refcnt != 0)
713			prtbuf(bp, "ckfini: clearing in-use buffer");
714		TAILQ_REMOVE(&bufqueuehd, bp, b_list);
715		LIST_REMOVE(bp, b_hash);
716		cnt++;
717		flush(fswritefd, bp);
718		free(bp->b_un.b_buf);
719		free((char *)bp);
720	}
721	if (numbufs != cnt)
722		errx(EEXIT, "panic: lost %d buffers", numbufs - cnt);
723	/* Step 4: superblock */
724	if (debug)
725		printf("Flush the superblock\n");
726	flush(fswritefd, &sblk);
727	if (havesb && cursnapshot == 0 &&
728	    sblk.b_bno != sblock.fs_sblockloc / dev_bsize) {
729		if (preen || reply("UPDATE STANDARD SUPERBLOCK")) {
730			/* Change write destination to standard superblock */
731			sblock.fs_sblockactualloc = sblock.fs_sblockloc;
732			sblk.b_bno = sblock.fs_sblockloc / dev_bsize;
733			sbdirty();
734			flush(fswritefd, &sblk);
735		} else {
736			markclean = 0;
737		}
738	}
739	if (cursnapshot == 0 && sblock.fs_clean != markclean) {
740		if ((sblock.fs_clean = markclean) != 0) {
741			sblock.fs_flags &= ~(FS_UNCLEAN | FS_NEEDSFSCK);
742			sblock.fs_pendingblocks = 0;
743			sblock.fs_pendinginodes = 0;
744		}
745		sbdirty();
746		ofsmodified = fsmodified;
747		flush(fswritefd, &sblk);
748		fsmodified = ofsmodified;
749		if (!preen) {
750			printf("\n***** FILE SYSTEM MARKED %s *****\n",
751			    markclean ? "CLEAN" : "DIRTY");
752			if (!markclean)
753				rerun = 1;
754		}
755	} else if (!preen) {
756		if (markclean) {
757			printf("\n***** FILE SYSTEM IS CLEAN *****\n");
758		} else {
759			printf("\n***** FILE SYSTEM STILL DIRTY *****\n");
760			rerun = 1;
761		}
762	}
763	/*
764	 * Free allocated tracking structures.
765	 */
766	if (blockmap != NULL)
767		free(blockmap);
768	blockmap = NULL;
769	if (inostathead != NULL) {
770		for (cg = 0; cg < sblock.fs_ncg; cg++)
771			if (inostathead[cg].il_stat != NULL)
772				free((char *)inostathead[cg].il_stat);
773		free(inostathead);
774	}
775	inostathead = NULL;
776	inocleanup();
777	finalIOstats();
778	(void)close(fsreadfd);
779	(void)close(fswritefd);
780}
781
782/*
783 * Print out I/O statistics.
784 */
785void
786IOstats(char *what)
787{
788	int i;
789
790	if (debug == 0)
791		return;
792	if (diskreads == 0) {
793		printf("%s: no I/O\n\n", what);
794		return;
795	}
796	if (startpass.tv_sec == 0)
797		startpass = startprog;
798	printf("%s: I/O statistics\n", what);
799	printIOstats();
800	totaldiskreads += diskreads;
801	diskreads = 0;
802	for (i = 0; i < BT_NUMBUFTYPES; i++) {
803		timespecadd(&totalreadtime[i], &readtime[i], &totalreadtime[i]);
804		totalreadcnt[i] += readcnt[i];
805		readtime[i].tv_sec = readtime[i].tv_nsec = 0;
806		readcnt[i] = 0;
807	}
808	clock_gettime(CLOCK_REALTIME_PRECISE, &startpass);
809}
810
811void
812finalIOstats(void)
813{
814	int i;
815
816	if (debug == 0)
817		return;
818	printf("Final I/O statistics\n");
819	totaldiskreads += diskreads;
820	diskreads = totaldiskreads;
821	startpass = startprog;
822	for (i = 0; i < BT_NUMBUFTYPES; i++) {
823		timespecadd(&totalreadtime[i], &readtime[i], &totalreadtime[i]);
824		totalreadcnt[i] += readcnt[i];
825		readtime[i] = totalreadtime[i];
826		readcnt[i] = totalreadcnt[i];
827	}
828	printIOstats();
829}
830
831static void printIOstats(void)
832{
833	long long msec, totalmsec;
834	int i;
835
836	clock_gettime(CLOCK_REALTIME_PRECISE, &finishpass);
837	timespecsub(&finishpass, &startpass, &finishpass);
838	printf("Running time: %jd.%03ld sec\n",
839		(intmax_t)finishpass.tv_sec, finishpass.tv_nsec / 1000000);
840	printf("buffer reads by type:\n");
841	for (totalmsec = 0, i = 0; i < BT_NUMBUFTYPES; i++)
842		totalmsec += readtime[i].tv_sec * 1000 +
843		    readtime[i].tv_nsec / 1000000;
844	if (totalmsec == 0)
845		totalmsec = 1;
846	for (i = 0; i < BT_NUMBUFTYPES; i++) {
847		if (readcnt[i] == 0)
848			continue;
849		msec =
850		    readtime[i].tv_sec * 1000 + readtime[i].tv_nsec / 1000000;
851		printf("%21s:%8ld %2ld.%ld%% %4jd.%03ld sec %2lld.%lld%%\n",
852		    buftype[i], readcnt[i], readcnt[i] * 100 / diskreads,
853		    (readcnt[i] * 1000 / diskreads) % 10,
854		    (intmax_t)readtime[i].tv_sec, readtime[i].tv_nsec / 1000000,
855		    msec * 100 / totalmsec, (msec * 1000 / totalmsec) % 10);
856	}
857	printf("\n");
858}
859
860int
861blread(int fd, char *buf, ufs2_daddr_t blk, long size)
862{
863	char *cp;
864	int i, errs;
865	off_t offset;
866
867	offset = blk;
868	offset *= dev_bsize;
869	if (bkgrdflag)
870		slowio_start();
871	totalreads++;
872	diskreads++;
873	if (pread(fd, buf, (int)size, offset) == size) {
874		if (bkgrdflag)
875			slowio_end();
876		return (0);
877	}
878
879	/*
880	 * This is handled specially here instead of in rwerror because
881	 * rwerror is used for all sorts of errors, not just true read/write
882	 * errors.  It should be refactored and fixed.
883	 */
884	if (surrender) {
885		pfatal("CANNOT READ_BLK: %ld", (long)blk);
886		errx(EEXIT, "ABORTING DUE TO READ ERRORS");
887	} else
888		rwerror("READ BLK", blk);
889
890	errs = 0;
891	memset(buf, 0, (size_t)size);
892	printf("THE FOLLOWING DISK SECTORS COULD NOT BE READ:");
893	for (cp = buf, i = 0; i < size; i += secsize, cp += secsize) {
894		if (pread(fd, cp, (int)secsize, offset + i) != secsize) {
895			if (secsize != dev_bsize && dev_bsize != 1)
896				printf(" %jd (%jd),",
897				    (intmax_t)(blk * dev_bsize + i) / secsize,
898				    (intmax_t)blk + i / dev_bsize);
899			else
900				printf(" %jd,", (intmax_t)blk + i / dev_bsize);
901			errs++;
902		}
903	}
904	printf("\n");
905	if (errs)
906		resolved = 0;
907	return (errs);
908}
909
910void
911blwrite(int fd, char *buf, ufs2_daddr_t blk, ssize_t size)
912{
913	int i;
914	char *cp;
915	off_t offset;
916
917	if (fd < 0)
918		return;
919	offset = blk;
920	offset *= dev_bsize;
921	if (pwrite(fd, buf, size, offset) == size) {
922		fsmodified = 1;
923		return;
924	}
925	resolved = 0;
926	rwerror("WRITE BLK", blk);
927	printf("THE FOLLOWING SECTORS COULD NOT BE WRITTEN:");
928	for (cp = buf, i = 0; i < size; i += dev_bsize, cp += dev_bsize)
929		if (pwrite(fd, cp, dev_bsize, offset + i) != dev_bsize)
930			printf(" %jd,", (intmax_t)blk + i / dev_bsize);
931	printf("\n");
932	return;
933}
934
935void
936blerase(int fd, ufs2_daddr_t blk, long size)
937{
938	off_t ioarg[2];
939
940	if (fd < 0)
941		return;
942	ioarg[0] = blk * dev_bsize;
943	ioarg[1] = size;
944	ioctl(fd, DIOCGDELETE, ioarg);
945	/* we don't really care if we succeed or not */
946	return;
947}
948
949/*
950 * Fill a contiguous region with all-zeroes.  Note ZEROBUFSIZE is by
951 * definition a multiple of dev_bsize.
952 */
953void
954blzero(int fd, ufs2_daddr_t blk, long size)
955{
956	static char *zero;
957	off_t offset, len;
958
959	if (fd < 0)
960		return;
961	if (zero == NULL) {
962		zero = Balloc(ZEROBUFSIZE);
963		if (zero == NULL)
964			errx(EEXIT, "cannot allocate buffer pool");
965	}
966	offset = blk * dev_bsize;
967	if (lseek(fd, offset, 0) < 0)
968		rwerror("SEEK BLK", blk);
969	while (size > 0) {
970		len = MIN(ZEROBUFSIZE, size);
971		if (write(fd, zero, len) != len)
972			rwerror("WRITE BLK", blk);
973		blk += len / dev_bsize;
974		size -= len;
975	}
976}
977
978/*
979 * Verify cylinder group's magic number and other parameters.  If the
980 * test fails, offer an option to rebuild the whole cylinder group.
981 *
982 * Return 1 if the cylinder group is good or return 0 if it is bad.
983 */
984#undef CHK
985#define CHK(lhs, op, rhs, fmt)						\
986	if (lhs op rhs) {						\
987		pwarn("UFS%d cylinder group %d failed: "		\
988		    "%s (" #fmt ") %s %s (" #fmt ")\n",			\
989		    sblock.fs_magic == FS_UFS1_MAGIC ? 1 : 2, cg,	\
990		    #lhs, (intmax_t)lhs, #op, #rhs, (intmax_t)rhs);	\
991		error = 1;						\
992	}
993int
994check_cgmagic(int cg, struct bufarea *cgbp)
995{
996	struct cg *cgp = cgbp->b_un.b_cg;
997	uint32_t cghash, calchash;
998	static int prevfailcg = -1;
999	long start;
1000	int error;
1001
1002	/*
1003	 * Extended cylinder group checks.
1004	 */
1005	calchash = cgp->cg_ckhash;
1006	if ((sblock.fs_metackhash & CK_CYLGRP) != 0 &&
1007	    (ckhashadd & CK_CYLGRP) == 0) {
1008		cghash = cgp->cg_ckhash;
1009		cgp->cg_ckhash = 0;
1010		calchash = calculate_crc32c(~0L, (void *)cgp, sblock.fs_cgsize);
1011		cgp->cg_ckhash = cghash;
1012	}
1013	error = 0;
1014	CHK(cgp->cg_ckhash, !=, calchash, "%jd");
1015	CHK(cg_chkmagic(cgp), ==, 0, "%jd");
1016	CHK(cgp->cg_cgx, !=, cg, "%jd");
1017	CHK(cgp->cg_ndblk, >, sblock.fs_fpg, "%jd");
1018	if (sblock.fs_magic == FS_UFS1_MAGIC) {
1019		CHK(cgp->cg_old_niblk, !=, sblock.fs_ipg, "%jd");
1020	} else if (sblock.fs_magic == FS_UFS2_MAGIC) {
1021		CHK(cgp->cg_niblk, !=, sblock.fs_ipg, "%jd");
1022		CHK(cgp->cg_initediblk, >, sblock.fs_ipg, "%jd");
1023	}
1024	if (cgbase(&sblock, cg) + sblock.fs_fpg < sblock.fs_size) {
1025		CHK(cgp->cg_ndblk, !=, sblock.fs_fpg, "%jd");
1026	} else {
1027		CHK(cgp->cg_ndblk, !=, sblock.fs_size - cgbase(&sblock, cg),
1028		    "%jd");
1029	}
1030	start = sizeof(*cgp);
1031	if (sblock.fs_magic == FS_UFS2_MAGIC) {
1032		CHK(cgp->cg_iusedoff, !=, start, "%jd");
1033	} else if (sblock.fs_magic == FS_UFS1_MAGIC) {
1034		CHK(cgp->cg_niblk, !=, 0, "%jd");
1035		CHK(cgp->cg_initediblk, !=, 0, "%jd");
1036		CHK(cgp->cg_old_niblk, !=, sblock.fs_ipg, "%jd");
1037		CHK(cgp->cg_old_btotoff, !=, start, "%jd");
1038		CHK(cgp->cg_old_boff, !=, cgp->cg_old_btotoff +
1039		    sblock.fs_old_cpg * sizeof(int32_t), "%jd");
1040		CHK(cgp->cg_iusedoff, !=, cgp->cg_old_boff +
1041		    sblock.fs_old_cpg * sizeof(u_int16_t), "%jd");
1042	}
1043	CHK(cgp->cg_freeoff, !=,
1044	    cgp->cg_iusedoff + howmany(sblock.fs_ipg, CHAR_BIT), "%jd");
1045	if (sblock.fs_contigsumsize == 0) {
1046		CHK(cgp->cg_nextfreeoff, !=,
1047		    cgp->cg_freeoff + howmany(sblock.fs_fpg, CHAR_BIT), "%jd");
1048	} else {
1049		CHK(cgp->cg_nclusterblks, !=, cgp->cg_ndblk / sblock.fs_frag,
1050		    "%jd");
1051		CHK(cgp->cg_clustersumoff, !=,
1052		    roundup(cgp->cg_freeoff + howmany(sblock.fs_fpg, CHAR_BIT),
1053		    sizeof(u_int32_t)) - sizeof(u_int32_t), "%jd");
1054		CHK(cgp->cg_clusteroff, !=, cgp->cg_clustersumoff +
1055		    (sblock.fs_contigsumsize + 1) * sizeof(u_int32_t), "%jd");
1056		CHK(cgp->cg_nextfreeoff, !=, cgp->cg_clusteroff +
1057		    howmany(fragstoblks(&sblock, sblock.fs_fpg), CHAR_BIT),
1058		    "%jd");
1059	}
1060	if (error == 0)
1061		return (1);
1062	if (prevfailcg == cg)
1063		return (0);
1064	prevfailcg = cg;
1065	pfatal("CYLINDER GROUP %d: INTEGRITY CHECK FAILED", cg);
1066	printf("\n");
1067	return (0);
1068}
1069
1070void
1071rebuild_cg(int cg, struct bufarea *cgbp)
1072{
1073	struct cg *cgp = cgbp->b_un.b_cg;
1074	long start;
1075
1076	/*
1077	 * Zero out the cylinder group and then initialize critical fields.
1078	 * Bit maps and summaries will be recalculated by later passes.
1079	 */
1080	memset(cgp, 0, (size_t)sblock.fs_cgsize);
1081	cgp->cg_magic = CG_MAGIC;
1082	cgp->cg_cgx = cg;
1083	cgp->cg_niblk = sblock.fs_ipg;
1084	cgp->cg_initediblk = MIN(sblock.fs_ipg, 2 * INOPB(&sblock));
1085	if (cgbase(&sblock, cg) + sblock.fs_fpg < sblock.fs_size)
1086		cgp->cg_ndblk = sblock.fs_fpg;
1087	else
1088		cgp->cg_ndblk = sblock.fs_size - cgbase(&sblock, cg);
1089	start = sizeof(*cgp);
1090	if (sblock.fs_magic == FS_UFS2_MAGIC) {
1091		cgp->cg_iusedoff = start;
1092	} else if (sblock.fs_magic == FS_UFS1_MAGIC) {
1093		cgp->cg_niblk = 0;
1094		cgp->cg_initediblk = 0;
1095		cgp->cg_old_ncyl = sblock.fs_old_cpg;
1096		cgp->cg_old_niblk = sblock.fs_ipg;
1097		cgp->cg_old_btotoff = start;
1098		cgp->cg_old_boff = cgp->cg_old_btotoff +
1099		    sblock.fs_old_cpg * sizeof(int32_t);
1100		cgp->cg_iusedoff = cgp->cg_old_boff +
1101		    sblock.fs_old_cpg * sizeof(u_int16_t);
1102	}
1103	cgp->cg_freeoff = cgp->cg_iusedoff + howmany(sblock.fs_ipg, CHAR_BIT);
1104	cgp->cg_nextfreeoff = cgp->cg_freeoff + howmany(sblock.fs_fpg,CHAR_BIT);
1105	if (sblock.fs_contigsumsize > 0) {
1106		cgp->cg_nclusterblks = cgp->cg_ndblk / sblock.fs_frag;
1107		cgp->cg_clustersumoff =
1108		    roundup(cgp->cg_nextfreeoff, sizeof(u_int32_t));
1109		cgp->cg_clustersumoff -= sizeof(u_int32_t);
1110		cgp->cg_clusteroff = cgp->cg_clustersumoff +
1111		    (sblock.fs_contigsumsize + 1) * sizeof(u_int32_t);
1112		cgp->cg_nextfreeoff = cgp->cg_clusteroff +
1113		    howmany(fragstoblks(&sblock, sblock.fs_fpg), CHAR_BIT);
1114	}
1115	cgp->cg_ckhash = calculate_crc32c(~0L, (void *)cgp, sblock.fs_cgsize);
1116	cgdirty(cgbp);
1117}
1118
1119/*
1120 * allocate a data block with the specified number of fragments
1121 */
1122ufs2_daddr_t
1123allocblk(long startcg, long frags,
1124    ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t blkno, long frags))
1125{
1126	ufs2_daddr_t blkno, newblk;
1127
1128	if (sujrecovery && checkblkavail == std_checkblkavail) {
1129		pfatal("allocblk: std_checkblkavail used for SUJ recovery\n");
1130		return (0);
1131	}
1132	if (frags <= 0 || frags > sblock.fs_frag)
1133		return (0);
1134	for (blkno = MAX(cgdata(&sblock, startcg), 0);
1135	     blkno < maxfsblock - sblock.fs_frag;
1136	     blkno += sblock.fs_frag) {
1137		if ((newblk = (*checkblkavail)(blkno, frags)) == 0)
1138			continue;
1139		if (newblk > 0)
1140			return (newblk);
1141		if (newblk < 0)
1142			blkno = -newblk;
1143	}
1144	for (blkno = MAX(cgdata(&sblock, 0), 0);
1145	     blkno < cgbase(&sblock, startcg) - sblock.fs_frag;
1146	     blkno += sblock.fs_frag) {
1147		if ((newblk = (*checkblkavail)(blkno, frags)) == 0)
1148			continue;
1149		if (newblk > 0)
1150			return (newblk);
1151		if (newblk < 0)
1152			blkno = -newblk;
1153	}
1154	return (0);
1155}
1156
1157ufs2_daddr_t
1158std_checkblkavail(ufs2_daddr_t blkno, long frags)
1159{
1160	struct bufarea *cgbp;
1161	struct cg *cgp;
1162	ufs2_daddr_t j, k, baseblk;
1163	long cg;
1164
1165	if ((u_int64_t)blkno > sblock.fs_size)
1166		return (0);
1167	for (j = 0; j <= sblock.fs_frag - frags; j++) {
1168		if (testbmap(blkno + j))
1169			continue;
1170		for (k = 1; k < frags; k++)
1171			if (testbmap(blkno + j + k))
1172				break;
1173		if (k < frags) {
1174			j += k;
1175			continue;
1176		}
1177		cg = dtog(&sblock, blkno + j);
1178		cgbp = cglookup(cg);
1179		cgp = cgbp->b_un.b_cg;
1180		if (!check_cgmagic(cg, cgbp))
1181			return (-((cg + 1) * sblock.fs_fpg - sblock.fs_frag));
1182		baseblk = dtogd(&sblock, blkno + j);
1183		for (k = 0; k < frags; k++) {
1184			setbmap(blkno + j + k);
1185			clrbit(cg_blksfree(cgp), baseblk + k);
1186		}
1187		n_blks += frags;
1188		if (frags == sblock.fs_frag)
1189			cgp->cg_cs.cs_nbfree--;
1190		else
1191			cgp->cg_cs.cs_nffree -= frags;
1192		cgdirty(cgbp);
1193		return (blkno + j);
1194	}
1195	return (0);
1196}
1197
1198/*
1199 * Check whether a file size is within the limits for the filesystem.
1200 * Return 1 when valid and 0 when too big.
1201 *
1202 * This should match the file size limit in ffs_mountfs().
1203 */
1204int
1205chkfilesize(mode_t mode, u_int64_t filesize)
1206{
1207	u_int64_t kernmaxfilesize;
1208
1209	if (sblock.fs_magic == FS_UFS1_MAGIC)
1210		kernmaxfilesize = (off_t)0x40000000 * sblock.fs_bsize - 1;
1211	else
1212		kernmaxfilesize = sblock.fs_maxfilesize;
1213	if (filesize > kernmaxfilesize ||
1214	    filesize > sblock.fs_maxfilesize ||
1215	    (mode == IFDIR && filesize > MAXDIRSIZE)) {
1216		if (debug)
1217			printf("bad file size %ju:", (uintmax_t)filesize);
1218		return (0);
1219	}
1220	return (1);
1221}
1222
1223/*
1224 * Slow down IO so as to leave some disk bandwidth for other processes
1225 */
1226void
1227slowio_start()
1228{
1229
1230	/* Delay one in every 8 operations */
1231	slowio_pollcnt = (slowio_pollcnt + 1) & 7;
1232	if (slowio_pollcnt == 0) {
1233		gettimeofday(&slowio_starttime, NULL);
1234	}
1235}
1236
1237void
1238slowio_end()
1239{
1240	struct timeval tv;
1241	int delay_usec;
1242
1243	if (slowio_pollcnt != 0)
1244		return;
1245
1246	/* Update the slowdown interval. */
1247	gettimeofday(&tv, NULL);
1248	delay_usec = (tv.tv_sec - slowio_starttime.tv_sec) * 1000000 +
1249	    (tv.tv_usec - slowio_starttime.tv_usec);
1250	if (delay_usec < 64)
1251		delay_usec = 64;
1252	if (delay_usec > 2500000)
1253		delay_usec = 2500000;
1254	slowio_delay_usec = (slowio_delay_usec * 63 + delay_usec) >> 6;
1255	/* delay by 8 times the average IO delay */
1256	if (slowio_delay_usec > 64)
1257		usleep(slowio_delay_usec * 8);
1258}
1259
1260/*
1261 * Find a pathname
1262 */
1263void
1264getpathname(char *namebuf, ino_t curdir, ino_t ino)
1265{
1266	int len;
1267	char *cp;
1268	struct inode ip;
1269	struct inodesc idesc;
1270	static int busy = 0;
1271
1272	if (curdir == ino && ino == UFS_ROOTINO) {
1273		(void)strcpy(namebuf, "/");
1274		return;
1275	}
1276	if (busy || !INO_IS_DVALID(curdir)) {
1277		(void)strcpy(namebuf, "?");
1278		return;
1279	}
1280	busy = 1;
1281	memset(&idesc, 0, sizeof(struct inodesc));
1282	idesc.id_type = DATA;
1283	idesc.id_fix = IGNORE;
1284	cp = &namebuf[MAXPATHLEN - 1];
1285	*cp = '\0';
1286	if (curdir != ino) {
1287		idesc.id_parent = curdir;
1288		goto namelookup;
1289	}
1290	while (ino != UFS_ROOTINO) {
1291		idesc.id_number = ino;
1292		idesc.id_func = findino;
1293		idesc.id_name = strdup("..");
1294		ginode(ino, &ip);
1295		if ((ckinode(ip.i_dp, &idesc) & FOUND) == 0) {
1296			irelse(&ip);
1297			free(idesc.id_name);
1298			break;
1299		}
1300		irelse(&ip);
1301		free(idesc.id_name);
1302	namelookup:
1303		idesc.id_number = idesc.id_parent;
1304		idesc.id_parent = ino;
1305		idesc.id_func = findname;
1306		idesc.id_name = namebuf;
1307		ginode(idesc.id_number, &ip);
1308		if ((ckinode(ip.i_dp, &idesc) & FOUND) == 0) {
1309			irelse(&ip);
1310			break;
1311		}
1312		irelse(&ip);
1313		len = strlen(namebuf);
1314		cp -= len;
1315		memmove(cp, namebuf, (size_t)len);
1316		*--cp = '/';
1317		if (cp < &namebuf[UFS_MAXNAMLEN])
1318			break;
1319		ino = idesc.id_number;
1320	}
1321	busy = 0;
1322	if (ino != UFS_ROOTINO)
1323		*--cp = '?';
1324	memmove(namebuf, cp, (size_t)(&namebuf[MAXPATHLEN] - cp));
1325}
1326
1327void
1328catch(int sig __unused)
1329{
1330
1331	ckfini(0);
1332	exit(12);
1333}
1334
1335/*
1336 * When preening, allow a single quit to signal
1337 * a special exit after file system checks complete
1338 * so that reboot sequence may be interrupted.
1339 */
1340void
1341catchquit(int sig __unused)
1342{
1343	printf("returning to single-user after file system check\n");
1344	returntosingle = 1;
1345	(void)signal(SIGQUIT, SIG_DFL);
1346}
1347
1348/*
1349 * determine whether an inode should be fixed.
1350 */
1351int
1352dofix(struct inodesc *idesc, const char *msg)
1353{
1354
1355	switch (idesc->id_fix) {
1356
1357	case DONTKNOW:
1358		if (idesc->id_type == DATA)
1359			direrror(idesc->id_number, msg);
1360		else
1361			pwarn("%s", msg);
1362		if (preen) {
1363			printf(" (SALVAGED)\n");
1364			idesc->id_fix = FIX;
1365			return (ALTERED);
1366		}
1367		if (reply("SALVAGE") == 0) {
1368			idesc->id_fix = NOFIX;
1369			return (0);
1370		}
1371		idesc->id_fix = FIX;
1372		return (ALTERED);
1373
1374	case FIX:
1375		return (ALTERED);
1376
1377	case NOFIX:
1378	case IGNORE:
1379		return (0);
1380
1381	default:
1382		errx(EEXIT, "UNKNOWN INODESC FIX MODE %d", idesc->id_fix);
1383	}
1384	/* NOTREACHED */
1385	return (0);
1386}
1387
1388#include <stdarg.h>
1389
1390/*
1391 * Print details about a buffer.
1392 */
1393void
1394prtbuf(struct bufarea *bp, const char *fmt, ...)
1395{
1396	va_list ap;
1397	va_start(ap, fmt);
1398	if (preen)
1399		(void)fprintf(stdout, "%s: ", cdevname);
1400	(void)vfprintf(stdout, fmt, ap);
1401	va_end(ap);
1402	printf(": bp %p, type %s, bno %jd, size %d, refcnt %d, flags %s, "
1403	    "index %jd\n", bp, BT_BUFTYPE(bp->b_type), (intmax_t) bp->b_bno,
1404	    bp->b_size, bp->b_refcnt, bp->b_flags & B_DIRTY ? "dirty" : "clean",
1405	    (intmax_t) bp->b_index);
1406}
1407
1408/*
1409 * An unexpected inconsistency occurred.
1410 * Die if preening or file system is running with soft dependency protocol,
1411 * otherwise just print message and continue.
1412 */
1413void
1414pfatal(const char *fmt, ...)
1415{
1416	va_list ap;
1417	va_start(ap, fmt);
1418	if (!preen) {
1419		(void)vfprintf(stdout, fmt, ap);
1420		va_end(ap);
1421		if (usedsoftdep)
1422			(void)fprintf(stdout,
1423			    "\nUNEXPECTED SOFT UPDATE INCONSISTENCY\n");
1424		/*
1425		 * Force foreground fsck to clean up inconsistency.
1426		 */
1427		if (bkgrdflag) {
1428			cmd.value = FS_NEEDSFSCK;
1429			cmd.size = 1;
1430			if (sysctlbyname("vfs.ffs.setflags", 0, 0,
1431			    &cmd, sizeof cmd) == -1)
1432				pwarn("CANNOT SET FS_NEEDSFSCK FLAG\n");
1433			fprintf(stdout, "CANNOT RUN IN BACKGROUND\n");
1434			ckfini(0);
1435			exit(EEXIT);
1436		}
1437		return;
1438	}
1439	if (cdevname == NULL)
1440		cdevname = strdup("fsck");
1441	(void)fprintf(stdout, "%s: ", cdevname);
1442	(void)vfprintf(stdout, fmt, ap);
1443	(void)fprintf(stdout,
1444	    "\n%s: UNEXPECTED%sINCONSISTENCY; RUN fsck MANUALLY.\n",
1445	    cdevname, usedsoftdep ? " SOFT UPDATE " : " ");
1446	/*
1447	 * Force foreground fsck to clean up inconsistency.
1448	 */
1449	if (bkgrdflag) {
1450		cmd.value = FS_NEEDSFSCK;
1451		cmd.size = 1;
1452		if (sysctlbyname("vfs.ffs.setflags", 0, 0,
1453		    &cmd, sizeof cmd) == -1)
1454			pwarn("CANNOT SET FS_NEEDSFSCK FLAG\n");
1455	}
1456	ckfini(0);
1457	exit(EEXIT);
1458}
1459
1460/*
1461 * Pwarn just prints a message when not preening or running soft dependency
1462 * protocol, or a warning (preceded by filename) when preening.
1463 */
1464void
1465pwarn(const char *fmt, ...)
1466{
1467	va_list ap;
1468	va_start(ap, fmt);
1469	if (preen)
1470		(void)fprintf(stdout, "%s: ", cdevname);
1471	(void)vfprintf(stdout, fmt, ap);
1472	va_end(ap);
1473}
1474
1475/*
1476 * Stub for routines from kernel.
1477 */
1478void
1479panic(const char *fmt, ...)
1480{
1481	va_list ap;
1482	va_start(ap, fmt);
1483	pfatal("INTERNAL INCONSISTENCY:");
1484	(void)vfprintf(stdout, fmt, ap);
1485	va_end(ap);
1486	exit(EEXIT);
1487}
1488