1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Copyright (c) 2021-2024 Oracle.  All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
5 */
6#include "xfs.h"
7#include "xfs_fs.h"
8#include "xfs_shared.h"
9#include "xfs_format.h"
10#include "xfs_trans_resv.h"
11#include "xfs_mount.h"
12#include "xfs_log_format.h"
13#include "xfs_trans.h"
14#include "xfs_inode.h"
15#include "xfs_btree.h"
16#include "xfs_ialloc.h"
17#include "xfs_ialloc_btree.h"
18#include "xfs_ag.h"
19#include "xfs_error.h"
20#include "xfs_bit.h"
21#include "xfs_icache.h"
22#include "scrub/scrub.h"
23#include "scrub/iscan.h"
24#include "scrub/common.h"
25#include "scrub/trace.h"
26
27/*
28 * Live File Scan
29 * ==============
30 *
31 * Live file scans walk every inode in a live filesystem.  This is more or
32 * less like a regular iwalk, except that when we're advancing the scan cursor,
33 * we must ensure that inodes cannot be added or deleted anywhere between the
34 * old cursor value and the new cursor value.  If we're advancing the cursor
35 * by one inode, the caller must hold that inode; if we're finding the next
36 * inode to scan, we must grab the AGI and hold it until we've updated the
37 * scan cursor.
38 *
39 * Callers are expected to use this code to scan all files in the filesystem to
40 * construct a new metadata index of some kind.  The scan races against other
41 * live updates, which means there must be a provision to update the new index
42 * when updates are made to inodes that already been scanned.  The iscan lock
43 * can be used in live update hook code to stop the scan and protect this data
44 * structure.
45 *
46 * To keep the new index up to date with other metadata updates being made to
47 * the live filesystem, it is assumed that the caller will add hooks as needed
48 * to be notified when a metadata update occurs.  The inode scanner must tell
49 * the hook code when an inode has been visited with xchk_iscan_mark_visit.
50 * Hook functions can use xchk_iscan_want_live_update to decide if the
51 * scanner's observations must be updated.
52 */
53
54/*
55 * If the inobt record @rec covers @iscan->skip_ino, mark the inode free so
56 * that the scan ignores that inode.
57 */
58STATIC void
59xchk_iscan_mask_skipino(
60	struct xchk_iscan	*iscan,
61	struct xfs_perag	*pag,
62	struct xfs_inobt_rec_incore	*rec,
63	xfs_agino_t		lastrecino)
64{
65	struct xfs_scrub	*sc = iscan->sc;
66	struct xfs_mount	*mp = sc->mp;
67	xfs_agnumber_t		skip_agno = XFS_INO_TO_AGNO(mp, iscan->skip_ino);
68	xfs_agnumber_t		skip_agino = XFS_INO_TO_AGINO(mp, iscan->skip_ino);
69
70	if (pag->pag_agno != skip_agno)
71		return;
72	if (skip_agino < rec->ir_startino)
73		return;
74	if (skip_agino > lastrecino)
75		return;
76
77	rec->ir_free |= xfs_inobt_maskn(skip_agino - rec->ir_startino, 1);
78}
79
80/*
81 * Set *cursor to the next allocated inode after whatever it's set to now.
82 * If there are no more inodes in this AG, cursor is set to NULLAGINO.
83 */
84STATIC int
85xchk_iscan_find_next(
86	struct xchk_iscan	*iscan,
87	struct xfs_buf		*agi_bp,
88	struct xfs_perag	*pag,
89	xfs_inofree_t		*allocmaskp,
90	xfs_agino_t		*cursor,
91	uint8_t			*nr_inodesp)
92{
93	struct xfs_scrub	*sc = iscan->sc;
94	struct xfs_inobt_rec_incore	rec;
95	struct xfs_btree_cur	*cur;
96	struct xfs_mount	*mp = sc->mp;
97	struct xfs_trans	*tp = sc->tp;
98	xfs_agnumber_t		agno = pag->pag_agno;
99	xfs_agino_t		lastino = NULLAGINO;
100	xfs_agino_t		first, last;
101	xfs_agino_t		agino = *cursor;
102	int			has_rec;
103	int			error;
104
105	/* If the cursor is beyond the end of this AG, move to the next one. */
106	xfs_agino_range(mp, agno, &first, &last);
107	if (agino > last) {
108		*cursor = NULLAGINO;
109		return 0;
110	}
111
112	/*
113	 * Look up the inode chunk for the current cursor position.  If there
114	 * is no chunk here, we want the next one.
115	 */
116	cur = xfs_inobt_init_cursor(pag, tp, agi_bp);
117	error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &has_rec);
118	if (!error && !has_rec)
119		error = xfs_btree_increment(cur, 0, &has_rec);
120	for (; !error; error = xfs_btree_increment(cur, 0, &has_rec)) {
121		xfs_inofree_t	allocmask;
122
123		/*
124		 * If we've run out of inobt records in this AG, move the
125		 * cursor on to the next AG and exit.  The caller can try
126		 * again with the next AG.
127		 */
128		if (!has_rec) {
129			*cursor = NULLAGINO;
130			break;
131		}
132
133		error = xfs_inobt_get_rec(cur, &rec, &has_rec);
134		if (error)
135			break;
136		if (!has_rec) {
137			error = -EFSCORRUPTED;
138			break;
139		}
140
141		/* Make sure that we always move forward. */
142		if (lastino != NULLAGINO &&
143		    XFS_IS_CORRUPT(mp, lastino >= rec.ir_startino)) {
144			error = -EFSCORRUPTED;
145			break;
146		}
147		lastino = rec.ir_startino + XFS_INODES_PER_CHUNK - 1;
148
149		/*
150		 * If this record only covers inodes that come before the
151		 * cursor, advance to the next record.
152		 */
153		if (rec.ir_startino + XFS_INODES_PER_CHUNK <= agino)
154			continue;
155
156		if (iscan->skip_ino)
157			xchk_iscan_mask_skipino(iscan, pag, &rec, lastino);
158
159		/*
160		 * If the incoming lookup put us in the middle of an inobt
161		 * record, mark it and the previous inodes "free" so that the
162		 * search for allocated inodes will start at the cursor.
163		 * We don't care about ir_freecount here.
164		 */
165		if (agino >= rec.ir_startino)
166			rec.ir_free |= xfs_inobt_maskn(0,
167						agino + 1 - rec.ir_startino);
168
169		/*
170		 * If there are allocated inodes in this chunk, find them
171		 * and update the scan cursor.
172		 */
173		allocmask = ~rec.ir_free;
174		if (hweight64(allocmask) > 0) {
175			int	next = xfs_lowbit64(allocmask);
176
177			ASSERT(next >= 0);
178			*cursor = rec.ir_startino + next;
179			*allocmaskp = allocmask >> next;
180			*nr_inodesp = XFS_INODES_PER_CHUNK - next;
181			break;
182		}
183	}
184
185	xfs_btree_del_cursor(cur, error);
186	return error;
187}
188
189/*
190 * Advance both the scan and the visited cursors.
191 *
192 * The inumber address space for a given filesystem is sparse, which means that
193 * the scan cursor can jump a long ways in a single iter() call.  There are no
194 * inodes in these sparse areas, so we must move the visited cursor forward at
195 * the same time so that the scan user can receive live updates for inodes that
196 * may get created once we release the AGI buffer.
197 */
198static inline void
199xchk_iscan_move_cursor(
200	struct xchk_iscan	*iscan,
201	xfs_agnumber_t		agno,
202	xfs_agino_t		agino)
203{
204	struct xfs_scrub	*sc = iscan->sc;
205	struct xfs_mount	*mp = sc->mp;
206	xfs_ino_t		cursor, visited;
207
208	BUILD_BUG_ON(XFS_MAXINUMBER == NULLFSINO);
209
210	/*
211	 * Special-case ino == 0 here so that we never set visited_ino to
212	 * NULLFSINO when wrapping around EOFS, for that will let through all
213	 * live updates.
214	 */
215	cursor = XFS_AGINO_TO_INO(mp, agno, agino);
216	if (cursor == 0)
217		visited = XFS_MAXINUMBER;
218	else
219		visited = cursor - 1;
220
221	mutex_lock(&iscan->lock);
222	iscan->cursor_ino = cursor;
223	iscan->__visited_ino = visited;
224	trace_xchk_iscan_move_cursor(iscan);
225	mutex_unlock(&iscan->lock);
226}
227
228/*
229 * Prepare to return agno/agino to the iscan caller by moving the lastino
230 * cursor to the previous inode.  Do this while we still hold the AGI so that
231 * no other threads can create or delete inodes in this AG.
232 */
233static inline void
234xchk_iscan_finish(
235	struct xchk_iscan	*iscan)
236{
237	mutex_lock(&iscan->lock);
238	iscan->cursor_ino = NULLFSINO;
239
240	/* All live updates will be applied from now on */
241	iscan->__visited_ino = NULLFSINO;
242
243	mutex_unlock(&iscan->lock);
244}
245
246/* Mark an inode scan finished before we actually scan anything. */
247void
248xchk_iscan_finish_early(
249	struct xchk_iscan	*iscan)
250{
251	ASSERT(iscan->cursor_ino == iscan->scan_start_ino);
252	ASSERT(iscan->__visited_ino == iscan->scan_start_ino);
253
254	xchk_iscan_finish(iscan);
255}
256
257/*
258 * Grab the AGI to advance the inode scan.  Returns 0 if *agi_bpp is now set,
259 * -ECANCELED if the live scan aborted, -EBUSY if the AGI could not be grabbed,
260 * or the usual negative errno.
261 */
262STATIC int
263xchk_iscan_read_agi(
264	struct xchk_iscan	*iscan,
265	struct xfs_perag	*pag,
266	struct xfs_buf		**agi_bpp)
267{
268	struct xfs_scrub	*sc = iscan->sc;
269	unsigned long		relax;
270	int			ret;
271
272	if (!xchk_iscan_agi_needs_trylock(iscan))
273		return xfs_ialloc_read_agi(pag, sc->tp, 0, agi_bpp);
274
275	relax = msecs_to_jiffies(iscan->iget_retry_delay);
276	do {
277		ret = xfs_ialloc_read_agi(pag, sc->tp, XFS_IALLOC_FLAG_TRYLOCK,
278				agi_bpp);
279		if (ret != -EAGAIN)
280			return ret;
281		if (!iscan->iget_timeout ||
282		    time_is_before_jiffies(iscan->__iget_deadline))
283			return -EBUSY;
284
285		trace_xchk_iscan_agi_retry_wait(iscan);
286	} while (!schedule_timeout_killable(relax) &&
287		 !xchk_iscan_aborted(iscan));
288	return -ECANCELED;
289}
290
291/*
292 * Advance ino to the next inode that the inobt thinks is allocated, being
293 * careful to jump to the next AG if we've reached the right end of this AG's
294 * inode btree.  Advancing ino effectively means that we've pushed the inode
295 * scan forward, so set the iscan cursor to (ino - 1) so that our live update
296 * predicates will track inode allocations in that part of the inode number
297 * key space once we release the AGI buffer.
298 *
299 * Returns 1 if there's a new inode to examine, 0 if we've run out of inodes,
300 * -ECANCELED if the live scan aborted, or the usual negative errno.
301 */
302STATIC int
303xchk_iscan_advance(
304	struct xchk_iscan	*iscan,
305	struct xfs_perag	**pagp,
306	struct xfs_buf		**agi_bpp,
307	xfs_inofree_t		*allocmaskp,
308	uint8_t			*nr_inodesp)
309{
310	struct xfs_scrub	*sc = iscan->sc;
311	struct xfs_mount	*mp = sc->mp;
312	struct xfs_buf		*agi_bp;
313	struct xfs_perag	*pag;
314	xfs_agnumber_t		agno;
315	xfs_agino_t		agino;
316	int			ret;
317
318	ASSERT(iscan->cursor_ino >= iscan->__visited_ino);
319
320	do {
321		if (xchk_iscan_aborted(iscan))
322			return -ECANCELED;
323
324		agno = XFS_INO_TO_AGNO(mp, iscan->cursor_ino);
325		pag = xfs_perag_get(mp, agno);
326		if (!pag)
327			return -ECANCELED;
328
329		ret = xchk_iscan_read_agi(iscan, pag, &agi_bp);
330		if (ret)
331			goto out_pag;
332
333		agino = XFS_INO_TO_AGINO(mp, iscan->cursor_ino);
334		ret = xchk_iscan_find_next(iscan, agi_bp, pag, allocmaskp,
335				&agino, nr_inodesp);
336		if (ret)
337			goto out_buf;
338
339		if (agino != NULLAGINO) {
340			/*
341			 * Found the next inode in this AG, so return it along
342			 * with the AGI buffer and the perag structure to
343			 * ensure it cannot go away.
344			 */
345			xchk_iscan_move_cursor(iscan, agno, agino);
346			*agi_bpp = agi_bp;
347			*pagp = pag;
348			return 1;
349		}
350
351		/*
352		 * Did not find any more inodes in this AG, move on to the next
353		 * AG.
354		 */
355		agno = (agno + 1) % mp->m_sb.sb_agcount;
356		xchk_iscan_move_cursor(iscan, agno, 0);
357		xfs_trans_brelse(sc->tp, agi_bp);
358		xfs_perag_put(pag);
359
360		trace_xchk_iscan_advance_ag(iscan);
361	} while (iscan->cursor_ino != iscan->scan_start_ino);
362
363	xchk_iscan_finish(iscan);
364	return 0;
365
366out_buf:
367	xfs_trans_brelse(sc->tp, agi_bp);
368out_pag:
369	xfs_perag_put(pag);
370	return ret;
371}
372
373/*
374 * Grabbing the inode failed, so we need to back up the scan and ask the caller
375 * to try to _advance the scan again.  Returns -EBUSY if we've run out of retry
376 * opportunities, -ECANCELED if the process has a fatal signal pending, or
377 * -EAGAIN if we should try again.
378 */
379STATIC int
380xchk_iscan_iget_retry(
381	struct xchk_iscan	*iscan,
382	bool			wait)
383{
384	ASSERT(iscan->cursor_ino == iscan->__visited_ino + 1);
385
386	if (!iscan->iget_timeout ||
387	    time_is_before_jiffies(iscan->__iget_deadline))
388		return -EBUSY;
389
390	if (wait) {
391		unsigned long	relax;
392
393		/*
394		 * Sleep for a period of time to let the rest of the system
395		 * catch up.  If we return early, someone sent a kill signal to
396		 * the calling process.
397		 */
398		relax = msecs_to_jiffies(iscan->iget_retry_delay);
399		trace_xchk_iscan_iget_retry_wait(iscan);
400
401		if (schedule_timeout_killable(relax) ||
402		    xchk_iscan_aborted(iscan))
403			return -ECANCELED;
404	}
405
406	iscan->cursor_ino--;
407	return -EAGAIN;
408}
409
410/*
411 * For an inode scan, we hold the AGI and want to try to grab a batch of
412 * inodes.  Holding the AGI prevents inodegc from clearing freed inodes,
413 * so we must use noretry here.  For every inode after the first one in the
414 * batch, we don't want to wait, so we use retry there too.  Finally, use
415 * dontcache to avoid polluting the cache.
416 */
417#define ISCAN_IGET_FLAGS	(XFS_IGET_NORETRY | XFS_IGET_DONTCACHE)
418
419/*
420 * Grab an inode as part of an inode scan.  While scanning this inode, the
421 * caller must ensure that no other threads can modify the inode until a call
422 * to xchk_iscan_visit succeeds.
423 *
424 * Returns the number of incore inodes grabbed; -EAGAIN if the caller should
425 * call again xchk_iscan_advance; -EBUSY if we couldn't grab an inode;
426 * -ECANCELED if there's a fatal signal pending; or some other negative errno.
427 */
428STATIC int
429xchk_iscan_iget(
430	struct xchk_iscan	*iscan,
431	struct xfs_perag	*pag,
432	struct xfs_buf		*agi_bp,
433	xfs_inofree_t		allocmask,
434	uint8_t			nr_inodes)
435{
436	struct xfs_scrub	*sc = iscan->sc;
437	struct xfs_mount	*mp = sc->mp;
438	xfs_ino_t		ino = iscan->cursor_ino;
439	unsigned int		idx = 0;
440	unsigned int		i;
441	int			error;
442
443	ASSERT(iscan->__inodes[0] == NULL);
444
445	/* Fill the first slot in the inode array. */
446	error = xfs_iget(sc->mp, sc->tp, ino, ISCAN_IGET_FLAGS, 0,
447			&iscan->__inodes[idx]);
448
449	trace_xchk_iscan_iget(iscan, error);
450
451	if (error == -ENOENT || error == -EAGAIN) {
452		xfs_trans_brelse(sc->tp, agi_bp);
453		xfs_perag_put(pag);
454
455		/*
456		 * It's possible that this inode has lost all of its links but
457		 * hasn't yet been inactivated.  If we don't have a transaction
458		 * or it's not writable, flush the inodegc workers and wait.
459		 * If we have a non-empty transaction, we must not block on
460		 * inodegc, which allocates its own transactions.
461		 */
462		if (sc->tp && !(sc->tp->t_flags & XFS_TRANS_NO_WRITECOUNT))
463			xfs_inodegc_push(mp);
464		else
465			xfs_inodegc_flush(mp);
466		return xchk_iscan_iget_retry(iscan, true);
467	}
468
469	if (error == -EINVAL) {
470		xfs_trans_brelse(sc->tp, agi_bp);
471		xfs_perag_put(pag);
472
473		/*
474		 * We thought the inode was allocated, but the inode btree
475		 * lookup failed, which means that it was freed since the last
476		 * time we advanced the cursor.  Back up and try again.  This
477		 * should never happen since still hold the AGI buffer from the
478		 * inobt check, but we need to be careful about infinite loops.
479		 */
480		return xchk_iscan_iget_retry(iscan, false);
481	}
482
483	if (error) {
484		xfs_trans_brelse(sc->tp, agi_bp);
485		xfs_perag_put(pag);
486		return error;
487	}
488	idx++;
489	ino++;
490	allocmask >>= 1;
491
492	/*
493	 * Now that we've filled the first slot in __inodes, try to fill the
494	 * rest of the batch with consecutively ordered inodes.  to reduce the
495	 * number of _iter calls.  Make a bitmap of unallocated inodes from the
496	 * zeroes in the inuse bitmap; these inodes will not be scanned, but
497	 * the _want_live_update predicate will pass through all live updates.
498	 *
499	 * If we can't iget an allocated inode, stop and return what we have.
500	 */
501	mutex_lock(&iscan->lock);
502	iscan->__batch_ino = ino - 1;
503	iscan->__skipped_inomask = 0;
504	mutex_unlock(&iscan->lock);
505
506	for (i = 1; i < nr_inodes; i++, ino++, allocmask >>= 1) {
507		if (!(allocmask & 1)) {
508			ASSERT(!(iscan->__skipped_inomask & (1ULL << i)));
509
510			mutex_lock(&iscan->lock);
511			iscan->cursor_ino = ino;
512			iscan->__skipped_inomask |= (1ULL << i);
513			mutex_unlock(&iscan->lock);
514			continue;
515		}
516
517		ASSERT(iscan->__inodes[idx] == NULL);
518
519		error = xfs_iget(sc->mp, sc->tp, ino, ISCAN_IGET_FLAGS, 0,
520				&iscan->__inodes[idx]);
521		if (error)
522			break;
523
524		mutex_lock(&iscan->lock);
525		iscan->cursor_ino = ino;
526		mutex_unlock(&iscan->lock);
527		idx++;
528	}
529
530	trace_xchk_iscan_iget_batch(sc->mp, iscan, nr_inodes, idx);
531	xfs_trans_brelse(sc->tp, agi_bp);
532	xfs_perag_put(pag);
533	return idx;
534}
535
536/*
537 * Advance the visit cursor to reflect skipped inodes beyond whatever we
538 * scanned.
539 */
540STATIC void
541xchk_iscan_finish_batch(
542	struct xchk_iscan	*iscan)
543{
544	xfs_ino_t		highest_skipped;
545
546	mutex_lock(&iscan->lock);
547
548	if (iscan->__batch_ino != NULLFSINO) {
549		highest_skipped = iscan->__batch_ino +
550					xfs_highbit64(iscan->__skipped_inomask);
551		iscan->__visited_ino = max(iscan->__visited_ino,
552					   highest_skipped);
553
554		trace_xchk_iscan_skip(iscan);
555	}
556
557	iscan->__batch_ino = NULLFSINO;
558	iscan->__skipped_inomask = 0;
559
560	mutex_unlock(&iscan->lock);
561}
562
563/*
564 * Advance the inode scan cursor to the next allocated inode and return up to
565 * 64 consecutive allocated inodes starting with the cursor position.
566 */
567STATIC int
568xchk_iscan_iter_batch(
569	struct xchk_iscan	*iscan)
570{
571	struct xfs_scrub	*sc = iscan->sc;
572	int			ret;
573
574	xchk_iscan_finish_batch(iscan);
575
576	if (iscan->iget_timeout)
577		iscan->__iget_deadline = jiffies +
578					 msecs_to_jiffies(iscan->iget_timeout);
579
580	do {
581		struct xfs_buf	*agi_bp = NULL;
582		struct xfs_perag *pag = NULL;
583		xfs_inofree_t	allocmask = 0;
584		uint8_t		nr_inodes = 0;
585
586		ret = xchk_iscan_advance(iscan, &pag, &agi_bp, &allocmask,
587				&nr_inodes);
588		if (ret != 1)
589			return ret;
590
591		if (xchk_iscan_aborted(iscan)) {
592			xfs_trans_brelse(sc->tp, agi_bp);
593			xfs_perag_put(pag);
594			ret = -ECANCELED;
595			break;
596		}
597
598		ret = xchk_iscan_iget(iscan, pag, agi_bp, allocmask, nr_inodes);
599	} while (ret == -EAGAIN);
600
601	return ret;
602}
603
604/*
605 * Advance the inode scan cursor to the next allocated inode and return the
606 * incore inode structure associated with it.
607 *
608 * Returns 1 if there's a new inode to examine, 0 if we've run out of inodes,
609 * -ECANCELED if the live scan aborted, -EBUSY if the incore inode could not be
610 * grabbed, or the usual negative errno.
611 *
612 * If the function returns -EBUSY and the caller can handle skipping an inode,
613 * it may call this function again to continue the scan with the next allocated
614 * inode.
615 */
616int
617xchk_iscan_iter(
618	struct xchk_iscan	*iscan,
619	struct xfs_inode	**ipp)
620{
621	unsigned int		i;
622	int			error;
623
624	/* Find a cached inode, or go get another batch. */
625	for (i = 0; i < XFS_INODES_PER_CHUNK; i++) {
626		if (iscan->__inodes[i])
627			goto foundit;
628	}
629
630	error = xchk_iscan_iter_batch(iscan);
631	if (error <= 0)
632		return error;
633
634	ASSERT(iscan->__inodes[0] != NULL);
635	i = 0;
636
637foundit:
638	/* Give the caller our reference. */
639	*ipp = iscan->__inodes[i];
640	iscan->__inodes[i] = NULL;
641	return 1;
642}
643
644/* Clean up an xfs_iscan_iter call by dropping any inodes that we still hold. */
645void
646xchk_iscan_iter_finish(
647	struct xchk_iscan	*iscan)
648{
649	struct xfs_scrub	*sc = iscan->sc;
650	unsigned int		i;
651
652	for (i = 0; i < XFS_INODES_PER_CHUNK; i++) {
653		if (iscan->__inodes[i]) {
654			xchk_irele(sc, iscan->__inodes[i]);
655			iscan->__inodes[i] = NULL;
656		}
657	}
658}
659
660/* Mark this inode scan finished and release resources. */
661void
662xchk_iscan_teardown(
663	struct xchk_iscan	*iscan)
664{
665	xchk_iscan_iter_finish(iscan);
666	xchk_iscan_finish(iscan);
667	mutex_destroy(&iscan->lock);
668}
669
670/* Pick an AG from which to start a scan. */
671static inline xfs_ino_t
672xchk_iscan_rotor(
673	struct xfs_mount	*mp)
674{
675	static atomic_t		agi_rotor;
676	unsigned int		r = atomic_inc_return(&agi_rotor) - 1;
677
678	/*
679	 * Rotoring *backwards* through the AGs, so we add one here before
680	 * subtracting from the agcount to arrive at an AG number.
681	 */
682	r = (r % mp->m_sb.sb_agcount) + 1;
683
684	return XFS_AGINO_TO_INO(mp, mp->m_sb.sb_agcount - r, 0);
685}
686
687/*
688 * Set ourselves up to start an inode scan.  If the @iget_timeout and
689 * @iget_retry_delay parameters are set, the scan will try to iget each inode
690 * for @iget_timeout milliseconds.  If an iget call indicates that the inode is
691 * waiting to be inactivated, the CPU will relax for @iget_retry_delay
692 * milliseconds after pushing the inactivation workers.
693 */
694void
695xchk_iscan_start(
696	struct xfs_scrub	*sc,
697	unsigned int		iget_timeout,
698	unsigned int		iget_retry_delay,
699	struct xchk_iscan	*iscan)
700{
701	xfs_ino_t		start_ino;
702
703	start_ino = xchk_iscan_rotor(sc->mp);
704
705	iscan->__batch_ino = NULLFSINO;
706	iscan->__skipped_inomask = 0;
707
708	iscan->sc = sc;
709	clear_bit(XCHK_ISCAN_OPSTATE_ABORTED, &iscan->__opstate);
710	iscan->iget_timeout = iget_timeout;
711	iscan->iget_retry_delay = iget_retry_delay;
712	iscan->__visited_ino = start_ino;
713	iscan->cursor_ino = start_ino;
714	iscan->scan_start_ino = start_ino;
715	mutex_init(&iscan->lock);
716	memset(iscan->__inodes, 0, sizeof(iscan->__inodes));
717
718	trace_xchk_iscan_start(iscan, start_ino);
719}
720
721/*
722 * Mark this inode as having been visited.  Callers must hold a sufficiently
723 * exclusive lock on the inode to prevent concurrent modifications.
724 */
725void
726xchk_iscan_mark_visited(
727	struct xchk_iscan	*iscan,
728	struct xfs_inode	*ip)
729{
730	mutex_lock(&iscan->lock);
731	iscan->__visited_ino = ip->i_ino;
732	trace_xchk_iscan_visit(iscan);
733	mutex_unlock(&iscan->lock);
734}
735
736/*
737 * Did we skip this inode because it wasn't allocated when we loaded the batch?
738 * If so, it is newly allocated and will not be scanned.  All live updates to
739 * this inode must be passed to the caller to maintain scan correctness.
740 */
741static inline bool
742xchk_iscan_skipped(
743	const struct xchk_iscan	*iscan,
744	xfs_ino_t		ino)
745{
746	if (iscan->__batch_ino == NULLFSINO)
747		return false;
748	if (ino < iscan->__batch_ino)
749		return false;
750	if (ino >= iscan->__batch_ino + XFS_INODES_PER_CHUNK)
751		return false;
752
753	return iscan->__skipped_inomask & (1ULL << (ino - iscan->__batch_ino));
754}
755
756/*
757 * Do we need a live update for this inode?  This is true if the scanner thread
758 * has visited this inode and the scan hasn't been aborted due to errors.
759 * Callers must hold a sufficiently exclusive lock on the inode to prevent
760 * scanners from reading any inode metadata.
761 */
762bool
763xchk_iscan_want_live_update(
764	struct xchk_iscan	*iscan,
765	xfs_ino_t		ino)
766{
767	bool			ret = false;
768
769	if (xchk_iscan_aborted(iscan))
770		return false;
771
772	mutex_lock(&iscan->lock);
773
774	trace_xchk_iscan_want_live_update(iscan, ino);
775
776	/* Scan is finished, caller should receive all updates. */
777	if (iscan->__visited_ino == NULLFSINO) {
778		ret = true;
779		goto unlock;
780	}
781
782	/*
783	 * No inodes have been visited yet, so the visited cursor points at the
784	 * start of the scan range.  The caller should not receive any updates.
785	 */
786	if (iscan->scan_start_ino == iscan->__visited_ino) {
787		ret = false;
788		goto unlock;
789	}
790
791	/*
792	 * This inode was not allocated at the time of the iscan batch.
793	 * The caller should receive all updates.
794	 */
795	if (xchk_iscan_skipped(iscan, ino)) {
796		ret = true;
797		goto unlock;
798	}
799
800	/*
801	 * The visited cursor hasn't yet wrapped around the end of the FS.  If
802	 * @ino is inside the starred range, the caller should receive updates:
803	 *
804	 * 0 ------------ S ************ V ------------ EOFS
805	 */
806	if (iscan->scan_start_ino <= iscan->__visited_ino) {
807		if (ino >= iscan->scan_start_ino &&
808		    ino <= iscan->__visited_ino)
809			ret = true;
810
811		goto unlock;
812	}
813
814	/*
815	 * The visited cursor wrapped around the end of the FS.  If @ino is
816	 * inside the starred range, the caller should receive updates:
817	 *
818	 * 0 ************ V ------------ S ************ EOFS
819	 */
820	if (ino >= iscan->scan_start_ino || ino <= iscan->__visited_ino)
821		ret = true;
822
823unlock:
824	mutex_unlock(&iscan->lock);
825	return ret;
826}
827