1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Copyright (C) 2017-2023 Oracle.  All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
5 */
6#ifndef __XFS_SCRUB_SCRUB_H__
7#define __XFS_SCRUB_SCRUB_H__
8
9struct xfs_scrub;
10
11struct xchk_relax {
12	unsigned long	next_resched;
13	unsigned int	resched_nr;
14	bool		interruptible;
15};
16
17/* Yield to the scheduler at most 10x per second. */
18#define XCHK_RELAX_NEXT		(jiffies + (HZ / 10))
19
20#define INIT_XCHK_RELAX	\
21	(struct xchk_relax){ \
22		.next_resched	= XCHK_RELAX_NEXT, \
23		.resched_nr	= 0, \
24		.interruptible	= true, \
25	}
26
27/*
28 * Relax during a scrub operation and exit if there's a fatal signal pending.
29 *
30 * If preemption is disabled, we need to yield to the scheduler every now and
31 * then so that we don't run afoul of the soft lockup watchdog or RCU stall
32 * detector.  cond_resched calls are somewhat expensive (~5ns) so we want to
33 * ratelimit this to 10x per second.  Amortize the cost of the other checks by
34 * only doing it once every 100 calls.
35 */
36static inline int xchk_maybe_relax(struct xchk_relax *widget)
37{
38	/* Amortize the cost of scheduling and checking signals. */
39	if (likely(++widget->resched_nr < 100))
40		return 0;
41	widget->resched_nr = 0;
42
43	if (unlikely(widget->next_resched <= jiffies)) {
44		cond_resched();
45		widget->next_resched = XCHK_RELAX_NEXT;
46	}
47
48	if (widget->interruptible && fatal_signal_pending(current))
49		return -EINTR;
50
51	return 0;
52}
53
54/*
55 * Standard flags for allocating memory within scrub.  NOFS context is
56 * configured by the process allocation scope.  Scrub and repair must be able
57 * to back out gracefully if there isn't enough memory.  Force-cast to avoid
58 * complaints from static checkers.
59 */
60#define XCHK_GFP_FLAGS	((__force gfp_t)(GFP_KERNEL | __GFP_NOWARN | \
61					 __GFP_RETRY_MAYFAIL))
62
63/*
64 * For opening files by handle for fsck operations, we don't trust the inumber
65 * or the allocation state; therefore, perform an untrusted lookup.  We don't
66 * want these inodes to pollute the cache, so mark them for immediate removal.
67 */
68#define XCHK_IGET_FLAGS	(XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE)
69
70/* Type info and names for the scrub types. */
71enum xchk_type {
72	ST_NONE = 1,	/* disabled */
73	ST_PERAG,	/* per-AG metadata */
74	ST_FS,		/* per-FS metadata */
75	ST_INODE,	/* per-inode metadata */
76};
77
78struct xchk_meta_ops {
79	/* Acquire whatever resources are needed for the operation. */
80	int		(*setup)(struct xfs_scrub *sc);
81
82	/* Examine metadata for errors. */
83	int		(*scrub)(struct xfs_scrub *);
84
85	/* Repair or optimize the metadata. */
86	int		(*repair)(struct xfs_scrub *);
87
88	/*
89	 * Re-scrub the metadata we repaired, in case there's extra work that
90	 * we need to do to check our repair work.  If this is NULL, we'll use
91	 * the ->scrub function pointer, assuming that the regular scrub is
92	 * sufficient.
93	 */
94	int		(*repair_eval)(struct xfs_scrub *sc);
95
96	/* Decide if we even have this piece of metadata. */
97	bool		(*has)(struct xfs_mount *);
98
99	/* type describing required/allowed inputs */
100	enum xchk_type	type;
101};
102
103/* Buffer pointers and btree cursors for an entire AG. */
104struct xchk_ag {
105	struct xfs_perag	*pag;
106
107	/* AG btree roots */
108	struct xfs_buf		*agf_bp;
109	struct xfs_buf		*agi_bp;
110
111	/* AG btrees */
112	struct xfs_btree_cur	*bno_cur;
113	struct xfs_btree_cur	*cnt_cur;
114	struct xfs_btree_cur	*ino_cur;
115	struct xfs_btree_cur	*fino_cur;
116	struct xfs_btree_cur	*rmap_cur;
117	struct xfs_btree_cur	*refc_cur;
118};
119
120struct xfs_scrub {
121	/* General scrub state. */
122	struct xfs_mount		*mp;
123	struct xfs_scrub_metadata	*sm;
124	const struct xchk_meta_ops	*ops;
125	struct xfs_trans		*tp;
126
127	/* File that scrub was called with. */
128	struct file			*file;
129
130	/*
131	 * File that is undergoing the scrub operation.  This can differ from
132	 * the file that scrub was called with if we're checking file-based fs
133	 * metadata (e.g. rt bitmaps) or if we're doing a scrub-by-handle for
134	 * something that can't be opened directly (e.g. symlinks).
135	 */
136	struct xfs_inode		*ip;
137
138	/* Kernel memory buffer used by scrubbers; freed at teardown. */
139	void				*buf;
140
141	/*
142	 * Clean up resources owned by whatever is in the buffer.  Cleanup can
143	 * be deferred with this hook as a means for scrub functions to pass
144	 * data to repair functions.  This function must not free the buffer
145	 * itself.
146	 */
147	void				(*buf_cleanup)(void *buf);
148
149	/* xfile used by the scrubbers; freed at teardown. */
150	struct xfile			*xfile;
151
152	/* buffer target for in-memory btrees; also freed at teardown. */
153	struct xfs_buftarg		*xmbtp;
154
155	/* Lock flags for @ip. */
156	uint				ilock_flags;
157
158	/* The orphanage, for stashing files that have lost their parent. */
159	uint				orphanage_ilock_flags;
160	struct xfs_inode		*orphanage;
161
162	/* A temporary file on this filesystem, for staging new metadata. */
163	struct xfs_inode		*tempip;
164	uint				temp_ilock_flags;
165
166	/* See the XCHK/XREP state flags below. */
167	unsigned int			flags;
168
169	/*
170	 * The XFS_SICK_* flags that correspond to the metadata being scrubbed
171	 * or repaired.  We will use this mask to update the in-core fs health
172	 * status with whatever we find.
173	 */
174	unsigned int			sick_mask;
175
176	/* next time we want to cond_resched() */
177	struct xchk_relax		relax;
178
179	/* State tracking for single-AG operations. */
180	struct xchk_ag			sa;
181};
182
183/* XCHK state flags grow up from zero, XREP state flags grown down from 2^31 */
184#define XCHK_TRY_HARDER		(1U << 0)  /* can't get resources, try again */
185#define XCHK_HAVE_FREEZE_PROT	(1U << 1)  /* do we have freeze protection? */
186#define XCHK_FSGATES_DRAIN	(1U << 2)  /* defer ops draining enabled */
187#define XCHK_NEED_DRAIN		(1U << 3)  /* scrub needs to drain defer ops */
188#define XCHK_FSGATES_QUOTA	(1U << 4)  /* quota live update enabled */
189#define XCHK_FSGATES_DIRENTS	(1U << 5)  /* directory live update enabled */
190#define XCHK_FSGATES_RMAP	(1U << 6)  /* rmapbt live update enabled */
191#define XREP_RESET_PERAG_RESV	(1U << 30) /* must reset AG space reservation */
192#define XREP_ALREADY_FIXED	(1U << 31) /* checking our repair work */
193
194/*
195 * The XCHK_FSGATES* flags reflect functionality in the main filesystem that
196 * are only enabled for this particular online fsck.  When not in use, the
197 * features are gated off via dynamic code patching, which is why the state
198 * must be enabled during scrub setup and can only be torn down afterwards.
199 */
200#define XCHK_FSGATES_ALL	(XCHK_FSGATES_DRAIN | \
201				 XCHK_FSGATES_QUOTA | \
202				 XCHK_FSGATES_DIRENTS | \
203				 XCHK_FSGATES_RMAP)
204
205struct xfs_scrub_subord {
206	struct xfs_scrub	sc;
207	struct xfs_scrub	*parent_sc;
208	unsigned int		old_smtype;
209	unsigned int		old_smflags;
210};
211
212struct xfs_scrub_subord *xchk_scrub_create_subord(struct xfs_scrub *sc,
213		unsigned int subtype);
214void xchk_scrub_free_subord(struct xfs_scrub_subord *sub);
215
216/*
217 * We /could/ terminate a scrub/repair operation early.  If we're not
218 * in a good place to continue (fatal signal, etc.) then bail out.
219 * Note that we're careful not to make any judgements about *error.
220 */
221static inline bool
222xchk_should_terminate(
223	struct xfs_scrub	*sc,
224	int			*error)
225{
226	if (xchk_maybe_relax(&sc->relax)) {
227		if (*error == 0)
228			*error = -EINTR;
229		return true;
230	}
231	return false;
232}
233
234/* Metadata scrubbers */
235int xchk_tester(struct xfs_scrub *sc);
236int xchk_superblock(struct xfs_scrub *sc);
237int xchk_agf(struct xfs_scrub *sc);
238int xchk_agfl(struct xfs_scrub *sc);
239int xchk_agi(struct xfs_scrub *sc);
240int xchk_allocbt(struct xfs_scrub *sc);
241int xchk_iallocbt(struct xfs_scrub *sc);
242int xchk_rmapbt(struct xfs_scrub *sc);
243int xchk_refcountbt(struct xfs_scrub *sc);
244int xchk_inode(struct xfs_scrub *sc);
245int xchk_bmap_data(struct xfs_scrub *sc);
246int xchk_bmap_attr(struct xfs_scrub *sc);
247int xchk_bmap_cow(struct xfs_scrub *sc);
248int xchk_directory(struct xfs_scrub *sc);
249int xchk_xattr(struct xfs_scrub *sc);
250int xchk_symlink(struct xfs_scrub *sc);
251int xchk_parent(struct xfs_scrub *sc);
252int xchk_dirtree(struct xfs_scrub *sc);
253#ifdef CONFIG_XFS_RT
254int xchk_rtbitmap(struct xfs_scrub *sc);
255int xchk_rtsummary(struct xfs_scrub *sc);
256#else
257static inline int
258xchk_rtbitmap(struct xfs_scrub *sc)
259{
260	return -ENOENT;
261}
262static inline int
263xchk_rtsummary(struct xfs_scrub *sc)
264{
265	return -ENOENT;
266}
267#endif
268#ifdef CONFIG_XFS_QUOTA
269int xchk_quota(struct xfs_scrub *sc);
270int xchk_quotacheck(struct xfs_scrub *sc);
271#else
272static inline int
273xchk_quota(struct xfs_scrub *sc)
274{
275	return -ENOENT;
276}
277static inline int
278xchk_quotacheck(struct xfs_scrub *sc)
279{
280	return -ENOENT;
281}
282#endif
283int xchk_fscounters(struct xfs_scrub *sc);
284int xchk_nlinks(struct xfs_scrub *sc);
285
286/* cross-referencing helpers */
287void xchk_xref_is_used_space(struct xfs_scrub *sc, xfs_agblock_t agbno,
288		xfs_extlen_t len);
289void xchk_xref_is_not_inode_chunk(struct xfs_scrub *sc, xfs_agblock_t agbno,
290		xfs_extlen_t len);
291void xchk_xref_is_inode_chunk(struct xfs_scrub *sc, xfs_agblock_t agbno,
292		xfs_extlen_t len);
293void xchk_xref_is_only_owned_by(struct xfs_scrub *sc, xfs_agblock_t agbno,
294		xfs_extlen_t len, const struct xfs_owner_info *oinfo);
295void xchk_xref_is_not_owned_by(struct xfs_scrub *sc, xfs_agblock_t agbno,
296		xfs_extlen_t len, const struct xfs_owner_info *oinfo);
297void xchk_xref_has_no_owner(struct xfs_scrub *sc, xfs_agblock_t agbno,
298		xfs_extlen_t len);
299void xchk_xref_is_cow_staging(struct xfs_scrub *sc, xfs_agblock_t bno,
300		xfs_extlen_t len);
301void xchk_xref_is_not_shared(struct xfs_scrub *sc, xfs_agblock_t bno,
302		xfs_extlen_t len);
303void xchk_xref_is_not_cow_staging(struct xfs_scrub *sc, xfs_agblock_t bno,
304		xfs_extlen_t len);
305#ifdef CONFIG_XFS_RT
306void xchk_xref_is_used_rt_space(struct xfs_scrub *sc, xfs_rtblock_t rtbno,
307		xfs_extlen_t len);
308#else
309# define xchk_xref_is_used_rt_space(sc, rtbno, len) do { } while (0)
310#endif
311
312#endif	/* __XFS_SCRUB_SCRUB_H__ */
313