1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Copyright (C) 2022-2023 Oracle.  All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
5 */
6#include "xfs.h"
7#include "xfs_fs.h"
8#include "xfs_shared.h"
9#include "xfs_format.h"
10#include "xfs_log_format.h"
11#include "xfs_trans_resv.h"
12#include "xfs_mount.h"
13#include "xfs_inode.h"
14#include "xfs_dir2.h"
15#include "xfs_dir2_priv.h"
16#include "xfs_trace.h"
17#include "xfs_bmap.h"
18#include "xfs_trans.h"
19#include "xfs_error.h"
20#include "scrub/scrub.h"
21#include "scrub/common.h"
22#include "scrub/readdir.h"
23
24/* Call a function for every entry in a shortform directory. */
25STATIC int
26xchk_dir_walk_sf(
27	struct xfs_scrub	*sc,
28	struct xfs_inode	*dp,
29	xchk_dirent_fn		dirent_fn,
30	void			*priv)
31{
32	struct xfs_name		name = {
33		.name		= ".",
34		.len		= 1,
35		.type		= XFS_DIR3_FT_DIR,
36	};
37	struct xfs_mount	*mp = dp->i_mount;
38	struct xfs_da_geometry	*geo = mp->m_dir_geo;
39	struct xfs_dir2_sf_entry *sfep;
40	struct xfs_dir2_sf_hdr	*sfp = dp->i_df.if_data;
41	xfs_ino_t		ino;
42	xfs_dir2_dataptr_t	dapos;
43	unsigned int		i;
44	int			error;
45
46	ASSERT(dp->i_df.if_bytes == dp->i_disk_size);
47	ASSERT(sfp != NULL);
48
49	/* dot entry */
50	dapos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk,
51			geo->data_entry_offset);
52
53	error = dirent_fn(sc, dp, dapos, &name, dp->i_ino, priv);
54	if (error)
55		return error;
56
57	/* dotdot entry */
58	dapos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk,
59			geo->data_entry_offset +
60			xfs_dir2_data_entsize(mp, sizeof(".") - 1));
61	ino = xfs_dir2_sf_get_parent_ino(sfp);
62	name.name = "..";
63	name.len = 2;
64
65	error = dirent_fn(sc, dp, dapos, &name, ino, priv);
66	if (error)
67		return error;
68
69	/* iterate everything else */
70	sfep = xfs_dir2_sf_firstentry(sfp);
71	for (i = 0; i < sfp->count; i++) {
72		dapos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk,
73				xfs_dir2_sf_get_offset(sfep));
74		ino = xfs_dir2_sf_get_ino(mp, sfp, sfep);
75		name.name = sfep->name;
76		name.len = sfep->namelen;
77		name.type = xfs_dir2_sf_get_ftype(mp, sfep);
78
79		error = dirent_fn(sc, dp, dapos, &name, ino, priv);
80		if (error)
81			return error;
82
83		sfep = xfs_dir2_sf_nextentry(mp, sfp, sfep);
84	}
85
86	return 0;
87}
88
89/* Call a function for every entry in a block directory. */
90STATIC int
91xchk_dir_walk_block(
92	struct xfs_scrub	*sc,
93	struct xfs_inode	*dp,
94	xchk_dirent_fn		dirent_fn,
95	void			*priv)
96{
97	struct xfs_mount	*mp = dp->i_mount;
98	struct xfs_da_geometry	*geo = mp->m_dir_geo;
99	struct xfs_buf		*bp;
100	unsigned int		off, next_off, end;
101	int			error;
102
103	error = xfs_dir3_block_read(sc->tp, dp, dp->i_ino, &bp);
104	if (error)
105		return error;
106
107	/* Walk each directory entry. */
108	end = xfs_dir3_data_end_offset(geo, bp->b_addr);
109	for (off = geo->data_entry_offset; off < end; off = next_off) {
110		struct xfs_name			name = { };
111		struct xfs_dir2_data_unused	*dup = bp->b_addr + off;
112		struct xfs_dir2_data_entry	*dep = bp->b_addr + off;
113		xfs_ino_t			ino;
114		xfs_dir2_dataptr_t		dapos;
115
116		/* Skip an empty entry. */
117		if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
118			next_off = off + be16_to_cpu(dup->length);
119			continue;
120		}
121
122		/* Otherwise, find the next entry and report it. */
123		next_off = off + xfs_dir2_data_entsize(mp, dep->namelen);
124		if (next_off > end)
125			break;
126
127		dapos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk, off);
128		ino = be64_to_cpu(dep->inumber);
129		name.name = dep->name;
130		name.len = dep->namelen;
131		name.type = xfs_dir2_data_get_ftype(mp, dep);
132
133		error = dirent_fn(sc, dp, dapos, &name, ino, priv);
134		if (error)
135			break;
136	}
137
138	xfs_trans_brelse(sc->tp, bp);
139	return error;
140}
141
142/* Read a leaf-format directory buffer. */
143STATIC int
144xchk_read_leaf_dir_buf(
145	struct xfs_trans	*tp,
146	struct xfs_inode	*dp,
147	struct xfs_da_geometry	*geo,
148	xfs_dir2_off_t		*curoff,
149	struct xfs_buf		**bpp)
150{
151	struct xfs_iext_cursor	icur;
152	struct xfs_bmbt_irec	map;
153	struct xfs_ifork	*ifp = xfs_ifork_ptr(dp, XFS_DATA_FORK);
154	xfs_dablk_t		last_da;
155	xfs_dablk_t		map_off;
156	xfs_dir2_off_t		new_off;
157
158	*bpp = NULL;
159
160	/*
161	 * Look for mapped directory blocks at or above the current offset.
162	 * Truncate down to the nearest directory block to start the scanning
163	 * operation.
164	 */
165	last_da = xfs_dir2_byte_to_da(geo, XFS_DIR2_LEAF_OFFSET);
166	map_off = xfs_dir2_db_to_da(geo, xfs_dir2_byte_to_db(geo, *curoff));
167
168	if (!xfs_iext_lookup_extent(dp, ifp, map_off, &icur, &map))
169		return 0;
170	if (map.br_startoff >= last_da)
171		return 0;
172	xfs_trim_extent(&map, map_off, last_da - map_off);
173
174	/* Read the directory block of that first mapping. */
175	new_off = xfs_dir2_da_to_byte(geo, map.br_startoff);
176	if (new_off > *curoff)
177		*curoff = new_off;
178
179	return xfs_dir3_data_read(tp, dp, dp->i_ino, map.br_startoff, 0, bpp);
180}
181
182/* Call a function for every entry in a leaf directory. */
183STATIC int
184xchk_dir_walk_leaf(
185	struct xfs_scrub	*sc,
186	struct xfs_inode	*dp,
187	xchk_dirent_fn		dirent_fn,
188	void			*priv)
189{
190	struct xfs_mount	*mp = dp->i_mount;
191	struct xfs_da_geometry	*geo = mp->m_dir_geo;
192	struct xfs_buf		*bp = NULL;
193	xfs_dir2_off_t		curoff = 0;
194	unsigned int		offset = 0;
195	int			error;
196
197	/* Iterate every directory offset in this directory. */
198	while (curoff < XFS_DIR2_LEAF_OFFSET) {
199		struct xfs_name			name = { };
200		struct xfs_dir2_data_unused	*dup;
201		struct xfs_dir2_data_entry	*dep;
202		xfs_ino_t			ino;
203		unsigned int			length;
204		xfs_dir2_dataptr_t		dapos;
205
206		/*
207		 * If we have no buffer, or we're off the end of the
208		 * current buffer, need to get another one.
209		 */
210		if (!bp || offset >= geo->blksize) {
211			if (bp) {
212				xfs_trans_brelse(sc->tp, bp);
213				bp = NULL;
214			}
215
216			error = xchk_read_leaf_dir_buf(sc->tp, dp, geo, &curoff,
217					&bp);
218			if (error || !bp)
219				break;
220
221			/*
222			 * Find our position in the block.
223			 */
224			offset = geo->data_entry_offset;
225			curoff += geo->data_entry_offset;
226		}
227
228		/* Skip an empty entry. */
229		dup = bp->b_addr + offset;
230		if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
231			length = be16_to_cpu(dup->length);
232			offset += length;
233			curoff += length;
234			continue;
235		}
236
237		/* Otherwise, find the next entry and report it. */
238		dep = bp->b_addr + offset;
239		length = xfs_dir2_data_entsize(mp, dep->namelen);
240
241		dapos = xfs_dir2_byte_to_dataptr(curoff) & 0x7fffffff;
242		ino = be64_to_cpu(dep->inumber);
243		name.name = dep->name;
244		name.len = dep->namelen;
245		name.type = xfs_dir2_data_get_ftype(mp, dep);
246
247		error = dirent_fn(sc, dp, dapos, &name, ino, priv);
248		if (error)
249			break;
250
251		/* Advance to the next entry. */
252		offset += length;
253		curoff += length;
254	}
255
256	if (bp)
257		xfs_trans_brelse(sc->tp, bp);
258	return error;
259}
260
261/*
262 * Call a function for every entry in a directory.
263 *
264 * Callers must hold the ILOCK.  File types are XFS_DIR3_FT_*.
265 */
266int
267xchk_dir_walk(
268	struct xfs_scrub	*sc,
269	struct xfs_inode	*dp,
270	xchk_dirent_fn		dirent_fn,
271	void			*priv)
272{
273	struct xfs_da_args	args = {
274		.dp		= dp,
275		.geo		= dp->i_mount->m_dir_geo,
276		.trans		= sc->tp,
277		.owner		= dp->i_ino,
278	};
279	int			error;
280
281	if (xfs_is_shutdown(dp->i_mount))
282		return -EIO;
283
284	ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
285	xfs_assert_ilocked(dp, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL);
286
287	switch (xfs_dir2_format(&args, &error)) {
288	case XFS_DIR2_FMT_SF:
289		return xchk_dir_walk_sf(sc, dp, dirent_fn, priv);
290	case XFS_DIR2_FMT_BLOCK:
291		return xchk_dir_walk_block(sc, dp, dirent_fn, priv);
292	case XFS_DIR2_FMT_LEAF:
293	case XFS_DIR2_FMT_NODE:
294		return xchk_dir_walk_leaf(sc, dp, dirent_fn, priv);
295	default:
296		return error;
297	}
298}
299
300/*
301 * Look up the inode number for an exact name in a directory.
302 *
303 * Callers must hold the ILOCK.  File types are XFS_DIR3_FT_*.  Names are not
304 * checked for correctness.
305 */
306int
307xchk_dir_lookup(
308	struct xfs_scrub	*sc,
309	struct xfs_inode	*dp,
310	const struct xfs_name	*name,
311	xfs_ino_t		*ino)
312{
313	struct xfs_da_args	args = {
314		.dp		= dp,
315		.geo		= dp->i_mount->m_dir_geo,
316		.trans		= sc->tp,
317		.name		= name->name,
318		.namelen	= name->len,
319		.filetype	= name->type,
320		.hashval	= xfs_dir2_hashname(dp->i_mount, name),
321		.whichfork	= XFS_DATA_FORK,
322		.op_flags	= XFS_DA_OP_OKNOENT,
323		.owner		= dp->i_ino,
324	};
325	int			error;
326
327	if (xfs_is_shutdown(dp->i_mount))
328		return -EIO;
329
330	/*
331	 * A temporary directory's block headers are written with the owner
332	 * set to sc->ip, so we must switch the owner here for the lookup.
333	 */
334	if (dp == sc->tempip)
335		args.owner = sc->ip->i_ino;
336
337	ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
338	xfs_assert_ilocked(dp, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL);
339
340	error = xfs_dir_lookup_args(&args);
341	if (!error)
342		*ino = args.inumber;
343	return error;
344}
345
346/*
347 * Try to grab the IOLOCK and ILOCK of sc->ip and ip, returning @ip's lock
348 * state.  The caller may have a transaction, so we must use trylock for both
349 * IOLOCKs.
350 */
351static inline unsigned int
352xchk_dir_trylock_both(
353	struct xfs_scrub	*sc,
354	struct xfs_inode	*ip)
355{
356	if (!xchk_ilock_nowait(sc, XFS_IOLOCK_EXCL))
357		return 0;
358
359	if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED))
360		goto parent_iolock;
361
362	xchk_ilock(sc, XFS_ILOCK_EXCL);
363	if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL))
364		goto parent_ilock;
365
366	return XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL;
367
368parent_ilock:
369	xchk_iunlock(sc, XFS_ILOCK_EXCL);
370	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
371parent_iolock:
372	xchk_iunlock(sc, XFS_IOLOCK_EXCL);
373	return 0;
374}
375
376/*
377 * Try for a limited time to grab the IOLOCK and ILOCK of both the scrub target
378 * (@sc->ip) and the inode at the other end (@ip) of a directory or parent
379 * pointer link so that we can check that link.
380 *
381 * We do not know ahead of time that the directory tree is /not/ corrupt, so we
382 * cannot use the "lock two inode" functions because we do not know that there
383 * is not a racing thread trying to take the locks in opposite order.  First
384 * take IOLOCK_EXCL of the scrub target, and then try to take IOLOCK_SHARED
385 * of @ip to synchronize with the VFS.  Next, take ILOCK_EXCL of the scrub
386 * target and @ip to synchronize with XFS.
387 *
388 * If the trylocks succeed, *lockmode will be set to the locks held for @ip;
389 * @sc->ilock_flags will be set for the locks held for @sc->ip; and zero will
390 * be returned.  If not, returns -EDEADLOCK to try again; or -ETIMEDOUT if
391 * XCHK_TRY_HARDER was set.  Returns -EINTR if the process has been killed.
392 */
393int
394xchk_dir_trylock_for_pptrs(
395	struct xfs_scrub	*sc,
396	struct xfs_inode	*ip,
397	unsigned int		*lockmode)
398{
399	unsigned int		nr;
400	int			error = 0;
401
402	ASSERT(sc->ilock_flags == 0);
403
404	for (nr = 0; nr < HZ; nr++) {
405		*lockmode = xchk_dir_trylock_both(sc, ip);
406		if (*lockmode)
407			return 0;
408
409		if (xchk_should_terminate(sc, &error))
410			return error;
411
412		delay(1);
413	}
414
415	if (sc->flags & XCHK_TRY_HARDER) {
416		xchk_set_incomplete(sc);
417		return -ETIMEDOUT;
418	}
419
420	return -EDEADLOCK;
421}
422