1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2004 Poul-Henning Kamp
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include <sys/param.h>
30#include <sys/systm.h>
31#include <sys/bio.h>
32#include <sys/kernel.h>
33#include <sys/lock.h>
34#include <sys/malloc.h>
35#include <sys/mutex.h>
36#include <sys/sbuf.h>
37#include <sys/vnode.h>
38#include <sys/mount.h>
39
40#include <geom/geom.h>
41#include <geom/geom_vfs.h>
42
43/*
44 * subroutines for use by filesystems.
45 *
46 * XXX: should maybe live somewhere else ?
47 */
48#include <sys/buf.h>
49
50struct g_vfs_softc {
51	struct mtx	 sc_mtx;
52	struct bufobj	*sc_bo;
53	struct g_event	*sc_event;
54	int		 sc_active;
55	bool		 sc_orphaned;
56	int		 sc_enxio_active;
57	int		 sc_enxio_reported;
58};
59
60static struct buf_ops __g_vfs_bufops = {
61	.bop_name =	"GEOM_VFS",
62	.bop_write =	bufwrite,
63	.bop_strategy =	g_vfs_strategy,
64	.bop_sync =	bufsync,
65	.bop_bdflush =	bufbdflush
66};
67
68struct buf_ops *g_vfs_bufops = &__g_vfs_bufops;
69
70static g_orphan_t g_vfs_orphan;
71
72static struct g_class g_vfs_class = {
73	.name =		"VFS",
74	.version =	G_VERSION,
75	.orphan =	g_vfs_orphan,
76};
77
78DECLARE_GEOM_CLASS(g_vfs_class, g_vfs);
79
80static void
81g_vfs_destroy(void *arg, int flags __unused)
82{
83	struct g_consumer *cp;
84
85	g_topology_assert();
86	cp = arg;
87	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
88		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
89	g_detach(cp);
90	if (cp->geom->softc == NULL)
91		g_wither_geom(cp->geom, ENXIO);
92}
93
94static void
95g_vfs_done(struct bio *bip)
96{
97	struct g_consumer *cp;
98	struct g_event *event;
99	struct g_vfs_softc *sc;
100	struct buf *bp;
101	int destroy;
102	struct mount *mp;
103	struct vnode *vp;
104	struct cdev *cdevp;
105
106	/*
107	 * Collect statistics on synchronous and asynchronous read
108	 * and write counts for disks that have associated filesystems.
109	 */
110	bp = bip->bio_caller2;
111	vp = bp->b_vp;
112	if (vp != NULL) {
113		/*
114		 * If not a disk vnode, use its associated mount point
115		 * otherwise use the mountpoint associated with the disk.
116		 */
117		VI_LOCK(vp);
118		if (vp->v_type != VCHR ||
119		    (cdevp = vp->v_rdev) == NULL ||
120		    cdevp->si_devsw == NULL ||
121		    (cdevp->si_devsw->d_flags & D_DISK) == 0)
122			mp = vp->v_mount;
123		else
124			mp = cdevp->si_mountpt;
125		if (mp != NULL) {
126			if (bp->b_iocmd == BIO_READ) {
127				if (BUF_DISOWNED(bp))
128					mp->mnt_stat.f_asyncreads++;
129				else
130					mp->mnt_stat.f_syncreads++;
131			} else if (bp->b_iocmd == BIO_WRITE) {
132				if (BUF_DISOWNED(bp))
133					mp->mnt_stat.f_asyncwrites++;
134				else
135					mp->mnt_stat.f_syncwrites++;
136			}
137		}
138		VI_UNLOCK(vp);
139	}
140
141	cp = bip->bio_from;
142	sc = cp->geom->softc;
143	if (bip->bio_error != 0 && bip->bio_error != EOPNOTSUPP) {
144		if ((bp->b_xflags & BX_CVTENXIO) != 0) {
145			if (atomic_cmpset_int(&sc->sc_enxio_active, 0, 1))
146				printf("g_vfs_done(): %s converting all errors to ENXIO\n",
147				    bip->bio_to->name);
148		}
149		if (sc->sc_enxio_active)
150			bip->bio_error = ENXIO;
151		if (bip->bio_error != ENXIO ||
152		    atomic_cmpset_int(&sc->sc_enxio_reported, 0, 1)) {
153			g_print_bio("g_vfs_done():", bip, "error = %d%s",
154			    bip->bio_error,
155			    bip->bio_error != ENXIO ? "" :
156			    " supressing further ENXIO");
157		}
158	}
159	bp->b_error = bip->bio_error;
160	bp->b_ioflags = bip->bio_flags;
161	if (bip->bio_error)
162		bp->b_ioflags |= BIO_ERROR;
163	bp->b_resid = bp->b_bcount - bip->bio_completed;
164	g_destroy_bio(bip);
165
166	mtx_lock(&sc->sc_mtx);
167	destroy = ((--sc->sc_active) == 0 && sc->sc_orphaned);
168	if (destroy) {
169		event = sc->sc_event;
170		sc->sc_event = NULL;
171	} else
172		event = NULL;
173	mtx_unlock(&sc->sc_mtx);
174	if (destroy)
175		g_post_event_ep(g_vfs_destroy, cp, event, NULL);
176
177	bufdone(bp);
178}
179
180void
181g_vfs_strategy(struct bufobj *bo, struct buf *bp)
182{
183	struct g_vfs_softc *sc;
184	struct g_consumer *cp;
185	struct bio *bip;
186
187	cp = bo->bo_private;
188	sc = cp->geom->softc;
189
190	/*
191	 * If the provider has orphaned us, just return ENXIO.
192	 */
193	mtx_lock(&sc->sc_mtx);
194	if (sc->sc_orphaned || sc->sc_enxio_active) {
195		mtx_unlock(&sc->sc_mtx);
196		bp->b_error = ENXIO;
197		bp->b_ioflags |= BIO_ERROR;
198		bufdone(bp);
199		return;
200	}
201	sc->sc_active++;
202	mtx_unlock(&sc->sc_mtx);
203
204	bip = g_alloc_bio();
205	bip->bio_cmd = bp->b_iocmd;
206	bip->bio_offset = bp->b_iooffset;
207	bip->bio_length = bp->b_bcount;
208	bdata2bio(bp, bip);
209	if ((bp->b_flags & B_BARRIER) != 0) {
210		bip->bio_flags |= BIO_ORDERED;
211		bp->b_flags &= ~B_BARRIER;
212	}
213	if (bp->b_iocmd == BIO_SPEEDUP)
214		bip->bio_flags |= bp->b_ioflags;
215	bip->bio_done = g_vfs_done;
216	bip->bio_caller2 = bp;
217#if defined(BUF_TRACKING) || defined(FULL_BUF_TRACKING)
218	buf_track(bp, __func__);
219	bip->bio_track_bp = bp;
220#endif
221	g_io_request(bip, cp);
222}
223
224static void
225g_vfs_orphan(struct g_consumer *cp)
226{
227	struct g_geom *gp;
228	struct g_event *event;
229	struct g_vfs_softc *sc;
230	int destroy;
231
232	g_topology_assert();
233
234	gp = cp->geom;
235	g_trace(G_T_TOPOLOGY, "g_vfs_orphan(%p(%s))", cp, gp->name);
236	sc = gp->softc;
237	if (sc == NULL)
238		return;
239	event = g_alloc_event(M_WAITOK);
240	mtx_lock(&sc->sc_mtx);
241	KASSERT(sc->sc_event == NULL, ("g_vfs %p already has an event", sc));
242	sc->sc_orphaned = true;
243	destroy = (sc->sc_active == 0);
244	if (!destroy) {
245		sc->sc_event = event;
246		event = NULL;
247	}
248	mtx_unlock(&sc->sc_mtx);
249	if (destroy) {
250		g_free(event);
251		g_vfs_destroy(cp, 0);
252	}
253
254	/*
255	 * Do not destroy the geom.  Filesystem will do that during unmount.
256	 */
257}
258
259int
260g_vfs_open(struct vnode *vp, struct g_consumer **cpp, const char *fsname, int wr)
261{
262	struct g_geom *gp;
263	struct g_provider *pp;
264	struct g_consumer *cp;
265	struct g_vfs_softc *sc;
266	struct bufobj *bo;
267	int error;
268
269	g_topology_assert();
270
271	*cpp = NULL;
272	bo = &vp->v_bufobj;
273	if (bo->bo_private != vp)
274		return (EBUSY);
275
276	pp = g_dev_getprovider(vp->v_rdev);
277	if (pp == NULL)
278		return (ENOENT);
279	gp = g_new_geomf(&g_vfs_class, "%s.%s", fsname, pp->name);
280	sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO);
281	mtx_init(&sc->sc_mtx, "g_vfs", NULL, MTX_DEF);
282	sc->sc_bo = bo;
283	gp->softc = sc;
284	cp = g_new_consumer(gp);
285	error = g_attach(cp, pp);
286	if (error) {
287		g_wither_geom(gp, ENXIO);
288		return (error);
289	}
290	error = g_access(cp, 1, wr, wr);
291	if (error) {
292		g_wither_geom(gp, ENXIO);
293		return (error);
294	}
295	/*
296	 * Mediasize might not be set until first access (see g_disk_access()),
297	 * That's why we check it here and not earlier.
298	 */
299	if (pp->mediasize == 0) {
300		(void)g_access(cp, -1, -wr, -wr);
301		g_wither_geom(gp, ENXIO);
302		return (ENXIO);
303	}
304	vnode_create_disk_vobject(vp, pp->mediasize, curthread);
305	*cpp = cp;
306	cp->private = vp;
307	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
308	bo->bo_ops = g_vfs_bufops;
309	bo->bo_private = cp;
310	bo->bo_bsize = pp->sectorsize;
311
312	return (error);
313}
314
315void
316g_vfs_close(struct g_consumer *cp)
317{
318	struct g_geom *gp;
319	struct g_vfs_softc *sc;
320
321	g_topology_assert();
322
323	gp = cp->geom;
324	sc = gp->softc;
325	bufobj_invalbuf(sc->sc_bo, V_SAVE, 0, 0);
326	sc->sc_bo->bo_private = cp->private;
327	gp->softc = NULL;
328	mtx_destroy(&sc->sc_mtx);
329	if (!sc->sc_orphaned || cp->provider == NULL)
330		g_wither_geom_close(gp, ENXIO);
331	KASSERT(sc->sc_event == NULL, ("g_vfs %p event is non-NULL", sc));
332	g_free(sc);
333}
334