1/*-
2 * Copyright (c) 2004 Poul-Henning Kamp
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD$");
29
30#include <sys/param.h>
31#include <sys/systm.h>
32#include <sys/bio.h>
33#include <sys/kernel.h>
34#include <sys/lock.h>
35#include <sys/malloc.h>
36#include <sys/mutex.h>
37#include <sys/vnode.h>
38#include <sys/mount.h>	/* XXX Temporary for VFS_LOCK_GIANT */
39
40#include <geom/geom.h>
41#include <geom/geom_vfs.h>
42
43/*
44 * subroutines for use by filesystems.
45 *
46 * XXX: should maybe live somewhere else ?
47 */
48#include <sys/buf.h>
49
50struct g_vfs_softc {
51	struct mtx	 sc_mtx;
52	struct bufobj	*sc_bo;
53	int		 sc_active;
54	int		 sc_orphaned;
55};
56
57static struct buf_ops __g_vfs_bufops = {
58	.bop_name =	"GEOM_VFS",
59	.bop_write =	bufwrite,
60	.bop_strategy =	g_vfs_strategy,
61	.bop_sync =	bufsync,
62	.bop_bdflush =	bufbdflush
63};
64
65struct buf_ops *g_vfs_bufops = &__g_vfs_bufops;
66
67static g_orphan_t g_vfs_orphan;
68
69static struct g_class g_vfs_class = {
70	.name =		"VFS",
71	.version =	G_VERSION,
72	.orphan =	g_vfs_orphan,
73};
74
75DECLARE_GEOM_CLASS(g_vfs_class, g_vfs);
76
77static void
78g_vfs_destroy(void *arg, int flags __unused)
79{
80	struct g_consumer *cp;
81
82	g_topology_assert();
83	cp = arg;
84	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
85		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
86	g_detach(cp);
87	if (cp->geom->softc == NULL)
88		g_wither_geom(cp->geom, ENXIO);
89}
90
91static void
92g_vfs_done(struct bio *bip)
93{
94	struct g_consumer *cp;
95	struct g_vfs_softc *sc;
96	struct buf *bp;
97	int vfslocked, destroy;
98	struct mount *mp;
99	struct vnode *vp;
100	struct cdev *cdevp;
101
102	cp = bip->bio_from;
103	sc = cp->geom->softc;
104	/*
105	 * Collect statistics on synchronous and asynchronous read
106	 * and write counts for disks that have associated filesystems.
107	 * Since this is run by the g_up thread it is single threaded and
108	 * we do not need to use atomic increments on the counters.
109	 */
110	bp = bip->bio_caller2;
111	vp = bp->b_vp;
112	if (vp == NULL) {
113		mp = NULL;
114	} else {
115		/*
116		 * If not a disk vnode, use its associated mount point
117		 * otherwise use the mountpoint associated with the disk.
118		 */
119		VI_LOCK(vp);
120		if (vp->v_type != VCHR ||
121		    (cdevp = vp->v_rdev) == NULL ||
122		    cdevp->si_devsw == NULL ||
123		    (cdevp->si_devsw->d_flags & D_DISK) == 0)
124			mp = vp->v_mount;
125		else
126			mp = cdevp->si_mountpt;
127		VI_UNLOCK(vp);
128	}
129	if (mp != NULL) {
130		if (bp->b_iocmd == BIO_WRITE) {
131			if (LK_HOLDER(bp->b_lock.lk_lock) == LK_KERNPROC)
132				mp->mnt_stat.f_asyncwrites++;
133			else
134				mp->mnt_stat.f_syncwrites++;
135		} else {
136			if (LK_HOLDER(bp->b_lock.lk_lock) == LK_KERNPROC)
137				mp->mnt_stat.f_asyncreads++;
138			else
139				mp->mnt_stat.f_syncreads++;
140		}
141	}
142
143	if (bip->bio_error) {
144		printf("g_vfs_done():");
145		g_print_bio(bip);
146		printf("error = %d\n", bip->bio_error);
147	}
148	bp->b_error = bip->bio_error;
149	bp->b_ioflags = bip->bio_flags;
150	if (bip->bio_error)
151		bp->b_ioflags |= BIO_ERROR;
152	bp->b_resid = bp->b_bcount - bip->bio_completed;
153	g_destroy_bio(bip);
154
155	mtx_lock(&sc->sc_mtx);
156	destroy = ((--sc->sc_active) == 0 && sc->sc_orphaned);
157	mtx_unlock(&sc->sc_mtx);
158	if (destroy)
159		g_post_event(g_vfs_destroy, cp, M_WAITOK, NULL);
160
161	vfslocked = VFS_LOCK_GIANT(((struct mount *)NULL));
162	bufdone(bp);
163	VFS_UNLOCK_GIANT(vfslocked);
164}
165
166void
167g_vfs_strategy(struct bufobj *bo, struct buf *bp)
168{
169	struct g_vfs_softc *sc;
170	struct g_consumer *cp;
171	struct bio *bip;
172	int vfslocked;
173
174	cp = bo->bo_private;
175	sc = cp->geom->softc;
176
177	/*
178	 * If the provider has orphaned us, just return EXIO.
179	 */
180	mtx_lock(&sc->sc_mtx);
181	if (sc->sc_orphaned) {
182		mtx_unlock(&sc->sc_mtx);
183		bp->b_error = ENXIO;
184		bp->b_ioflags |= BIO_ERROR;
185		vfslocked = VFS_LOCK_GIANT(((struct mount *)NULL));
186		bufdone(bp);
187		VFS_UNLOCK_GIANT(vfslocked);
188		return;
189	}
190	sc->sc_active++;
191	mtx_unlock(&sc->sc_mtx);
192
193	bip = g_alloc_bio();
194	bip->bio_cmd = bp->b_iocmd;
195	bip->bio_offset = bp->b_iooffset;
196	bip->bio_length = bp->b_bcount;
197	bdata2bio(bp, bip);
198	if ((bp->b_flags & B_BARRIER) != 0) {
199		bip->bio_flags |= BIO_ORDERED;
200		bp->b_flags &= ~B_BARRIER;
201	}
202	bip->bio_done = g_vfs_done;
203	bip->bio_caller2 = bp;
204	g_io_request(bip, cp);
205}
206
207static void
208g_vfs_orphan(struct g_consumer *cp)
209{
210	struct g_geom *gp;
211	struct g_vfs_softc *sc;
212	int destroy;
213
214	g_topology_assert();
215
216	gp = cp->geom;
217	g_trace(G_T_TOPOLOGY, "g_vfs_orphan(%p(%s))", cp, gp->name);
218	sc = gp->softc;
219	if (sc == NULL)
220		return;
221	mtx_lock(&sc->sc_mtx);
222	sc->sc_orphaned = 1;
223	destroy = (sc->sc_active == 0);
224	mtx_unlock(&sc->sc_mtx);
225	if (destroy)
226		g_vfs_destroy(cp, 0);
227
228	/*
229	 * Do not destroy the geom.  Filesystem will do that during unmount.
230	 */
231}
232
233int
234g_vfs_open(struct vnode *vp, struct g_consumer **cpp, const char *fsname, int wr)
235{
236	struct g_geom *gp;
237	struct g_provider *pp;
238	struct g_consumer *cp;
239	struct g_vfs_softc *sc;
240	struct bufobj *bo;
241	int vfslocked;
242	int error;
243
244	g_topology_assert();
245
246	*cpp = NULL;
247	bo = &vp->v_bufobj;
248	if (bo->bo_private != vp)
249		return (EBUSY);
250
251	pp = g_dev_getprovider(vp->v_rdev);
252	if (pp == NULL)
253		return (ENOENT);
254	gp = g_new_geomf(&g_vfs_class, "%s.%s", fsname, pp->name);
255	sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO);
256	mtx_init(&sc->sc_mtx, "g_vfs", NULL, MTX_DEF);
257	sc->sc_bo = bo;
258	gp->softc = sc;
259	cp = g_new_consumer(gp);
260	g_attach(cp, pp);
261	error = g_access(cp, 1, wr, wr);
262	if (error) {
263		g_wither_geom(gp, ENXIO);
264		return (error);
265	}
266	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
267	vnode_create_vobject(vp, pp->mediasize, curthread);
268	VFS_UNLOCK_GIANT(vfslocked);
269	*cpp = cp;
270	cp->private = vp;
271	bo->bo_ops = g_vfs_bufops;
272	bo->bo_private = cp;
273	bo->bo_bsize = pp->sectorsize;
274
275	return (error);
276}
277
278void
279g_vfs_close(struct g_consumer *cp)
280{
281	struct g_geom *gp;
282	struct g_vfs_softc *sc;
283
284	g_topology_assert();
285
286	gp = cp->geom;
287	sc = gp->softc;
288	bufobj_invalbuf(sc->sc_bo, V_SAVE, 0, 0);
289	sc->sc_bo->bo_private = cp->private;
290	gp->softc = NULL;
291	mtx_destroy(&sc->sc_mtx);
292	if (!sc->sc_orphaned || cp->provider == NULL)
293		g_wither_geom_close(gp, ENXIO);
294	g_free(sc);
295}
296