1139825Simp/*-
298542Smckusick * Copyright (c) 2002 Networks Associates Technology, Inc.
398542Smckusick * All rights reserved.
498542Smckusick *
598542Smckusick * This software was developed for the FreeBSD Project by Marshall
698542Smckusick * Kirk McKusick and Network Associates Laboratories, the Security
798542Smckusick * Research Division of Network Associates, Inc. under DARPA/SPAWAR
898542Smckusick * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
998542Smckusick * research program
1098542Smckusick *
11136721Srwatson * Redistribution and use in source and binary forms, with or without
12136721Srwatson * modification, are permitted provided that the following conditions
13136721Srwatson * are met:
14136721Srwatson * 1. Redistributions of source code must retain the above copyright
15136721Srwatson *    notice, this list of conditions and the following disclaimer.
16136721Srwatson * 2. Redistributions in binary form must reproduce the above copyright
17136721Srwatson *    notice, this list of conditions and the following disclaimer in the
18136721Srwatson *    documentation and/or other materials provided with the distribution.
19136721Srwatson *
20136721Srwatson * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21136721Srwatson * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22136721Srwatson * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23136721Srwatson * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24136721Srwatson * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25136721Srwatson * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26136721Srwatson * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27136721Srwatson * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28136721Srwatson * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29136721Srwatson * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30136721Srwatson * SUCH DAMAGE.
31136721Srwatson *
321541Srgrimes * Copyright (c) 1982, 1986, 1989, 1993
331541Srgrimes *	The Regents of the University of California.  All rights reserved.
341541Srgrimes *
351541Srgrimes * Redistribution and use in source and binary forms, with or without
361541Srgrimes * modification, are permitted provided that the following conditions
371541Srgrimes * are met:
381541Srgrimes * 1. Redistributions of source code must retain the above copyright
391541Srgrimes *    notice, this list of conditions and the following disclaimer.
401541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
411541Srgrimes *    notice, this list of conditions and the following disclaimer in the
421541Srgrimes *    documentation and/or other materials provided with the distribution.
431541Srgrimes * 4. Neither the name of the University nor the names of its contributors
441541Srgrimes *    may be used to endorse or promote products derived from this software
451541Srgrimes *    without specific prior written permission.
461541Srgrimes *
471541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
481541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
491541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
501541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
511541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
521541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
531541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
541541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
551541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
561541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
571541Srgrimes * SUCH DAMAGE.
581541Srgrimes *
5922521Sdyson *	@(#)ffs_balloc.c	8.8 (Berkeley) 6/16/95
601541Srgrimes */
611541Srgrimes
62116192Sobrien#include <sys/cdefs.h>
63116192Sobrien__FBSDID("$FreeBSD$");
64116192Sobrien
651541Srgrimes#include <sys/param.h>
661541Srgrimes#include <sys/systm.h>
6760041Sphk#include <sys/bio.h>
681541Srgrimes#include <sys/buf.h>
6931561Sbde#include <sys/lock.h>
7034266Sjulian#include <sys/mount.h>
711541Srgrimes#include <sys/vnode.h>
721541Srgrimes
731541Srgrimes#include <ufs/ufs/quota.h>
741541Srgrimes#include <ufs/ufs/inode.h>
751541Srgrimes#include <ufs/ufs/ufs_extern.h>
76118969Sphk#include <ufs/ufs/extattr.h>
77118969Sphk#include <ufs/ufs/ufsmount.h>
781541Srgrimes
791541Srgrimes#include <ufs/ffs/fs.h>
801541Srgrimes#include <ufs/ffs/ffs_extern.h>
811541Srgrimes
821541Srgrimes/*
8396755Strhodes * Balloc defines the structure of filesystem storage
841541Srgrimes * by allocating the physical blocks on a device given
851541Srgrimes * the inode and the logical block number in a file.
8698542Smckusick * This is the allocation strategy for UFS1. Below is
8798542Smckusick * the allocation strategy for UFS2.
881541Srgrimes */
891549Srgrimesint
9098542Smckusickffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
9198542Smckusick    struct ucred *cred, int flags, struct buf **bpp)
9234266Sjulian{
9355799Smckusick	struct inode *ip;
94100344Smckusick	struct ufs1_dinode *dp;
9598542Smckusick	ufs_lbn_t lbn, lastlbn;
9634266Sjulian	struct fs *fs;
9798542Smckusick	ufs1_daddr_t nb;
981541Srgrimes	struct buf *bp, *nbp;
99140705Sjeff	struct ufsmount *ump;
1001541Srgrimes	struct indir indirs[NIADDR + 2];
10122521Sdyson	int deallocated, osize, nsize, num, i, error;
10298542Smckusick	ufs2_daddr_t newb;
10398542Smckusick	ufs1_daddr_t *bap, pref;
10498542Smckusick	ufs1_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
105174973Skib	ufs2_daddr_t *lbns_remfree, lbns[NIADDR + 1];
10657446Sdillon	int unwindidx = -1;
107180758Skib	int saved_inbdflush;
108223114Smckusick	static struct timeval lastfail;
109223114Smckusick	static int curfail;
110248521Skib	int gbflags, reclaimed;
1111541Srgrimes
11234266Sjulian	ip = VTOI(vp);
113100344Smckusick	dp = ip->i_din1;
11434266Sjulian	fs = ip->i_fs;
115140705Sjeff	ump = ip->i_ump;
11698542Smckusick	lbn = lblkno(fs, startoffset);
11798542Smckusick	size = blkoff(fs, startoffset) + size;
118222958Sjeff	reclaimed = 0;
11934266Sjulian	if (size > fs->fs_bsize)
12098542Smckusick		panic("ffs_balloc_ufs1: blk too big");
12198542Smckusick	*bpp = NULL;
122100344Smckusick	if (flags & IO_EXT)
123100344Smckusick		return (EOPNOTSUPP);
12422521Sdyson	if (lbn < 0)
1251541Srgrimes		return (EFBIG);
126248521Skib	gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0;
1271541Srgrimes
128207141Sjeff	if (DOINGSOFTDEP(vp))
129207141Sjeff		softdep_prealloc(vp, MNT_WAIT);
1301541Srgrimes	/*
1311541Srgrimes	 * If the next write will extend the file into a new block,
1321541Srgrimes	 * and the file is currently composed of a fragment
1331541Srgrimes	 * this fragment has to be extended to be a full block.
1341541Srgrimes	 */
13598542Smckusick	lastlbn = lblkno(fs, ip->i_size);
13698542Smckusick	if (lastlbn < NDADDR && lastlbn < lbn) {
13798542Smckusick		nb = lastlbn;
1381541Srgrimes		osize = blksize(fs, ip, nb);
1391541Srgrimes		if (osize < fs->fs_bsize && osize > 0) {
140140705Sjeff			UFS_LOCK(ump);
141100344Smckusick			error = ffs_realloccg(ip, nb, dp->di_db[nb],
142100344Smckusick			   ffs_blkpref_ufs1(ip, lastlbn, (int)nb,
143187790Srwatson			   &dp->di_db[0]), osize, (int)fs->fs_bsize, flags,
144187790Srwatson			   cred, &bp);
1451541Srgrimes			if (error)
1461541Srgrimes				return (error);
14734266Sjulian			if (DOINGSOFTDEP(vp))
14834266Sjulian				softdep_setup_allocdirect(ip, nb,
149100344Smckusick				    dbtofsb(fs, bp->b_blkno), dp->di_db[nb],
15034266Sjulian				    fs->fs_bsize, osize, bp);
15124775Sbde			ip->i_size = smalllblktosize(fs, nb + 1);
152100344Smckusick			dp->di_size = ip->i_size;
153100344Smckusick			dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
1541541Srgrimes			ip->i_flag |= IN_CHANGE | IN_UPDATE;
155100344Smckusick			if (flags & IO_SYNC)
1561541Srgrimes				bwrite(bp);
1571541Srgrimes			else
1581541Srgrimes				bawrite(bp);
1591541Srgrimes		}
1601541Srgrimes	}
1611541Srgrimes	/*
1621541Srgrimes	 * The first NDADDR blocks are direct blocks
1631541Srgrimes	 */
16422521Sdyson	if (lbn < NDADDR) {
16598658Sdillon		if (flags & BA_METAONLY)
16698658Sdillon			panic("ffs_balloc_ufs1: BA_METAONLY for direct block");
167100344Smckusick		nb = dp->di_db[lbn];
16824775Sbde		if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
16922521Sdyson			error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp);
1701541Srgrimes			if (error) {
1711541Srgrimes				brelse(bp);
1721541Srgrimes				return (error);
1731541Srgrimes			}
1746864Sdg			bp->b_blkno = fsbtodb(fs, nb);
17598542Smckusick			*bpp = bp;
1761541Srgrimes			return (0);
1771541Srgrimes		}
1781541Srgrimes		if (nb != 0) {
1791541Srgrimes			/*
1801541Srgrimes			 * Consider need to reallocate a fragment.
1811541Srgrimes			 */
1821541Srgrimes			osize = fragroundup(fs, blkoff(fs, ip->i_size));
1831541Srgrimes			nsize = fragroundup(fs, size);
1841541Srgrimes			if (nsize <= osize) {
18522521Sdyson				error = bread(vp, lbn, osize, NOCRED, &bp);
1861541Srgrimes				if (error) {
1871541Srgrimes					brelse(bp);
1881541Srgrimes					return (error);
1891541Srgrimes				}
1906864Sdg				bp->b_blkno = fsbtodb(fs, nb);
1911541Srgrimes			} else {
192140705Sjeff				UFS_LOCK(ump);
193100344Smckusick				error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
19498542Smckusick				    ffs_blkpref_ufs1(ip, lbn, (int)lbn,
195187790Srwatson				    &dp->di_db[0]), osize, nsize, flags,
196187790Srwatson				    cred, &bp);
1971541Srgrimes				if (error)
1981541Srgrimes					return (error);
19934266Sjulian				if (DOINGSOFTDEP(vp))
20034266Sjulian					softdep_setup_allocdirect(ip, lbn,
20134266Sjulian					    dbtofsb(fs, bp->b_blkno), nb,
20234266Sjulian					    nsize, osize, bp);
2031541Srgrimes			}
2041541Srgrimes		} else {
20524775Sbde			if (ip->i_size < smalllblktosize(fs, lbn + 1))
2061541Srgrimes				nsize = fragroundup(fs, size);
2071541Srgrimes			else
2081541Srgrimes				nsize = fs->fs_bsize;
209140705Sjeff			UFS_LOCK(ump);
21022521Sdyson			error = ffs_alloc(ip, lbn,
211100344Smckusick			    ffs_blkpref_ufs1(ip, lbn, (int)lbn, &dp->di_db[0]),
212187790Srwatson			    nsize, flags, cred, &newb);
2131541Srgrimes			if (error)
2141541Srgrimes				return (error);
215248521Skib			bp = getblk(vp, lbn, nsize, 0, 0, gbflags);
2161541Srgrimes			bp->b_blkno = fsbtodb(fs, newb);
21798658Sdillon			if (flags & BA_CLRBUF)
2187695Sdg				vfs_bio_clrbuf(bp);
21934266Sjulian			if (DOINGSOFTDEP(vp))
22034266Sjulian				softdep_setup_allocdirect(ip, lbn, newb, 0,
22134266Sjulian				    nsize, 0, bp);
2221541Srgrimes		}
223100344Smckusick		dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
2241541Srgrimes		ip->i_flag |= IN_CHANGE | IN_UPDATE;
22598542Smckusick		*bpp = bp;
2261541Srgrimes		return (0);
2271541Srgrimes	}
2281541Srgrimes	/*
2291541Srgrimes	 * Determine the number of levels of indirection.
2301541Srgrimes	 */
2311541Srgrimes	pref = 0;
23243311Sdillon	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
2331541Srgrimes		return(error);
234173464Sobrien#ifdef INVARIANTS
2351541Srgrimes	if (num < 1)
23698542Smckusick		panic ("ffs_balloc_ufs1: ufs_getlbns returned indirect block");
2371541Srgrimes#endif
238223888Skib	saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH);
2391541Srgrimes	/*
2401541Srgrimes	 * Fetch the first indirect block allocating if necessary.
2411541Srgrimes	 */
2421541Srgrimes	--num;
243100344Smckusick	nb = dp->di_ib[indirs[0].in_off];
24422521Sdyson	allocib = NULL;
24522521Sdyson	allocblk = allociblk;
246174973Skib	lbns_remfree = lbns;
2471541Srgrimes	if (nb == 0) {
248140705Sjeff		UFS_LOCK(ump);
249248623Smckusick		pref = ffs_blkpref_ufs1(ip, lbn, -indirs[0].in_off - 1,
250248623Smckusick		    (ufs1_daddr_t *)0);
251262779Spfg		if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
252187790Srwatson		    flags, cred, &newb)) != 0) {
253223888Skib			curthread_pflags_restore(saved_inbdflush);
2541541Srgrimes			return (error);
255180758Skib		}
256242520Smckusick		pref = newb + fs->fs_frag;
2571541Srgrimes		nb = newb;
258304667Skib		MPASS(allocblk < allociblk + nitems(allociblk));
259304667Skib		MPASS(lbns_remfree < lbns + nitems(lbns));
26022521Sdyson		*allocblk++ = nb;
261174973Skib		*lbns_remfree++ = indirs[1].in_lbn;
262248521Skib		bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, gbflags);
26322521Sdyson		bp->b_blkno = fsbtodb(fs, nb);
2647695Sdg		vfs_bio_clrbuf(bp);
26534266Sjulian		if (DOINGSOFTDEP(vp)) {
26634266Sjulian			softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
26734266Sjulian			    newb, 0, fs->fs_bsize, 0, bp);
26834266Sjulian			bdwrite(bp);
26934266Sjulian		} else {
27034266Sjulian			/*
27134266Sjulian			 * Write synchronously so that indirect blocks
27234266Sjulian			 * never point at garbage.
27334266Sjulian			 */
27448801Smckusick			if (DOINGASYNC(vp))
27548801Smckusick				bdwrite(bp);
27648801Smckusick			else if ((error = bwrite(bp)) != 0)
27734266Sjulian				goto fail;
27834266Sjulian		}
279100344Smckusick		allocib = &dp->di_ib[indirs[0].in_off];
28022521Sdyson		*allocib = nb;
2811541Srgrimes		ip->i_flag |= IN_CHANGE | IN_UPDATE;
2821541Srgrimes	}
2831541Srgrimes	/*
2841541Srgrimes	 * Fetch through the indirect blocks, allocating as necessary.
2851541Srgrimes	 */
286222958Sjeffretry:
2871541Srgrimes	for (i = 1;;) {
2881541Srgrimes		error = bread(vp,
2891541Srgrimes		    indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
2901541Srgrimes		if (error) {
2911541Srgrimes			brelse(bp);
29222521Sdyson			goto fail;
2931541Srgrimes		}
29498542Smckusick		bap = (ufs1_daddr_t *)bp->b_data;
2951541Srgrimes		nb = bap[indirs[i].in_off];
2961541Srgrimes		if (i == num)
2971541Srgrimes			break;
2981541Srgrimes		i += 1;
2991541Srgrimes		if (nb != 0) {
30013490Sdyson			bqrelse(bp);
3011541Srgrimes			continue;
3021541Srgrimes		}
303140705Sjeff		UFS_LOCK(ump);
304252527Smckusick		/*
305252527Smckusick		 * If parent indirect has just been allocated, try to cluster
306252527Smckusick		 * immediately following it.
307252527Smckusick		 */
3081541Srgrimes		if (pref == 0)
309248623Smckusick			pref = ffs_blkpref_ufs1(ip, lbn, i - num - 1,
310248623Smckusick			    (ufs1_daddr_t *)0);
311187790Srwatson		if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
312222958Sjeff		    flags | IO_BUFLOCKED, cred, &newb)) != 0) {
3131541Srgrimes			brelse(bp);
314304668Skib			if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
315222958Sjeff				UFS_LOCK(ump);
316222958Sjeff				softdep_request_cleanup(fs, vp, cred,
317222958Sjeff				    FLUSH_BLOCKS_WAIT);
318222958Sjeff				UFS_UNLOCK(ump);
319222958Sjeff				goto retry;
320222958Sjeff			}
321223114Smckusick			if (ppsratecheck(&lastfail, &curfail, 1)) {
322223114Smckusick				ffs_fserr(fs, ip->i_number, "filesystem full");
323223114Smckusick				uprintf("\n%s: write failed, filesystem "
324223114Smckusick				    "is full\n", fs->fs_fsmnt);
325223114Smckusick			}
32622521Sdyson			goto fail;
3271541Srgrimes		}
328242520Smckusick		pref = newb + fs->fs_frag;
3291541Srgrimes		nb = newb;
330304667Skib		MPASS(allocblk < allociblk + nitems(allociblk));
331304667Skib		MPASS(lbns_remfree < lbns + nitems(lbns));
33222521Sdyson		*allocblk++ = nb;
333174973Skib		*lbns_remfree++ = indirs[i].in_lbn;
334111856Sjeff		nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0);
3351541Srgrimes		nbp->b_blkno = fsbtodb(fs, nb);
3367695Sdg		vfs_bio_clrbuf(nbp);
33734266Sjulian		if (DOINGSOFTDEP(vp)) {
33834266Sjulian			softdep_setup_allocindir_meta(nbp, ip, bp,
33934266Sjulian			    indirs[i - 1].in_off, nb);
34034266Sjulian			bdwrite(nbp);
34134266Sjulian		} else {
34234266Sjulian			/*
34334266Sjulian			 * Write synchronously so that indirect blocks
34434266Sjulian			 * never point at garbage.
34534266Sjulian			 */
34643311Sdillon			if ((error = bwrite(nbp)) != 0) {
34734266Sjulian				brelse(bp);
34834266Sjulian				goto fail;
34934266Sjulian			}
3501541Srgrimes		}
3511541Srgrimes		bap[indirs[i - 1].in_off] = nb;
35257446Sdillon		if (allocib == NULL && unwindidx < 0)
35357446Sdillon			unwindidx = i - 1;
3541541Srgrimes		/*
3551541Srgrimes		 * If required, write synchronously, otherwise use
3561541Srgrimes		 * delayed write.
3571541Srgrimes		 */
358100344Smckusick		if (flags & IO_SYNC) {
3591541Srgrimes			bwrite(bp);
3601541Srgrimes		} else {
36132286Sdyson			if (bp->b_bufsize == fs->fs_bsize)
36232286Sdyson				bp->b_flags |= B_CLUSTEROK;
3631541Srgrimes			bdwrite(bp);
3641541Srgrimes		}
3651541Srgrimes	}
3661541Srgrimes	/*
36762976Smckusick	 * If asked only for the indirect block, then return it.
36862976Smckusick	 */
36998658Sdillon	if (flags & BA_METAONLY) {
370223888Skib		curthread_pflags_restore(saved_inbdflush);
37198542Smckusick		*bpp = bp;
37262976Smckusick		return (0);
37362976Smckusick	}
37462976Smckusick	/*
3751541Srgrimes	 * Get the data block, allocating if necessary.
3761541Srgrimes	 */
3771541Srgrimes	if (nb == 0) {
378140705Sjeff		UFS_LOCK(ump);
379252527Smckusick		/*
380252527Smckusick		 * If allocating metadata at the front of the cylinder
381252527Smckusick		 * group and parent indirect block has just been allocated,
382252527Smckusick		 * then cluster next to it if it is the first indirect in
383252527Smckusick		 * the file. Otherwise it has been allocated in the metadata
384252527Smckusick		 * area, so we want to find our own place out in the data area.
385252527Smckusick		 */
386252527Smckusick		if (pref == 0 || (lbn > NDADDR && fs->fs_metaspace != 0))
387242520Smckusick			pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off,
388242520Smckusick			    &bap[0]);
389222958Sjeff		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
390222958Sjeff		    flags | IO_BUFLOCKED, cred, &newb);
3913487Sphk		if (error) {
3921541Srgrimes			brelse(bp);
393304668Skib			if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
394222958Sjeff				UFS_LOCK(ump);
395222958Sjeff				softdep_request_cleanup(fs, vp, cred,
396222958Sjeff				    FLUSH_BLOCKS_WAIT);
397222958Sjeff				UFS_UNLOCK(ump);
398222958Sjeff				goto retry;
399222958Sjeff			}
400223114Smckusick			if (ppsratecheck(&lastfail, &curfail, 1)) {
401223114Smckusick				ffs_fserr(fs, ip->i_number, "filesystem full");
402223114Smckusick				uprintf("\n%s: write failed, filesystem "
403223114Smckusick				    "is full\n", fs->fs_fsmnt);
404223114Smckusick			}
40522521Sdyson			goto fail;
4061541Srgrimes		}
4071541Srgrimes		nb = newb;
408304667Skib		MPASS(allocblk < allociblk + nitems(allociblk));
409304667Skib		MPASS(lbns_remfree < lbns + nitems(lbns));
41022521Sdyson		*allocblk++ = nb;
411174973Skib		*lbns_remfree++ = lbn;
412248521Skib		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
4131541Srgrimes		nbp->b_blkno = fsbtodb(fs, nb);
41498658Sdillon		if (flags & BA_CLRBUF)
4157695Sdg			vfs_bio_clrbuf(nbp);
41634266Sjulian		if (DOINGSOFTDEP(vp))
41734266Sjulian			softdep_setup_allocindir_page(ip, lbn, bp,
41834266Sjulian			    indirs[i].in_off, nb, 0, nbp);
4191541Srgrimes		bap[indirs[i].in_off] = nb;
4201541Srgrimes		/*
4211541Srgrimes		 * If required, write synchronously, otherwise use
4221541Srgrimes		 * delayed write.
4231541Srgrimes		 */
424100344Smckusick		if (flags & IO_SYNC) {
4251541Srgrimes			bwrite(bp);
4261541Srgrimes		} else {
42732286Sdyson			if (bp->b_bufsize == fs->fs_bsize)
42832286Sdyson				bp->b_flags |= B_CLUSTEROK;
4291541Srgrimes			bdwrite(bp);
4301541Srgrimes		}
431223888Skib		curthread_pflags_restore(saved_inbdflush);
43298542Smckusick		*bpp = nbp;
4331541Srgrimes		return (0);
4341541Srgrimes	}
4351541Srgrimes	brelse(bp);
43698658Sdillon	if (flags & BA_CLRBUF) {
437105422Sdillon		int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
438290638Skib		if (seqcount != 0 &&
439290638Skib		    (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0 &&
440290638Skib		    !(vm_page_count_severe() || buf_dirty_count_severe())) {
441105422Sdillon			error = cluster_read(vp, ip->i_size, lbn,
442105422Sdillon			    (int)fs->fs_bsize, NOCRED,
443248521Skib			    MAXBSIZE, seqcount, gbflags, &nbp);
444105422Sdillon		} else {
445248521Skib			error = bread_gb(vp, lbn, (int)fs->fs_bsize, NOCRED,
446248521Skib			    gbflags, &nbp);
447105422Sdillon		}
4481541Srgrimes		if (error) {
4491541Srgrimes			brelse(nbp);
45022521Sdyson			goto fail;
4511541Srgrimes		}
4521541Srgrimes	} else {
453248521Skib		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
4541541Srgrimes		nbp->b_blkno = fsbtodb(fs, nb);
4551541Srgrimes	}
456223888Skib	curthread_pflags_restore(saved_inbdflush);
45798542Smckusick	*bpp = nbp;
4581541Srgrimes	return (0);
45922521Sdysonfail:
460223888Skib	curthread_pflags_restore(saved_inbdflush);
46122521Sdyson	/*
462105669Smckusick	 * If we have failed to allocate any blocks, simply return the error.
463105669Smckusick	 * This is the usual case and avoids the need to fsync the file.
464105669Smckusick	 */
465105669Smckusick	if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
466105669Smckusick		return (error);
467105669Smckusick	/*
46822521Sdyson	 * If we have failed part way through block allocation, we
46922521Sdyson	 * have to deallocate any indirect blocks that we have allocated.
47055799Smckusick	 * We have to fsync the file before we start to get rid of all
47155799Smckusick	 * of its dependencies so that we do not leave them dangling.
47255799Smckusick	 * We have to sync it at the end so that the soft updates code
47355799Smckusick	 * does not find any untracked changes. Although this is really
47455799Smckusick	 * slow, running out of disk space is not expected to be a common
475249582Sgabor	 * occurrence. The error return from fsync is ignored as we already
47655799Smckusick	 * have an error to return to the user.
477207141Sjeff	 *
478207141Sjeff	 * XXX Still have to journal the free below
47922521Sdyson	 */
480233438Smckusick	(void) ffs_syncvnode(vp, MNT_WAIT, 0);
481174973Skib	for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns;
482174973Skib	     blkp < allocblk; blkp++, lbns_remfree++) {
483174973Skib		/*
484174973Skib		 * We shall not leave the freed blocks on the vnode
485174973Skib		 * buffer object lists.
486174973Skib		 */
487304669Skib		bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
488304669Skib		    GB_NOCREAT | GB_UNMAPPED);
489174973Skib		if (bp != NULL) {
490304670Skib			KASSERT(bp->b_blkno == fsbtodb(fs, *blkp),
491304670Skib			    ("mismatch1 l %jd %jd b %ju %ju",
492304670Skib			    (intmax_t)bp->b_lblkno, (uintmax_t)*lbns_remfree,
493304670Skib			    (uintmax_t)bp->b_blkno,
494304670Skib			    (uintmax_t)fsbtodb(fs, *blkp)));
495304672Skib			bp->b_flags |= B_INVAL | B_RELBUF | B_NOCACHE;
496304672Skib			bp->b_flags &= ~(B_ASYNC | B_CACHE);
497174973Skib			brelse(bp);
498174973Skib		}
49922521Sdyson		deallocated += fs->fs_bsize;
50022521Sdyson	}
50157446Sdillon	if (allocib != NULL) {
50222521Sdyson		*allocib = 0;
50357446Sdillon	} else if (unwindidx >= 0) {
50457446Sdillon		int r;
50557446Sdillon
50657446Sdillon		r = bread(vp, indirs[unwindidx].in_lbn,
50757446Sdillon		    (int)fs->fs_bsize, NOCRED, &bp);
50857446Sdillon		if (r) {
50957446Sdillon			panic("Could not unwind indirect block, error %d", r);
51057446Sdillon			brelse(bp);
51157446Sdillon		} else {
51298542Smckusick			bap = (ufs1_daddr_t *)bp->b_data;
51357446Sdillon			bap[indirs[unwindidx].in_off] = 0;
514100344Smckusick			if (flags & IO_SYNC) {
51557446Sdillon				bwrite(bp);
51657446Sdillon			} else {
51757446Sdillon				if (bp->b_bufsize == fs->fs_bsize)
51857446Sdillon					bp->b_flags |= B_CLUSTEROK;
51957446Sdillon				bdwrite(bp);
52057446Sdillon			}
52157446Sdillon		}
52257446Sdillon	}
52322521Sdyson	if (deallocated) {
52422521Sdyson#ifdef QUOTA
52522521Sdyson		/*
52622521Sdyson		 * Restore user's disk quota because allocation failed.
52722521Sdyson		 */
52898542Smckusick		(void) chkdq(ip, -btodb(deallocated), cred, FORCE);
52922521Sdyson#endif
530100344Smckusick		dp->di_blocks -= btodb(deallocated);
53122521Sdyson		ip->i_flag |= IN_CHANGE | IN_UPDATE;
53222521Sdyson	}
533233438Smckusick	(void) ffs_syncvnode(vp, MNT_WAIT, 0);
534175068Skib	/*
535175068Skib	 * After the buffers are invalidated and on-disk pointers are
536175068Skib	 * cleared, free the blocks.
537175068Skib	 */
538175068Skib	for (blkp = allociblk; blkp < allocblk; blkp++) {
539304670Skib#ifdef INVARIANTS
540304670Skib		if (blkp == allociblk)
541304670Skib			lbns_remfree = lbns;
542304670Skib		bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
543304670Skib		    GB_NOCREAT | GB_UNMAPPED);
544304670Skib		if (bp != NULL) {
545304670Skib			panic("zombie1 %jd %ju %ju",
546304670Skib			    (intmax_t)bp->b_lblkno, (uintmax_t)bp->b_blkno,
547304670Skib			    (uintmax_t)fsbtodb(fs, *blkp));
548304670Skib		}
549304670Skib		lbns_remfree++;
550304670Skib#endif
551175068Skib		ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize,
552223127Smckusick		    ip->i_number, vp->v_type, NULL);
553175068Skib	}
55422521Sdyson	return (error);
5551541Srgrimes}
55698542Smckusick
55798542Smckusick/*
55898542Smckusick * Balloc defines the structure of file system storage
55998542Smckusick * by allocating the physical blocks on a device given
56098542Smckusick * the inode and the logical block number in a file.
56198542Smckusick * This is the allocation strategy for UFS2. Above is
56298542Smckusick * the allocation strategy for UFS1.
56398542Smckusick */
56498542Smckusickint
56598542Smckusickffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
56698542Smckusick    struct ucred *cred, int flags, struct buf **bpp)
56798542Smckusick{
56898542Smckusick	struct inode *ip;
569100344Smckusick	struct ufs2_dinode *dp;
57098542Smckusick	ufs_lbn_t lbn, lastlbn;
57198542Smckusick	struct fs *fs;
57298542Smckusick	struct buf *bp, *nbp;
573140705Sjeff	struct ufsmount *ump;
57498542Smckusick	struct indir indirs[NIADDR + 2];
57598542Smckusick	ufs2_daddr_t nb, newb, *bap, pref;
57698542Smckusick	ufs2_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
577174973Skib	ufs2_daddr_t *lbns_remfree, lbns[NIADDR + 1];
57898542Smckusick	int deallocated, osize, nsize, num, i, error;
57998542Smckusick	int unwindidx = -1;
580180758Skib	int saved_inbdflush;
581223114Smckusick	static struct timeval lastfail;
582223114Smckusick	static int curfail;
583248521Skib	int gbflags, reclaimed;
58498542Smckusick
58598542Smckusick	ip = VTOI(vp);
586100344Smckusick	dp = ip->i_din2;
58798542Smckusick	fs = ip->i_fs;
588140705Sjeff	ump = ip->i_ump;
58998542Smckusick	lbn = lblkno(fs, startoffset);
59098542Smckusick	size = blkoff(fs, startoffset) + size;
591222958Sjeff	reclaimed = 0;
59298542Smckusick	if (size > fs->fs_bsize)
59398542Smckusick		panic("ffs_balloc_ufs2: blk too big");
59498542Smckusick	*bpp = NULL;
59598542Smckusick	if (lbn < 0)
59698542Smckusick		return (EFBIG);
597248521Skib	gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0;
59898542Smckusick
599207141Sjeff	if (DOINGSOFTDEP(vp))
600207141Sjeff		softdep_prealloc(vp, MNT_WAIT);
601207141Sjeff
60298542Smckusick	/*
603100344Smckusick	 * Check for allocating external data.
604100344Smckusick	 */
605100344Smckusick	if (flags & IO_EXT) {
606100344Smckusick		if (lbn >= NXADDR)
607100344Smckusick			return (EFBIG);
608100344Smckusick		/*
609100344Smckusick		 * If the next write will extend the data into a new block,
610100344Smckusick		 * and the data is currently composed of a fragment
611100344Smckusick		 * this fragment has to be extended to be a full block.
612100344Smckusick		 */
613100344Smckusick		lastlbn = lblkno(fs, dp->di_extsize);
614100344Smckusick		if (lastlbn < lbn) {
615100344Smckusick			nb = lastlbn;
616100344Smckusick			osize = sblksize(fs, dp->di_extsize, nb);
617100344Smckusick			if (osize < fs->fs_bsize && osize > 0) {
618140705Sjeff				UFS_LOCK(ump);
619100344Smckusick				error = ffs_realloccg(ip, -1 - nb,
620100344Smckusick				    dp->di_extb[nb],
621100344Smckusick				    ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
622100344Smckusick				    &dp->di_extb[0]), osize,
623187790Srwatson				    (int)fs->fs_bsize, flags, cred, &bp);
624100344Smckusick				if (error)
625100344Smckusick					return (error);
626100344Smckusick				if (DOINGSOFTDEP(vp))
627100344Smckusick					softdep_setup_allocext(ip, nb,
628100344Smckusick					    dbtofsb(fs, bp->b_blkno),
629100344Smckusick					    dp->di_extb[nb],
630100344Smckusick					    fs->fs_bsize, osize, bp);
631100344Smckusick				dp->di_extsize = smalllblktosize(fs, nb + 1);
632100344Smckusick				dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno);
633100344Smckusick				bp->b_xflags |= BX_ALTDATA;
634187790Srwatson				ip->i_flag |= IN_CHANGE;
635100344Smckusick				if (flags & IO_SYNC)
636100344Smckusick					bwrite(bp);
637100344Smckusick				else
638100344Smckusick					bawrite(bp);
639100344Smckusick			}
640100344Smckusick		}
641100344Smckusick		/*
642100344Smckusick		 * All blocks are direct blocks
643100344Smckusick		 */
644100344Smckusick		if (flags & BA_METAONLY)
645100344Smckusick			panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
646100344Smckusick		nb = dp->di_extb[lbn];
647100344Smckusick		if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
648248521Skib			error = bread_gb(vp, -1 - lbn, fs->fs_bsize, NOCRED,
649248521Skib			    gbflags, &bp);
650100344Smckusick			if (error) {
651100344Smckusick				brelse(bp);
652100344Smckusick				return (error);
653100344Smckusick			}
654100344Smckusick			bp->b_blkno = fsbtodb(fs, nb);
655100344Smckusick			bp->b_xflags |= BX_ALTDATA;
656100344Smckusick			*bpp = bp;
657100344Smckusick			return (0);
658100344Smckusick		}
659100344Smckusick		if (nb != 0) {
660100344Smckusick			/*
661100344Smckusick			 * Consider need to reallocate a fragment.
662100344Smckusick			 */
663100344Smckusick			osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
664100344Smckusick			nsize = fragroundup(fs, size);
665100344Smckusick			if (nsize <= osize) {
666248521Skib				error = bread_gb(vp, -1 - lbn, osize, NOCRED,
667248521Skib				    gbflags, &bp);
668100344Smckusick				if (error) {
669100344Smckusick					brelse(bp);
670100344Smckusick					return (error);
671100344Smckusick				}
672100344Smckusick				bp->b_blkno = fsbtodb(fs, nb);
673100344Smckusick				bp->b_xflags |= BX_ALTDATA;
674100344Smckusick			} else {
675140705Sjeff				UFS_LOCK(ump);
676100344Smckusick				error = ffs_realloccg(ip, -1 - lbn,
677100344Smckusick				    dp->di_extb[lbn],
678100344Smckusick				    ffs_blkpref_ufs2(ip, lbn, (int)lbn,
679187790Srwatson				    &dp->di_extb[0]), osize, nsize, flags,
680187790Srwatson				    cred, &bp);
681100344Smckusick				if (error)
682100344Smckusick					return (error);
683100344Smckusick				bp->b_xflags |= BX_ALTDATA;
684100344Smckusick				if (DOINGSOFTDEP(vp))
685100344Smckusick					softdep_setup_allocext(ip, lbn,
686100344Smckusick					    dbtofsb(fs, bp->b_blkno), nb,
687100344Smckusick					    nsize, osize, bp);
688100344Smckusick			}
689100344Smckusick		} else {
690100344Smckusick			if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
691100344Smckusick				nsize = fragroundup(fs, size);
692100344Smckusick			else
693100344Smckusick				nsize = fs->fs_bsize;
694140705Sjeff			UFS_LOCK(ump);
695100344Smckusick			error = ffs_alloc(ip, lbn,
696100344Smckusick			   ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]),
697187790Srwatson			   nsize, flags, cred, &newb);
698100344Smckusick			if (error)
699100344Smckusick				return (error);
700248521Skib			bp = getblk(vp, -1 - lbn, nsize, 0, 0, gbflags);
701100344Smckusick			bp->b_blkno = fsbtodb(fs, newb);
702100344Smckusick			bp->b_xflags |= BX_ALTDATA;
703100344Smckusick			if (flags & BA_CLRBUF)
704100344Smckusick				vfs_bio_clrbuf(bp);
705100344Smckusick			if (DOINGSOFTDEP(vp))
706100344Smckusick				softdep_setup_allocext(ip, lbn, newb, 0,
707100344Smckusick				    nsize, 0, bp);
708100344Smckusick		}
709100344Smckusick		dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno);
710187790Srwatson		ip->i_flag |= IN_CHANGE;
711100344Smckusick		*bpp = bp;
712100344Smckusick		return (0);
713100344Smckusick	}
714100344Smckusick	/*
71598542Smckusick	 * If the next write will extend the file into a new block,
71698542Smckusick	 * and the file is currently composed of a fragment
71798542Smckusick	 * this fragment has to be extended to be a full block.
71898542Smckusick	 */
71998542Smckusick	lastlbn = lblkno(fs, ip->i_size);
72098542Smckusick	if (lastlbn < NDADDR && lastlbn < lbn) {
72198542Smckusick		nb = lastlbn;
72298542Smckusick		osize = blksize(fs, ip, nb);
72398542Smckusick		if (osize < fs->fs_bsize && osize > 0) {
724140705Sjeff			UFS_LOCK(ump);
725100344Smckusick			error = ffs_realloccg(ip, nb, dp->di_db[nb],
726248283Skib			    ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
727248283Skib			    &dp->di_db[0]), osize, (int)fs->fs_bsize,
728248283Skib			    flags, cred, &bp);
72998542Smckusick			if (error)
73098542Smckusick				return (error);
73198542Smckusick			if (DOINGSOFTDEP(vp))
73298542Smckusick				softdep_setup_allocdirect(ip, nb,
73398542Smckusick				    dbtofsb(fs, bp->b_blkno),
734100344Smckusick				    dp->di_db[nb],
73598542Smckusick				    fs->fs_bsize, osize, bp);
73698542Smckusick			ip->i_size = smalllblktosize(fs, nb + 1);
737100344Smckusick			dp->di_size = ip->i_size;
738100344Smckusick			dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
73998542Smckusick			ip->i_flag |= IN_CHANGE | IN_UPDATE;
740100344Smckusick			if (flags & IO_SYNC)
74198542Smckusick				bwrite(bp);
74298542Smckusick			else
74398542Smckusick				bawrite(bp);
74498542Smckusick		}
74598542Smckusick	}
74698542Smckusick	/*
74798542Smckusick	 * The first NDADDR blocks are direct blocks
74898542Smckusick	 */
74998542Smckusick	if (lbn < NDADDR) {
75098658Sdillon		if (flags & BA_METAONLY)
75198658Sdillon			panic("ffs_balloc_ufs2: BA_METAONLY for direct block");
752100344Smckusick		nb = dp->di_db[lbn];
75398542Smckusick		if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
754248521Skib			error = bread_gb(vp, lbn, fs->fs_bsize, NOCRED,
755248521Skib			    gbflags, &bp);
75698542Smckusick			if (error) {
75798542Smckusick				brelse(bp);
75898542Smckusick				return (error);
75998542Smckusick			}
76098542Smckusick			bp->b_blkno = fsbtodb(fs, nb);
76198542Smckusick			*bpp = bp;
76298542Smckusick			return (0);
76398542Smckusick		}
76498542Smckusick		if (nb != 0) {
76598542Smckusick			/*
76698542Smckusick			 * Consider need to reallocate a fragment.
76798542Smckusick			 */
76898542Smckusick			osize = fragroundup(fs, blkoff(fs, ip->i_size));
76998542Smckusick			nsize = fragroundup(fs, size);
77098542Smckusick			if (nsize <= osize) {
771248521Skib				error = bread_gb(vp, lbn, osize, NOCRED,
772248521Skib				    gbflags, &bp);
77398542Smckusick				if (error) {
77498542Smckusick					brelse(bp);
77598542Smckusick					return (error);
77698542Smckusick				}
77798542Smckusick				bp->b_blkno = fsbtodb(fs, nb);
77898542Smckusick			} else {
779140705Sjeff				UFS_LOCK(ump);
780100344Smckusick				error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
78198542Smckusick				    ffs_blkpref_ufs2(ip, lbn, (int)lbn,
782248283Skib				    &dp->di_db[0]), osize, nsize, flags,
783187790Srwatson				    cred, &bp);
78498542Smckusick				if (error)
78598542Smckusick					return (error);
78698542Smckusick				if (DOINGSOFTDEP(vp))
78798542Smckusick					softdep_setup_allocdirect(ip, lbn,
78898542Smckusick					    dbtofsb(fs, bp->b_blkno), nb,
78998542Smckusick					    nsize, osize, bp);
79098542Smckusick			}
79198542Smckusick		} else {
79298542Smckusick			if (ip->i_size < smalllblktosize(fs, lbn + 1))
79398542Smckusick				nsize = fragroundup(fs, size);
79498542Smckusick			else
79598542Smckusick				nsize = fs->fs_bsize;
796140705Sjeff			UFS_LOCK(ump);
79798542Smckusick			error = ffs_alloc(ip, lbn,
79898542Smckusick			    ffs_blkpref_ufs2(ip, lbn, (int)lbn,
799187790Srwatson				&dp->di_db[0]), nsize, flags, cred, &newb);
80098542Smckusick			if (error)
80198542Smckusick				return (error);
802248521Skib			bp = getblk(vp, lbn, nsize, 0, 0, gbflags);
80398542Smckusick			bp->b_blkno = fsbtodb(fs, newb);
80498658Sdillon			if (flags & BA_CLRBUF)
80598542Smckusick				vfs_bio_clrbuf(bp);
80698542Smckusick			if (DOINGSOFTDEP(vp))
80798542Smckusick				softdep_setup_allocdirect(ip, lbn, newb, 0,
80898542Smckusick				    nsize, 0, bp);
80998542Smckusick		}
810100344Smckusick		dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
81198542Smckusick		ip->i_flag |= IN_CHANGE | IN_UPDATE;
81298542Smckusick		*bpp = bp;
81398542Smckusick		return (0);
81498542Smckusick	}
81598542Smckusick	/*
81698542Smckusick	 * Determine the number of levels of indirection.
81798542Smckusick	 */
81898542Smckusick	pref = 0;
81998542Smckusick	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
82098542Smckusick		return(error);
821173464Sobrien#ifdef INVARIANTS
82298542Smckusick	if (num < 1)
82398542Smckusick		panic ("ffs_balloc_ufs2: ufs_getlbns returned indirect block");
82498542Smckusick#endif
825223888Skib	saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH);
82698542Smckusick	/*
82798542Smckusick	 * Fetch the first indirect block allocating if necessary.
82898542Smckusick	 */
82998542Smckusick	--num;
830100344Smckusick	nb = dp->di_ib[indirs[0].in_off];
83198542Smckusick	allocib = NULL;
83298542Smckusick	allocblk = allociblk;
833174973Skib	lbns_remfree = lbns;
83498542Smckusick	if (nb == 0) {
835140705Sjeff		UFS_LOCK(ump);
836248623Smckusick		pref = ffs_blkpref_ufs2(ip, lbn, -indirs[0].in_off - 1,
837248623Smckusick		    (ufs2_daddr_t *)0);
838262779Spfg		if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
839187790Srwatson		    flags, cred, &newb)) != 0) {
840223888Skib			curthread_pflags_restore(saved_inbdflush);
84198542Smckusick			return (error);
842180758Skib		}
843242520Smckusick		pref = newb + fs->fs_frag;
84498542Smckusick		nb = newb;
845304667Skib		MPASS(allocblk < allociblk + nitems(allociblk));
846304667Skib		MPASS(lbns_remfree < lbns + nitems(lbns));
84798542Smckusick		*allocblk++ = nb;
848174973Skib		*lbns_remfree++ = indirs[1].in_lbn;
849248521Skib		bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0,
850248521Skib		    GB_UNMAPPED);
85198542Smckusick		bp->b_blkno = fsbtodb(fs, nb);
85298542Smckusick		vfs_bio_clrbuf(bp);
85398542Smckusick		if (DOINGSOFTDEP(vp)) {
85498542Smckusick			softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
85598542Smckusick			    newb, 0, fs->fs_bsize, 0, bp);
85698542Smckusick			bdwrite(bp);
85798542Smckusick		} else {
85898542Smckusick			/*
85998542Smckusick			 * Write synchronously so that indirect blocks
86098542Smckusick			 * never point at garbage.
86198542Smckusick			 */
86298542Smckusick			if (DOINGASYNC(vp))
86398542Smckusick				bdwrite(bp);
86498542Smckusick			else if ((error = bwrite(bp)) != 0)
86598542Smckusick				goto fail;
86698542Smckusick		}
867100344Smckusick		allocib = &dp->di_ib[indirs[0].in_off];
86898542Smckusick		*allocib = nb;
86998542Smckusick		ip->i_flag |= IN_CHANGE | IN_UPDATE;
87098542Smckusick	}
87198542Smckusick	/*
87298542Smckusick	 * Fetch through the indirect blocks, allocating as necessary.
87398542Smckusick	 */
874222958Sjeffretry:
87598542Smckusick	for (i = 1;;) {
87698542Smckusick		error = bread(vp,
87798542Smckusick		    indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
87898542Smckusick		if (error) {
87998542Smckusick			brelse(bp);
88098542Smckusick			goto fail;
88198542Smckusick		}
88298542Smckusick		bap = (ufs2_daddr_t *)bp->b_data;
88398542Smckusick		nb = bap[indirs[i].in_off];
88498542Smckusick		if (i == num)
88598542Smckusick			break;
88698542Smckusick		i += 1;
88798542Smckusick		if (nb != 0) {
88898542Smckusick			bqrelse(bp);
88998542Smckusick			continue;
89098542Smckusick		}
891140705Sjeff		UFS_LOCK(ump);
892252527Smckusick		/*
893252527Smckusick		 * If parent indirect has just been allocated, try to cluster
894252527Smckusick		 * immediately following it.
895252527Smckusick		 */
89698542Smckusick		if (pref == 0)
897248623Smckusick			pref = ffs_blkpref_ufs2(ip, lbn, i - num - 1,
898248623Smckusick			    (ufs2_daddr_t *)0);
899187790Srwatson		if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
900222958Sjeff		    flags | IO_BUFLOCKED, cred, &newb)) != 0) {
90198542Smckusick			brelse(bp);
902304668Skib			if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
903222958Sjeff				UFS_LOCK(ump);
904222958Sjeff				softdep_request_cleanup(fs, vp, cred,
905222958Sjeff				    FLUSH_BLOCKS_WAIT);
906222958Sjeff				UFS_UNLOCK(ump);
907222958Sjeff				goto retry;
908222958Sjeff			}
909223114Smckusick			if (ppsratecheck(&lastfail, &curfail, 1)) {
910223114Smckusick				ffs_fserr(fs, ip->i_number, "filesystem full");
911223114Smckusick				uprintf("\n%s: write failed, filesystem "
912223114Smckusick				    "is full\n", fs->fs_fsmnt);
913223114Smckusick			}
91498542Smckusick			goto fail;
91598542Smckusick		}
916242520Smckusick		pref = newb + fs->fs_frag;
91798542Smckusick		nb = newb;
918304667Skib		MPASS(allocblk < allociblk + nitems(allociblk));
919304667Skib		MPASS(lbns_remfree < lbns + nitems(lbns));
92098542Smckusick		*allocblk++ = nb;
921174973Skib		*lbns_remfree++ = indirs[i].in_lbn;
922248521Skib		nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0,
923248521Skib		    GB_UNMAPPED);
92498542Smckusick		nbp->b_blkno = fsbtodb(fs, nb);
92598542Smckusick		vfs_bio_clrbuf(nbp);
92698542Smckusick		if (DOINGSOFTDEP(vp)) {
92798542Smckusick			softdep_setup_allocindir_meta(nbp, ip, bp,
92898542Smckusick			    indirs[i - 1].in_off, nb);
92998542Smckusick			bdwrite(nbp);
93098542Smckusick		} else {
93198542Smckusick			/*
93298542Smckusick			 * Write synchronously so that indirect blocks
93398542Smckusick			 * never point at garbage.
93498542Smckusick			 */
93598542Smckusick			if ((error = bwrite(nbp)) != 0) {
93698542Smckusick				brelse(bp);
93798542Smckusick				goto fail;
93898542Smckusick			}
93998542Smckusick		}
94098542Smckusick		bap[indirs[i - 1].in_off] = nb;
94198542Smckusick		if (allocib == NULL && unwindidx < 0)
94298542Smckusick			unwindidx = i - 1;
94398542Smckusick		/*
94498542Smckusick		 * If required, write synchronously, otherwise use
94598542Smckusick		 * delayed write.
94698542Smckusick		 */
947100344Smckusick		if (flags & IO_SYNC) {
94898542Smckusick			bwrite(bp);
94998542Smckusick		} else {
95098542Smckusick			if (bp->b_bufsize == fs->fs_bsize)
95198542Smckusick				bp->b_flags |= B_CLUSTEROK;
95298542Smckusick			bdwrite(bp);
95398542Smckusick		}
95498542Smckusick	}
95598542Smckusick	/*
95698542Smckusick	 * If asked only for the indirect block, then return it.
95798542Smckusick	 */
95898658Sdillon	if (flags & BA_METAONLY) {
959223888Skib		curthread_pflags_restore(saved_inbdflush);
96098542Smckusick		*bpp = bp;
96198542Smckusick		return (0);
96298542Smckusick	}
96398542Smckusick	/*
96498542Smckusick	 * Get the data block, allocating if necessary.
96598542Smckusick	 */
96698542Smckusick	if (nb == 0) {
967140705Sjeff		UFS_LOCK(ump);
968252527Smckusick		/*
969252527Smckusick		 * If allocating metadata at the front of the cylinder
970252527Smckusick		 * group and parent indirect block has just been allocated,
971252527Smckusick		 * then cluster next to it if it is the first indirect in
972252527Smckusick		 * the file. Otherwise it has been allocated in the metadata
973252527Smckusick		 * area, so we want to find our own place out in the data area.
974252527Smckusick		 */
975252527Smckusick		if (pref == 0 || (lbn > NDADDR && fs->fs_metaspace != 0))
976242520Smckusick			pref = ffs_blkpref_ufs2(ip, lbn, indirs[i].in_off,
977242520Smckusick			    &bap[0]);
978222958Sjeff		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
979222958Sjeff		    flags | IO_BUFLOCKED, cred, &newb);
98098542Smckusick		if (error) {
98198542Smckusick			brelse(bp);
982304668Skib			if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
983222958Sjeff				UFS_LOCK(ump);
984222958Sjeff				softdep_request_cleanup(fs, vp, cred,
985222958Sjeff				    FLUSH_BLOCKS_WAIT);
986222958Sjeff				UFS_UNLOCK(ump);
987222958Sjeff				goto retry;
988222958Sjeff			}
989223114Smckusick			if (ppsratecheck(&lastfail, &curfail, 1)) {
990223114Smckusick				ffs_fserr(fs, ip->i_number, "filesystem full");
991223114Smckusick				uprintf("\n%s: write failed, filesystem "
992223114Smckusick				    "is full\n", fs->fs_fsmnt);
993223114Smckusick			}
99498542Smckusick			goto fail;
99598542Smckusick		}
99698542Smckusick		nb = newb;
997304667Skib		MPASS(allocblk < allociblk + nitems(allociblk));
998304667Skib		MPASS(lbns_remfree < lbns + nitems(lbns));
99998542Smckusick		*allocblk++ = nb;
1000174973Skib		*lbns_remfree++ = lbn;
1001248521Skib		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
100298542Smckusick		nbp->b_blkno = fsbtodb(fs, nb);
100398658Sdillon		if (flags & BA_CLRBUF)
100498542Smckusick			vfs_bio_clrbuf(nbp);
100598542Smckusick		if (DOINGSOFTDEP(vp))
100698542Smckusick			softdep_setup_allocindir_page(ip, lbn, bp,
100798542Smckusick			    indirs[i].in_off, nb, 0, nbp);
100898542Smckusick		bap[indirs[i].in_off] = nb;
100998542Smckusick		/*
101098542Smckusick		 * If required, write synchronously, otherwise use
101198542Smckusick		 * delayed write.
101298542Smckusick		 */
1013100344Smckusick		if (flags & IO_SYNC) {
101498542Smckusick			bwrite(bp);
101598542Smckusick		} else {
101698542Smckusick			if (bp->b_bufsize == fs->fs_bsize)
101798542Smckusick				bp->b_flags |= B_CLUSTEROK;
101898542Smckusick			bdwrite(bp);
101998542Smckusick		}
1020223888Skib		curthread_pflags_restore(saved_inbdflush);
102198542Smckusick		*bpp = nbp;
102298542Smckusick		return (0);
102398542Smckusick	}
102498542Smckusick	brelse(bp);
1025105422Sdillon	/*
1026105422Sdillon	 * If requested clear invalid portions of the buffer.  If we
1027105422Sdillon	 * have to do a read-before-write (typical if BA_CLRBUF is set),
1028105422Sdillon	 * try to do some read-ahead in the sequential case to reduce
1029105422Sdillon	 * the number of I/O transactions.
1030105422Sdillon	 */
103198658Sdillon	if (flags & BA_CLRBUF) {
1032105422Sdillon		int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
1033290638Skib		if (seqcount != 0 &&
1034290638Skib		    (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0 &&
1035290638Skib		    !(vm_page_count_severe() || buf_dirty_count_severe())) {
1036105422Sdillon			error = cluster_read(vp, ip->i_size, lbn,
1037105422Sdillon			    (int)fs->fs_bsize, NOCRED,
1038248521Skib			    MAXBSIZE, seqcount, gbflags, &nbp);
1039105422Sdillon		} else {
1040248521Skib			error = bread_gb(vp, lbn, (int)fs->fs_bsize,
1041248521Skib			    NOCRED, gbflags, &nbp);
1042105422Sdillon		}
104398542Smckusick		if (error) {
104498542Smckusick			brelse(nbp);
104598542Smckusick			goto fail;
104698542Smckusick		}
104798542Smckusick	} else {
1048248521Skib		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
104998542Smckusick		nbp->b_blkno = fsbtodb(fs, nb);
105098542Smckusick	}
1051223888Skib	curthread_pflags_restore(saved_inbdflush);
105298542Smckusick	*bpp = nbp;
105398542Smckusick	return (0);
105498542Smckusickfail:
1055223888Skib	curthread_pflags_restore(saved_inbdflush);
105698542Smckusick	/*
1057105669Smckusick	 * If we have failed to allocate any blocks, simply return the error.
1058105669Smckusick	 * This is the usual case and avoids the need to fsync the file.
1059105669Smckusick	 */
1060105669Smckusick	if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
1061105669Smckusick		return (error);
1062105669Smckusick	/*
106398542Smckusick	 * If we have failed part way through block allocation, we
106498542Smckusick	 * have to deallocate any indirect blocks that we have allocated.
106598542Smckusick	 * We have to fsync the file before we start to get rid of all
106698542Smckusick	 * of its dependencies so that we do not leave them dangling.
106798542Smckusick	 * We have to sync it at the end so that the soft updates code
106898542Smckusick	 * does not find any untracked changes. Although this is really
106998542Smckusick	 * slow, running out of disk space is not expected to be a common
1070250576Seadler	 * occurrence. The error return from fsync is ignored as we already
107198542Smckusick	 * have an error to return to the user.
1072207141Sjeff	 *
1073207141Sjeff	 * XXX Still have to journal the free below
107498542Smckusick	 */
1075233438Smckusick	(void) ffs_syncvnode(vp, MNT_WAIT, 0);
1076174973Skib	for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns;
1077174973Skib	     blkp < allocblk; blkp++, lbns_remfree++) {
1078174973Skib		/*
1079174973Skib		 * We shall not leave the freed blocks on the vnode
1080174973Skib		 * buffer object lists.
1081174973Skib		 */
1082304669Skib		bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
1083304669Skib		    GB_NOCREAT | GB_UNMAPPED);
1084174973Skib		if (bp != NULL) {
1085304670Skib			KASSERT(bp->b_blkno == fsbtodb(fs, *blkp),
1086304670Skib			    ("mismatch2 l %jd %jd b %ju %ju",
1087304670Skib			    (intmax_t)bp->b_lblkno, (uintmax_t)*lbns_remfree,
1088304670Skib			    (uintmax_t)bp->b_blkno,
1089304670Skib			    (uintmax_t)fsbtodb(fs, *blkp)));
1090304672Skib			bp->b_flags |= B_INVAL | B_RELBUF | B_NOCACHE;
1091304672Skib			bp->b_flags &= ~(B_ASYNC | B_CACHE);
1092174973Skib			brelse(bp);
1093174973Skib		}
109498542Smckusick		deallocated += fs->fs_bsize;
109598542Smckusick	}
109698542Smckusick	if (allocib != NULL) {
109798542Smckusick		*allocib = 0;
109898542Smckusick	} else if (unwindidx >= 0) {
109998542Smckusick		int r;
110098542Smckusick
110198542Smckusick		r = bread(vp, indirs[unwindidx].in_lbn,
110298542Smckusick		    (int)fs->fs_bsize, NOCRED, &bp);
110398542Smckusick		if (r) {
110498542Smckusick			panic("Could not unwind indirect block, error %d", r);
110598542Smckusick			brelse(bp);
110698542Smckusick		} else {
110798542Smckusick			bap = (ufs2_daddr_t *)bp->b_data;
110898542Smckusick			bap[indirs[unwindidx].in_off] = 0;
1109100344Smckusick			if (flags & IO_SYNC) {
111098542Smckusick				bwrite(bp);
111198542Smckusick			} else {
111298542Smckusick				if (bp->b_bufsize == fs->fs_bsize)
111398542Smckusick					bp->b_flags |= B_CLUSTEROK;
111498542Smckusick				bdwrite(bp);
111598542Smckusick			}
111698542Smckusick		}
111798542Smckusick	}
111898542Smckusick	if (deallocated) {
111998542Smckusick#ifdef QUOTA
112098542Smckusick		/*
112198542Smckusick		 * Restore user's disk quota because allocation failed.
112298542Smckusick		 */
112398542Smckusick		(void) chkdq(ip, -btodb(deallocated), cred, FORCE);
112498542Smckusick#endif
1125100344Smckusick		dp->di_blocks -= btodb(deallocated);
112698542Smckusick		ip->i_flag |= IN_CHANGE | IN_UPDATE;
112798542Smckusick	}
1128233438Smckusick	(void) ffs_syncvnode(vp, MNT_WAIT, 0);
1129175068Skib	/*
1130175068Skib	 * After the buffers are invalidated and on-disk pointers are
1131175068Skib	 * cleared, free the blocks.
1132175068Skib	 */
1133175068Skib	for (blkp = allociblk; blkp < allocblk; blkp++) {
1134304670Skib#ifdef INVARIANTS
1135304670Skib		if (blkp == allociblk)
1136304670Skib			lbns_remfree = lbns;
1137304670Skib		bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
1138304670Skib		    GB_NOCREAT | GB_UNMAPPED);
1139304670Skib		if (bp != NULL) {
1140304670Skib			panic("zombie2 %jd %ju %ju",
1141304670Skib			    (intmax_t)bp->b_lblkno, (uintmax_t)bp->b_blkno,
1142304670Skib			    (uintmax_t)fsbtodb(fs, *blkp));
1143304670Skib		}
1144304670Skib		lbns_remfree++;
1145304670Skib#endif
1146175068Skib		ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize,
1147223127Smckusick		    ip->i_number, vp->v_type, NULL);
1148175068Skib	}
114998542Smckusick	return (error);
115098542Smckusick}
1151