ext2_alloc.c revision 325750
11592Srgrimes/*- 250476Speter * modified for Lites 1.1 31592Srgrimes * 4321267Sngie * Aug 1995, Godmar Back (gback@cs.utah.edu) 564567Sgshapiro * University of Utah, Department of Computer Science 638099Speter */ 71592Srgrimes/*- 864567Sgshapiro * Copyright (c) 1982, 1986, 1989, 1993 974814Sru * The Regents of the University of California. All rights reserved. 1090798Sgshapiro * 11201380Sed * Redistribution and use in source and binary forms, with or without 12201380Sed * modification, are permitted provided that the following conditions 1390164Skris * are met: 141592Srgrimes * 1. Redistributions of source code must retain the above copyright 15147225Sdes * notice, this list of conditions and the following disclaimer. 16147225Sdes * 2. Redistributions in binary form must reproduce the above copyright 1764567Sgshapiro * notice, this list of conditions and the following disclaimer in the 1890798Sgshapiro * documentation and/or other materials provided with the distribution. 1990798Sgshapiro * 4. Neither the name of the University nor the names of its contributors 2064567Sgshapiro * may be used to endorse or promote products derived from this software 2190798Sgshapiro * without specific prior written permission. 2290798Sgshapiro * 2390798Sgshapiro * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 2465970Sgshapiro * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2565970Sgshapiro * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2666961Sgshapiro * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 2766961Sgshapiro * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2865970Sgshapiro * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2965970Sgshapiro * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 3090798Sgshapiro * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 3190798Sgshapiro * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 3290798Sgshapiro * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 331592Srgrimes * SUCH DAMAGE. 34 * 35 * @(#)ffs_alloc.c 8.8 (Berkeley) 2/21/94 36 * $FreeBSD: stable/10/sys/fs/ext2fs/ext2_alloc.c 325750 2017-11-13 02:31:25Z pfg $ 37 */ 38 39#include <sys/param.h> 40#include <sys/systm.h> 41#include <sys/conf.h> 42#include <sys/vnode.h> 43#include <sys/stat.h> 44#include <sys/mount.h> 45#include <sys/sysctl.h> 46#include <sys/syslog.h> 47#include <sys/buf.h> 48 49#include <fs/ext2fs/fs.h> 50#include <fs/ext2fs/inode.h> 51#include <fs/ext2fs/ext2_mount.h> 52#include <fs/ext2fs/ext2fs.h> 53#include <fs/ext2fs/ext2_extern.h> 54 55static daddr_t ext2_alloccg(struct inode *, int, daddr_t, int); 56static daddr_t ext2_clusteralloc(struct inode *, int, daddr_t, int); 57static u_long ext2_dirpref(struct inode *); 58static void ext2_fserr(struct m_ext2fs *, uid_t, char *); 59static u_long ext2_hashalloc(struct inode *, int, long, int, 60 daddr_t (*)(struct inode *, int, daddr_t, 61 int)); 62static daddr_t ext2_nodealloccg(struct inode *, int, daddr_t, int); 63static daddr_t ext2_mapsearch(struct m_ext2fs *, char *, daddr_t); 64 65/* 66 * Allocate a block in the filesystem. 67 * 68 * A preference may be optionally specified. If a preference is given 69 * the following hierarchy is used to allocate a block: 70 * 1) allocate the requested block. 71 * 2) allocate a rotationally optimal block in the same cylinder. 72 * 3) allocate a block in the same cylinder group. 73 * 4) quadradically rehash into other cylinder groups, until an 74 * available block is located. 75 * If no block preference is given the following hierarchy is used 76 * to allocate a block: 77 * 1) allocate a block in the cylinder group that contains the 78 * inode for the file. 79 * 2) quadradically rehash into other cylinder groups, until an 80 * available block is located. 81 */ 82int 83ext2_alloc(struct inode *ip, daddr_t lbn, e4fs_daddr_t bpref, int size, 84 struct ucred *cred, e4fs_daddr_t *bnp) 85{ 86 struct m_ext2fs *fs; 87 struct ext2mount *ump; 88 int32_t bno; 89 int cg; 90 91 *bnp = 0; 92 fs = ip->i_e2fs; 93 ump = ip->i_ump; 94 mtx_assert(EXT2_MTX(ump), MA_OWNED); 95#ifdef INVARIANTS 96 if ((u_int)size > fs->e2fs_bsize || blkoff(fs, size) != 0) { 97 vn_printf(ip->i_devvp, "bsize = %lu, size = %d, fs = %s\n", 98 (long unsigned int)fs->e2fs_bsize, size, fs->e2fs_fsmnt); 99 panic("ext2_alloc: bad size"); 100 } 101 if (cred == NOCRED) 102 panic("ext2_alloc: missing credential"); 103#endif /* INVARIANTS */ 104 if (size == fs->e2fs_bsize && fs->e2fs->e2fs_fbcount == 0) 105 goto nospace; 106 if (cred->cr_uid != 0 && 107 fs->e2fs->e2fs_fbcount < fs->e2fs->e2fs_rbcount) 108 goto nospace; 109 if (bpref >= fs->e2fs->e2fs_bcount) 110 bpref = 0; 111 if (bpref == 0) 112 cg = ino_to_cg(fs, ip->i_number); 113 else 114 cg = dtog(fs, bpref); 115 bno = (daddr_t)ext2_hashalloc(ip, cg, bpref, fs->e2fs_bsize, 116 ext2_alloccg); 117 if (bno > 0) { 118 /* set next_alloc fields as done in block_getblk */ 119 ip->i_next_alloc_block = lbn; 120 ip->i_next_alloc_goal = bno; 121 122 ip->i_blocks += btodb(fs->e2fs_bsize); 123 ip->i_flag |= IN_CHANGE | IN_UPDATE; 124 *bnp = bno; 125 return (0); 126 } 127nospace: 128 EXT2_UNLOCK(ump); 129 ext2_fserr(fs, cred->cr_uid, "filesystem full"); 130 uprintf("\n%s: write failed, filesystem is full\n", fs->e2fs_fsmnt); 131 return (ENOSPC); 132} 133 134/* 135 * Reallocate a sequence of blocks into a contiguous sequence of blocks. 136 * 137 * The vnode and an array of buffer pointers for a range of sequential 138 * logical blocks to be made contiguous is given. The allocator attempts 139 * to find a range of sequential blocks starting as close as possible to 140 * an fs_rotdelay offset from the end of the allocation for the logical 141 * block immediately preceding the current range. If successful, the 142 * physical block numbers in the buffer pointers and in the inode are 143 * changed to reflect the new allocation. If unsuccessful, the allocation 144 * is left unchanged. The success in doing the reallocation is returned. 145 * Note that the error return is not reflected back to the user. Rather 146 * the previous block allocation will be used. 147 */ 148 149static SYSCTL_NODE(_vfs, OID_AUTO, ext2fs, CTLFLAG_RW, 0, "EXT2FS filesystem"); 150 151static int doasyncfree = 1; 152 153SYSCTL_INT(_vfs_ext2fs, OID_AUTO, doasyncfree, CTLFLAG_RW, &doasyncfree, 0, 154 "Use asychronous writes to update block pointers when freeing blocks"); 155 156static int doreallocblks = 0; 157 158SYSCTL_INT(_vfs_ext2fs, OID_AUTO, doreallocblks, CTLFLAG_RW, &doreallocblks, 0, ""); 159 160int 161ext2_reallocblks(struct vop_reallocblks_args *ap) 162{ 163 struct m_ext2fs *fs; 164 struct inode *ip; 165 struct vnode *vp; 166 struct buf *sbp, *ebp; 167 uint32_t *bap, *sbap, *ebap; 168 struct ext2mount *ump; 169 struct cluster_save *buflist; 170 struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp; 171 e2fs_lbn_t start_lbn, end_lbn; 172 int soff; 173 e2fs_daddr_t newblk, blkno; 174 int i, len, start_lvl, end_lvl, pref, ssize; 175 176 if (doreallocblks == 0) 177 return (ENOSPC); 178 179 vp = ap->a_vp; 180 ip = VTOI(vp); 181 fs = ip->i_e2fs; 182 ump = ip->i_ump; 183 184 if (fs->e2fs_contigsumsize <= 0) 185 return (ENOSPC); 186 187 buflist = ap->a_buflist; 188 len = buflist->bs_nchildren; 189 start_lbn = buflist->bs_children[0]->b_lblkno; 190 end_lbn = start_lbn + len - 1; 191#ifdef INVARIANTS 192 for (i = 1; i < len; i++) 193 if (buflist->bs_children[i]->b_lblkno != start_lbn + i) 194 panic("ext2_reallocblks: non-cluster"); 195#endif 196 /* 197 * If the cluster crosses the boundary for the first indirect 198 * block, leave space for the indirect block. Indirect blocks 199 * are initially laid out in a position after the last direct 200 * block. Block reallocation would usually destroy locality by 201 * moving the indirect block out of the way to make room for 202 * data blocks if we didn't compensate here. We should also do 203 * this for other indirect block boundaries, but it is only 204 * important for the first one. 205 */ 206 if (start_lbn < NDADDR && end_lbn >= NDADDR) 207 return (ENOSPC); 208 /* 209 * If the latest allocation is in a new cylinder group, assume that 210 * the filesystem has decided to move and do not force it back to 211 * the previous cylinder group. 212 */ 213 if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) != 214 dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno))) 215 return (ENOSPC); 216 if (ext2_getlbns(vp, start_lbn, start_ap, &start_lvl) || 217 ext2_getlbns(vp, end_lbn, end_ap, &end_lvl)) 218 return (ENOSPC); 219 /* 220 * Get the starting offset and block map for the first block. 221 */ 222 if (start_lvl == 0) { 223 sbap = &ip->i_db[0]; 224 soff = start_lbn; 225 } else { 226 idp = &start_ap[start_lvl - 1]; 227 if (bread(vp, idp->in_lbn, (int)fs->e2fs_bsize, NOCRED, &sbp)) { 228 brelse(sbp); 229 return (ENOSPC); 230 } 231 sbap = (u_int *)sbp->b_data; 232 soff = idp->in_off; 233 } 234 /* 235 * If the block range spans two block maps, get the second map. 236 */ 237 ebap = NULL; 238 if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) { 239 ssize = len; 240 } else { 241#ifdef INVARIANTS 242 if (start_ap[start_lvl - 1].in_lbn == idp->in_lbn) 243 panic("ext2_reallocblks: start == end"); 244#endif 245 ssize = len - (idp->in_off + 1); 246 if (bread(vp, idp->in_lbn, (int)fs->e2fs_bsize, NOCRED, &ebp)) 247 goto fail; 248 ebap = (u_int *)ebp->b_data; 249 } 250 /* 251 * Find the preferred location for the cluster. 252 */ 253 EXT2_LOCK(ump); 254 pref = ext2_blkpref(ip, start_lbn, soff, sbap, 0); 255 /* 256 * Search the block map looking for an allocation of the desired size. 257 */ 258 if ((newblk = (e2fs_daddr_t)ext2_hashalloc(ip, dtog(fs, pref), pref, 259 len, ext2_clusteralloc)) == 0) { 260 EXT2_UNLOCK(ump); 261 goto fail; 262 } 263 /* 264 * We have found a new contiguous block. 265 * 266 * First we have to replace the old block pointers with the new 267 * block pointers in the inode and indirect blocks associated 268 * with the file. 269 */ 270#ifdef DEBUG 271 printf("realloc: ino %d, lbns %jd-%jd\n\told:", ip->i_number, 272 (intmax_t)start_lbn, (intmax_t)end_lbn); 273#endif /* DEBUG */ 274 blkno = newblk; 275 for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->e2fs_fpb) { 276 if (i == ssize) { 277 bap = ebap; 278 soff = -i; 279 } 280#ifdef INVARIANTS 281 if (buflist->bs_children[i]->b_blkno != fsbtodb(fs, *bap)) 282 panic("ext2_reallocblks: alloc mismatch"); 283#endif 284#ifdef DEBUG 285 printf(" %d,", *bap); 286#endif /* DEBUG */ 287 *bap++ = blkno; 288 } 289 /* 290 * Next we must write out the modified inode and indirect blocks. 291 * For strict correctness, the writes should be synchronous since 292 * the old block values may have been written to disk. In practise 293 * they are almost never written, but if we are concerned about 294 * strict correctness, the `doasyncfree' flag should be set to zero. 295 * 296 * The test on `doasyncfree' should be changed to test a flag 297 * that shows whether the associated buffers and inodes have 298 * been written. The flag should be set when the cluster is 299 * started and cleared whenever the buffer or inode is flushed. 300 * We can then check below to see if it is set, and do the 301 * synchronous write only when it has been cleared. 302 */ 303 if (sbap != &ip->i_db[0]) { 304 if (doasyncfree) 305 bdwrite(sbp); 306 else 307 bwrite(sbp); 308 } else { 309 ip->i_flag |= IN_CHANGE | IN_UPDATE; 310 if (!doasyncfree) 311 ext2_update(vp, 1); 312 } 313 if (ssize < len) { 314 if (doasyncfree) 315 bdwrite(ebp); 316 else 317 bwrite(ebp); 318 } 319 /* 320 * Last, free the old blocks and assign the new blocks to the buffers. 321 */ 322#ifdef DEBUG 323 printf("\n\tnew:"); 324#endif /* DEBUG */ 325 for (blkno = newblk, i = 0; i < len; i++, blkno += fs->e2fs_fpb) { 326 ext2_blkfree(ip, dbtofsb(fs, buflist->bs_children[i]->b_blkno), 327 fs->e2fs_bsize); 328 buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno); 329#ifdef DEBUG 330 printf(" %d,", blkno); 331#endif /* DEBUG */ 332 } 333#ifdef DEBUG 334 printf("\n"); 335#endif /* DEBUG */ 336 return (0); 337 338fail: 339 if (ssize < len) 340 brelse(ebp); 341 if (sbap != &ip->i_db[0]) 342 brelse(sbp); 343 return (ENOSPC); 344} 345 346/* 347 * Allocate an inode in the filesystem. 348 * 349 */ 350int 351ext2_valloc(struct vnode *pvp, int mode, struct ucred *cred, struct vnode **vpp) 352{ 353 struct timespec ts; 354 struct inode *pip; 355 struct m_ext2fs *fs; 356 struct inode *ip; 357 struct ext2mount *ump; 358 ino_t ino, ipref; 359 int i, error, cg; 360 361 *vpp = NULL; 362 pip = VTOI(pvp); 363 fs = pip->i_e2fs; 364 ump = pip->i_ump; 365 366 EXT2_LOCK(ump); 367 if (fs->e2fs->e2fs_ficount == 0) 368 goto noinodes; 369 /* 370 * If it is a directory then obtain a cylinder group based on 371 * ext2_dirpref else obtain it using ino_to_cg. The preferred inode is 372 * always the next inode. 373 */ 374 if ((mode & IFMT) == IFDIR) { 375 cg = ext2_dirpref(pip); 376 if (fs->e2fs_contigdirs[cg] < 255) 377 fs->e2fs_contigdirs[cg]++; 378 } else { 379 cg = ino_to_cg(fs, pip->i_number); 380 if (fs->e2fs_contigdirs[cg] > 0) 381 fs->e2fs_contigdirs[cg]--; 382 } 383 ipref = cg * fs->e2fs->e2fs_ipg + 1; 384 ino = (ino_t)ext2_hashalloc(pip, cg, (long)ipref, mode, ext2_nodealloccg); 385 386 if (ino == 0) 387 goto noinodes; 388 error = VFS_VGET(pvp->v_mount, ino, LK_EXCLUSIVE, vpp); 389 if (error) { 390 ext2_vfree(pvp, ino, mode); 391 return (error); 392 } 393 ip = VTOI(*vpp); 394 395 /* 396 * The question is whether using VGET was such good idea at all: 397 * Linux doesn't read the old inode in when it is allocating a 398 * new one. I will set at least i_size and i_blocks to zero. 399 */ 400 ip->i_flag = 0; 401 ip->i_size = 0; 402 ip->i_blocks = 0; 403 ip->i_mode = 0; 404 ip->i_flags = 0; 405 /* now we want to make sure that the block pointers are zeroed out */ 406 for (i = 0; i < NDADDR; i++) 407 ip->i_db[i] = 0; 408 for (i = 0; i < NIADDR; i++) 409 ip->i_ib[i] = 0; 410 411 /* 412 * Set up a new generation number for this inode. 413 */ 414 ip->i_gen = arc4random(); 415 416 vfs_timestamp(&ts); 417 ip->i_birthtime = ts.tv_sec; 418 ip->i_birthnsec = ts.tv_nsec; 419 420/* 421printf("ext2_valloc: allocated inode %d\n", ino); 422*/ 423 return (0); 424noinodes: 425 EXT2_UNLOCK(ump); 426 ext2_fserr(fs, cred->cr_uid, "out of inodes"); 427 uprintf("\n%s: create/symlink failed, no inodes free\n", fs->e2fs_fsmnt); 428 return (ENOSPC); 429} 430 431/* 432 * Find a cylinder to place a directory. 433 * 434 * The policy implemented by this algorithm is to allocate a 435 * directory inode in the same cylinder group as its parent 436 * directory, but also to reserve space for its files inodes 437 * and data. Restrict the number of directories which may be 438 * allocated one after another in the same cylinder group 439 * without intervening allocation of files. 440 * 441 * If we allocate a first level directory then force allocation 442 * in another cylinder group. 443 * 444 */ 445static u_long 446ext2_dirpref(struct inode *pip) 447{ 448 struct m_ext2fs *fs; 449 int cg, prefcg, cgsize; 450 u_int avgifree, avgbfree, avgndir, curdirsize; 451 u_int minifree, minbfree, maxndir; 452 u_int mincg, minndir; 453 u_int dirsize, maxcontigdirs; 454 455 mtx_assert(EXT2_MTX(pip->i_ump), MA_OWNED); 456 fs = pip->i_e2fs; 457 458 avgifree = fs->e2fs->e2fs_ficount / fs->e2fs_gcount; 459 avgbfree = fs->e2fs->e2fs_fbcount / fs->e2fs_gcount; 460 avgndir = fs->e2fs_total_dir / fs->e2fs_gcount; 461 462 /* 463 * Force allocation in another cg if creating a first level dir. 464 */ 465 ASSERT_VOP_LOCKED(ITOV(pip), "ext2fs_dirpref"); 466 if (ITOV(pip)->v_vflag & VV_ROOT) { 467 prefcg = arc4random() % fs->e2fs_gcount; 468 mincg = prefcg; 469 minndir = fs->e2fs_ipg; 470 for (cg = prefcg; cg < fs->e2fs_gcount; cg++) 471 if (fs->e2fs_gd[cg].ext2bgd_ndirs < minndir && 472 fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree && 473 fs->e2fs_gd[cg].ext2bgd_nbfree >= avgbfree) { 474 mincg = cg; 475 minndir = fs->e2fs_gd[cg].ext2bgd_ndirs; 476 } 477 for (cg = 0; cg < prefcg; cg++) 478 if (fs->e2fs_gd[cg].ext2bgd_ndirs < minndir && 479 fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree && 480 fs->e2fs_gd[cg].ext2bgd_nbfree >= avgbfree) { 481 mincg = cg; 482 minndir = fs->e2fs_gd[cg].ext2bgd_ndirs; 483 } 484 return (mincg); 485 } 486 /* 487 * Count various limits which used for 488 * optimal allocation of a directory inode. 489 */ 490 maxndir = min(avgndir + fs->e2fs_ipg / 16, fs->e2fs_ipg); 491 minifree = avgifree - avgifree / 4; 492 if (minifree < 1) 493 minifree = 1; 494 minbfree = avgbfree - avgbfree / 4; 495 if (minbfree < 1) 496 minbfree = 1; 497 cgsize = fs->e2fs_fsize * fs->e2fs_fpg; 498 dirsize = AVGDIRSIZE; 499 curdirsize = avgndir ? (cgsize - avgbfree * fs->e2fs_bsize) / avgndir : 0; 500 if (dirsize < curdirsize) 501 dirsize = curdirsize; 502 maxcontigdirs = min((avgbfree * fs->e2fs_bsize) / dirsize, 255); 503 maxcontigdirs = min(maxcontigdirs, fs->e2fs_ipg / AFPDIR); 504 if (maxcontigdirs == 0) 505 maxcontigdirs = 1; 506 507 /* 508 * Limit number of dirs in one cg and reserve space for 509 * regular files, but only if we have no deficit in 510 * inodes or space. 511 */ 512 prefcg = ino_to_cg(fs, pip->i_number); 513 for (cg = prefcg; cg < fs->e2fs_gcount; cg++) 514 if (fs->e2fs_gd[cg].ext2bgd_ndirs < maxndir && 515 fs->e2fs_gd[cg].ext2bgd_nifree >= minifree && 516 fs->e2fs_gd[cg].ext2bgd_nbfree >= minbfree) { 517 if (fs->e2fs_contigdirs[cg] < maxcontigdirs) 518 return (cg); 519 } 520 for (cg = 0; cg < prefcg; cg++) 521 if (fs->e2fs_gd[cg].ext2bgd_ndirs < maxndir && 522 fs->e2fs_gd[cg].ext2bgd_nifree >= minifree && 523 fs->e2fs_gd[cg].ext2bgd_nbfree >= minbfree) { 524 if (fs->e2fs_contigdirs[cg] < maxcontigdirs) 525 return (cg); 526 } 527 /* 528 * This is a backstop when we have deficit in space. 529 */ 530 for (cg = prefcg; cg < fs->e2fs_gcount; cg++) 531 if (fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree) 532 return (cg); 533 for (cg = 0; cg < prefcg; cg++) 534 if (fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree) 535 break; 536 return (cg); 537} 538 539/* 540 * Select the desired position for the next block in a file. 541 * 542 * we try to mimic what Remy does in inode_getblk/block_getblk 543 * 544 * we note: blocknr == 0 means that we're about to allocate either 545 * a direct block or a pointer block at the first level of indirection 546 * (In other words, stuff that will go in i_db[] or i_ib[]) 547 * 548 * blocknr != 0 means that we're allocating a block that is none 549 * of the above. Then, blocknr tells us the number of the block 550 * that will hold the pointer 551 */ 552e4fs_daddr_t 553ext2_blkpref(struct inode *ip, e2fs_lbn_t lbn, int indx, e2fs_daddr_t *bap, 554 e2fs_daddr_t blocknr) 555{ 556 int tmp; 557 558 mtx_assert(EXT2_MTX(ip->i_ump), MA_OWNED); 559 560 /* 561 * If the next block is actually what we thought it is, then set the 562 * goal to what we thought it should be. 563 */ 564 if (ip->i_next_alloc_block == lbn && ip->i_next_alloc_goal != 0) 565 return ip->i_next_alloc_goal; 566 567 /* 568 * Now check whether we were provided with an array that basically 569 * tells us previous blocks to which we want to stay close. 570 */ 571 if (bap) 572 for (tmp = indx - 1; tmp >= 0; tmp--) 573 if (bap[tmp]) 574 return bap[tmp]; 575 576 /* 577 * Else lets fall back to the blocknr or, if there is none, follow 578 * the rule that a block should be allocated near its inode. 579 */ 580 return blocknr ? blocknr : 581 (e2fs_daddr_t)(ip->i_block_group * 582 EXT2_BLOCKS_PER_GROUP(ip->i_e2fs)) + 583 ip->i_e2fs->e2fs->e2fs_first_dblock; 584} 585 586/* 587 * Implement the cylinder overflow algorithm. 588 * 589 * The policy implemented by this algorithm is: 590 * 1) allocate the block in its requested cylinder group. 591 * 2) quadradically rehash on the cylinder group number. 592 * 3) brute force search for a free block. 593 */ 594static u_long 595ext2_hashalloc(struct inode *ip, int cg, long pref, int size, 596 daddr_t (*allocator) (struct inode *, int, daddr_t, int)) 597{ 598 struct m_ext2fs *fs; 599 ino_t result; 600 int i, icg = cg; 601 602 mtx_assert(EXT2_MTX(ip->i_ump), MA_OWNED); 603 fs = ip->i_e2fs; 604 /* 605 * 1: preferred cylinder group 606 */ 607 result = (*allocator)(ip, cg, pref, size); 608 if (result) 609 return (result); 610 /* 611 * 2: quadratic rehash 612 */ 613 for (i = 1; i < fs->e2fs_gcount; i *= 2) { 614 cg += i; 615 if (cg >= fs->e2fs_gcount) 616 cg -= fs->e2fs_gcount; 617 result = (*allocator)(ip, cg, 0, size); 618 if (result) 619 return (result); 620 } 621 /* 622 * 3: brute force search 623 * Note that we start at i == 2, since 0 was checked initially, 624 * and 1 is always checked in the quadratic rehash. 625 */ 626 cg = (icg + 2) % fs->e2fs_gcount; 627 for (i = 2; i < fs->e2fs_gcount; i++) { 628 result = (*allocator)(ip, cg, 0, size); 629 if (result) 630 return (result); 631 cg++; 632 if (cg == fs->e2fs_gcount) 633 cg = 0; 634 } 635 return (0); 636} 637 638/* 639 * Determine whether a block can be allocated. 640 * 641 * Check to see if a block of the appropriate size is available, 642 * and if it is, allocate it. 643 */ 644static daddr_t 645ext2_alloccg(struct inode *ip, int cg, daddr_t bpref, int size) 646{ 647 struct m_ext2fs *fs; 648 struct buf *bp; 649 struct ext2mount *ump; 650 daddr_t bno, runstart, runlen; 651 int bit, loc, end, error, start; 652 char *bbp; 653 /* XXX ondisk32 */ 654 fs = ip->i_e2fs; 655 ump = ip->i_ump; 656 if (fs->e2fs_gd[cg].ext2bgd_nbfree == 0) 657 return (0); 658 EXT2_UNLOCK(ump); 659 error = bread(ip->i_devvp, fsbtodb(fs, 660 fs->e2fs_gd[cg].ext2bgd_b_bitmap), 661 (int)fs->e2fs_bsize, NOCRED, &bp); 662 if (error) { 663 brelse(bp); 664 EXT2_LOCK(ump); 665 return (0); 666 } 667 if (fs->e2fs_gd[cg].ext2bgd_nbfree == 0) { 668 /* 669 * Another thread allocated the last block in this 670 * group while we were waiting for the buffer. 671 */ 672 brelse(bp); 673 EXT2_LOCK(ump); 674 return (0); 675 } 676 bbp = (char *)bp->b_data; 677 678 if (dtog(fs, bpref) != cg) 679 bpref = 0; 680 if (bpref != 0) { 681 bpref = dtogd(fs, bpref); 682 /* 683 * if the requested block is available, use it 684 */ 685 if (isclr(bbp, bpref)) { 686 bno = bpref; 687 goto gotit; 688 } 689 } 690 /* 691 * no blocks in the requested cylinder, so take next 692 * available one in this cylinder group. 693 * first try to get 8 contigous blocks, then fall back to a single 694 * block. 695 */ 696 if (bpref) 697 start = dtogd(fs, bpref) / NBBY; 698 else 699 start = 0; 700 end = howmany(fs->e2fs->e2fs_fpg, NBBY) - start; 701retry: 702 runlen = 0; 703 runstart = 0; 704 for (loc = start; loc < end; loc++) { 705 if (bbp[loc] == (char)0xff) { 706 runlen = 0; 707 continue; 708 } 709 710 /* Start of a run, find the number of high clear bits. */ 711 if (runlen == 0) { 712 bit = fls(bbp[loc]); 713 runlen = NBBY - bit; 714 runstart = loc * NBBY + bit; 715 } else if (bbp[loc] == 0) { 716 /* Continue a run. */ 717 runlen += NBBY; 718 } else { 719 /* 720 * Finish the current run. If it isn't long 721 * enough, start a new one. 722 */ 723 bit = ffs(bbp[loc]) - 1; 724 runlen += bit; 725 if (runlen >= 8) { 726 bno = runstart; 727 goto gotit; 728 } 729 730 /* Run was too short, start a new one. */ 731 bit = fls(bbp[loc]); 732 runlen = NBBY - bit; 733 runstart = loc * NBBY + bit; 734 } 735 736 /* If the current run is long enough, use it. */ 737 if (runlen >= 8) { 738 bno = runstart; 739 goto gotit; 740 } 741 } 742 if (start != 0) { 743 end = start; 744 start = 0; 745 goto retry; 746 } 747 bno = ext2_mapsearch(fs, bbp, bpref); 748 if (bno < 0) { 749 brelse(bp); 750 EXT2_LOCK(ump); 751 return (0); 752 } 753gotit: 754#ifdef INVARIANTS 755 if (isset(bbp, bno)) { 756 printf("ext2fs_alloccgblk: cg=%d bno=%jd fs=%s\n", 757 cg, (intmax_t)bno, fs->e2fs_fsmnt); 758 panic("ext2fs_alloccg: dup alloc"); 759 } 760#endif 761 setbit(bbp, bno); 762 EXT2_LOCK(ump); 763 ext2_clusteracct(fs, bbp, cg, bno, -1); 764 fs->e2fs->e2fs_fbcount--; 765 fs->e2fs_gd[cg].ext2bgd_nbfree--; 766 fs->e2fs_fmod = 1; 767 EXT2_UNLOCK(ump); 768 bdwrite(bp); 769 return (cg * fs->e2fs->e2fs_fpg + fs->e2fs->e2fs_first_dblock + bno); 770} 771 772/* 773 * Determine whether a cluster can be allocated. 774 */ 775static daddr_t 776ext2_clusteralloc(struct inode *ip, int cg, daddr_t bpref, int len) 777{ 778 struct m_ext2fs *fs; 779 struct ext2mount *ump; 780 struct buf *bp; 781 char *bbp; 782 int bit, error, got, i, loc, run; 783 int32_t *lp; 784 daddr_t bno; 785 786 fs = ip->i_e2fs; 787 ump = ip->i_ump; 788 789 if (fs->e2fs_maxcluster[cg] < len) 790 return (0); 791 792 EXT2_UNLOCK(ump); 793 error = bread(ip->i_devvp, 794 fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_b_bitmap), 795 (int)fs->e2fs_bsize, NOCRED, &bp); 796 if (error) 797 goto fail_lock; 798 799 bbp = (char *)bp->b_data; 800 EXT2_LOCK(ump); 801 /* 802 * Check to see if a cluster of the needed size (or bigger) is 803 * available in this cylinder group. 804 */ 805 lp = &fs->e2fs_clustersum[cg].cs_sum[len]; 806 for (i = len; i <= fs->e2fs_contigsumsize; i++) 807 if (*lp++ > 0) 808 break; 809 if (i > fs->e2fs_contigsumsize) { 810 /* 811 * Update the cluster summary information to reflect 812 * the true maximum-sized cluster so that future cluster 813 * allocation requests can avoid reading the bitmap only 814 * to find no cluster. 815 */ 816 lp = &fs->e2fs_clustersum[cg].cs_sum[len - 1]; 817 for (i = len - 1; i > 0; i--) 818 if (*lp-- > 0) 819 break; 820 fs->e2fs_maxcluster[cg] = i; 821 goto fail; 822 } 823 EXT2_UNLOCK(ump); 824 825 /* Search the bitmap to find a big enough cluster like in FFS. */ 826 if (dtog(fs, bpref) != cg) 827 bpref = 0; 828 if (bpref != 0) 829 bpref = dtogd(fs, bpref); 830 loc = bpref / NBBY; 831 bit = 1 << (bpref % NBBY); 832 for (run = 0, got = bpref; got < fs->e2fs->e2fs_fpg; got++) { 833 if ((bbp[loc] & bit) != 0) 834 run = 0; 835 else { 836 run++; 837 if (run == len) 838 break; 839 } 840 if ((got & (NBBY - 1)) != (NBBY - 1)) 841 bit <<= 1; 842 else { 843 loc++; 844 bit = 1; 845 } 846 } 847 848 if (got >= fs->e2fs->e2fs_fpg) 849 goto fail_lock; 850 851 /* Allocate the cluster that we found. */ 852 for (i = 1; i < len; i++) 853 if (!isclr(bbp, got - run + i)) 854 panic("ext2_clusteralloc: map mismatch"); 855 856 bno = got - run + 1; 857 if (bno >= fs->e2fs->e2fs_fpg) 858 panic("ext2_clusteralloc: allocated out of group"); 859 860 EXT2_LOCK(ump); 861 for (i = 0; i < len; i += fs->e2fs_fpb) { 862 setbit(bbp, bno + i); 863 ext2_clusteracct(fs, bbp, cg, bno + i, -1); 864 fs->e2fs->e2fs_fbcount--; 865 fs->e2fs_gd[cg].ext2bgd_nbfree--; 866 } 867 fs->e2fs_fmod = 1; 868 EXT2_UNLOCK(ump); 869 870 bdwrite(bp); 871 return (cg * fs->e2fs->e2fs_fpg + fs->e2fs->e2fs_first_dblock + bno); 872 873fail_lock: 874 EXT2_LOCK(ump); 875fail: 876 brelse(bp); 877 return (0); 878} 879 880/* 881 * Determine whether an inode can be allocated. 882 * 883 * Check to see if an inode is available, and if it is, 884 * allocate it using tode in the specified cylinder group. 885 */ 886static daddr_t 887ext2_nodealloccg(struct inode *ip, int cg, daddr_t ipref, int mode) 888{ 889 struct m_ext2fs *fs; 890 struct buf *bp; 891 struct ext2mount *ump; 892 int error, start, len; 893 char *ibp, *loc; 894 895 ipref--; /* to avoid a lot of (ipref -1) */ 896 if (ipref == -1) 897 ipref = 0; 898 fs = ip->i_e2fs; 899 ump = ip->i_ump; 900 if (fs->e2fs_gd[cg].ext2bgd_nifree == 0) 901 return (0); 902 EXT2_UNLOCK(ump); 903 error = bread(ip->i_devvp, fsbtodb(fs, 904 fs->e2fs_gd[cg].ext2bgd_i_bitmap), 905 (int)fs->e2fs_bsize, NOCRED, &bp); 906 if (error) { 907 brelse(bp); 908 EXT2_LOCK(ump); 909 return (0); 910 } 911 if (fs->e2fs_gd[cg].ext2bgd_nifree == 0) { 912 /* 913 * Another thread allocated the last i-node in this 914 * group while we were waiting for the buffer. 915 */ 916 brelse(bp); 917 EXT2_LOCK(ump); 918 return (0); 919 } 920 ibp = (char *)bp->b_data; 921 if (ipref) { 922 ipref %= fs->e2fs->e2fs_ipg; 923 if (isclr(ibp, ipref)) 924 goto gotit; 925 } 926 start = ipref / NBBY; 927 len = howmany(fs->e2fs->e2fs_ipg - ipref, NBBY); 928 loc = memcchr(&ibp[start], 0xff, len); 929 if (loc == NULL) { 930 len = start + 1; 931 start = 0; 932 loc = memcchr(&ibp[start], 0xff, len); 933 if (loc == NULL) { 934 printf("cg = %d, ipref = %lld, fs = %s\n", 935 cg, (long long)ipref, fs->e2fs_fsmnt); 936 panic("ext2fs_nodealloccg: map corrupted"); 937 /* NOTREACHED */ 938 } 939 } 940 ipref = (loc - ibp) * NBBY + ffs(~*loc) - 1; 941gotit: 942 setbit(ibp, ipref); 943 EXT2_LOCK(ump); 944 fs->e2fs_gd[cg].ext2bgd_nifree--; 945 fs->e2fs->e2fs_ficount--; 946 fs->e2fs_fmod = 1; 947 if ((mode & IFMT) == IFDIR) { 948 fs->e2fs_gd[cg].ext2bgd_ndirs++; 949 fs->e2fs_total_dir++; 950 } 951 EXT2_UNLOCK(ump); 952 bdwrite(bp); 953 return (cg * fs->e2fs->e2fs_ipg + ipref + 1); 954} 955 956/* 957 * Free a block or fragment. 958 * 959 */ 960void 961ext2_blkfree(struct inode *ip, e4fs_daddr_t bno, long size) 962{ 963 struct m_ext2fs *fs; 964 struct buf *bp; 965 struct ext2mount *ump; 966 int cg, error; 967 char *bbp; 968 969 fs = ip->i_e2fs; 970 ump = ip->i_ump; 971 cg = dtog(fs, bno); 972 if ((u_int)bno >= fs->e2fs->e2fs_bcount) { 973 printf("bad block %lld, ino %llu\n", (long long)bno, 974 (unsigned long long)ip->i_number); 975 ext2_fserr(fs, ip->i_uid, "bad block"); 976 return; 977 } 978 error = bread(ip->i_devvp, 979 fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_b_bitmap), 980 (int)fs->e2fs_bsize, NOCRED, &bp); 981 if (error) { 982 brelse(bp); 983 return; 984 } 985 bbp = (char *)bp->b_data; 986 bno = dtogd(fs, bno); 987 if (isclr(bbp, bno)) { 988 printf("block = %lld, fs = %s\n", 989 (long long)bno, fs->e2fs_fsmnt); 990 panic("ext2_blkfree: freeing free block"); 991 } 992 clrbit(bbp, bno); 993 EXT2_LOCK(ump); 994 ext2_clusteracct(fs, bbp, cg, bno, 1); 995 fs->e2fs->e2fs_fbcount++; 996 fs->e2fs_gd[cg].ext2bgd_nbfree++; 997 fs->e2fs_fmod = 1; 998 EXT2_UNLOCK(ump); 999 bdwrite(bp); 1000} 1001 1002/* 1003 * Free an inode. 1004 * 1005 */ 1006int 1007ext2_vfree(struct vnode *pvp, ino_t ino, int mode) 1008{ 1009 struct m_ext2fs *fs; 1010 struct inode *pip; 1011 struct buf *bp; 1012 struct ext2mount *ump; 1013 int error, cg; 1014 char *ibp; 1015 1016 pip = VTOI(pvp); 1017 fs = pip->i_e2fs; 1018 ump = pip->i_ump; 1019 if ((u_int)ino > fs->e2fs_ipg * fs->e2fs_gcount) 1020 panic("ext2_vfree: range: devvp = %p, ino = %ju, fs = %s", 1021 pip->i_devvp, (uintmax_t)ino, fs->e2fs_fsmnt); 1022 1023 cg = ino_to_cg(fs, ino); 1024 error = bread(pip->i_devvp, 1025 fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_i_bitmap), 1026 (int)fs->e2fs_bsize, NOCRED, &bp); 1027 if (error) { 1028 brelse(bp); 1029 return (0); 1030 } 1031 ibp = (char *)bp->b_data; 1032 ino = (ino - 1) % fs->e2fs->e2fs_ipg; 1033 if (isclr(ibp, ino)) { 1034 printf("ino = %llu, fs = %s\n", 1035 (unsigned long long)ino, fs->e2fs_fsmnt); 1036 if (fs->e2fs_ronly == 0) 1037 panic("ext2_vfree: freeing free inode"); 1038 } 1039 clrbit(ibp, ino); 1040 EXT2_LOCK(ump); 1041 fs->e2fs->e2fs_ficount++; 1042 fs->e2fs_gd[cg].ext2bgd_nifree++; 1043 if ((mode & IFMT) == IFDIR) { 1044 fs->e2fs_gd[cg].ext2bgd_ndirs--; 1045 fs->e2fs_total_dir--; 1046 } 1047 fs->e2fs_fmod = 1; 1048 EXT2_UNLOCK(ump); 1049 bdwrite(bp); 1050 return (0); 1051} 1052 1053/* 1054 * Find a block in the specified cylinder group. 1055 * 1056 * It is a panic if a request is made to find a block if none are 1057 * available. 1058 */ 1059static daddr_t 1060ext2_mapsearch(struct m_ext2fs *fs, char *bbp, daddr_t bpref) 1061{ 1062 char *loc; 1063 int start, len; 1064 1065 /* 1066 * find the fragment by searching through the free block 1067 * map for an appropriate bit pattern 1068 */ 1069 if (bpref) 1070 start = dtogd(fs, bpref) / NBBY; 1071 else 1072 start = 0; 1073 len = howmany(fs->e2fs->e2fs_fpg, NBBY) - start; 1074 loc = memcchr(&bbp[start], 0xff, len); 1075 if (loc == NULL) { 1076 len = start + 1; 1077 start = 0; 1078 loc = memcchr(&bbp[start], 0xff, len); 1079 if (loc == NULL) { 1080 printf("start = %d, len = %d, fs = %s\n", 1081 start, len, fs->e2fs_fsmnt); 1082 panic("ext2_mapsearch: map corrupted"); 1083 /* NOTREACHED */ 1084 } 1085 } 1086 return ((loc - bbp) * NBBY + ffs(~*loc) - 1); 1087} 1088 1089/* 1090 * Fserr prints the name of a filesystem with an error diagnostic. 1091 * 1092 * The form of the error message is: 1093 * fs: error message 1094 */ 1095static void 1096ext2_fserr(struct m_ext2fs *fs, uid_t uid, char *cp) 1097{ 1098 1099 log(LOG_ERR, "uid %u on %s: %s\n", uid, fs->e2fs_fsmnt, cp); 1100} 1101 1102int 1103cg_has_sb(int i) 1104{ 1105 int a3, a5, a7; 1106 1107 if (i == 0 || i == 1) 1108 return 1; 1109 for (a3 = 3, a5 = 5, a7 = 7; 1110 a3 <= i || a5 <= i || a7 <= i; 1111 a3 *= 3, a5 *= 5, a7 *= 7) 1112 if (i == a3 || i == a5 || i == a7) 1113 return 1; 1114 return 0; 1115} 1116