ext2_bmap.c revision 294545
113240Sprr/*- 213240Sprr * Copyright (c) 1989, 1991, 1993 313240Sprr * The Regents of the University of California. All rights reserved. 413240Sprr * (c) UNIX System Laboratories, Inc. 513240Sprr * All or some portions of this file are derived from material licensed 613240Sprr * to the University of California by American Telephone and Telegraph 713240Sprr * Co. or Unix System Laboratories, Inc. and are reproduced herein with 813240Sprr * the permission of UNIX System Laboratories, Inc. 913240Sprr * 1013240Sprr * Redistribution and use in source and binary forms, with or without 1113240Sprr * modification, are permitted provided that the following conditions 1213240Sprr * are met: 1313240Sprr * 1. Redistributions of source code must retain the above copyright 1413240Sprr * notice, this list of conditions and the following disclaimer. 1513240Sprr * 2. Redistributions in binary form must reproduce the above copyright 1613240Sprr * notice, this list of conditions and the following disclaimer in the 1713240Sprr * documentation and/or other materials provided with the distribution. 1813240Sprr * 4. Neither the name of the University nor the names of its contributors 1913240Sprr * may be used to endorse or promote products derived from this software 2013240Sprr * without specific prior written permission. 2113240Sprr * 2213240Sprr * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 2313240Sprr * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2413240Sprr * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2513240Sprr * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 2613240Sprr * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2713240Sprr * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2813240Sprr * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2913240Sprr * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 3013240Sprr * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 3113240Sprr * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 3213240Sprr * SUCH DAMAGE. 3313240Sprr * 3413240Sprr * @(#)ufs_bmap.c 8.7 (Berkeley) 3/21/95 3513240Sprr * $FreeBSD: stable/10/sys/fs/ext2fs/ext2_bmap.c 294545 2016-01-22 03:25:06Z pfg $ 3613240Sprr */ 3713240Sprr 3813240Sprr#include <sys/param.h> 3913240Sprr#include <sys/systm.h> 4013240Sprr#include <sys/bio.h> 4113240Sprr#include <sys/buf.h> 4213240Sprr#include <sys/proc.h> 4313240Sprr#include <sys/vnode.h> 4413240Sprr#include <sys/mount.h> 4513240Sprr#include <sys/resourcevar.h> 4613240Sprr#include <sys/stat.h> 4713240Sprr 4813240Sprr#include <fs/ext2fs/inode.h> 4913240Sprr#include <fs/ext2fs/fs.h> 5013240Sprr#include <fs/ext2fs/ext2fs.h> 5113240Sprr#include <fs/ext2fs/ext2_dinode.h> 5213240Sprr#include <fs/ext2fs/ext2_extern.h> 5313240Sprr#include <fs/ext2fs/ext2_mount.h> 5413240Sprr 5513240Sprrstatic int ext4_bmapext(struct vnode *, int32_t, int64_t *, int *, int *); 5613240Sprr 5713240Sprr/* 5813240Sprr * Bmap converts the logical block number of a file to its physical block 5913240Sprr * number on the disk. The conversion is done by using the logical block 6013240Sprr * number to index into the array of block pointers described by the dinode. 6113240Sprr */ 6213240Sprrint 6313240Sprrext2_bmap(struct vop_bmap_args *ap) 6413240Sprr{ 6513240Sprr daddr_t blkno; 6613240Sprr int error; 6713240Sprr 6813240Sprr /* 6913240Sprr * Check for underlying vnode requests and ensure that logical 7013240Sprr * to physical mapping is requested. 7113240Sprr */ 7213240Sprr if (ap->a_bop != NULL) 7313240Sprr *ap->a_bop = &VTOI(ap->a_vp)->i_devvp->v_bufobj; 7413240Sprr if (ap->a_bnp == NULL) 7513240Sprr return (0); 7613240Sprr 7713240Sprr if (VTOI(ap->a_vp)->i_flag & IN_E4EXTENTS) 7813240Sprr error = ext4_bmapext(ap->a_vp, ap->a_bn, &blkno, 7913240Sprr ap->a_runp, ap->a_runb); 8013240Sprr else 8113240Sprr error = ext2_bmaparray(ap->a_vp, ap->a_bn, &blkno, 8213240Sprr ap->a_runp, ap->a_runb); 8313240Sprr *ap->a_bnp = blkno; 8413240Sprr return (error); 8513240Sprr} 8613240Sprr 8713240Sprr/* 8813240Sprr * This function converts the logical block number of a file to 8913240Sprr * its physical block number on the disk within ext4 extents. 9013240Sprr */ 9113240Sprrstatic int 9213240Sprrext4_bmapext(struct vnode *vp, int32_t bn, int64_t *bnp, int *runp, int *runb) 9313240Sprr{ 9413240Sprr struct inode *ip; 9513240Sprr struct m_ext2fs *fs; 9613240Sprr struct ext4_extent *ep; 9713240Sprr struct ext4_extent_path path = { .ep_bp = NULL }; 9813240Sprr daddr_t lbn; 9913240Sprr int ret = 0; 10013240Sprr 10113240Sprr ip = VTOI(vp); 10213240Sprr fs = ip->i_e2fs; 10313240Sprr lbn = bn; 10413240Sprr 10513240Sprr /* 10613240Sprr * TODO: need to implement read ahead to improve the performance. 10713240Sprr */ 10813240Sprr if (runp != NULL) 10913240Sprr *runp = 0; 11013240Sprr 11113240Sprr if (runb != NULL) 11213240Sprr *runb = 0; 11313240Sprr 11413240Sprr ext4_ext_find_extent(fs, ip, lbn, &path); 11513240Sprr ep = path.ep_ext; 11613240Sprr if (ep == NULL) 11713240Sprr ret = EIO; 11813240Sprr else { 11913240Sprr *bnp = fsbtodb(fs, lbn - ep->e_blk + 12013240Sprr (ep->e_start_lo | (daddr_t)ep->e_start_hi << 32)); 12113240Sprr 12213240Sprr if (*bnp == 0) 12313240Sprr *bnp = -1; 12413240Sprr } 12513240Sprr 12613240Sprr if (path.ep_bp != NULL) { 12713240Sprr brelse(path.ep_bp); 12813240Sprr path.ep_bp = NULL; 12913240Sprr } 13013240Sprr 13113240Sprr return (ret); 13213240Sprr} 13313240Sprr 13413240Sprr/* 13513240Sprr * Indirect blocks are now on the vnode for the file. They are given negative 13613240Sprr * logical block numbers. Indirect blocks are addressed by the negative 13713240Sprr * address of the first data block to which they point. Double indirect blocks 13813240Sprr * are addressed by one less than the address of the first indirect block to 13913240Sprr * which they point. Triple indirect blocks are addressed by one less than 14013240Sprr * the address of the first double indirect block to which they point. 14113240Sprr * 14213240Sprr * ext2_bmaparray does the bmap conversion, and if requested returns the 14313240Sprr * array of logical blocks which must be traversed to get to a block. 14413240Sprr * Each entry contains the offset into that block that gets you to the 14513240Sprr * next block and the disk address of the block (if it is assigned). 14613240Sprr */ 14713240Sprr 14813240Sprrint 14913240Sprrext2_bmaparray(struct vnode *vp, daddr_t bn, daddr_t *bnp, int *runp, int *runb) 15013240Sprr{ 15113240Sprr struct inode *ip; 15213240Sprr struct buf *bp; 15313240Sprr struct ext2mount *ump; 15413240Sprr struct mount *mp; 15513240Sprr struct indir a[NIADDR+1], *ap; 15615400Sprr daddr_t daddr; 15713240Sprr e2fs_lbn_t metalbn; 15813240Sprr int error, num, maxrun = 0, bsize; 15913240Sprr int *nump; 16013240Sprr 16113240Sprr ap = NULL; 16213240Sprr ip = VTOI(vp); 16313240Sprr mp = vp->v_mount; 16413240Sprr ump = VFSTOEXT2(mp); 16513240Sprr 16613240Sprr bsize = EXT2_BLOCK_SIZE(ump->um_e2fs); 16713240Sprr 16813240Sprr if (runp) { 16913240Sprr maxrun = mp->mnt_iosize_max / bsize - 1; 17013240Sprr *runp = 0; 17113240Sprr } 17213240Sprr 17313240Sprr if (runb) { 17413240Sprr *runb = 0; 17513240Sprr } 17613240Sprr 17713240Sprr 17813240Sprr ap = a; 17916538Sprr nump = # 18016538Sprr error = ext2_getlbns(vp, bn, ap, nump); 18116538Sprr if (error) 18213240Sprr return (error); 18313240Sprr 18413240Sprr num = *nump; 18513240Sprr if (num == 0) { 18613240Sprr *bnp = blkptrtodb(ump, ip->i_db[bn]); 18713240Sprr if (*bnp == 0) { 18813240Sprr *bnp = -1; 18913240Sprr } else if (runp) { 19013240Sprr daddr_t bnb = bn; 19113240Sprr for (++bn; bn < NDADDR && *runp < maxrun && 19213240Sprr is_sequential(ump, ip->i_db[bn - 1], ip->i_db[bn]); 19316538Sprr ++bn, ++*runp); 19416538Sprr bn = bnb; 19516538Sprr if (runb && (bn > 0)) { 19616538Sprr for (--bn; (bn >= 0) && (*runb < maxrun) && 19716538Sprr is_sequential(ump, ip->i_db[bn], 19816538Sprr ip->i_db[bn + 1]); 19916538Sprr --bn, ++*runb); 20016538Sprr } 20113240Sprr } 20213240Sprr return (0); 20313240Sprr } 20413240Sprr 20513240Sprr 20613240Sprr /* Get disk address out of indirect block array */ 20713240Sprr daddr = ip->i_ib[ap->in_off]; 20813240Sprr 20913240Sprr for (bp = NULL, ++ap; --num; ++ap) { 21015400Sprr /* 21115400Sprr * Exit the loop if there is no disk address assigned yet and 21213240Sprr * the indirect block isn't in the cache, or if we were 21313240Sprr * looking for an indirect block and we've found it. 21413240Sprr */ 21513240Sprr 21613240Sprr metalbn = ap->in_lbn; 21713240Sprr if ((daddr == 0 && !incore(&vp->v_bufobj, metalbn)) || metalbn == bn) 21813240Sprr break; 21913240Sprr /* 22013240Sprr * If we get here, we've either got the block in the cache 22113240Sprr * or we have a disk address for it, go fetch it. 22213240Sprr */ 22313240Sprr if (bp) 22413240Sprr bqrelse(bp); 22513240Sprr 22613240Sprr bp = getblk(vp, metalbn, bsize, 0, 0, 0); 22713240Sprr if ((bp->b_flags & B_CACHE) == 0) { 22813240Sprr#ifdef INVARIANTS 22913240Sprr if (!daddr) 23013240Sprr panic("ext2_bmaparray: indirect block not in cache"); 23113240Sprr#endif 23213240Sprr bp->b_blkno = blkptrtodb(ump, daddr); 23313240Sprr bp->b_iocmd = BIO_READ; 23413240Sprr bp->b_flags &= ~B_INVAL; 23513240Sprr bp->b_ioflags &= ~BIO_ERROR; 23613240Sprr vfs_busy_pages(bp, 0); 23713240Sprr bp->b_iooffset = dbtob(bp->b_blkno); 23813240Sprr bstrategy(bp); 23913240Sprr curthread->td_ru.ru_inblock++; 240 error = bufwait(bp); 241 if (error) { 242 brelse(bp); 243 return (error); 244 } 245 } 246 247 daddr = ((e2fs_daddr_t *)bp->b_data)[ap->in_off]; 248 if (num == 1 && daddr && runp) { 249 for (bn = ap->in_off + 1; 250 bn < MNINDIR(ump) && *runp < maxrun && 251 is_sequential(ump, 252 ((e2fs_daddr_t *)bp->b_data)[bn - 1], 253 ((e2fs_daddr_t *)bp->b_data)[bn]); 254 ++bn, ++*runp); 255 bn = ap->in_off; 256 if (runb && bn) { 257 for (--bn; bn >= 0 && *runb < maxrun && 258 is_sequential(ump, 259 ((e2fs_daddr_t *)bp->b_data)[bn], 260 ((e2fs_daddr_t *)bp->b_data)[bn + 1]); 261 --bn, ++*runb); 262 } 263 } 264 } 265 if (bp) 266 bqrelse(bp); 267 268 /* 269 * Since this is FFS independent code, we are out of scope for the 270 * definitions of BLK_NOCOPY and BLK_SNAP, but we do know that they 271 * will fall in the range 1..um_seqinc, so we use that test and 272 * return a request for a zeroed out buffer if attempts are made 273 * to read a BLK_NOCOPY or BLK_SNAP block. 274 */ 275 if ((ip->i_flags & SF_SNAPSHOT) && daddr > 0 && daddr < ump->um_seqinc){ 276 *bnp = -1; 277 return (0); 278 } 279 *bnp = blkptrtodb(ump, daddr); 280 if (*bnp == 0) { 281 *bnp = -1; 282 } 283 return (0); 284} 285 286/* 287 * Create an array of logical block number/offset pairs which represent the 288 * path of indirect blocks required to access a data block. The first "pair" 289 * contains the logical block number of the appropriate single, double or 290 * triple indirect block and the offset into the inode indirect block array. 291 * Note, the logical block number of the inode single/double/triple indirect 292 * block appears twice in the array, once with the offset into the i_ib and 293 * once with the offset into the page itself. 294 */ 295int 296ext2_getlbns(struct vnode *vp, daddr_t bn, struct indir *ap, int *nump) 297{ 298 long blockcnt; 299 e2fs_lbn_t metalbn, realbn; 300 struct ext2mount *ump; 301 int i, numlevels, off; 302 int64_t qblockcnt; 303 304 ump = VFSTOEXT2(vp->v_mount); 305 if (nump) 306 *nump = 0; 307 numlevels = 0; 308 realbn = bn; 309 if ((long)bn < 0) 310 bn = -(long)bn; 311 312 /* The first NDADDR blocks are direct blocks. */ 313 if (bn < NDADDR) 314 return (0); 315 316 /* 317 * Determine the number of levels of indirection. After this loop 318 * is done, blockcnt indicates the number of data blocks possible 319 * at the previous level of indirection, and NIADDR - i is the number 320 * of levels of indirection needed to locate the requested block. 321 */ 322 for (blockcnt = 1, i = NIADDR, bn -= NDADDR;; i--, bn -= blockcnt) { 323 if (i == 0) 324 return (EFBIG); 325 /* 326 * Use int64_t's here to avoid overflow for triple indirect 327 * blocks when longs have 32 bits and the block size is more 328 * than 4K. 329 */ 330 qblockcnt = (int64_t)blockcnt * MNINDIR(ump); 331 if (bn < qblockcnt) 332 break; 333 blockcnt = qblockcnt; 334 } 335 336 /* Calculate the address of the first meta-block. */ 337 if (realbn >= 0) 338 metalbn = -(realbn - bn + NIADDR - i); 339 else 340 metalbn = -(-realbn - bn + NIADDR - i); 341 342 /* 343 * At each iteration, off is the offset into the bap array which is 344 * an array of disk addresses at the current level of indirection. 345 * The logical block number and the offset in that block are stored 346 * into the argument array. 347 */ 348 ap->in_lbn = metalbn; 349 ap->in_off = off = NIADDR - i; 350 ap++; 351 for (++numlevels; i <= NIADDR; i++) { 352 /* If searching for a meta-data block, quit when found. */ 353 if (metalbn == realbn) 354 break; 355 356 off = (bn / blockcnt) % MNINDIR(ump); 357 358 ++numlevels; 359 ap->in_lbn = metalbn; 360 ap->in_off = off; 361 ++ap; 362 363 metalbn -= -1 + off * blockcnt; 364 blockcnt /= MNINDIR(ump); 365 } 366 if (nump) 367 *nump = numlevels; 368 return (0); 369} 370