1# 2# msdosfs -- Helper module for dealing with FAT (MS-DOS) on-disk structures. 3# 4 5CLUST_FREE = 0 6CLUST_FIRST = 2 7CLUST_MAX12 = 0x00000FFF 8CLUST_MAX16 = 0x0000FFFF 9CLUST_MAX32 = 0x0FFFFFFF 10CLUST_RSRVD = 0xFFFFFFF6 11CLUST_BAD = 0xFFFFFFF7 12CLUST_EOFS = 0xFFFFFFF8 13CLUST_EOF = 0xFFFFFFFF 14 15ATTR_READ_ONLY = 0x01 16ATTR_HIDDEN = 0x02 17ATTR_SYSTEM = 0x04 18ATTR_VOLUME_ID = 0x08 19ATTR_DIRECTORY = 0x10 20ATTR_ARCHIVE = 0x20 21ATTR_LONG_NAME = ATTR_READ_ONLY | ATTR_HIDDEN | ATTR_SYSTEM | ATTR_VOLUME_ID 22ATTR_LONG_NAME_MASK = ATTR_LONG_NAME | ATTR_DIRECTORY | ATTR_ARCHIVE 23 24LAST_LONG_ENTRY = 0x40 25 26import struct 27import math 28 29def make_dirent(name, attr, head=0, length=0, ntres=0, 30 create_time_tenth=0, create_time=0, create_date=0, 31 access_date=0, mod_time=0, mod_date=0): 32 "Create the raw bytes for a directory entry" 33 assert len(name) == 11 34 entry = name + struct.pack("<3B7HI", attr, ntres, create_time_tenth, 35 create_time, create_date, access_date, head>>16, mod_time, 36 mod_date, head&0xFFFF, length) 37 assert len(entry) == 32 38 return entry 39 40def parse_dirent(bytes): 41 """ 42 Parse the raw bytes of a directory entry, returning a dictionary of 43 all of the fields. 44 """ 45 assert len(bytes) == 32 46 fields = struct.unpack("<3B7HI", bytes[11:]) 47 return dict(name = bytes[0:11], 48 attr = fields[0], 49 ntres = fields[1], 50 create_time_tenth = fields[2], 51 create_time = fields[3], 52 create_date = fields[4], 53 access_date = fields[5], 54 mod_time = fields[7], 55 mod_date = fields[8], 56 length = fields[10], 57 head = (fields[6] << 16) + fields[9]) 58 59SHORTNAME_CHARS = "".join([chr(x) for x in xrange(33,128) if chr(x) not in '"*+,./:;<=>?[\]|']) 60 61def mixed_case(s): 62 return s.lower() != s and s.upper() != s 63 64def fat_checksum(s): 65 "Return the FAT checksum for a short name" 66 assert len(s) == 11 67 sum = 0 68 for c in s: 69 if sum & 1: 70 sum = 0x80 + (sum >> 1) 71 else: 72 sum = sum >> 1 73 sum = (sum + ord(c)) & 0xFF 74 return sum 75 76def make_ldir(name, order, checksum): 77 """ 78 Construct a FAT long name directory entry. 79 """ 80 assert 1 <= len(name) <= 13 81 82 # Pad name with NUL and FFFF's 83 name = (name + u'\u0000' + u'\uFFFF'*11)[0:13] 84 85 entry = "".join([chr(order), name[0:5].encode("UTF-16LE"), 86 chr(ATTR_LONG_NAME), chr(0), chr(checksum), 87 name[5:11].encode("UTF-16LE"), "\x00\x00", 88 name[11:13].encode("UTF-16LE")]) 89 assert len(entry) == 32 90 return entry 91 92def make_long_dirent(name, attr, head=0, length=0, 93 create_time_tenth=0, create_time=0, create_date=0, 94 access_date=0, mod_time=0, mod_date=0, tail='1'): 95 """ 96 Create one or more directory entries -- a short name entry (like make_dirent) 97 and preceding long name entries (if any are needed). This routine handles 98 names of arbitrary length (one to 255 Unicode characters), and will set 99 the "ntres" field correctly for short names whose base and/or extension 100 contains lower case letters. 101 """ 102 name = unicode(name) 103 assert 1 <= len(name) <= 255 104 105 # 106 # Split the name into base and extension (if any) 107 # 108 if u'.' in name: 109 base, ext = name.rsplit(u'.', 1) 110 else: 111 base = name 112 ext = u'' 113 114 # 115 # See if the name will fit the "8.3" form (possibly by using ntres for 116 # base/ext containing lower case characters) 117 # 118 needs_long_name = True 119 if (1 <= len(base) <= 8) and (len(ext) <= 3): 120 needs_long_name = False 121 for c in base+ext: 122 if c not in SHORTNAME_CHARS: 123 needs_long_name = True 124 break 125 if mixed_case(base) or mixed_case(ext): 126 needs_long_name = True 127 128 # "entries" is a list of raw directory entry structures 129 entries = [] 130 ntres = 0 131 132 # 133 # If we need to generate a long name, do it; otherwise determine 134 # the "ntres" value for a short name. 135 # 136 if needs_long_name: 137 # Convert invalid ASCII chars to Services For Macintosh equivalents 138 139 # Construct the associated short name 140 lossy = False 141 basis = "" 142 for c in base.upper(): 143 if c in SHORTNAME_CHARS: 144 basis = basis + c 145 elif c in u' .': 146 # Remove all spaces and periods 147 lossy = True 148 else: 149 # Collapse multiple invalid characters to single '_' 150 if basis[-1] != u'_': 151 basis = basis + u'_' 152 lossy = True 153 if len(basis) > 8: 154 lossy = True 155 basis = (basis + u' ')[0:8] 156 for c in ext.upper(): 157 if c in SHORTNAME_CHARS: 158 basis = basis + c 159 else: 160 # Should really collapse multiple '_' to one '_' 161 basis = basis + u'_' 162 lossy = True 163 basis = (basis + u' ')[0:11] 164 assert len(basis) == 11 165 basis = basis.encode('ASCII') 166 if lossy: 167 basis = basis[0:7-len(tail)] + '~' + tail + basis[8:11] 168 basis = basis.encode('ASCII') 169 170 # Make sure short name is unique in parent directory? 171 # Try other numeric tails 172 173 # Get the checksum of the short name 174 checksum = fat_checksum(basis) 175 176 # Generate the long name entries 177 order = 1 178 while len(name) > 0: 179 if len(name) < 14: 180 order += LAST_LONG_ENTRY 181 entries.insert(0, make_ldir(name[:13], order, checksum)) 182 name = name[13:] 183 order +=1 184 185 # Add the short name entry 186 entries.append(make_dirent(basis, attr, head, length, ntres, 187 create_time_tenth, create_time, create_date, access_date, 188 mod_time, mod_date)) 189 else: 190 # Space pad the base and extension 191 base = (str(base) + " ")[0:8] 192 ext = (str(ext) + " ")[0:3] 193 194 # Determine the value for "ntres" 195 if base.islower(): 196 base = base.upper() 197 ntres |= 0x08 198 if ext.islower(): 199 ext = ext.upper() 200 ntres |= 0x10 201 202 entries = [make_dirent(base+ext, attr, head, length, ntres, 203 create_time_tenth, create_time, create_date, access_date, 204 mod_time, mod_date)] 205 206 return "".join(entries) 207 208class Timestamp(object): 209 def __init__(self, month=1, day=1, year=2000, hour=0, minute=0, second=0.0): 210 """Timestamp(month, day, year, hour, minute, second) -> timestamp 211 212 The default timestamp is 00:00:00 January 1, 2000. 213 """ 214 self.month = month 215 self.day = day 216 self.year = year 217 self.hour = hour 218 self.minute = minute 219 frac, whole = math.modf(second) 220 self.second = int(whole) 221 self.ms = int(1000 * frac) 222 223 @classmethod 224 def from_raw(klass, _date, _time, _ms): 225 # Unpack the bit fields 226 year = 1980 + ((_date >> 9) & 127) 227 month = (_date >> 5) & 15 228 day = _date & 31 229 hour = (_time >> 11) & 31 230 minute = (_time >> 5) & 63 231 seconds = (_time & 31) * 2 + (_ms // 100) 232 ms = 10 * (_ms % 100) 233 234 assert(0 <= month < 16) 235 assert(0 <= day < 32) 236 assert(1980 <= year < 2108) 237 assert(0 <= hour < 32) 238 assert(0 <= minute < 64) 239 assert(0 <= seconds < 64) 240 assert(0 <= ms < 1000) 241 242 return klass(month, day, year, hour, minute, seconds+ms/1000.0) 243 244 @property 245 def raw(self): 246 "A tuple containing the raw 16-bit datestamp, 16-bit timestamp, and 8-bit milliseconds" 247 assert(0 <= self.month < 16) 248 assert(0 <= self.day < 32) 249 assert(1980 <= self.year < 2108) 250 assert(0 <= self.hour < 32) 251 assert(0 <= self.minute < 64) 252 assert(0 <= self.second < 64) 253 assert(0 <= self.ms < 1000) 254 datestamp = (self.year - 1980) << 9 255 datestamp += self.month << 5 256 datestamp += self.day 257 timestamp = self.hour << 11 258 timestamp += self.minute << 5 259 timestamp += self.second // 2 260 ms = (self.second % 2) * 100 + self.ms // 10 261 return (datestamp, timestamp, ms) 262 263 def __repr__(self): 264 fmt = '{0}({1:04d}-{2:02d}-{3:02d} {4:02d}:{5:02d}:{6:02d}.{7:02d})' 265 return fmt.format(self.__class__.__name__, self.year, self.month, self.day, self.hour, self.minute, self.second, self.ms//10) 266 267class msdosfs(object): 268 def __init__(self, dev): 269 self.dev = dev 270 271 # 272 # TODO: Set the sector size based on the device's info. 273 # 274 275 # 276 # Read and parse the boot block 277 # 278 dev.seek(0) 279 bs = dev.read(512) 280 if ord(bs[0]) != 0xE9 and ord(bs[0]) != 0xEB: 281 raise RuntimeError("Invalid boot jump signature") 282 v = struct.unpack("<H", bs[11:13])[0] 283 if v < 512 or v > 4096 or (v & (v-1)): 284 raise RuntimeError("Invalid bytes per sector") 285 self.bytesPerSector = v 286 v = ord(bs[13]) 287 if (v & (v - 1)) or (self.bytesPerSector * v) > 65536: 288 raise RuntimeError("Invalid sectors per cluster") 289 sectorsPerCluster = v 290 self.bytesPerCluster = v * self.bytesPerSector 291 self.reservedSectors = struct.unpack("<H", bs[14:16])[0] 292 self.numFATs = ord(bs[16]) 293 self.rootEntryCount = struct.unpack("<H", bs[17:19])[0] 294 v = struct.unpack("<H", bs[19:21])[0] 295 if v: 296 self.totalSectors = v 297 else: 298 self.totalSectors = struct.unpack("<I", bs[32:36])[0] 299 v = struct.unpack("<H", bs[22:24])[0] 300 if v: 301 self.fatSectors = v 302 else: 303 self.fatSectors = struct.unpack("<I", bs[36:40])[0] 304 self.fsinfo = None 305 306 # 307 # Figure out the bits per FAT entry, and create an object for the FAT 308 # 309 rootSectors = ((self.rootEntryCount * 32) + self.bytesPerSector - 1) / self.bytesPerSector 310 self.rootSectors = rootSectors 311 clustersStart = self.reservedSectors + (self.numFATs * self.fatSectors) + rootSectors 312 self.clusterStart = clustersStart 313 dataSectors = self.totalSectors - clustersStart 314 self.clusters = clusters = dataSectors / sectorsPerCluster 315 self.maxcluster = clusters+1 316 if clusters < 4085: 317 self.type = 12 318 self.fat = self.FAT12(self) 319 elif clusters < 65525: 320 self.type = 16 321 self.fat = self.FAT16(self) 322 else: 323 self.type = 32 324 self.fat = self.FAT32(self) 325 self.rootCluster = struct.unpack("<I", bs[44:48])[0] 326 fsinfo = struct.unpack("<H", bs[48:50])[0] 327 if fsinfo: 328 self.fsinfo = self.FSInfo(self, fsinfo) 329 330 def ReadCluster(self, cluster, count=1): 331 """Return the contents of cluster <cluster>""" 332 assert cluster >= 2 333 offset = (self.clusterStart * self.bytesPerSector) + ((cluster-2) * self.bytesPerCluster) 334 self.dev.seek(offset) 335 return self.dev.read(count * self.bytesPerCluster) 336 337 def WriteCluster(self, cluster, bytes): 338 """Return the contents of cluster <cluster>""" 339 assert cluster >= 2 340 assert (len(bytes) % self.bytesPerSector) == 0 341 offset = (self.clusterStart * self.bytesPerSector) + ((cluster-2) * self.bytesPerCluster) 342 self.dev.seek(offset) 343 return self.dev.write(bytes) 344 345 def flush(self): 346 self.fat.flush() 347 if self.fsinfo: 348 self.fsinfo.flush() 349 350 def allocate(self, count, start=CLUST_FIRST, last=CLUST_EOF): 351 """ 352 Allocate and create a chain of count clusters. 353 Searching begins at cluster #start. 354 The last cluster of the chain will point at cluster #last. 355 Returns the first cluster of the chain. 356 357 Unlike FAT.allocate, this routine will adjust the free space in the 358 FSInfo sector (applies to FAT32 only). 359 """ 360 cluster = self.fat.chain(self.fat.find(count, start), last) 361 if cluster and self.fsinfo: 362 self.fsinfo.allocate(count) 363 return cluster 364 365 class FAT(object): 366 """Base class to represent the File Allocation Table. Do not 367 instantiate this class; use FAT12, FAT16 or FAT32 instead.""" 368 369 def __init__(self): 370 raise NotImplementedError("Do not instantiate class FAT directly") 371 372 def __del__(self): 373 self.flush() 374 375 def find(self, count, start=CLUST_FIRST): 376 """Return a list of <count> free clusters""" 377 378 if count < 1: 379 raise ValueError("Invalid count of clusters") 380 381 clusters = [] 382 for cluster in xrange(start, self.maxcluster+1): 383 if self[cluster] == CLUST_FREE: 384 clusters.append(cluster) 385 count -= 1 386 if count == 0: 387 break 388 389 if count: 390 raise RuntimeError("Insufficient free space") 391 392 return clusters 393 394 def find_contig(self, count, start=CLUST_FIRST): 395 """Return an iterable of <count> contiguous free clusters""" 396 raise NotImplementedError() 397 398 def chain(self, clusters, last=CLUST_EOF): 399 """Create a cluster chain (allocate clusters). The cluster numbers 400 are in <clusters>. The last cluster in the chain will point to 401 cluster <last>. The cluster number of the first cluster in the 402 chain is returned.""" 403 it = iter(clusters) 404 head = prev = it.next() 405 for cluster in it: 406 self[prev] = cluster 407 prev = cluster 408 self[prev] = last 409 return head 410 411 def allocate(self, count, start=CLUST_FIRST, last=CLUST_EOF): 412 return self.chain(self.find(count, start), last) 413 414 def allocate_contig(self, count, start=CLUST_FIRST, last=CLUST_EOF): 415 return self.chain(self.find_contig(count, start), last) 416 417 class FAT32(FAT): 418 "A File Allocation Table with 32-bit entries." 419 420 def __init__(self, vol): 421 self.dev = vol.dev # File object to use for I/O 422 self.reservedSectors = vol.reservedSectors 423 self.bytesPerSector = vol.bytesPerSector 424 self.maxcluster = vol.maxcluster 425 self.sector = None # Sector number of cached sector 426 self.bytes = None # Raw bytes from cached sector 427 self.entries = None # Array of entries from cached sector 428 self.entriesPerSector = self.bytesPerSector / 4 429 self.dirty = False 430 431 def _cache(self, cluster): 432 """Make sure the FAT sector containing <cluster> is cached.""" 433 434 sector = cluster / self.entriesPerSector 435 if sector != self.sector: 436 self.flush() 437 self.sector = sector 438 self.dev.seek((sector + self.reservedSectors) * self.bytesPerSector) 439 self.bytes = self.dev.read(self.bytesPerSector) 440 self.entries = list(struct.unpack("<%dI" % self.entriesPerSector, self.bytes)) 441 442 def __getitem__(self, key): 443 # 444 # Make sure the requested cluster number is valid 445 # 446 if key < 0 or key > self.maxcluster: 447 raise IndexError("cluster number %d out of range" % key) 448 449 # 450 # Make sure we have the correct sector cached 451 # 452 self._cache(key) 453 454 # 455 # Return the desired entry from the current sector. 456 # For reserved/bad/EOF values, extend to full 32 bits. 457 # 458 value = self.entries[key % self.entriesPerSector] & 0x0FFFFFFF 459 if value >= (CLUST_RSRVD & 0x0FFFFFFF): 460 value |= 0xF0000000 461 return value 462 463 def __setitem__(self, key, value): 464 # 465 # Make sure the requested cluster number is valid 466 # 467 if key < 0 or key > self.maxcluster: 468 raise IndexError("cluster number %d out of range" % key) 469 470 # 471 # Make sure the value is valid 472 # 473 if CLUST_RSRVD <= value <= CLUST_EOF: 474 value = value & 0x0FFFFFFF 475 if value < 0 or value > CLUST_MAX32: 476 raise ValueError("cluster value out of range") 477 478 # 479 # Make sure we have the correct sector cached 480 # 481 self._cache(key) 482 483 # 484 # Set the desired entry to the given value. 485 # Only updates the low 28 bits. 486 # 487 old = self.entries[key % self.entriesPerSector] 488 value = (value & 0x0FFFFFFF) | (old & 0xF0000000) 489 self.entries[key % self.entriesPerSector] = value 490 self.dirty = True 491 492 def flush(self): 493 "Write any pending changes to disk" 494 495 if not self.dirty: 496 return 497 498 if len(self.entries) != self.entriesPerSector: 499 raise RuntimeError("FAT entries corrupt!") 500 501 # 502 # Only write to disk if the data has changed 503 # 504 bytes = struct.pack("<%dI" % self.entriesPerSector, *self.entries) 505 if bytes != self.bytes: 506 self.bytes = bytes 507 self.dev.seek((self.sector + self.reservedSectors) * self.bytesPerSector) 508 self.dev.write(bytes) 509 510 self.dirty = False 511 512 class FAT16(FAT): 513 "A File Allocation Table with 16-bit entries." 514 515 def __init__(self, vol): 516 self.dev = vol.dev # File object to use for I/O 517 self.reservedSectors = vol.reservedSectors 518 self.bytesPerSector = vol.bytesPerSector 519 self.maxcluster = vol.maxcluster 520 self.sector = None # Sector number of cached sector 521 self.bytes = None # Raw bytes from cached sector 522 self.entries = None # Array of entries from cached sector 523 self.entriesPerSector = self.bytesPerSector / 2 524 self.dirty = False 525 526 def _cache(self, cluster): 527 """Make sure the FAT sector containing <cluster> is cached.""" 528 529 sector = cluster / self.entriesPerSector 530 if sector != self.sector: 531 self.flush() 532 self.sector = sector 533 self.dev.seek((sector + self.reservedSectors) * self.bytesPerSector) 534 self.bytes = self.dev.read(self.bytesPerSector) 535 self.entries = list(struct.unpack("<%dH" % self.entriesPerSector, self.bytes)) 536 537 def __getitem__(self, key): 538 # 539 # Make sure the requested cluster number is valid 540 # 541 if key < 0 or key > self.maxcluster: 542 raise IndexError("cluster number %d out of range" % key) 543 544 # 545 # Make sure we have the correct sector cached 546 # 547 self._cache(key) 548 549 # 550 # Return the desired entry from the current sector. 551 # For reserved/bad/EOF values, extend to full 32 bits. 552 # 553 value = self.entries[key % self.entriesPerSector] 554 if value >= (CLUST_RSRVD & 0x0000FFFF): 555 value |= 0xFFFF0000 556 return value 557 558 def __setitem__(self, key, value): 559 # 560 # Make sure the requested cluster number is valid 561 # 562 if key < 0 or key > self.maxcluster: 563 raise IndexError("cluster number %d out of range" % key) 564 565 # 566 # Make sure the value is valid 567 # 568 if CLUST_RSRVD <= value <= CLUST_EOF: 569 value = value & 0x0000FFFF 570 if value < 0 or value > CLUST_MAX16: 571 raise ValueError("cluster value out of range") 572 573 # 574 # Make sure we have the correct sector cached 575 # 576 self._cache(key) 577 578 # 579 # Set the desired entry to the given value. 580 # 581 self.entries[key % self.entriesPerSector] = value 582 self.dirty = True 583 584 def flush(self): 585 "Write any pending changes to disk" 586 587 if not self.dirty: 588 return 589 590 if len(self.entries) != self.entriesPerSector: 591 raise RuntimeError("FAT entries corrupt!") 592 593 # 594 # Only write to disk if the data has changed 595 # 596 bytes = struct.pack("<%dH" % self.entriesPerSector, *self.entries) 597 if bytes != self.bytes: 598 self.bytes = bytes 599 self.dev.seek((self.sector + self.reservedSectors) * self.bytesPerSector) 600 self.dev.write(bytes) 601 602 self.dirty = False 603 604 class FAT12(FAT): 605 "A File Allocation Table with 12-bit entries." 606 607 def __init__(self, vol): 608 self.vol = vol 609 self.dev = vol.dev # File object to use for I/O 610 self.maxcluster = vol.maxcluster 611 self.dirty = False 612 613 # Read in the entire FAT, converting it to the self.entries array. 614 self.dev.seek(vol.reservedSectors * vol.bytesPerSector) 615 bytes = self.dev.read(vol.fatSectors * vol.bytesPerSector) 616 617 # We always unpack a multiple of two entries, for convenience. 618 self.entries = [0] * (self.maxcluster + 2) 619 for i in xrange(0, self.maxcluster + 1, 2): 620 index = i * 3 / 2 621 self.entries[i] = struct.unpack("<H", bytes[index:index+2])[0] & 0x0FFF 622 self.entries[i+1] = struct.unpack("<H", bytes[index+1:index+3])[0] >> 4 623 624 def __getitem__(self, key): 625 # 626 # Make sure the requested cluster number is valid 627 # 628 if key < 0 or key > self.maxcluster: 629 raise IndexError("cluster number %d out of range" % key) 630 631 # 632 # Return the desired entry from the current sector. 633 # For reserved/bad/EOF values, extend to full 32 bits. 634 # 635 value = self.entries[key] 636 if value >= (CLUST_RSRVD & 0x00000FFF): 637 value |= 0xFFFFF000 638 return value 639 640 def __setitem__(self, key, value): 641 # 642 # Make sure the requested cluster number is valid 643 # 644 if key < 0 or key > self.maxcluster: 645 raise IndexError("cluster number %d out of range" % key) 646 647 # 648 # Make sure the value is valid 649 # 650 if CLUST_RSRVD <= value <= CLUST_EOF: 651 value = value & 0x00000FFF 652 if value < 0 or value > CLUST_MAX12: 653 raise ValueError("cluster value out of range") 654 655 # 656 # Set the desired entry to the given value. 657 # 658 self.entries[key] = value 659 self.dirty = True 660 661 def flush(self): 662 "Write any pending changes to disk" 663 664 if not self.dirty: 665 return 666 667 if len(self.entries) != self.maxcluster + 2: 668 raise RuntimeError("FAT entries corrupt!") 669 670 vol = self.vol 671 672 # Read the old data from disk 673 self.dev.seek(vol.reservedSectors * vol.bytesPerSector) 674 bytes = self.dev.read(vol.fatSectors * vol.bytesPerSector) 675 676 # Update the bytes with values from self.entries 677 for i in xrange(0, self.maxcluster + 1, 2): 678 index = i * 3 / 2 679 pair = struct.pack("<I", self.entries[i] + (self.entries[i+1] << 12)) 680 bytes = bytes[:index] + pair[:3] + bytes[index+3:] 681 assert len(bytes) == (vol.fatSectors * vol.bytesPerSector) 682 683 # Write back to disk 684 self.dev.seek(vol.reservedSectors * vol.bytesPerSector) 685 self.dev.write(bytes) 686 687 self.dirty = False 688 689 class Chain(object): 690 """A chain of clusters (i.e. a file or directory)""" 691 def __init__(self, volume, head, length=0): 692 self.volume = volume 693 self.head = head 694 self.length = length 695 696 def cmap(self, offset): 697 """Return the cluster containing the chain's given byte <offset>. 698 Returns <None> if the given offset is not part of the cluster 699 chain.""" 700 701 bytesPerCluster = self.volume.bytesPerCluster 702 cluster = self.head 703 if cluster == CLUST_FREE: 704 return None 705 706 while offset >= bytesPerCluster: 707 cluster = self.volume.fat[cluster] 708 if cluster == CLUST_FREE or cluster >= CLUST_RSRVD: 709 return None 710 offset -= bytesPerCluster 711 712 return cluster 713 714 def pread(self, offset=0, count=None): 715 """Read up to <count> bytes at <offset> bytes from the start of 716 the chain. If <count> is <None>, then read all bytes until the 717 end of the chain.""" 718 719 if count == 0: 720 return "" 721 722 result = [] 723 bytesPerCluster = self.volume.bytesPerCluster 724 cluster = self.cmap(offset) 725 if not cluster: 726 return "" 727 728 # Handle non-aligned start of read 729 ofs = offset % bytesPerCluster 730 if ofs: 731 buf = self.volume.ReadCluster(cluster) 732 length = bytesPerCluster - ofs 733 if count is not None and count < length: 734 length = count 735 result.append(buf[ofs:ofs+length]) 736 if count is not None: 737 count -= length 738 cluster = self.volume.fat[cluster] 739 if cluster == CLUST_FREE or cluster >= CLUST_RSRVD: 740 return result[0] 741 742 # Handle whole clusters 743 while count != 0: 744 if count is not None and count < bytesPerCluster: 745 break 746 result.append(self.volume.ReadCluster(cluster)) 747 cluster = self.volume.fat[cluster] 748 if cluster == CLUST_FREE or cluster >= CLUST_RSRVD: 749 return "".join(result) 750 if count is not None: 751 count -= bytesPerCluster 752 753 # Handle non-aligned end of read 754 if count is not None and count > 0: 755 buf = self.volume.ReadCluster(cluster) 756 result.append(buf[0:count]) 757 758 return "".join(result) 759 760 def pwrite(self, offset, bytes): 761 """Write <bytes> at <offset> bytes from the start of the chain.""" 762 count = len(bytes) 763 if count == 0: 764 return 765 766 bytesPerCluster = self.volume.bytesPerCluster 767 cluster = self.cmap(offset) 768 if not cluster: 769 raise RuntimeError("Write beyond end of cluster chain") 770 771 # Handle non-aligned start of write 772 ofs = offset % bytesPerCluster 773 if ofs: 774 buf = self.volume.ReadCluster(cluster) 775 length = bytesPerCluster - ofs 776 if count < length: 777 length = count 778 buf = buf[0:ofs] + bytes[0:length] + buf[ofs+length:] 779 assert len(buf) == bytesPerCluster 780 self.volume.WriteCluster(cluster, buf) 781 ofs = length 782 count -= length 783 cluster = self.volume.fat[cluster] 784 785 # Handle whole clusters 786 while count >= bytesPerCluster: 787 if cluster == CLUST_FREE or cluster >= CLUST_RSRVD: 788 raise RuntimeError("Write beyond end of cluster chain") 789 self.volume.WriteCluster(cluster, bytes[ofs:ofs+bytesPerCluster]) 790 ofs += bytesPerCluster 791 count -= bytesPerCluster 792 cluster = self.volume.fat[cluster] 793 794 # Handle non-aligned end of write 795 if count > 0: 796 if cluster == CLUST_FREE or cluster >= CLUST_RSRVD: 797 raise RuntimeError("Write beyond end of cluster chain") 798 buf = self.volume.ReadCluster(cluster) 799 buf = bytes[ofs:ofs+count] + buf[count:] 800 assert len(buf) == bytesPerCluster 801 self.volume.WriteCluster(cluster, buf) 802 803 def _truncateGrow(self, oldClusters, newClusters): 804 addClusters = newClusters - oldClusters 805 806 # Find and allocate some free space 807 # TODO: Try to keep the file contiguous? 808 head = self.volume.allocate(addClusters) 809 810 # Point file's current last cluster at the first new cluster 811 if self.head == 0: 812 self.head = head 813 else: 814 prev = None 815 cluster = self.head 816 while cluster >= CLUST_FIRST and cluster <= CLUST_RSRVD: 817 prev = cluster 818 cluster = self.volume.fat[cluster] 819 self.volume.fat[prev] = head 820 821 def truncate(self, length): 822 bytesPerCluster = self.volume.bytesPerCluster 823 oldClusters = (self.length + bytesPerCluster - 1) // bytesPerCluster 824 newClusters = (length + bytesPerCluster - 1) // bytesPerCluster 825 826 if newClusters < oldClusters: 827 raise NotImplementedError("shrinking chains is not implemented yet") 828 elif newClusters > oldClusters: 829 self._truncateGrow(oldClusters, newClusters) 830 else: 831 return 832 833 self.length = length 834 835 # If the chain has an associate directory entry, then update its 836 # head and length, and write it back to disk 837 if hasattr(self, 'parent'): 838 raw = self.parent.read_slots(self.slot) 839 # Update head in raw[26:28] (low) and raw[20:22] (high) 840 # Update length in raw[28:32] 841 raw = raw[0:20] + struct.pack("<H", self.head>>16) + raw[22:26] + struct.pack("<HI", self.head&0xFFFF, self.length) 842 self.parent.write_slots(self.slot, raw) 843 844 class Directory(Chain): 845 def __init__(self, volume, head): 846 super(volume.Directory, self).__init__(volume, head) 847 if volume.type == 32 and head == volume.rootCluster: 848 self.is_root = True 849 else: 850 self.is_root = False 851 852 def find_slots(self, count, grow=True): 853 """Find <count> contiguous free slots. If <grow> is true then extend 854 the directory to make enough space. If there is insufficient free 855 space, and the directory can't be grown, raise an error. Returns 856 the slot index of the found space.""" 857 assert count > 0 858 bytesPerCluster = self.volume.bytesPerCluster 859 slot = 0 860 found = 0 861 cluster = self.head 862 while cluster >= CLUST_FIRST and cluster < CLUST_RSRVD: 863 buf = self.volume.ReadCluster(cluster) 864 offset = 0 865 while offset < bytesPerCluster: 866 if buf[offset] == '\x00' or buf[offset] == '\xE5': 867 found += 1 868 if found >= count: 869 # <slot> is the index of the last of the slots 870 return slot-count+1 871 else: 872 found = 0 873 offset += 32 874 slot += 1 875 cluster = self.volume.fat[cluster] 876 877 if grow: 878 raise NotImplementedError("Growing directories not implemented") 879 880 raise RuntimeError("Insufficient space in directory") 881 882 def fill_slots(self, slot, grow=True): 883 """Mark all never-used directory entries at index less than "slot" 884 as deleted. This is useful if you then want to create an entry 885 starting at a specific slot, and guarantee that the directory 886 doesn't "end" before that point.""" 887 bytesPerCluster = self.volume.bytesPerCluster 888 cluster = self.head 889 i = 0 890 while cluster >= CLUST_FIRST and cluster < CLUST_RSRVD and i < slot: 891 buf = bytearray(self.volume.ReadCluster(cluster)) 892 dirty = False 893 offset = 0 894 while offset < bytesPerCluster and i < slot: 895 if buf[offset] == 0: 896 buf[offset] = 0xE5 897 dirty = True 898 offset += 32 899 i += 1 900 if dirty: 901 self.volume.WriteCluster(cluster, buf) 902 cluster = self.volume.fat[cluster] 903 904 if i >= slot: 905 return 906 907 if grow: 908 raise NotImplementedError("Growing directories not implemented") 909 raise RuntimeError("Directory too short") 910 911 def read_slots(self, slot, count=1): 912 """Read and return <count> consecutive directory slots, starting 913 at slot number <slot>.""" 914 return self.pread(slot*32, count*32) 915 916 def write_slots(self, slot, bytes): 917 """Write <data> to consecutive directory slots, starting at slot 918 number <slot>.""" 919 assert len(bytes) > 0 and (len(bytes) % 32) == 0 920 self.pwrite(slot*32, bytes) 921 922 def mkfile(self, name, head=None, length=None, attributes=ATTR_ARCHIVE, deleted=False, 923 createDate=None, accessDate=None, modDate=None, 924 content=None, clusters=None): 925 "Create a file entry in the directory." 926 927 # Default length to size of content, or zero 928 if length is None: 929 if content is None: 930 length = 0 931 else: 932 length = len(content) 933 934 # If the file is supposed to be non-empty, and they didn't explicitly 935 # specify the clusters, then allocate enough to hold the given length 936 if length > 0 and clusters is None and head is None: 937 bytesPerCluster = self.volume.bytesPerCluster 938 numClusters = (length + bytesPerCluster + 1) // bytesPerCluster 939 clusters = self.volume.fat.find(numClusters) 940 head = self.volume.fat.chain(clusters) 941 if head and self.volume.fsinfo: 942 self.volume.fsinfo.allocate(numClusters) 943 944 # Default the head to the first cluster in the chain 945 if head is None: 946 if clusters is None: 947 head = 0 948 else: 949 head = clusters[0] 950 951 # 952 # Construct the raw directory entry (entries if long name) 953 # 954 dates = {} 955 if createDate is not None: 956 _date, _time, _ms = createDate.raw 957 dates["create_time_tenth"] = _ms 958 dates["create_time"] = _time 959 dates["create_date"] = _date 960 if accessDate is not None: 961 _date, _time, _ms = accessDate.raw 962 dates["access_date"] = _date 963 if modDate is not None: 964 _date, _time, _ms = modDate.raw 965 dates["mod_time"] = _time 966 dates["mod_date"] = _date 967 raw = make_long_dirent(name, attributes, head=head, length=length, **dates) 968 969 # 970 # Find enough free slots, growing the directory if needed 971 # 972 slots = len(raw)/32 973 slot = self.find_slots(slots) 974 975 # If the file is to be marked deleted, then set the first byte of 976 # each slot to 0xE5. 977 if deleted: 978 raw = bytearray(raw) 979 # Set the first byte of every slot to 0xE5 980 for i in range(slots): 981 raw[i * 32] = 0xE5 982 raw = bytes(raw) 983 984 # 985 # Write the raw entry/entries to the free slots 986 # 987 self.write_slots(slot, raw) 988 989 # Create a stream (chain) for the file. Write the initial content 990 stream = msdosfs.Chain(self.volume, head, length) 991 if content is not None: 992 stream.pwrite(0, content) 993 994 # Remember where the short name entry exists, so it can be updated 995 stream.parent = self 996 stream.slot = slot + slots - 1 # Slot index of last (short name) entry 997 998 return stream 999 1000 def mkdir(self, name, clusters=None, length=0, makeDot=True, makeDotDot=True, zeroFill=True): 1001 """ 1002 Create a subdirectory entry in the parent directory. 1003 By default, it also creates the "." and ".." entries in the first 1004 two slots of the first cluster of the subdirectory. By default, 1005 the remaining slots and remaining clusters will be zero filled. 1006 1007 The "clusters" parameter should be an iterable containing the 1008 clusters to be allocated to the subdirectory. If the default, 1009 None, is used, then one cluster will be found and allocated. 1010 1011 The "length" parameter overrides the length/size field in the 1012 subdirectory's entry. This should normally be left at the 1013 default, 0. 1014 1015 The "makeDot" and "makeDotDot" parameters may be set to False to 1016 avoid initializing the "." and ".." entries, respectively. 1017 1018 The "zeroFill" parameter may be set to False to avoid filling the 1019 clusters with zeroes. 1020 1021 If one or more clusters were successfully allocated and the 1022 subdirectory created, a Directory object for the new subdirectory 1023 is returned. 1024 """ 1025 1026 if clusters is None: 1027 clusters = self.volume.fat.find(1) 1028 1029 # See if there is a first cluster in the chain 1030 head = 0 1031 try: 1032 head = clusters[0] 1033 except IndexError: 1034 pass 1035 except TypeError: 1036 pass 1037 1038 # Create the subdirectory's entry in the parent 1039 bytes = make_long_dirent(name, ATTR_DIRECTORY, head=head, length=length) 1040 slots = len(bytes) / 32 1041 self.write_slots(self.find_slots(slots), bytes) 1042 1043 subdir = None 1044 # If there is at least one cluster, then zero fill and 1045 # create the "." and ".." entries 1046 if head: 1047 # Zero fill unless told not to 1048 if zeroFill: 1049 zeroes = "\x00" * self.volume.bytesPerCluster 1050 for cluster in clusters: 1051 self.volume.WriteCluster(cluster, zeroes) 1052 1053 # Allocate the cluster(s) 1054 self.volume.fat.chain(clusters) 1055 if self.volume.fsinfo: 1056 self.volume.fsinfo.allocate(len(clusters)) 1057 1058 # Create a Directory object for the new subdirectory 1059 # so that we can write to it conveniently 1060 subdir = self.volume.Directory(self.volume, head) 1061 1062 # Create "." 1063 if makeDot: 1064 bytes = make_dirent('. ', ATTR_DIRECTORY, head=head) 1065 subdir.write_slots(0, bytes) 1066 1067 # Create ".." 1068 if makeDotDot: 1069 # If "self" is the root directory, then the first cluster 1070 # for ".." should be set to 0. 1071 if self.is_root: 1072 headDotDot = 0 1073 else: 1074 headDotDot = self.head 1075 bytes = make_dirent('.. ', ATTR_DIRECTORY, head=headDotDot) 1076 subdir.write_slots(1, bytes) 1077 1078 # Return the subdirectory's Directory object (if any) 1079 return subdir 1080 1081 class FixedRootDir(Directory): 1082 def __init__(self, volume): 1083 self.volume = volume 1084 self.head = 0 1085 self.is_root = True 1086 1087 def pread(self, offset=0, count=None): 1088 """Read up to <count> bytes at <offset> bytes from the start of 1089 the chain. If <count> is <None>, then read all bytes until the 1090 end of the chain.""" 1091 1092 if count == 0: 1093 return "" 1094 1095 volume = self.volume 1096 bytesPerSector = volume.bytesPerSector 1097 firstSector = volume.reservedSectors + (volume.numFATs * volume.fatSectors) 1098 1099 if count is None: 1100 count = (volume.rootSectors * bytesPerSector) - offset 1101 1102 if (offset + count) > (volume.rootSectors * bytesPerSector): 1103 raise RuntimeError("Read past end of fixed root directory") 1104 1105 # Figure out the starting and ending sectors (relative to root dir) 1106 sector = offset / bytesPerSector 1107 endSector = (offset + count + bytesPerSector - 1) / bytesPerSector 1108 1109 # Read whole sectors 1110 volume.dev.seek((sector + firstSector) * bytesPerSector) 1111 bytes = volume.dev.read((endSector - sector) * bytesPerSector) 1112 1113 # Return just the portion the caller asked for 1114 offset = offset % bytesPerSector 1115 return bytes[offset:offset+count] 1116 1117 def pwrite(self, offset, bytes): 1118 """Write <bytes> at <offset> bytes from the start of the chain.""" 1119 count = len(bytes) 1120 if count == 0: 1121 return 1122 1123 volume = self.volume 1124 bytesPerSector = volume.bytesPerSector 1125 firstSector = volume.reservedSectors + (volume.numFATs * volume.fatSectors) 1126 1127 if count is None: 1128 count = (volume.rootSectors * bytesPerSector) - offset 1129 1130 if (offset + count) > (volume.rootSectors * bytesPerSector): 1131 raise RuntimeError("Write past end of fixed root directory") 1132 1133 # Figure out the starting and ending sectors (relative to root dir) 1134 sector = offset / bytesPerSector 1135 endSector = (offset + count + bytesPerSector - 1) / bytesPerSector 1136 1137 if offset % bytesPerSector or count % bytesPerSector: 1138 offset = offset % bytesPerSector 1139 1140 # Read whole sectors 1141 volume.dev.seek((sector + firstSector) * bytesPerSector) 1142 original = volume.dev.read((endSector - sector) * bytesPerSector) 1143 1144 # Overwrite with caller supplied data 1145 bytes = original[:offset] + bytes + original[offset+count:] 1146 1147 # Write out the (updated) sectors 1148 volume.dev.seek((sector + firstSector) * bytesPerSector) 1149 volume.dev.write(bytes) 1150 1151 def find_slots(self, count, grow=False): 1152 """Find <count> contiguous free slots. If there is insufficient 1153 free space, then raise an error. Returns the slot index of the 1154 found space.""" 1155 assert count > 0 1156 volume = self.volume 1157 bytesPerSector = volume.bytesPerSector 1158 firstSector = volume.reservedSectors + (volume.numFATs * volume.fatSectors) 1159 slot = 0 1160 found = 0 1161 for sector in xrange(firstSector, firstSector+volume.rootSectors): 1162 volume.dev.seek(sector * bytesPerSector) 1163 buf = volume.dev.read(bytesPerSector) 1164 offset = 0 1165 while offset < bytesPerSector: 1166 if buf[offset] == '\x00' or buf[offset] == '\xE5': 1167 found += 1 1168 if found >= count: 1169 # <slot> is the index of the last of the slots 1170 return slot-count+1 1171 else: 1172 found = 0 1173 offset += 32 1174 slot += 1 1175 1176 raise RuntimeError("Insufficient space in directory") 1177 1178 def root(self): 1179 "Return an object for the root directory." 1180 if self.type == 32: 1181 return self.Directory(self, self.rootCluster) 1182 else: 1183 return self.FixedRootDir(self) 1184 1185 class FSInfo(object): 1186 def __init__(self, volume, sector): 1187 self.volume = volume 1188 self.sector = sector 1189 self.valid = False 1190 self.dirty = False 1191 self.free = 0xFFFFFFFF 1192 self.nextFree= 0 1193 1194 volume.dev.seek(volume.bytesPerSector * sector) 1195 fsinfo = volume.dev.read(volume.bytesPerSector) 1196 if fsinfo[0:4] == 'RRaA' and fsinfo[484:488] == 'rrAa': 1197 self.valid = True 1198 self.free, self.nextFree = struct.unpack("<II", fsinfo[488:496]) 1199 1200 def allocate(self, clusters): 1201 if self.valid: 1202 self.free -= clusters 1203 self.dirty = True 1204 1205 def flush(self): 1206 if self.valid and self.dirty: 1207 self.volume.dev.seek(self.volume.bytesPerSector * self.sector) 1208 fsinfo = self.volume.dev.read(self.volume.bytesPerSector) 1209 fsinfo = fsinfo[0:488] + struct.pack("<II", self.free, self.nextFree) + fsinfo[496:] 1210 self.volume.dev.seek(self.volume.bytesPerSector * self.sector) 1211 self.volume.dev.write(fsinfo) 1212 self.dirty = False 1213 1214def chain_extents(chain): 1215 """ 1216 Given a chain of extents (a list of the clusters in the chain), generate 1217 a list of extents in the form (start, count). 1218 """ 1219 if len(chain) == 0: 1220 return 1221 from itertools import izip 1222 start = chain[0] 1223 count = 1 1224 for last,next in izip(chain, chain[1:]): 1225 if next == last+1: 1226 count += 1 1227 else: 1228 yield (start, count) 1229 start = next 1230 count = 1 1231 yield (start, count) 1232 1233# 1234# Walk the FAT looking for chains, and reporting what percentage of 1235# logically contiguous clusters are physically contiguous. 1236# 1237# NOTE: This examines all FAT entries/chains, including those that 1238# are not referenced by any directory entry. This routine will fail 1239# if there is a cycle in a cluster chain. 1240# 1241def info_fragmentation(argv): 1242 """ 1243 Display information about fragmentation on the volume. Pass -v 1244 to see the largest number of extents per file/directory. Pass 1245 -v a second time to see the list of extents for that file/directory. 1246 Pass -v a third time to see the list of extents for all fragmented 1247 files and directories. 1248 """ 1249 1250 verbose = 0 1251 while "-v" in argv: 1252 verbose += 1 1253 argv.remove("-v") 1254 1255 dev = file(argv[0], "r") 1256 v = msdosfs(dev) 1257 fat = v.fat 1258 ignore = set([CLUST_FREE, 1, CLUST_RSRVD, CLUST_BAD]) # FAT values to ignore 1259 1260 # Build a dictionary of all cluster chains. The key is the first cluster 1261 # in the chain. The value is a list of all clusters in the chain, in 1262 # logical order within the file. 1263 chains = dict() 1264 1265 # To start, each cluster is a chain of just itself 1266 for cl in xrange(CLUST_FIRST, v.maxcluster+1): 1267 next = fat[cl] 1268 if next not in ignore: 1269 chains[cl] = [cl] 1270 1271 # Connect clusters into chains 1272 again = True 1273 while again: 1274 again = False 1275 for cl in chains.keys(): 1276 if cl in chains: # May have already been removed 1277 next = fat[chains[cl][-1]] 1278 if next in ignore: 1279 print "Warning: %d -> 0x%x" % (chains[cl][-1], next) 1280 if next in chains: 1281 chains[cl].extend(chains[next]) 1282 del chains[next] 1283 again = True 1284 1285 # Examine each chain, gathering statistics 1286 total = 0 # Number of logically contiguous links 1287 frags = 0 # Number of physically discontiguous links 1288 max_extents = [] 1289 contigs = set() # Set of lengths of contiguous extents 1290 for chain in chains.itervalues(): 1291 extents = list(chain_extents(chain)) 1292 total += len(chain) - 1 1293 frags += len(extents) - 1 1294 for start, count in extents: 1295 contigs.add(count) 1296 if verbose and len(extents) > 1: 1297 if verbose > 2: 1298 print "{0}: {1}".format(len(extents), extents) 1299 if len(extents) > len(max_extents): 1300 max_extents = extents 1301 1302 # Print the stats 1303 try: 1304 percent = 100.0 * float(frags)/float(total) 1305 except ZeroDivisionError: 1306 percent = 0.0 1307 print "Fragmentation: %f%% (%d of %d links)" % (percent, frags, total) 1308 if verbose: 1309 print "Most extents per file: {0}".format(len(max_extents)) 1310 if verbose > 1: 1311 print max_extents 1312 print "Cluster chains: {0}".format(len(chains)) 1313 print "Longest fragment: %d clusters, Shortest: %d clusters" % (max(contigs), min(contigs)) 1314 1315def info(argv): 1316 """Display general information about a volume. 1317 1318 argv[0] "frag" or path to device 1319 """ 1320 if argv[0] == "frag": 1321 return info_fragmentation(argv[1:]) 1322 1323 dev = file(argv[0], "r") 1324 v = msdosfs(dev) 1325 1326 print "bytesPerSector: ", v.bytesPerSector 1327 print "totalSectors: ", v.totalSectors 1328 print "size in bytes: ", v.bytesPerSector * v.totalSectors 1329 print "reservedSectors:", v.reservedSectors 1330 print "fatSectors: ", v.fatSectors 1331 print "rootSectors: ", v.rootSectors 1332 print "clusterStart: ", v.clusterStart 1333 print "numFATs: ", v.numFATs 1334 print "rootEntryCount: ", v.rootEntryCount 1335 print "clusters: ", v.clusters, " (FAT%d)" % v.type 1336 print "maxCluster: ", v.maxcluster 1337 print "bytesPerCluster:", v.bytesPerCluster 1338 # TODO: Print FSInfo stuff, if it exists 1339 1340def fragment_free(argv): 1341 """Fragment the free space on a volume by marking various clusters 'bad' 1342 so they can't be allocated. 1343 1344 argv[0] Path to device 1345 argv[1] Interval between clusters 1346 """ 1347 dev = file(argv[0], "r+") 1348 v = msdosfs(dev) 1349 skip = int(argv[1],0) 1350 clusters = 0 1351 for cl in xrange(CLUST_FIRST, v.maxcluster+1, skip): 1352 if v.fat[cl] == CLUST_FREE: 1353 v.fat[cl] = CLUST_BAD 1354 clusters += 1 1355 if v.fsinfo: 1356 v.fsinfo.allocate(clusters) 1357 v.flush() 1358 dev.close() 1359 1360def free_bad(argv): 1361 """Free and clusters currently marked 'bad' 1362 1363 argv[0] Path to device 1364 """ 1365 dev = file(argv[0], "r+") 1366 v = msdosfs(dev) 1367 fat = v.fat 1368 clusters = 0 1369 for cl in xrange(CLUST_FIRST, v.maxcluster+1): 1370 if fat[cl] == CLUST_BAD: 1371 fat[cl] = CLUST_FREE 1372 clusters += 1 1373 if v.fsinfo: 1374 v.fsinfo.allocate(-clusters) 1375 v.flush() 1376 dev.close() 1377 1378def usage(): 1379 print "Usage:" 1380 print " python msdosfs.py info <device_path>" 1381 print " python msdosfs.py info frag <device_path>" 1382 print " python msdosfs.py fragment <device_path> <interval>" 1383 print " python msdosfs.py freebad <device_path>" 1384 1385if __name__ == "__main__": 1386 import sys 1387 if len(sys.argv) == 1: 1388 usage() 1389 elif sys.argv[1] == 'info': 1390 info(sys.argv[2:]) 1391 elif sys.argv[1] == 'fragment': 1392 fragment_free(sys.argv[2:]) 1393 elif sys.argv[1] == 'freebad': 1394 free_bad(sys.argv[2:]) 1395 else: 1396 print 'Unknown command:', sys.argv[1] 1397 usage() 1398 1399