1/*-
2 * Copyright (c) 2010-2012 Semihalf.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD$");
29
30#include <sys/param.h>
31#include <sys/queue.h>
32#include <sys/stdint.h>
33#include <ufs/ufs/dinode.h>
34#include <fs/nandfs/nandfs_fs.h>
35#include "stand.h"
36#include "string.h"
37#include "zlib.h"
38
39#define DEBUG
40#undef DEBUG
41#ifdef DEBUG
42#define NANDFS_DEBUG(fmt, args...) do { \
43    printf("NANDFS_DEBUG:" fmt "\n", ##args); } while (0)
44#else
45#define NANDFS_DEBUG(fmt, args...)
46#endif
47
48struct nandfs_mdt {
49	uint32_t	entries_per_block;
50	uint32_t	entries_per_group;
51	uint32_t	blocks_per_group;
52	uint32_t	groups_per_desc_block;	/* desc is super group */
53	uint32_t	blocks_per_desc_block;	/* desc is super group */
54};
55
56struct bmap_buf {
57	LIST_ENTRY(bmap_buf)	list;
58	nandfs_daddr_t		blknr;
59	uint64_t		*map;
60};
61
62struct nandfs_node {
63	struct nandfs_inode	*inode;
64	LIST_HEAD(, bmap_buf)	bmap_bufs;
65};
66struct nandfs {
67	int	nf_blocksize;
68	int	nf_sectorsize;
69	int	nf_cpno;
70
71	struct open_file	*nf_file;
72	struct nandfs_node	*nf_opened_node;
73	u_int			nf_offset;
74	uint8_t			*nf_buf;
75	int64_t			nf_buf_blknr;
76
77	struct nandfs_fsdata		*nf_fsdata;
78	struct nandfs_super_block	*nf_sb;
79	struct nandfs_segment_summary	nf_segsum;
80	struct nandfs_checkpoint	nf_checkpoint;
81	struct nandfs_super_root	nf_sroot;
82	struct nandfs_node		nf_ifile;
83	struct nandfs_node		nf_datfile;
84	struct nandfs_node		nf_cpfile;
85	struct nandfs_mdt		nf_datfile_mdt;
86	struct nandfs_mdt		nf_ifile_mdt;
87
88	int nf_nindir[NIADDR];
89};
90
91static int nandfs_open(const char *, struct open_file *);
92static int nandfs_close(struct open_file *);
93static int nandfs_read(struct open_file *, void *, size_t, size_t *);
94static off_t nandfs_seek(struct open_file *, off_t, int);
95static int nandfs_stat(struct open_file *, struct stat *);
96static int nandfs_readdir(struct open_file *, struct dirent *);
97
98static int nandfs_buf_read(struct nandfs *, void **, size_t *);
99static struct nandfs_node *nandfs_lookup_path(struct nandfs *, const char *);
100static int nandfs_read_inode(struct nandfs *, struct nandfs_node *,
101    nandfs_lbn_t, u_int, void *, int);
102static int nandfs_read_blk(struct nandfs *, nandfs_daddr_t, void *, int);
103static int nandfs_bmap_lookup(struct nandfs *, struct nandfs_node *,
104    nandfs_lbn_t, nandfs_daddr_t *, int);
105static int nandfs_get_checkpoint(struct nandfs *, uint64_t,
106    struct nandfs_checkpoint *);
107static nandfs_daddr_t nandfs_vtop(struct nandfs *, nandfs_daddr_t);
108static void nandfs_calc_mdt_consts(int, struct nandfs_mdt *, int);
109static void nandfs_mdt_trans(struct nandfs_mdt *, uint64_t,
110    nandfs_daddr_t *, uint32_t *);
111static int ioread(struct open_file *, off_t, void *, u_int);
112static int nandfs_probe_sectorsize(struct open_file *);
113
114struct fs_ops nandfs_fsops = {
115	"nandfs",
116	nandfs_open,
117	nandfs_close,
118	nandfs_read,
119	null_write,
120	nandfs_seek,
121	nandfs_stat,
122	nandfs_readdir
123};
124
125#define	NINDIR(fs)	((fs)->nf_blocksize / sizeof(nandfs_daddr_t))
126
127/* from NetBSD's src/sys/net/if_ethersubr.c */
128static uint32_t
129nandfs_crc32(uint32_t crc, const uint8_t *buf, size_t len)
130{
131	static const uint32_t crctab[] = {
132		0x00000000, 0x1db71064, 0x3b6e20c8, 0x26d930ac,
133		0x76dc4190, 0x6b6b51f4, 0x4db26158, 0x5005713c,
134		0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c,
135		0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c
136	};
137	size_t i;
138
139	crc = crc ^ ~0U;
140	for (i = 0; i < len; i++) {
141		crc ^= buf[i];
142		crc = (crc >> 4) ^ crctab[crc & 0xf];
143		crc = (crc >> 4) ^ crctab[crc & 0xf];
144	}
145	return (crc ^ ~0U);
146}
147
148static int
149nandfs_check_fsdata_crc(struct nandfs_fsdata *fsdata)
150{
151	uint32_t fsdata_crc, comp_crc;
152
153	if (fsdata->f_magic != NANDFS_FSDATA_MAGIC)
154		return (0);
155
156	/* Preserve crc */
157	fsdata_crc = fsdata->f_sum;
158
159	/* Calculate */
160	fsdata->f_sum = (0);
161	comp_crc = nandfs_crc32(0, (uint8_t *)fsdata, fsdata->f_bytes);
162
163	/* Restore */
164	fsdata->f_sum = fsdata_crc;
165
166	/* Check CRC */
167	return (fsdata_crc == comp_crc);
168}
169
170static int
171nandfs_check_superblock_crc(struct nandfs_fsdata *fsdata,
172    struct nandfs_super_block *super)
173{
174	uint32_t super_crc, comp_crc;
175
176	/* Check super block magic */
177	if (super->s_magic != NANDFS_SUPER_MAGIC)
178		return (0);
179
180	/* Preserve CRC */
181	super_crc = super->s_sum;
182
183	/* Calculate */
184	super->s_sum = (0);
185	comp_crc = nandfs_crc32(0, (uint8_t *)super, fsdata->f_sbbytes);
186
187	/* Restore */
188	super->s_sum = super_crc;
189
190	/* Check CRC */
191	return (super_crc == comp_crc);
192}
193
194static int
195nandfs_find_super_block(struct nandfs *fs, struct open_file *f)
196{
197	struct nandfs_super_block *sb;
198	int i, j, n, s;
199	int sectors_to_read, error;
200
201	sb = malloc(fs->nf_sectorsize);
202	if (sb == NULL)
203		return (ENOMEM);
204
205	memset(fs->nf_sb, 0, sizeof(*fs->nf_sb));
206
207	sectors_to_read = (NANDFS_NFSAREAS * fs->nf_fsdata->f_erasesize) /
208	    fs->nf_sectorsize;
209	for (i = 0; i < sectors_to_read; i++) {
210		NANDFS_DEBUG("reading i %d offset %d\n", i,
211		    i * fs->nf_sectorsize);
212		error = ioread(f, i * fs->nf_sectorsize, (char *)sb,
213		    fs->nf_sectorsize);
214		if (error) {
215			NANDFS_DEBUG("error %d\n", error);
216			continue;
217		}
218		n = fs->nf_sectorsize / sizeof(struct nandfs_super_block);
219		s = 0;
220		if ((i * fs->nf_sectorsize) % fs->nf_fsdata->f_erasesize == 0) {
221			if (fs->nf_sectorsize == sizeof(struct nandfs_fsdata))
222				continue;
223			else {
224				s += (sizeof(struct nandfs_fsdata) /
225				    sizeof(struct nandfs_super_block));
226			}
227		}
228
229		for (j = s; j < n; j++) {
230			if (!nandfs_check_superblock_crc(fs->nf_fsdata, &sb[j]))
231				continue;
232			NANDFS_DEBUG("magic %x wtime %jd, lastcp 0x%jx\n",
233			    sb[j].s_magic, sb[j].s_wtime, sb[j].s_last_cno);
234			if (sb[j].s_last_cno > fs->nf_sb->s_last_cno)
235				memcpy(fs->nf_sb, &sb[j], sizeof(*fs->nf_sb));
236		}
237	}
238
239	free(sb);
240
241	return (fs->nf_sb->s_magic != 0 ? 0 : EINVAL);
242}
243
244static int
245nandfs_find_fsdata(struct nandfs *fs, struct open_file *f)
246{
247	int offset, error, i;
248
249	NANDFS_DEBUG("starting\n");
250
251	offset = 0;
252	for (i = 0; i < 64 * NANDFS_NFSAREAS; i++) {
253		error = ioread(f, offset, (char *)fs->nf_fsdata,
254		    sizeof(struct nandfs_fsdata));
255		if (error)
256			return (error);
257		if (fs->nf_fsdata->f_magic == NANDFS_FSDATA_MAGIC) {
258			NANDFS_DEBUG("found at %x, volume %s\n", offset,
259			    fs->nf_fsdata->f_volume_name);
260			if (nandfs_check_fsdata_crc(fs->nf_fsdata))
261				break;
262		}
263		offset += fs->nf_sectorsize;
264	}
265
266	return (error);
267}
268
269static int
270nandfs_read_structures(struct nandfs *fs, struct open_file *f)
271{
272	int error;
273
274	error = nandfs_find_fsdata(fs, f);
275	if (error)
276		return (error);
277
278	error = nandfs_find_super_block(fs, f);
279
280	if (error == 0)
281		NANDFS_DEBUG("selected sb with w_time %jd last_pseg %jx\n",
282		    fs->nf_sb->s_wtime, fs->nf_sb->s_last_pseg);
283
284	return (error);
285}
286
287static int
288nandfs_mount(struct nandfs *fs, struct open_file *f)
289{
290	int err = 0, level;
291	uint64_t last_pseg;
292
293	fs->nf_fsdata = malloc(sizeof(struct nandfs_fsdata));
294	fs->nf_sb = malloc(sizeof(struct nandfs_super_block));
295
296	err = nandfs_read_structures(fs, f);
297	if (err) {
298		free(fs->nf_fsdata);
299		free(fs->nf_sb);
300		return (err);
301	}
302
303	fs->nf_blocksize = 1 << (fs->nf_fsdata->f_log_block_size + 10);
304
305	NANDFS_DEBUG("using superblock with wtime %jd\n", fs->nf_sb->s_wtime);
306
307	fs->nf_cpno = fs->nf_sb->s_last_cno;
308	last_pseg = fs->nf_sb->s_last_pseg;
309
310	/*
311	 * Calculate indirect block levels.
312	 */
313	nandfs_daddr_t mult;
314
315	mult = 1;
316	for (level = 0; level < NIADDR; level++) {
317		mult *= NINDIR(fs);
318		fs->nf_nindir[level] = mult;
319	}
320
321	nandfs_calc_mdt_consts(fs->nf_blocksize, &fs->nf_datfile_mdt,
322	    fs->nf_fsdata->f_dat_entry_size);
323
324	nandfs_calc_mdt_consts(fs->nf_blocksize, &fs->nf_ifile_mdt,
325	    fs->nf_fsdata->f_inode_size);
326
327	err = ioread(f, last_pseg * fs->nf_blocksize, &fs->nf_segsum,
328	    sizeof(struct nandfs_segment_summary));
329	if (err) {
330		free(fs->nf_sb);
331		free(fs->nf_fsdata);
332		return (err);
333	}
334
335	err = ioread(f, (last_pseg + fs->nf_segsum.ss_nblocks - 1) *
336	    fs->nf_blocksize, &fs->nf_sroot, sizeof(struct nandfs_super_root));
337	if (err) {
338		free(fs->nf_sb);
339		free(fs->nf_fsdata);
340		return (err);
341	}
342
343	fs->nf_datfile.inode = &fs->nf_sroot.sr_dat;
344	LIST_INIT(&fs->nf_datfile.bmap_bufs);
345	fs->nf_cpfile.inode = &fs->nf_sroot.sr_cpfile;
346	LIST_INIT(&fs->nf_cpfile.bmap_bufs);
347
348	err = nandfs_get_checkpoint(fs, fs->nf_cpno, &fs->nf_checkpoint);
349	if (err) {
350		free(fs->nf_sb);
351		free(fs->nf_fsdata);
352		return (err);
353	}
354
355	NANDFS_DEBUG("checkpoint cp_cno=%lld\n", fs->nf_checkpoint.cp_cno);
356	NANDFS_DEBUG("checkpoint cp_inodes_count=%lld\n",
357	    fs->nf_checkpoint.cp_inodes_count);
358	NANDFS_DEBUG("checkpoint cp_ifile_inode.i_blocks=%lld\n",
359	    fs->nf_checkpoint.cp_ifile_inode.i_blocks);
360
361	fs->nf_ifile.inode = &fs->nf_checkpoint.cp_ifile_inode;
362	LIST_INIT(&fs->nf_ifile.bmap_bufs);
363	return (0);
364}
365
366#define NINDIR(fs)	((fs)->nf_blocksize / sizeof(nandfs_daddr_t))
367
368static int
369nandfs_open(const char *path, struct open_file *f)
370{
371	struct nandfs *fs;
372	struct nandfs_node *node;
373	int err, bsize, level;
374
375	NANDFS_DEBUG("nandfs_open('%s', %p)\n", path, f);
376
377	fs = malloc(sizeof(struct nandfs));
378	f->f_fsdata = fs;
379	fs->nf_file = f;
380
381	bsize = nandfs_probe_sectorsize(f);
382	if (bsize < 0) {
383		printf("Cannot probe medium sector size\n");
384		return (EINVAL);
385	}
386
387	fs->nf_sectorsize = bsize;
388
389	/*
390	 * Calculate indirect block levels.
391	 */
392	nandfs_daddr_t mult;
393
394	mult = 1;
395	for (level = 0; level < NIADDR; level++) {
396		mult *= NINDIR(fs);
397		fs->nf_nindir[level] = mult;
398	}
399
400	NANDFS_DEBUG("fs %p nf_sectorsize=%x\n", fs, fs->nf_sectorsize);
401
402	err = nandfs_mount(fs, f);
403	if (err) {
404		NANDFS_DEBUG("Cannot mount nandfs: %s\n", strerror(err));
405		return (err);
406	}
407
408	node = nandfs_lookup_path(fs, path);
409	if (node == NULL)
410		return (EINVAL);
411
412	fs->nf_offset = 0;
413	fs->nf_buf = NULL;
414	fs->nf_buf_blknr = -1;
415	fs->nf_opened_node = node;
416	LIST_INIT(&fs->nf_opened_node->bmap_bufs);
417	return (0);
418}
419
420static void
421nandfs_free_node(struct nandfs_node *node)
422{
423	struct bmap_buf *bmap, *tmp;
424
425	free(node->inode);
426	LIST_FOREACH_SAFE(bmap, &node->bmap_bufs, list, tmp) {
427		LIST_REMOVE(bmap, list);
428		free(bmap->map);
429		free(bmap);
430	}
431	free(node);
432}
433
434static int
435nandfs_close(struct open_file *f)
436{
437	struct nandfs *fs = f->f_fsdata;
438
439	NANDFS_DEBUG("nandfs_close(%p)\n", f);
440
441	if (fs->nf_buf != NULL)
442		free(fs->nf_buf);
443
444	nandfs_free_node(fs->nf_opened_node);
445	free(fs->nf_sb);
446	free(fs);
447	return (0);
448}
449
450static int
451nandfs_read(struct open_file *f, void *addr, size_t size, size_t *resid)
452{
453	struct nandfs *fs = (struct nandfs *)f->f_fsdata;
454	size_t csize, buf_size;
455	void *buf;
456	int error = 0;
457
458	NANDFS_DEBUG("nandfs_read(file=%p, addr=%p, size=%d)\n", f, addr, size);
459
460	while (size != 0) {
461		if (fs->nf_offset >= fs->nf_opened_node->inode->i_size)
462			break;
463
464		error = nandfs_buf_read(fs, &buf, &buf_size);
465		if (error)
466			break;
467
468		csize = size;
469		if (csize > buf_size)
470			csize = buf_size;
471
472		bcopy(buf, addr, csize);
473
474		fs->nf_offset += csize;
475		addr = (char *)addr + csize;
476		size -= csize;
477	}
478
479	if (resid)
480		*resid = size;
481	return (error);
482}
483
484static off_t
485nandfs_seek(struct open_file *f, off_t offset, int where)
486{
487	struct nandfs *fs = f->f_fsdata;
488	off_t off;
489	u_int size;
490
491	NANDFS_DEBUG("nandfs_seek(file=%p, offset=%lld, where=%d)\n", f,
492	    offset, where);
493
494	size = fs->nf_opened_node->inode->i_size;
495
496	switch (where) {
497	case SEEK_SET:
498		off = 0;
499		break;
500	case SEEK_CUR:
501		off = fs->nf_offset;
502		break;
503	case SEEK_END:
504		off = size;
505		break;
506	default:
507		errno = EINVAL;
508		return (-1);
509	}
510
511	off += offset;
512	if (off < 0 || off > size) {
513		errno = EINVAL;
514		return(-1);
515	}
516
517	fs->nf_offset = (u_int)off;
518
519	return (off);
520}
521
522static int
523nandfs_stat(struct open_file *f, struct stat *sb)
524{
525	struct nandfs *fs = f->f_fsdata;
526
527	NANDFS_DEBUG("nandfs_stat(file=%p, stat=%p)\n", f, sb);
528
529	sb->st_size = fs->nf_opened_node->inode->i_size;
530	sb->st_mode = fs->nf_opened_node->inode->i_mode;
531	sb->st_uid = fs->nf_opened_node->inode->i_uid;
532	sb->st_gid = fs->nf_opened_node->inode->i_gid;
533	return (0);
534}
535
536static int
537nandfs_readdir(struct open_file *f, struct dirent *d)
538{
539	struct nandfs *fs = f->f_fsdata;
540	struct nandfs_dir_entry *dirent;
541	void *buf;
542	size_t buf_size;
543
544	NANDFS_DEBUG("nandfs_readdir(file=%p, dirent=%p)\n", f, d);
545
546	if (fs->nf_offset >= fs->nf_opened_node->inode->i_size) {
547		NANDFS_DEBUG("nandfs_readdir(file=%p, dirent=%p) ENOENT\n",
548		    f, d);
549		return (ENOENT);
550	}
551
552	if (nandfs_buf_read(fs, &buf, &buf_size)) {
553		NANDFS_DEBUG("nandfs_readdir(file=%p, dirent=%p)"
554		    "buf_read failed\n", f, d);
555		return (EIO);
556	}
557
558	NANDFS_DEBUG("nandfs_readdir(file=%p, dirent=%p) moving forward\n",
559	    f, d);
560
561	dirent = (struct nandfs_dir_entry *)buf;
562	fs->nf_offset += dirent->rec_len;
563	strncpy(d->d_name, dirent->name, dirent->name_len);
564	d->d_name[dirent->name_len] = '\0';
565	d->d_type = dirent->file_type;
566	return (0);
567}
568
569static int
570nandfs_buf_read(struct nandfs *fs, void **buf_p, size_t *size_p)
571{
572	nandfs_daddr_t blknr, blkoff;
573
574	blknr = fs->nf_offset / fs->nf_blocksize;
575	blkoff = fs->nf_offset % fs->nf_blocksize;
576
577	if (blknr != fs->nf_buf_blknr) {
578		if (fs->nf_buf == NULL)
579			fs->nf_buf = malloc(fs->nf_blocksize);
580
581		if (nandfs_read_inode(fs, fs->nf_opened_node, blknr, 1,
582		    fs->nf_buf, 0))
583			return (EIO);
584
585		fs->nf_buf_blknr = blknr;
586	}
587
588	*buf_p = fs->nf_buf + blkoff;
589	*size_p = fs->nf_blocksize - blkoff;
590
591	NANDFS_DEBUG("nandfs_buf_read buf_p=%p size_p=%d\n", *buf_p, *size_p);
592
593	if (*size_p > fs->nf_opened_node->inode->i_size - fs->nf_offset)
594		*size_p = fs->nf_opened_node->inode->i_size - fs->nf_offset;
595
596	return (0);
597}
598
599static struct nandfs_node *
600nandfs_lookup_node(struct nandfs *fs, uint64_t ino)
601{
602	uint64_t blocknr;
603	int entrynr;
604	struct nandfs_inode *buffer;
605	struct nandfs_node *node;
606	struct nandfs_inode *inode;
607
608	NANDFS_DEBUG("nandfs_lookup_node ino=%lld\n", ino);
609
610	if (ino == 0) {
611		printf("nandfs_lookup_node: invalid inode requested\n");
612		return (NULL);
613	}
614
615	buffer = malloc(fs->nf_blocksize);
616	inode = malloc(sizeof(struct nandfs_inode));
617	node = malloc(sizeof(struct nandfs_node));
618
619	nandfs_mdt_trans(&fs->nf_ifile_mdt, ino, &blocknr, &entrynr);
620
621	if (nandfs_read_inode(fs, &fs->nf_ifile, blocknr, 1, buffer, 0))
622		return (NULL);
623
624	memcpy(inode, &buffer[entrynr], sizeof(struct nandfs_inode));
625	node->inode = inode;
626	free(buffer);
627	return (node);
628}
629
630static struct nandfs_node *
631nandfs_lookup_path(struct nandfs *fs, const char *path)
632{
633	struct nandfs_node *node;
634	struct nandfs_dir_entry *dirent;
635	char *namebuf;
636	uint64_t i, done, pinode, inode;
637	int nlinks = 0, counter, len, link_len, nameidx;
638	uint8_t *buffer, *orig;
639	char *strp, *lpath;
640
641	buffer = malloc(fs->nf_blocksize);
642	orig = buffer;
643
644	namebuf = malloc(2 * MAXPATHLEN + 2);
645	strncpy(namebuf, path, MAXPATHLEN);
646	namebuf[MAXPATHLEN] = '\0';
647	done = nameidx = 0;
648	lpath = namebuf;
649
650	/* Get the root inode */
651	node = nandfs_lookup_node(fs, NANDFS_ROOT_INO);
652	inode = NANDFS_ROOT_INO;
653
654	while ((strp = strsep(&lpath, "/")) != NULL) {
655		if (*strp == '\0')
656			continue;
657		if ((node->inode->i_mode & IFMT) != IFDIR) {
658			nandfs_free_node(node);
659			node = NULL;
660			goto out;
661		}
662
663		len = strlen(strp);
664		NANDFS_DEBUG("%s: looking for %s\n", __func__, strp);
665		for (i = 0; i < node->inode->i_blocks; i++) {
666			if (nandfs_read_inode(fs, node, i, 1, orig, 0)) {
667				node = NULL;
668				goto out;
669			}
670
671			buffer = orig;
672			done = counter = 0;
673			while (1) {
674				dirent =
675				    (struct nandfs_dir_entry *)(void *)buffer;
676				NANDFS_DEBUG("%s: dirent.name = %s\n",
677				    __func__, dirent->name);
678				NANDFS_DEBUG("%s: dirent.rec_len = %d\n",
679				    __func__, dirent->rec_len);
680				NANDFS_DEBUG("%s: dirent.inode = %lld\n",
681				    __func__, dirent->inode);
682				if (len == dirent->name_len &&
683				    (strncmp(strp, dirent->name, len) == 0) &&
684				    dirent->inode != 0) {
685					nandfs_free_node(node);
686					node = nandfs_lookup_node(fs,
687					    dirent->inode);
688					pinode = inode;
689					inode = dirent->inode;
690					done = 1;
691					break;
692				}
693
694				counter += dirent->rec_len;
695				buffer += dirent->rec_len;
696
697				if (counter == fs->nf_blocksize)
698					break;
699			}
700
701			if (done)
702				break;
703		}
704
705		if (!done) {
706			node = NULL;
707			goto out;
708		}
709
710		NANDFS_DEBUG("%s: %.*s has mode %o\n", __func__,
711		    dirent->name_len, dirent->name, node->inode->i_mode);
712
713		if ((node->inode->i_mode & IFMT) == IFLNK) {
714			NANDFS_DEBUG("%s: %.*s is symlink\n",
715			    __func__, dirent->name_len, dirent->name);
716			link_len = node->inode->i_size;
717
718			if (++nlinks > MAXSYMLINKS) {
719				nandfs_free_node(node);
720				node = NULL;
721				goto out;
722			}
723
724			if (nandfs_read_inode(fs, node, 0, 1, orig, 0)) {
725				nandfs_free_node(node);
726				node = NULL;
727				goto out;
728			}
729
730			NANDFS_DEBUG("%s: symlink is  %.*s\n",
731			    __func__, link_len, (char *)orig);
732
733			nameidx = (nameidx == 0) ? MAXPATHLEN + 1 : 0;
734			bcopy((char *)orig, namebuf + nameidx,
735			    (unsigned)link_len);
736			if (lpath != NULL) {
737				namebuf[nameidx + link_len++] = '/';
738				strncpy(namebuf + nameidx + link_len, lpath,
739				    MAXPATHLEN - link_len);
740				namebuf[nameidx + MAXPATHLEN] = '\0';
741			} else
742				namebuf[nameidx + link_len] = '\0';
743
744			NANDFS_DEBUG("%s: strp=%s, lpath=%s, namebuf0=%s, "
745			    "namebuf1=%s, idx=%d\n", __func__, strp, lpath,
746			    namebuf + 0, namebuf + MAXPATHLEN + 1, nameidx);
747
748			lpath = namebuf + nameidx;
749
750			nandfs_free_node(node);
751
752			/*
753			 * If absolute pathname, restart at root. Otherwise
754			 * continue with out parent inode.
755			 */
756			inode = (orig[0] == '/') ? NANDFS_ROOT_INO : pinode;
757			node = nandfs_lookup_node(fs, inode);
758		}
759	}
760
761out:
762	free(namebuf);
763	free(orig);
764	return (node);
765}
766
767static int
768nandfs_read_inode(struct nandfs *fs, struct nandfs_node *node,
769    nandfs_daddr_t blknr, u_int nblks, void *buf, int raw)
770{
771	uint64_t *pblks;
772	uint64_t *vblks;
773	u_int i;
774	int error;
775
776	pblks = malloc(nblks * sizeof(uint64_t));
777	vblks = malloc(nblks * sizeof(uint64_t));
778
779	NANDFS_DEBUG("nandfs_read_inode fs=%p node=%p blknr=%lld nblks=%d\n",
780	    fs, node, blknr, nblks);
781	for (i = 0; i < nblks; i++) {
782		error = nandfs_bmap_lookup(fs, node, blknr + i, &vblks[i], raw);
783		if (error) {
784			free(pblks);
785			free(vblks);
786			return (error);
787		}
788		if (raw == 0)
789			pblks[i] = nandfs_vtop(fs, vblks[i]);
790		else
791			pblks[i] = vblks[i];
792	}
793
794	for (i = 0; i < nblks; i++) {
795		if (ioread(fs->nf_file, pblks[i] * fs->nf_blocksize, buf,
796		    fs->nf_blocksize)) {
797			free(pblks);
798			free(vblks);
799			return (EIO);
800		}
801
802		buf = (void *)((uintptr_t)buf + fs->nf_blocksize);
803	}
804
805	free(pblks);
806	free(vblks);
807	return (0);
808}
809
810static int
811nandfs_read_blk(struct nandfs *fs, nandfs_daddr_t blknr, void *buf, int phys)
812{
813	uint64_t pblknr;
814
815	pblknr = (phys ? blknr : nandfs_vtop(fs, blknr));
816
817	return (ioread(fs->nf_file, pblknr * fs->nf_blocksize, buf,
818	    fs->nf_blocksize));
819}
820
821static int
822nandfs_get_checkpoint(struct nandfs *fs, uint64_t cpno,
823    struct nandfs_checkpoint *cp)
824{
825	uint64_t blocknr;
826	int blockoff, cp_per_block, dlen;
827	uint8_t *buf;
828
829	NANDFS_DEBUG("nandfs_get_checkpoint(fs=%p cpno=%lld)\n", fs, cpno);
830
831	buf = malloc(fs->nf_blocksize);
832
833	cpno += NANDFS_CPFILE_FIRST_CHECKPOINT_OFFSET - 1;
834	dlen = fs->nf_fsdata->f_checkpoint_size;
835	cp_per_block = fs->nf_blocksize / dlen;
836	blocknr = cpno / cp_per_block;
837	blockoff = (cpno % cp_per_block) * dlen;
838
839	if (nandfs_read_inode(fs, &fs->nf_cpfile, blocknr, 1, buf, 0)) {
840		free(buf);
841		return (EINVAL);
842	}
843
844	memcpy(cp, buf + blockoff, sizeof(struct nandfs_checkpoint));
845	free(buf);
846
847	return (0);
848}
849
850static uint64_t *
851nandfs_get_map(struct nandfs *fs, struct nandfs_node *node, nandfs_daddr_t blknr,
852    int phys)
853{
854	struct bmap_buf *bmap;
855	uint64_t *map;
856
857	LIST_FOREACH(bmap, &node->bmap_bufs, list) {
858		if (bmap->blknr == blknr)
859			return (bmap->map);
860	}
861
862	map = malloc(fs->nf_blocksize);
863	if (nandfs_read_blk(fs, blknr, map, phys)) {
864		free(map);
865		return (NULL);
866	}
867
868	bmap = malloc(sizeof(struct bmap_buf));
869	bmap->blknr = blknr;
870	bmap->map = map;
871
872	LIST_INSERT_HEAD(&node->bmap_bufs, bmap, list);
873
874	NANDFS_DEBUG("%s:(node=%p, map=%p)\n", __func__, node, map);
875	return (map);
876}
877
878static int
879nandfs_bmap_lookup(struct nandfs *fs, struct nandfs_node *node,
880    nandfs_lbn_t lblknr, nandfs_daddr_t *vblknr, int phys)
881{
882	struct nandfs_inode *ino;
883	nandfs_daddr_t ind_block_num;
884	uint64_t *map;
885	int idx;
886	int level;
887
888	ino = node->inode;
889
890	if (lblknr < NDADDR) {
891		*vblknr = ino->i_db[lblknr];
892		return (0);
893	}
894
895	lblknr -= NDADDR;
896
897	/*
898	 * nindir[0] = NINDIR
899	 * nindir[1] = NINDIR**2
900	 * nindir[2] = NINDIR**3
901	 *	etc
902	 */
903	for (level = 0; level < NIADDR; level++) {
904		NANDFS_DEBUG("lblknr=%jx fs->nf_nindir[%d]=%d\n", lblknr, level, fs->nf_nindir[level]);
905		if (lblknr < fs->nf_nindir[level])
906			break;
907		lblknr -= fs->nf_nindir[level];
908	}
909
910	if (level == NIADDR) {
911		/* Block number too high */
912		NANDFS_DEBUG("lblknr %jx too high\n", lblknr);
913		return (EFBIG);
914	}
915
916	ind_block_num = ino->i_ib[level];
917
918	for (; level >= 0; level--) {
919		if (ind_block_num == 0) {
920			*vblknr = 0;	/* missing */
921			return (0);
922		}
923
924		twiddle();
925		NANDFS_DEBUG("calling get_map with %jx\n", ind_block_num);
926		map = nandfs_get_map(fs, node, ind_block_num, phys);
927		if (map == NULL)
928			return (EIO);
929
930		if (level > 0) {
931			idx = lblknr / fs->nf_nindir[level - 1];
932			lblknr %= fs->nf_nindir[level - 1];
933		} else
934			idx = lblknr;
935
936		ind_block_num = ((nandfs_daddr_t *)map)[idx];
937	}
938
939	*vblknr = ind_block_num;
940
941	return (0);
942}
943
944static nandfs_daddr_t
945nandfs_vtop(struct nandfs *fs, nandfs_daddr_t vblocknr)
946{
947	nandfs_lbn_t blocknr;
948	nandfs_daddr_t pblocknr;
949	int entrynr;
950	struct nandfs_dat_entry *dat;
951
952	dat = malloc(fs->nf_blocksize);
953	nandfs_mdt_trans(&fs->nf_datfile_mdt, vblocknr, &blocknr, &entrynr);
954
955	if (nandfs_read_inode(fs, &fs->nf_datfile, blocknr, 1, dat, 1)) {
956		free(dat);
957		return (0);
958	}
959
960	NANDFS_DEBUG("nandfs_vtop entrynr=%d vblocknr=%lld pblocknr=%lld\n",
961	    entrynr, vblocknr, dat[entrynr].de_blocknr);
962
963	pblocknr = dat[entrynr].de_blocknr;
964	free(dat);
965	return (pblocknr);
966}
967
968static void
969nandfs_calc_mdt_consts(int blocksize, struct nandfs_mdt *mdt, int entry_size)
970{
971
972	mdt->entries_per_group = blocksize * 8;	   /* bits in sector */
973	mdt->entries_per_block = blocksize / entry_size;
974	mdt->blocks_per_group  =
975	    (mdt->entries_per_group -1) / mdt->entries_per_block + 1 + 1;
976	mdt->groups_per_desc_block =
977	    blocksize / sizeof(struct nandfs_block_group_desc);
978	mdt->blocks_per_desc_block =
979	    mdt->groups_per_desc_block * mdt->blocks_per_group + 1;
980}
981
982static void
983nandfs_mdt_trans(struct nandfs_mdt *mdt, uint64_t index,
984    nandfs_daddr_t *blocknr, uint32_t *entry_in_block)
985{
986	nandfs_daddr_t blknr;
987	uint64_t group, group_offset, blocknr_in_group;
988	uint64_t desc_block, desc_offset;
989
990	/* Calculate our offset in the file */
991	group = index / mdt->entries_per_group;
992	group_offset = index % mdt->entries_per_group;
993	desc_block = group / mdt->groups_per_desc_block;
994	desc_offset = group % mdt->groups_per_desc_block;
995	blocknr_in_group = group_offset / mdt->entries_per_block;
996
997	/* To descgroup offset */
998	blknr = 1 + desc_block * mdt->blocks_per_desc_block;
999
1000	/* To group offset */
1001	blknr += desc_offset * mdt->blocks_per_group;
1002
1003	/* To actual file block */
1004	blknr += 1 + blocknr_in_group;
1005
1006	*blocknr        = blknr;
1007	*entry_in_block = group_offset % mdt->entries_per_block;
1008}
1009
1010static int
1011ioread(struct open_file *f, off_t pos, void *buf, u_int length)
1012{
1013	void *buffer;
1014	int err;
1015	int bsize = ((struct nandfs *)f->f_fsdata)->nf_sectorsize;
1016	u_int off, nsec;
1017
1018	off = pos % bsize;
1019	pos /= bsize;
1020	nsec = (length + (bsize - 1)) / bsize;
1021
1022	NANDFS_DEBUG("pos=%lld length=%d off=%d nsec=%d\n", pos, length,
1023	    off, nsec);
1024
1025	buffer = malloc(nsec * bsize);
1026
1027	err = (f->f_dev->dv_strategy)(f->f_devdata, F_READ, pos,
1028	    nsec * bsize, buffer, NULL);
1029
1030	memcpy(buf, (void *)((uintptr_t)buffer + off), length);
1031	free(buffer);
1032
1033	return (err);
1034}
1035
1036static int
1037nandfs_probe_sectorsize(struct open_file *f)
1038{
1039	void *buffer;
1040	int i, err;
1041
1042	buffer = malloc(16 * 1024);
1043
1044	NANDFS_DEBUG("probing for sector size: ");
1045
1046	for (i = 512; i < (16 * 1024); i <<= 1) {
1047		NANDFS_DEBUG("%d ", i);
1048		err = (f->f_dev->dv_strategy)(f->f_devdata, F_READ, 0, i,
1049		    buffer, NULL);
1050
1051		if (err == 0) {
1052			NANDFS_DEBUG("found");
1053			free(buffer);
1054			return (i);
1055		}
1056	}
1057
1058	free(buffer);
1059	NANDFS_DEBUG("not found\n");
1060	return (-1);
1061}
1062