1/*
2 * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/* Copyright (c) 1998 Apple Computer, Inc.  All rights reserved.
29 *
30 *	File:	bsd/kern/kern_symfile.c
31 *
32 * HISTORY
33 */
34
35#include <mach/vm_param.h>
36
37#include <sys/param.h>
38#include <sys/systm.h>
39#include <sys/signalvar.h>
40#include <sys/resourcevar.h>
41#include <sys/namei.h>
42#include <sys/vnode_internal.h>
43#include <sys/proc_internal.h>
44#include <sys/kauth.h>
45#include <sys/timeb.h>
46#include <sys/times.h>
47#include <sys/acct.h>
48#include <sys/file_internal.h>
49#include <sys/uio.h>
50#include <sys/kernel.h>
51#include <sys/stat.h>
52#include <sys/disk.h>
53#include <sys/conf.h>
54
55#include <mach-o/loader.h>
56#include <mach-o/nlist.h>
57
58#include <kern/kalloc.h>
59#include <vm/vm_kern.h>
60#include <pexpert/pexpert.h>
61#include <IOKit/IOHibernatePrivate.h>
62
63/* This function is called from kern_sysctl in the current process context;
64 * it is exported with the System6.0.exports, but this appears to be a legacy
65 * export, as there are no internal consumers.
66 */
67int
68get_kernel_symfile(__unused proc_t p, __unused char const **symfile);
69int
70get_kernel_symfile(__unused proc_t p, __unused char const **symfile)
71{
72    return KERN_FAILURE;
73}
74
75struct kern_direct_file_io_ref_t
76{
77    vfs_context_t  ctx;
78    struct vnode * vp;
79    dev_t          device;
80    uint32_t	   blksize;
81    off_t          filelength;
82    char           pinned;
83};
84
85
86static int file_ioctl(void * p1, void * p2, u_long theIoctl, caddr_t result)
87{
88    dev_t device = *(dev_t*) p1;
89
90    return ((*bdevsw[major(device)].d_ioctl)
91		    (device, theIoctl, result, S_IFBLK, p2));
92}
93
94static int device_ioctl(void * p1, __unused void * p2, u_long theIoctl, caddr_t result)
95{
96    return (VNOP_IOCTL(p1, theIoctl, result, 0, p2));
97}
98
99static int
100kern_ioctl_file_extents(struct kern_direct_file_io_ref_t * ref, u_long theIoctl, off_t offset, off_t end)
101{
102    int error;
103    int (*do_ioctl)(void * p1, void * p2, u_long theIoctl, caddr_t result);
104    void * p1;
105    void * p2;
106    uint64_t    fileblk;
107    size_t      filechunk;
108    dk_extent_t  extent;
109    dk_unmap_t   unmap;
110    _dk_cs_pin_t pin;
111
112    bzero(&extent, sizeof(dk_extent_t));
113    bzero(&unmap, sizeof(dk_unmap_t));
114    bzero(&pin, sizeof(pin));
115    if (ref->vp->v_type == VREG)
116    {
117	 p1 = &ref->device;
118	 p2 = kernproc;
119	 do_ioctl = &file_ioctl;
120    }
121    else
122    {
123	/* Partition. */
124	p1 = ref->vp;
125	p2 = ref->ctx;
126	do_ioctl = &device_ioctl;
127    }
128    while (offset < end)
129    {
130        if (ref->vp->v_type == VREG)
131        {
132            daddr64_t blkno;
133	    filechunk = 1*1024*1024*1024;
134	    if (filechunk > (size_t)(end - offset))
135	    filechunk = (size_t)(end - offset);
136            error = VNOP_BLOCKMAP(ref->vp, offset, filechunk, &blkno, &filechunk, NULL, 0, NULL);
137			if (error) break;
138            fileblk = blkno * ref->blksize;
139        }
140        else if ((ref->vp->v_type == VBLK) || (ref->vp->v_type == VCHR))
141        {
142            fileblk = offset;
143            filechunk = ref->filelength;
144        }
145
146	if (DKIOCUNMAP == theIoctl)
147	{
148	    extent.offset = fileblk;
149	    extent.length = filechunk;
150	    unmap.extents = &extent;
151	    unmap.extentsCount = 1;
152	    error = do_ioctl(p1, p2, theIoctl, (caddr_t)&unmap);
153// 	    printf("DKIOCUNMAP(%d) 0x%qx, 0x%qx\n", error, extent.offset, extent.length);
154	}
155	else if (_DKIOCCSPINEXTENT == theIoctl)
156	{
157	    pin.cp_extent.offset = fileblk;
158	    pin.cp_extent.length = filechunk;
159	    pin.cp_flags = _DKIOCSPINDISCARDDATA;
160	    error = do_ioctl(p1, p2, theIoctl, (caddr_t)&pin);
161	    if (error && (ENOTTY != error))
162	    {
163		printf("_DKIOCCSPINEXTENT(%d) 0x%qx, 0x%qx\n",
164			error, pin.cp_extent.offset, pin.cp_extent.length);
165	    }
166	}
167	else error = EINVAL;
168
169	if (error) break;
170        offset += filechunk;
171    }
172    return (error);
173}
174
175int
176kern_write_file(struct kern_direct_file_io_ref_t * ref, off_t offset, caddr_t addr, vm_size_t len);
177
178struct kern_direct_file_io_ref_t *
179kern_open_file_for_direct_io(const char * name,
180			     kern_get_file_extents_callback_t callback,
181			     void * callback_ref,
182                             off_t set_file_size,
183                             off_t write_file_offset,
184                             caddr_t write_file_addr,
185                             vm_size_t write_file_len,
186			     dev_t * partition_device_result,
187			     dev_t * image_device_result,
188                             uint64_t * partitionbase_result,
189                             uint64_t * maxiocount_result,
190                             uint32_t * oflags)
191{
192    struct kern_direct_file_io_ref_t * ref;
193
194    proc_t			p;
195    struct vnode_attr		va;
196    int				error;
197    off_t			f_offset;
198    uint64_t                    fileblk;
199    size_t                      filechunk;
200    uint64_t                    physoffset;
201    dev_t			device;
202    dev_t			target = 0;
203    int			        isssd = 0;
204    uint32_t                    flags = 0;
205    uint32_t			blksize;
206    off_t 			maxiocount, count;
207    boolean_t                   locked = FALSE;
208
209    int (*do_ioctl)(void * p1, void * p2, u_long theIoctl, caddr_t result);
210    void * p1 = NULL;
211    void * p2 = NULL;
212
213    error = EFAULT;
214
215    ref = (struct kern_direct_file_io_ref_t *) kalloc(sizeof(struct kern_direct_file_io_ref_t));
216    if (!ref)
217    {
218	error = EFAULT;
219    	goto out;
220    }
221
222    bzero(ref, sizeof(*ref));
223    p = kernproc;
224    ref->ctx = vfs_context_create(vfs_context_current());
225
226    if ((error = vnode_open(name, (O_CREAT | FWRITE), (0), 0, &ref->vp, ref->ctx)))
227        goto out;
228
229    if (write_file_addr && write_file_len)
230    {
231	if ((error = kern_write_file(ref, write_file_offset, write_file_addr, write_file_len)))
232	    goto out;
233    }
234
235    VATTR_INIT(&va);
236    VATTR_WANTED(&va, va_rdev);
237    VATTR_WANTED(&va, va_fsid);
238    VATTR_WANTED(&va, va_data_size);
239    VATTR_WANTED(&va, va_nlink);
240    error = EFAULT;
241    if (vnode_getattr(ref->vp, &va, ref->ctx))
242    	goto out;
243
244    kprintf("vp va_rdev major %d minor %d\n", major(va.va_rdev), minor(va.va_rdev));
245    kprintf("vp va_fsid major %d minor %d\n", major(va.va_fsid), minor(va.va_fsid));
246    kprintf("vp size %qd\n", va.va_data_size);
247
248    if (ref->vp->v_type == VREG)
249    {
250	/* Don't dump files with links. */
251	if (va.va_nlink != 1)
252	    goto out;
253
254        device = va.va_fsid;
255        ref->filelength = va.va_data_size;
256
257        p1 = &device;
258        p2 = p;
259        do_ioctl = &file_ioctl;
260
261	if (set_file_size)
262	{
263	    off_t     bytesallocated = 0;
264	    u_int32_t alloc_flags = PREALLOCATE | ALLOCATEFROMPEOF | ALLOCATEALL;
265	    error = VNOP_ALLOCATE(ref->vp, set_file_size, alloc_flags,
266				  &bytesallocated, 0 /*fst_offset*/,
267				  ref->ctx);
268	    // F_SETSIZE:
269	    if (!error) error = vnode_setsize(ref->vp, set_file_size, IO_NOZEROFILL, ref->ctx);
270	    kprintf("vnode_setsize(%d) %qd\n", error, set_file_size);
271	    ref->filelength = bytesallocated;
272	}
273    }
274    else if ((ref->vp->v_type == VBLK) || (ref->vp->v_type == VCHR))
275    {
276	/* Partition. */
277        device = va.va_rdev;
278
279        p1 = ref->vp;
280        p2 = ref->ctx;
281        do_ioctl = &device_ioctl;
282    }
283    else
284    {
285	/* Don't dump to non-regular files. */
286	error = EFAULT;
287        goto out;
288    }
289    ref->device = device;
290
291    // get block size
292
293    error = do_ioctl(p1, p2, DKIOCGETBLOCKSIZE, (caddr_t) &ref->blksize);
294    if (error)
295        goto out;
296
297    if (ref->vp->v_type != VREG)
298    {
299        error = do_ioctl(p1, p2, DKIOCGETBLOCKCOUNT, (caddr_t) &fileblk);
300        if (error)
301            goto out;
302	ref->filelength = fileblk * ref->blksize;
303    }
304
305    // pin logical extents
306
307    error = kern_ioctl_file_extents(ref, _DKIOCCSPINEXTENT, 0, ref->filelength);
308    if (error && (ENOTTY != error)) goto out;
309    ref->pinned = (error == 0);
310
311    // generate the block list
312
313    error = do_ioctl(p1, p2, DKIOCLOCKPHYSICALEXTENTS, NULL);
314    if (error)
315        goto out;
316    locked = TRUE;
317
318    f_offset = 0;
319    while (f_offset < ref->filelength)
320    {
321        if (ref->vp->v_type == VREG)
322        {
323            filechunk = 1*1024*1024*1024;
324            daddr64_t blkno;
325
326            error = VNOP_BLOCKMAP(ref->vp, f_offset, filechunk, &blkno, &filechunk, NULL, 0, NULL);
327            if (error)
328                goto out;
329
330            fileblk = blkno * ref->blksize;
331        }
332        else if ((ref->vp->v_type == VBLK) || (ref->vp->v_type == VCHR))
333        {
334            fileblk = f_offset;
335            filechunk = f_offset ? 0 : ref->filelength;
336        }
337
338        physoffset = 0;
339        while (physoffset < filechunk)
340        {
341            dk_physical_extent_t getphysreq;
342            bzero(&getphysreq, sizeof(getphysreq));
343
344            getphysreq.offset = fileblk + physoffset;
345            getphysreq.length = (filechunk - physoffset);
346            error = do_ioctl(p1, p2, DKIOCGETPHYSICALEXTENT, (caddr_t) &getphysreq);
347            if (error)
348                goto out;
349            if (!target)
350            {
351                target = getphysreq.dev;
352            }
353            else if (target != getphysreq.dev)
354            {
355                error = ENOTSUP;
356                goto out;
357            }
358            callback(callback_ref, getphysreq.offset, getphysreq.length);
359            physoffset += getphysreq.length;
360        }
361        f_offset += filechunk;
362    }
363    callback(callback_ref, 0ULL, 0ULL);
364
365    if (ref->vp->v_type == VREG)
366        p1 = &target;
367
368    // get partition base
369
370    error = do_ioctl(p1, p2, DKIOCGETBASE, (caddr_t) partitionbase_result);
371    if (error)
372        goto out;
373
374    // get block size & constraints
375
376    error = do_ioctl(p1, p2, DKIOCGETBLOCKSIZE, (caddr_t) &blksize);
377    if (error)
378        goto out;
379
380    maxiocount = 1*1024*1024*1024;
381
382    error = do_ioctl(p1, p2, DKIOCGETMAXBLOCKCOUNTREAD, (caddr_t) &count);
383    if (error)
384        count = 0;
385    count *= blksize;
386    if (count && (count < maxiocount))
387        maxiocount = count;
388
389    error = do_ioctl(p1, p2, DKIOCGETMAXBLOCKCOUNTWRITE, (caddr_t) &count);
390    if (error)
391        count = 0;
392    count *= blksize;
393    if (count && (count < maxiocount))
394        maxiocount = count;
395
396    error = do_ioctl(p1, p2, DKIOCGETMAXBYTECOUNTREAD, (caddr_t) &count);
397    if (error)
398        count = 0;
399    if (count && (count < maxiocount))
400        maxiocount = count;
401
402    error = do_ioctl(p1, p2, DKIOCGETMAXBYTECOUNTWRITE, (caddr_t) &count);
403    if (error)
404        count = 0;
405    if (count && (count < maxiocount))
406        maxiocount = count;
407
408    error = do_ioctl(p1, p2, DKIOCGETMAXSEGMENTBYTECOUNTREAD, (caddr_t) &count);
409    if (error)
410        count = 0;
411    if (count && (count < maxiocount))
412        maxiocount = count;
413
414    error = do_ioctl(p1, p2, DKIOCGETMAXSEGMENTBYTECOUNTWRITE, (caddr_t) &count);
415    if (error)
416        count = 0;
417    if (count && (count < maxiocount))
418        maxiocount = count;
419
420    kprintf("max io 0x%qx bytes\n", maxiocount);
421    if (maxiocount_result)
422        *maxiocount_result = maxiocount;
423
424    error = do_ioctl(p1, p2, DKIOCISSOLIDSTATE, (caddr_t)&isssd);
425    if (!error && isssd)
426        flags |= kIOHibernateOptionSSD;
427
428    if (partition_device_result)
429        *partition_device_result = device;
430    if (image_device_result)
431        *image_device_result = target;
432    if (flags)
433        *oflags = flags;
434
435out:
436    kprintf("kern_open_file_for_direct_io(%d)\n", error);
437
438    if (error && locked)
439    {
440        p1 = &device;
441        (void) do_ioctl(p1, p2, DKIOCUNLOCKPHYSICALEXTENTS, NULL);
442    }
443
444    if (error && ref)
445    {
446	if (ref->vp)
447	{
448	    vnode_close(ref->vp, FWRITE, ref->ctx);
449	    ref->vp = NULLVP;
450	}
451	vfs_context_rele(ref->ctx);
452	kfree(ref, sizeof(struct kern_direct_file_io_ref_t));
453	ref = NULL;
454    }
455
456    return(ref);
457}
458
459int
460kern_write_file(struct kern_direct_file_io_ref_t * ref, off_t offset, caddr_t addr, vm_size_t len)
461{
462    return (vn_rdwr(UIO_WRITE, ref->vp,
463			addr, len, offset,
464			UIO_SYSSPACE, IO_SYNC|IO_NODELOCKED|IO_UNIT,
465                        vfs_context_ucred(ref->ctx), (int *) 0,
466			vfs_context_proc(ref->ctx)));
467}
468
469
470void
471kern_close_file_for_direct_io(struct kern_direct_file_io_ref_t * ref,
472			      off_t write_offset, caddr_t addr, vm_size_t write_length,
473			      off_t discard_offset, off_t discard_end)
474{
475    int error;
476    kprintf("kern_close_file_for_direct_io\n");
477
478    if (!ref) return;
479
480    if (ref->vp)
481    {
482        int (*do_ioctl)(void * p1, void * p2, u_long theIoctl, caddr_t result);
483        void * p1;
484        void * p2;
485
486        if (ref->vp->v_type == VREG)
487        {
488            p1 = &ref->device;
489            p2 = kernproc;
490            do_ioctl = &file_ioctl;
491        }
492        else
493        {
494            /* Partition. */
495            p1 = ref->vp;
496            p2 = ref->ctx;
497            do_ioctl = &device_ioctl;
498        }
499        (void) do_ioctl(p1, p2, DKIOCUNLOCKPHYSICALEXTENTS, NULL);
500
501        if (addr && write_length)
502        {
503            (void) kern_write_file(ref, write_offset, addr, write_length);
504        }
505        if (discard_offset && discard_end && !ref->pinned)
506        {
507            (void) kern_ioctl_file_extents(ref, DKIOCUNMAP, discard_offset, discard_end);
508        }
509
510        error = vnode_close(ref->vp, FWRITE, ref->ctx);
511
512        ref->vp = NULLVP;
513        kprintf("vnode_close(%d)\n", error);
514    }
515    vfs_context_rele(ref->ctx);
516    ref->ctx = NULL;
517    kfree(ref, sizeof(struct kern_direct_file_io_ref_t));
518}
519
520