1/**
2 * \file
3 * \brief Kernel capability management implementation.
4 */
5
6/*
7 * Copyright (c) 2007-2012,2015,2016 ETH Zurich.
8 * Copyright (c) 2015, 2016 Hewlett Packard Enterprise Development LP.
9 * All rights reserved.
10 *
11 * This file is distributed under the terms in the attached LICENSE file.
12 * If you do not find this file, copies can be found by writing to:
13 * ETH Zurich D-INFK, Universitaetstrasse 6, CH-8092 Zurich. Attn: Systems Group.
14 */
15
16#include <stdio.h>
17#include <string.h>
18#include <kernel.h>
19#include <barrelfish_kpi/syscalls.h>
20#include <barrelfish_kpi/paging_arch.h>
21#include <barrelfish_kpi/lmp.h>
22#include <offsets.h>
23#include <capabilities.h>
24#include <cap_predicates.h>
25#include <distcaps.h>
26#include <dispatch.h>
27#include <kcb.h>
28#include <paging_kernel_arch.h>
29#include <mdb/mdb.h>
30#include <mdb/mdb_tree.h>
31#include <trace/trace.h>
32#include <trace_definitions/trace_defs.h>
33#include <wakeup.h>
34#include <bitmacros.h>
35
36// XXX: remove
37#pragma GCC diagnostic ignored "-Wsuggest-attribute=noreturn"
38
39#ifdef TRACE_PMEM_CAPS
40uint64_t   trace_types_enabled = TRACE_TYPES_ENABLED_INITIAL;
41genpaddr_t TRACE_PMEM_BEGIN    = TRACE_PMEM_BEGIN_INITIAL;
42gensize_t  TRACE_PMEM_SIZE     = TRACE_PMEM_SIZE_INITIAL;
43
44void caps_trace_ctrl(uint64_t types, genpaddr_t start, gensize_t size)
45{
46    if (types) {
47        trace_types_enabled = types;
48        TRACE_PMEM_BEGIN = start;
49        TRACE_PMEM_SIZE = size;
50    } else {
51        trace_types_enabled = 0;
52    }
53}
54#endif
55
56struct capability monitor_ep;
57
58STATIC_ASSERT(68 == ObjType_Num, "Knowledge of all cap types");
59int sprint_cap(char *buf, size_t len, struct capability *cap)
60{
61    char *mappingtype;
62    switch (cap->type) {
63    case ObjType_PhysAddr:
64        return snprintf(buf, len,
65                        "physical address range cap (0x%" PRIxGENPADDR ":0x%" PRIxGENSIZE ")",
66                        cap->u.physaddr.base, cap->u.physaddr.bytes);
67
68    case ObjType_RAM:
69        return snprintf(buf, len, "RAM cap (0x%" PRIxGENPADDR ":0x%" PRIxGENSIZE ")",
70                        cap->u.ram.base, cap->u.ram.bytes);
71
72    case ObjType_L1CNode: {
73        int ret = snprintf(buf, len, "L1 CNode cap "
74                           "(base=%#"PRIxGENPADDR", allocated bytes %#"PRIxGENSIZE
75                           ", rights mask %#"PRIxCAPRIGHTS")",
76                           get_address(cap), get_size(cap),
77                           cap->u.l1cnode.rightsmask);
78        return ret;
79    }
80
81    case ObjType_L2CNode: {
82        int ret = snprintf(buf, len, "L2 CNode cap "
83                           "(base=%#"PRIxGENPADDR", rights mask %#"PRIxCAPRIGHTS")",
84                           get_address(cap), cap->u.l1cnode.rightsmask);
85        return ret;
86    }
87
88    case ObjType_Dispatcher:
89        return snprintf(buf, len, "Dispatcher cap %p", cap->u.dispatcher.dcb);
90
91    case ObjType_Frame:
92        return snprintf(buf, len, "Frame cap (0x%" PRIxGENPADDR ":0x%" PRIxGENSIZE ")",
93                        cap->u.frame.base, cap->u.frame.bytes);
94
95    case ObjType_EndPointUMP:
96        return snprintf(buf, len, "EndPointUMP cap (0x%" PRIxGENPADDR ":0x%"
97                                   PRIxGENSIZE ") If:%" PRIu32,
98                        cap->u.endpointump.base, cap->u.endpointump.bytes,
99                        cap->u.endpointump.iftype);
100
101    case ObjType_DevFrame:
102        return snprintf(buf, len, "Device Frame cap (0x%" PRIxGENPADDR ":0x%" PRIxGENSIZE ")",
103                        cap->u.devframe.base, cap->u.devframe.bytes);
104
105    case ObjType_VNode_ARM_l1:
106        return snprintf(buf, len, "ARM L1 table at 0x%" PRIxGENPADDR,
107                        cap->u.vnode_arm_l1.base);
108
109    case ObjType_VNode_ARM_l2:
110        return snprintf(buf, len, "ARM L2 table at 0x%" PRIxGENPADDR,
111                        cap->u.vnode_arm_l2.base);
112
113    case ObjType_VNode_AARCH64_l0:
114        return snprintf(buf, len, "AARCH64 L0 table at 0x%" PRIxGENPADDR,
115                        cap->u.vnode_aarch64_l0.base);
116
117    case ObjType_VNode_AARCH64_l1:
118        return snprintf(buf, len, "AARCH64 L1 table at 0x%" PRIxGENPADDR,
119                        cap->u.vnode_aarch64_l1.base);
120
121    case ObjType_VNode_AARCH64_l2:
122        return snprintf(buf, len, "AARCH64 L2 table at 0x%" PRIxGENPADDR,
123                        cap->u.vnode_aarch64_l2.base);
124
125    case ObjType_VNode_AARCH64_l3:
126        return snprintf(buf, len, "AARCH64 L3 table at 0x%" PRIxGENPADDR,
127                        cap->u.vnode_aarch64_l3.base);
128
129    case ObjType_VNode_x86_32_ptable:
130        return snprintf(buf, len, "x86_32 Page table at 0x%" PRIxGENPADDR,
131                        cap->u.vnode_x86_32_ptable.base);
132
133    case ObjType_VNode_x86_32_pdir:
134        return snprintf(buf, len, "x86_32 Page directory at 0x%" PRIxGENPADDR,
135                        cap->u.vnode_x86_32_pdir.base);
136
137    case ObjType_VNode_x86_32_pdpt:
138        return snprintf(buf, len, "x86_32 PDPT at 0x%" PRIxGENPADDR,
139                        cap->u.vnode_x86_32_pdpt.base);
140
141    case ObjType_VNode_x86_64_ptable:
142        return snprintf(buf, len, "x86_64 Page table at 0x%" PRIxGENPADDR,
143                        cap->u.vnode_x86_64_ptable.base);
144
145    case ObjType_VNode_x86_64_pdir:
146        return snprintf(buf, len, "x86_64 Page directory at 0x%" PRIxGENPADDR,
147                        cap->u.vnode_x86_64_pdir.base);
148
149    case ObjType_VNode_x86_64_pdpt:
150        return snprintf(buf, len, "x86_64 PDPT at 0x%" PRIxGENPADDR,
151                        cap->u.vnode_x86_64_pdpt.base);
152
153    case ObjType_VNode_x86_64_pml4:
154        return snprintf(buf, len, "x86_64 PML4 at 0x%" PRIxGENPADDR,
155                        cap->u.vnode_x86_64_pml4.base);
156    case ObjType_VNode_x86_64_pml5:
157        return snprintf(buf, len, "x86_64 PML5 at 0x%" PRIxGENPADDR,
158                        cap->u.vnode_x86_64_pml4.base);
159    case ObjType_VNode_VTd_root_table:
160        return snprintf(buf, len, "VTd root table at 0x%" PRIxGENPADDR,
161                        cap->u.vnode_x86_64_pml4.base);
162    case ObjType_VNode_VTd_ctxt_table:
163        return snprintf(buf, len, "VTd ctxt table at 0x%" PRIxGENPADDR,
164                        cap->u.vnode_x86_64_pml4.base);
165
166    case ObjType_VNode_x86_64_ept_ptable:
167        return snprintf(buf, len, "x86_64 EPT Page table at 0x%" PRIxGENPADDR,
168                        cap->u.vnode_x86_64_ept_ptable.base);
169
170    case ObjType_VNode_x86_64_ept_pdir:
171        return snprintf(buf, len, "x86_64 EPT Page directory at 0x%" PRIxGENPADDR,
172                        cap->u.vnode_x86_64_ept_pdir.base);
173
174    case ObjType_VNode_x86_64_ept_pdpt:
175        return snprintf(buf, len, "x86_64 EPT PDPT at 0x%" PRIxGENPADDR,
176                        cap->u.vnode_x86_64_ept_pdpt.base);
177
178    case ObjType_VNode_x86_64_ept_pml4:
179        return snprintf(buf, len, "x86_64 EPT PML4 at 0x%" PRIxGENPADDR,
180                        cap->u.vnode_x86_64_ept_pml4.base);
181
182    case ObjType_Frame_Mapping:
183        mappingtype = "Frame";
184        goto ObjType_Mapping;
185    case ObjType_DevFrame_Mapping:
186        mappingtype = "DevFrame";
187        goto ObjType_Mapping;
188    case ObjType_EndPointUMP_Mapping:
189        mappingtype = "EndPointUMP";
190        goto ObjType_Mapping;
191
192    case ObjType_VNode_x86_64_pml5_Mapping:
193        mappingtype = "x86_64 PML4";
194        goto ObjType_Mapping;
195    case ObjType_VNode_x86_64_pml4_Mapping:
196        mappingtype = "x86_64 PML4";
197        goto ObjType_Mapping;
198    case ObjType_VNode_x86_64_pdpt_Mapping:
199        mappingtype = "x86_64 PDPT";
200        goto ObjType_Mapping;
201    case ObjType_VNode_x86_64_pdir_Mapping:
202        mappingtype = "x86_64 PDIR";
203        goto ObjType_Mapping;
204    case ObjType_VNode_x86_64_ptable_Mapping:
205        mappingtype = "x86_64 PTABLE";
206        goto ObjType_Mapping;
207
208    case ObjType_VNode_x86_64_ept_pml4_Mapping:
209        mappingtype = "x86_64 EPT PML4";
210        goto ObjType_Mapping;
211    case ObjType_VNode_x86_64_ept_pdpt_Mapping:
212        mappingtype = "x86_64 EPT PDPT";
213        goto ObjType_Mapping;
214    case ObjType_VNode_x86_64_ept_pdir_Mapping:
215        mappingtype = "x86_64 EPT PDIR";
216        goto ObjType_Mapping;
217    case ObjType_VNode_x86_64_ept_ptable_Mapping:
218        mappingtype = "x86_64 EPT PTABLE";
219        goto ObjType_Mapping;
220
221    case ObjType_VNode_x86_32_pdpt_Mapping:
222        mappingtype = "x86_32 PDPT";
223        goto ObjType_Mapping;
224    case ObjType_VNode_x86_32_pdir_Mapping:
225        mappingtype = "x86_32 PDIR";
226        goto ObjType_Mapping;
227    case ObjType_VNode_x86_32_ptable_Mapping:
228        mappingtype = "x86_32 PTABLE";
229        goto ObjType_Mapping;
230
231    case ObjType_VNode_ARM_l1_Mapping:
232        mappingtype = "ARM l1";
233        goto ObjType_Mapping;
234    case ObjType_VNode_ARM_l2_Mapping:
235        mappingtype = "ARM l2";
236        goto ObjType_Mapping;
237
238    case ObjType_VNode_AARCH64_l0_Mapping:
239        mappingtype = "AARCH64 l0";
240        goto ObjType_Mapping;
241    case ObjType_VNode_AARCH64_l1_Mapping:
242        mappingtype = "AARCH64 l1";
243        goto ObjType_Mapping;
244    case ObjType_VNode_AARCH64_l2_Mapping:
245        mappingtype = "AARCH64 l2";
246        goto ObjType_Mapping;
247    case ObjType_VNode_AARCH64_l3_Mapping:
248        mappingtype = "AARCH64 l3";
249        goto ObjType_Mapping;
250
251    case ObjType_VNode_VTd_root_table_Mapping:
252        mappingtype = "VTd root table";
253        goto ObjType_Mapping;
254    case ObjType_VNode_VTd_ctxt_table_Mapping:
255        mappingtype = "VTd ctxt table";
256        goto ObjType_Mapping;
257ObjType_Mapping:
258        return snprintf(buf, len, "%s Mapping (%s cap @%p, "
259                                  "ptable cap @0x%p, entry=%hu, pte_count=%hu)",
260                                  mappingtype, mappingtype,
261                                  cap->u.frame_mapping.cap,
262                                  cap->u.frame_mapping.ptable,
263                                  cap->u.frame_mapping.entry,
264                                  cap->u.frame_mapping.pte_count);
265
266    case ObjType_IRQTable:
267        return snprintf(buf, len, "IRQTable cap");
268
269    case ObjType_IRQDest:
270        return snprintf(buf, len, "IRQDest cap (vec: %"PRIu64", cpu: %"PRIu64")",
271                cap->u.irqdest.vector, cap->u.irqdest.cpu);
272
273    case ObjType_EndPointLMP:
274        return snprintf(buf, len, "EndPoint cap (disp %p offset 0x%" PRIxLVADDR ")",
275                        cap->u.endpointlmp.listener, cap->u.endpointlmp.epoffset);
276
277    case ObjType_IO:
278        return snprintf(buf, len, "IO cap (0x%hx-0x%hx)",
279                        cap->u.io.start, cap->u.io.end);
280
281    case ObjType_Kernel:
282        return snprintf(buf, len, "Kernel cap");
283
284    case ObjType_KernelControlBlock:
285        return snprintf(buf, len, "Kernel control block");
286
287    case ObjType_ID:
288        return snprintf(buf, len, "ID capability (coreid 0x%" PRIxCOREID
289                        " core_local_id 0x%" PRIx32 ")", cap->u.id.coreid,
290                        cap->u.id.core_local_id);
291    case ObjType_ProcessManager:
292        return snprintf(buf, len, "Process manager capability");
293
294    case ObjType_Domain:
295        return snprintf(buf, len, "Domain capability (coreid 0x%" PRIxCOREID
296                        " core_local_id 0x%" PRIx32 ")", cap->u.domain.coreid,
297                        cap->u.domain.core_local_id);
298
299    case ObjType_PerfMon:
300        return snprintf(buf, len, "PerfMon cap");
301
302    case ObjType_Null:
303        return snprintf(buf, len, "Null capability (empty slot)");
304
305    case ObjType_IPI:
306        return snprintf(buf, len, "IPI cap");
307
308    case ObjType_DeviceID:
309        return snprintf(buf, len, "DeviceID %u.%u.%u",
310                        cap->u.deviceid.bus, cap->u.deviceid.device,
311                        cap->u.deviceid.function);
312    case ObjType_DeviceIDManager:
313        return snprintf(buf, len, "DeviceID Manager cap");
314
315
316        default:
317        return snprintf(buf, len, "UNKNOWN TYPE! (%d)", cap->type);
318    }
319}
320
321void caps_trace(const char *func, int line, struct cte *cte, const char *msg)
322{
323    char cap_buf[512];
324    sprint_cap(cap_buf, 512, &cte->cap);
325
326    char disp_buf[64];
327    if (dcb_current) {
328        dispatcher_handle_t handle = dcb_current->disp;
329        struct dispatcher_shared_generic *disp =
330            get_dispatcher_shared_generic(handle);
331        snprintf(disp_buf, 64, "from %.*s", DISP_NAME_LEN, disp->name);
332    }
333    else {
334        strcpy(disp_buf, "no disp");
335    }
336
337    printk(LOG_WARN, "%s: %s:%d: %s %p %s"
338           " (owner:%" PRIuCOREID ", rc:%d/ra:%d/rd:%d)\n",
339           disp_buf, func, line, (msg ? : ""), cte, cap_buf, cte->mdbnode.owner,
340           cte->mdbnode.remote_copies, cte->mdbnode.remote_ancs,
341           cte->mdbnode.remote_descs);
342}
343
344/**
345 * ID capability core_local_id counter.
346 */
347static uint32_t id_cap_counter = 1;
348
349/**
350 * Domain capability core_local_id counter.
351 */
352static uint32_t domain_cap_counter = 1;
353
354/**
355 * Tracing sequence number for retypes
356 */
357static uint64_t retype_seqnum = 0;
358
359/**
360 *  Sets #dest equal to #src
361 *
362 * #dest cannot be in use.
363 */
364static errval_t set_cap(struct capability *dest, struct capability *src)
365{
366    /* Parameter checking */
367    assert(src  != NULL);
368    assert(dest != NULL);
369
370    debug(SUBSYS_CAPS, "Copying cap from %#"PRIxLPADDR" to %#"PRIxLPADDR"\n",
371            mem_to_local_phys((lvaddr_t)cte_for_cap(src)),
372            mem_to_local_phys((lvaddr_t)cte_for_cap(dest)));
373
374    // Reserved object bits must always be greater/equal to actual object size
375    assert((1UL << OBJBITS_CTE) >= sizeof(struct cte));
376
377    // Cannot overwrite an already existing cap
378    if (dest->type != ObjType_Null) {
379        return SYS_ERR_SLOT_IN_USE;
380    }
381
382    memcpy(dest, src, sizeof(struct capability));
383    return SYS_ERR_OK;
384}
385
386/**
387 * \brief Determine how many objects can be created in a specified region.
388 *
389 * This function computes the number of objects that can be created by a call
390 * to caps_create().
391 *
392 * \param type          Type of objects to create.
393 * \param srcsize       Size of memory area in bytes
394 * \param objsize       For variable-sized objects, size multiplier
395 *
396 * \return Number of objects to be created, or zero on error
397 */
398
399// If you create more capability types you need to deal with them
400// in the table below.
401STATIC_ASSERT(68 == ObjType_Num, "Knowledge of all cap types");
402static size_t caps_max_numobjs(enum objtype type, gensize_t srcsize, gensize_t objsize)
403{
404    switch(type) {
405    case ObjType_PhysAddr:
406    case ObjType_RAM:
407    case ObjType_Frame:
408    case ObjType_EndPointUMP:
409    case ObjType_DevFrame:
410        if (objsize > srcsize) {
411            return 0;
412        } else {
413            return srcsize / objsize;
414        }
415
416    case ObjType_L1CNode:
417        if (srcsize < OBJSIZE_L2CNODE || objsize < OBJSIZE_L2CNODE) {
418            // disallow L1 CNode to be smaller than 16kB.
419            return 0;
420        } else {
421            return srcsize / objsize;
422        }
423
424    case ObjType_L2CNode:
425        if (srcsize < OBJSIZE_L2CNODE || objsize != OBJSIZE_L2CNODE) {
426            // disallow L2 CNode creation if source too small or objsize wrong
427            return 0;
428        } else {
429            return srcsize / objsize;
430        }
431    case ObjType_VNode_VTd_root_table :
432    case ObjType_VNode_VTd_ctxt_table :
433    case ObjType_VNode_x86_64_pml5:
434    case ObjType_VNode_x86_64_pml4:
435    case ObjType_VNode_x86_64_pdpt:
436    case ObjType_VNode_x86_64_pdir:
437    case ObjType_VNode_x86_64_ptable:
438    case ObjType_VNode_x86_64_ept_pml4:
439    case ObjType_VNode_x86_64_ept_pdpt:
440    case ObjType_VNode_x86_64_ept_pdir:
441    case ObjType_VNode_x86_64_ept_ptable:
442    case ObjType_VNode_x86_32_pdpt:
443    case ObjType_VNode_x86_32_pdir:
444    case ObjType_VNode_x86_32_ptable:
445    case ObjType_VNode_ARM_l1:
446    case ObjType_VNode_ARM_l2:
447    case ObjType_VNode_AARCH64_l0:
448    case ObjType_VNode_AARCH64_l1:
449    case ObjType_VNode_AARCH64_l2:
450    case ObjType_VNode_AARCH64_l3:
451    {
452        if (srcsize < vnode_objsize(type)) {
453            return 0;
454        } else {
455            return srcsize / vnode_objsize(type);
456        }
457    }
458
459    case ObjType_Dispatcher:
460        if (srcsize < OBJSIZE_DISPATCHER) {
461            return 0;
462        } else {
463            return srcsize / OBJSIZE_DISPATCHER;
464        }
465
466    case ObjType_KernelControlBlock:
467        if (srcsize < OBJSIZE_KCB) {
468            return 0;
469        } else {
470            return srcsize / OBJSIZE_KCB;
471        }
472
473    case ObjType_Domain:
474        return L2_CNODE_SLOTS;
475
476    case ObjType_Kernel:
477    case ObjType_IRQTable:
478    case ObjType_IRQDest:
479    case ObjType_IRQSrc:
480    case ObjType_IO:
481    case ObjType_EndPointLMP:
482    case ObjType_ID:
483    case ObjType_Notify_IPI:
484    case ObjType_PerfMon:
485    case ObjType_IPI:
486    case ObjType_ProcessManager:
487    case ObjType_DeviceID:
488    case ObjType_DeviceIDManager:
489    case ObjType_VNode_ARM_l1_Mapping:
490    case ObjType_VNode_ARM_l2_Mapping:
491    case ObjType_VNode_AARCH64_l0_Mapping:
492    case ObjType_VNode_AARCH64_l1_Mapping:
493    case ObjType_VNode_AARCH64_l2_Mapping:
494    case ObjType_VNode_AARCH64_l3_Mapping:
495    case ObjType_VNode_x86_64_pml4_Mapping:
496    case ObjType_VNode_x86_64_pdpt_Mapping:
497    case ObjType_VNode_x86_64_pdir_Mapping:
498    case ObjType_VNode_x86_64_ptable_Mapping:
499    case ObjType_VNode_x86_64_ept_pml4_Mapping:
500    case ObjType_VNode_x86_64_ept_pdpt_Mapping:
501    case ObjType_VNode_x86_64_ept_pdir_Mapping:
502    case ObjType_VNode_x86_64_ept_ptable_Mapping:
503    case ObjType_VNode_x86_32_pdpt_Mapping:
504    case ObjType_VNode_x86_32_pdir_Mapping:
505    case ObjType_VNode_x86_32_ptable_Mapping:
506    case ObjType_DevFrame_Mapping:
507    case ObjType_Frame_Mapping:
508        return 1;
509
510    default:
511        panic("invalid type");
512        return 0;
513    }
514}
515
516/**
517 * \brief Initialize the objects for which local caps are about to be created.
518 *
519 * For the meaning of the parameters, see the 'caps_create' function.
520 */
521STATIC_ASSERT(68 == ObjType_Num, "Knowledge of all cap types");
522static errval_t caps_zero_objects(enum objtype type, lpaddr_t lpaddr,
523                                  gensize_t objsize, size_t count)
524{
525    TRACE(KERNEL_CAPOPS, ZERO_OBJECTS, retype_seqnum);
526    assert(type < ObjType_Num);
527
528    // Virtual address of the memory the kernel object resides in
529    // XXX: A better of doing this,
530    // this is creating caps that the kernel cannot address.
531    // It assumes that the cap is not of the type which will have to zeroed out.
532    lvaddr_t lvaddr;
533    if(lpaddr < PADDR_SPACE_LIMIT) {
534        lvaddr = local_phys_to_mem(lpaddr);
535    } else {
536        lvaddr = 0;
537    }
538
539    switch (type) {
540
541    case ObjType_Frame:
542    case ObjType_EndPointUMP :
543        debug(SUBSYS_CAPS, "Frame: zeroing %zu bytes @%#"PRIxLPADDR"\n",
544                (size_t)objsize * count, lpaddr);
545        TRACE(KERNEL, BZERO, 1);
546        memset((void*)lvaddr, 0, objsize * count);
547        TRACE(KERNEL, BZERO, 0);
548        break;
549
550    case ObjType_L1CNode:
551    case ObjType_L2CNode:
552        debug(SUBSYS_CAPS, "L%dCNode: zeroing %zu bytes @%#"PRIxLPADDR"\n",
553                type == ObjType_L1CNode ? 1 : 2, (size_t)objsize * count,
554                lpaddr);
555        TRACE(KERNEL, BZERO, 1);
556        memset((void*)lvaddr, 0, objsize * count);
557        TRACE(KERNEL, BZERO, 0);
558        break;
559
560    case ObjType_VNode_ARM_l1:
561    case ObjType_VNode_ARM_l2:
562    case ObjType_VNode_AARCH64_l0:
563    case ObjType_VNode_AARCH64_l1:
564    case ObjType_VNode_AARCH64_l2:
565    case ObjType_VNode_AARCH64_l3:
566    case ObjType_VNode_x86_32_ptable:
567    case ObjType_VNode_x86_32_pdir:
568    case ObjType_VNode_x86_32_pdpt:
569    case ObjType_VNode_x86_64_ptable:
570    case ObjType_VNode_x86_64_pdir:
571    case ObjType_VNode_x86_64_pdpt:
572    case ObjType_VNode_x86_64_pml4:
573    case ObjType_VNode_x86_64_ept_ptable:
574    case ObjType_VNode_x86_64_ept_pdir:
575    case ObjType_VNode_x86_64_ept_pdpt:
576    case ObjType_VNode_x86_64_ept_pml4:
577    case ObjType_VNode_x86_64_pml5:
578    case ObjType_VNode_VTd_root_table:
579    case ObjType_VNode_VTd_ctxt_table:
580        // objsize is size of VNode; but not given as such
581        objsize = vnode_objsize(type);
582        debug(SUBSYS_CAPS, "VNode: zeroing %zu bytes @%#"PRIxLPADDR"\n",
583                (size_t)objsize * count, lpaddr);
584        TRACE(KERNEL, BZERO, 1);
585        memset((void*)lvaddr, 0, objsize * count);
586        TRACE(KERNEL, BZERO, 0);
587        break;
588
589    case ObjType_Dispatcher:
590        debug(SUBSYS_CAPS, "Dispatcher: zeroing %zu bytes @%#"PRIxLPADDR"\n",
591                ((size_t) OBJSIZE_DISPATCHER) * count, lpaddr);
592        TRACE(KERNEL, BZERO, 1);
593        memset((void*)lvaddr, 0, OBJSIZE_DISPATCHER * count);
594        TRACE(KERNEL, BZERO, 0);
595        break;
596
597    case ObjType_KernelControlBlock:
598        debug(SUBSYS_CAPS, "KCB: zeroing %zu bytes @%#"PRIxLPADDR"\n",
599                ((size_t) OBJSIZE_KCB) * count, lpaddr);
600        TRACE(KERNEL, BZERO, 1);
601        memset((void*)lvaddr, 0, OBJSIZE_KCB * count);
602        TRACE(KERNEL, BZERO, 0);
603        break;
604
605    default:
606        debug(SUBSYS_CAPS, "Not zeroing %zu bytes @%#"PRIxLPADDR" for type %d\n",
607                (size_t)objsize * count, lpaddr, (int)type);
608        break;
609
610    }
611
612    TRACE(KERNEL_CAPOPS, ZERO_OBJECTS_DONE, retype_seqnum);
613    return SYS_ERR_OK;
614}
615
616/**
617 * \brief Create capabilities to kernel objects.
618 *
619 * This function creates 'count' kernel objects of 'type' into the memory
620 * area, based at 'addr' and of size 'objsize'. For each created kernel
621 * object, a capability is created to it and put consecutively into the array
622 * of CTEs pointed to by 'caps'. The array needs to have the appropriate size
623 * to hold all created caps. Some kernel objects can have a variable size. In
624 * that case, 'objsize' should be non-zero. and give the size multiplier. *
625 *
626 * \param type          Type of objects to create.
627 * \param lpaddr        Base address in the local address space.
628 * \param size          Size of memory area as bytes.
629 * \param objsize       For variable-sized objects, size in bytes.
630 * \param count         Number of objects to be created
631 *                      (count <= caps_max_numobjs(type, size, objsize))
632 * \param dest_caps     Pointer to array of CTEs to hold created caps.
633 *
634 * \return Error code
635 */
636// If you create more capability types you need to deal with them
637// in the table below.
638STATIC_ASSERT(68 == ObjType_Num, "Knowledge of all cap types");
639
640static errval_t caps_create(enum objtype type, lpaddr_t lpaddr, gensize_t size,
641                            gensize_t objsize, size_t count, coreid_t owner,
642                            struct cte *dest_caps)
643{
644    errval_t err;
645
646    /* Parameter checking */
647    assert(dest_caps != NULL);
648    assert(type != ObjType_Null);
649    assert(type < ObjType_Num);
650    assert(count > 0);
651    // objsize is 0 for non-sized types (e.g. VNodes)
652    // TODO cleanup semantics for type == CNode
653    //assert(objsize % BASE_PAGE_SIZE == 0);
654    assert(!type_is_mapping(type));
655
656    genpaddr_t genpaddr = local_phys_to_gen_phys(lpaddr);
657
658    debug(SUBSYS_CAPS, "creating caps for %#"PRIxGENPADDR
659                       ", %" PRIuGENSIZE " bytes, objsize=%"PRIuGENSIZE
660                       ", count=%zu, owner=%d, type=%d\n",
661            genpaddr, size, objsize, count, (int)owner, (int)type);
662
663    // Virtual address of the memory the kernel object resides in
664    // XXX: A better of doing this,
665    // this is creating caps that the kernel cannot address.
666    // It assumes that the cap is not of the type which will have to zeroed out.
667    lvaddr_t lvaddr;
668    if(lpaddr < PADDR_SPACE_LIMIT) {
669        lvaddr = local_phys_to_mem(lpaddr);
670    } else {
671        lvaddr = 0;
672    }
673
674    /* Initialize the created capability */
675    struct capability temp_cap;
676    memset(&temp_cap, 0, sizeof(struct capability));
677    temp_cap.type = type;
678    // XXX: Handle rights!
679    temp_cap.rights = CAPRIGHTS_ALLRIGHTS;
680
681    debug(SUBSYS_CAPS, "owner = %d, my_core_id = %d\n", owner, my_core_id);
682    if (owner == my_core_id) {
683        // If we're creating new local objects, they need to be cleared
684        err = caps_zero_objects(type, lpaddr, objsize, count);
685        if (err_is_fail(err)) {
686            return err;
687        }
688    }
689
690    size_t dest_i = 0;
691    err = SYS_ERR_OK;
692    bool is_ept = false;
693
694    /* Set the type specific fields and insert into #dest_caps */
695    switch(type) {
696    case ObjType_Frame:
697        for(dest_i = 0; dest_i < count; dest_i++) {
698            // Initialize type specific fields
699            temp_cap.u.frame.base = genpaddr + dest_i * objsize;
700            temp_cap.u.frame.bytes = objsize;
701            assert((get_size(&temp_cap) & BASE_PAGE_MASK) == 0);
702            // Insert the capability
703            err = set_cap(&dest_caps[dest_i].cap, &temp_cap);
704            if (err_is_fail(err)) {
705                break;
706            }
707        }
708        break;
709    case ObjType_EndPointUMP:
710        for(dest_i = 0; dest_i < count; dest_i++) {
711            // Initialize type specific fields
712            temp_cap.u.endpointump.base = genpaddr + dest_i * objsize;
713            temp_cap.u.endpointump.bytes = objsize;
714            assert((get_size(&temp_cap) & BASE_PAGE_MASK) == 0);
715            // Insert the capability
716            err = set_cap(&dest_caps[dest_i].cap, &temp_cap);
717            if (err_is_fail(err)) {
718                break;
719            }
720        }
721        break;
722
723    case ObjType_PhysAddr:
724        for(dest_i = 0; dest_i < count; dest_i++) {
725            // Initialize type specific fields
726            temp_cap.u.physaddr.base = genpaddr + dest_i * objsize;
727            temp_cap.u.physaddr.bytes = objsize;
728            // Insert the capability
729            err = set_cap(&dest_caps[dest_i].cap, &temp_cap);
730            if (err_is_fail(err)) {
731                break;
732            }
733        }
734        break;
735
736    case ObjType_RAM:
737        for(dest_i = 0; dest_i < count; dest_i++) {
738            // Initialize type specific fields
739            temp_cap.u.ram.base = genpaddr + dest_i * objsize;
740            temp_cap.u.ram.bytes = objsize;
741            // Insert the capabilities
742            err = set_cap(&dest_caps[dest_i].cap, &temp_cap);
743            if (err_is_fail(err)) {
744                break;
745            }
746        }
747        break;
748
749    case ObjType_DevFrame:
750        for(dest_i = 0; dest_i < count; dest_i++) {
751            // Initialize type specific fields
752            temp_cap.u.devframe.base = genpaddr + dest_i * objsize;
753            temp_cap.u.devframe.bytes = objsize;
754            // Insert the capabilities
755            err = set_cap(&dest_caps[dest_i].cap, &temp_cap);
756            if (err_is_fail(err)) {
757                break;
758            }
759        }
760        break;
761
762    case ObjType_L1CNode:
763        for (dest_i = 0; dest_i < count; dest_i++) {
764            assert(objsize >= OBJSIZE_L2CNODE);
765            assert(objsize % OBJSIZE_L2CNODE == 0);
766            temp_cap.u.l1cnode.cnode = lpaddr + dest_i * objsize;
767            temp_cap.u.l1cnode.allocated_bytes = objsize;
768            // XXX: implement CNode cap rights
769            temp_cap.u.l1cnode.rightsmask = CAPRIGHTS_ALLRIGHTS;
770            err = set_cap(&dest_caps[dest_i].cap, &temp_cap);
771            if (err_is_fail(err)) {
772                break;
773            }
774        }
775        break;
776
777    case ObjType_L2CNode:
778        for (dest_i = 0; dest_i < count; dest_i++) {
779            temp_cap.u.l2cnode.cnode = lpaddr + dest_i * objsize;
780            // XXX: implement CNode cap rights
781            temp_cap.u.l2cnode.rightsmask = CAPRIGHTS_ALLRIGHTS;
782            err = set_cap(&dest_caps[dest_i].cap, &temp_cap);
783            if (err_is_fail(err)) {
784                break;
785            }
786        }
787        break;
788
789    case ObjType_VNode_ARM_l1:
790    {
791        size_t objsize_vnode = vnode_objsize(type);
792
793        for(dest_i = 0; dest_i < count; dest_i++) {
794            // Initialize type specific fields
795            temp_cap.u.vnode_arm_l1.base =
796                genpaddr + dest_i * objsize_vnode;
797
798
799            // Insert the capability
800            err = set_cap(&dest_caps[dest_i].cap, &temp_cap);
801            if (err_is_fail(err)) {
802                break;
803            }
804        }
805
806        break;
807    }
808
809    case ObjType_VNode_ARM_l2:
810    {
811        size_t objsize_vnode = vnode_objsize(type);
812
813        for(dest_i = 0; dest_i < count; dest_i++) {
814            // Initialize type specific fields
815            temp_cap.u.vnode_arm_l2.base =
816                genpaddr + dest_i * objsize_vnode;
817
818            // Insert the capability
819            err = set_cap(&dest_caps[dest_i].cap, &temp_cap);
820            if (err_is_fail(err)) {
821                break;
822            }
823        }
824        break;
825    }
826
827    case ObjType_VNode_AARCH64_l0:
828    {
829        size_t objsize_vnode = vnode_objsize(type);
830
831        for(dest_i = 0; dest_i < count; dest_i++) {
832            // Initialize type specific fields
833            temp_cap.u.vnode_aarch64_l0.base =
834                genpaddr + dest_i * objsize_vnode;
835
836            // Insert the capability
837            err = set_cap(&dest_caps[dest_i].cap, &temp_cap);
838            if (err_is_fail(err)) {
839                break;
840            }
841        }
842
843        break;
844    }
845
846    case ObjType_VNode_AARCH64_l1:
847    {
848        size_t objsize_vnode = vnode_objsize(type);
849
850        for(dest_i = 0; dest_i < count; dest_i++) {
851            // Initialize type specific fields
852            temp_cap.u.vnode_aarch64_l1.base =
853                genpaddr + dest_i * objsize_vnode;
854
855            // Insert the capability
856            err = set_cap(&dest_caps[dest_i].cap, &temp_cap);
857            if (err_is_fail(err)) {
858                break;
859            }
860        }
861
862        break;
863    }
864
865    case ObjType_VNode_AARCH64_l2:
866    {
867        size_t objsize_vnode = vnode_objsize(type);
868
869        for(dest_i = 0; dest_i < count; dest_i++) {
870            // Initialize type specific fields
871            temp_cap.u.vnode_aarch64_l2.base =
872                genpaddr + dest_i * objsize_vnode;
873
874            // Insert the capability
875            err = set_cap(&dest_caps[dest_i].cap, &temp_cap);
876
877            if (err_is_fail(err)) {
878                break;
879            }
880        }
881        break;
882    }
883
884    case ObjType_VNode_AARCH64_l3:
885    {
886        size_t objsize_vnode = vnode_objsize(type);
887
888        for(dest_i = 0; dest_i < count; dest_i++) {
889            // Initialize type specific fields
890            temp_cap.u.vnode_aarch64_l3.base =
891                genpaddr + dest_i * objsize_vnode;
892
893            // Insert the capability
894            err = set_cap(&dest_caps[dest_i].cap, &temp_cap);
895            if (err_is_fail(err)) {
896                break;
897            }
898        }
899        break;
900    }
901
902    case ObjType_VNode_x86_32_ptable:
903    {
904        size_t objsize_vnode = vnode_objsize(type);
905
906        for(dest_i = 0; dest_i < count; dest_i++) {
907            // Initialize type specific fields
908            temp_cap.u.vnode_x86_32_ptable.base =
909                genpaddr + dest_i * objsize_vnode;
910
911            // Insert the capability
912            err = set_cap(&dest_caps[dest_i].cap, &temp_cap);
913            if (err_is_fail(err)) {
914                break;
915            }
916        }
917        break;
918    }
919
920    case ObjType_VNode_x86_32_pdir:
921    {
922        size_t objsize_vnode = vnode_objsize(type);
923
924        for(dest_i = 0; dest_i < count; dest_i++) {
925            // Initialize type specific fields
926            temp_cap.u.vnode_x86_32_pdir.base =
927                genpaddr + dest_i * objsize_vnode;
928
929#if defined(__i386__) && !defined(CONFIG_PAE)
930            // Make it a good PDE by inserting kernel/mem VSpaces
931            lpaddr = gen_phys_to_local_phys(temp_cap.u.vnode_x86_32_pdir.base);
932            paging_x86_32_make_good_pdir(lpaddr);
933#endif
934
935            // Insert the capability
936            err = set_cap(&dest_caps[dest_i].cap, &temp_cap);
937            if (err_is_fail(err)) {
938                break;
939            }
940        }
941        break;
942    }
943
944    case ObjType_VNode_x86_32_pdpt:
945    {
946        size_t objsize_vnode = vnode_objsize(type);
947
948        for(dest_i = 0; dest_i < count; dest_i++) {
949            // Initialize type specific fields
950            temp_cap.u.vnode_x86_32_pdir.base =
951                genpaddr + dest_i * objsize_vnode;
952
953#if defined(__i386__) && defined(CONFIG_PAE)
954            // Make it a good PDPTE by inserting kernel/mem VSpaces
955            lpaddr_t var =
956                gen_phys_to_local_phys(temp_cap.u.vnode_x86_32_pdpt.base);
957            paging_x86_32_make_good_pdpte(var);
958#endif
959
960            // Insert the capability
961            err = set_cap(&dest_caps[dest_i].cap, &temp_cap);
962            if (err_is_fail(err)) {
963                break;
964            }
965        }
966        break;
967    }
968
969    case ObjType_VNode_x86_64_ptable:
970        is_ept = true;
971    case ObjType_VNode_x86_64_ept_ptable:
972    {
973        size_t objsize_vnode = vnode_objsize(type);
974
975        for(dest_i = 0; dest_i < count; dest_i++) {
976            // Initialize type specific fields
977            temp_cap.u.vnode_x86_64_ptable.base =
978                genpaddr + dest_i * objsize_vnode;
979
980            // Insert the capability
981            err = set_cap(&dest_caps[dest_i].cap, &temp_cap);
982            if (err_is_fail(err)) {
983                break;
984            }
985        }
986        break;
987    }
988
989    case ObjType_VNode_x86_64_pdir:
990        is_ept = true;
991    case ObjType_VNode_x86_64_ept_pdir:
992    {
993        size_t objsize_vnode = vnode_objsize(type);
994
995        for(dest_i = 0; dest_i < count; dest_i++) {
996            // Initialize type specific fields
997            temp_cap.u.vnode_x86_64_pdir.base =
998                genpaddr + dest_i * objsize_vnode;
999
1000            // Insert the capability
1001            err = set_cap(&dest_caps[dest_i].cap, &temp_cap);
1002            if (err_is_fail(err)) {
1003                break;
1004            }
1005        }
1006        break;
1007    }
1008
1009    case ObjType_VNode_x86_64_pdpt:
1010        is_ept = true;
1011    case ObjType_VNode_x86_64_ept_pdpt:
1012    {
1013        size_t objsize_vnode = vnode_objsize(type);
1014
1015        for(dest_i = 0; dest_i < count; dest_i++) {
1016            // Initialize type specific fields
1017            temp_cap.u.vnode_x86_64_pdpt.base =
1018                genpaddr + dest_i * objsize_vnode;
1019
1020            // Insert the capability
1021            err = set_cap(&dest_caps[dest_i].cap, &temp_cap);
1022            if (err_is_fail(err)) {
1023                break;
1024            }
1025        }
1026        break;
1027    }
1028
1029    case ObjType_VNode_x86_64_ept_pml4:
1030        is_ept = true;
1031    case ObjType_VNode_x86_64_pml4:
1032    {
1033        size_t objsize_vnode = vnode_objsize(type);
1034
1035        for(dest_i = 0; dest_i < count; dest_i++) {
1036            // Initialize type specific fields
1037            temp_cap.u.vnode_x86_64_pml4.base =
1038                genpaddr + dest_i * objsize_vnode;
1039
1040#if defined(__x86_64__) || defined(__k1om__)
1041            // Make it a good PML4 by inserting kernel/mem VSpaces
1042            lpaddr_t var = gen_phys_to_local_phys(get_address(&temp_cap));
1043            paging_x86_64_make_good_pml4(var);
1044            if (is_ept) {
1045                paging_x86_64_make_good_ept_pml4(var);
1046            } else {
1047                paging_x86_64_make_good_pml4(var);
1048            }
1049#endif
1050
1051            // Insert the capability
1052            err = set_cap(&dest_caps[dest_i].cap, &temp_cap);
1053            if (err_is_fail(err)) {
1054                break;
1055            }
1056        }
1057
1058        break;
1059    }
1060    case ObjType_VNode_x86_64_pml5:
1061    {
1062        size_t objsize_vnode = vnode_objsize(type);
1063
1064        for(dest_i = 0; dest_i < count; dest_i++) {
1065            // Initialize type specific fields
1066            temp_cap.u.vnode_x86_64_pml5.base =
1067                    genpaddr + dest_i * objsize_vnode;
1068
1069            // Insert the capability
1070            err = set_cap(&dest_caps[dest_i].cap, &temp_cap);
1071            if (err_is_fail(err)) {
1072                break;
1073            }
1074        }
1075
1076        break;
1077    }
1078    case ObjType_VNode_VTd_root_table:
1079    {
1080        size_t objsize_vnode = vnode_objsize(type);
1081
1082        for(dest_i = 0; dest_i < count; dest_i++) {
1083            // Initialize type specific fields
1084            temp_cap.u.vnode_vtd_root_table.base =
1085                    genpaddr + dest_i * objsize_vnode;
1086
1087            // Insert the capability
1088            err = set_cap(&dest_caps[dest_i].cap, &temp_cap);
1089            if (err_is_fail(err)) {
1090                break;
1091            }
1092        }
1093
1094        break;
1095    }
1096    case ObjType_VNode_VTd_ctxt_table:
1097    {
1098        size_t objsize_vnode = vnode_objsize(type);
1099
1100        for(dest_i = 0; dest_i < count; dest_i++) {
1101            // Initialize type specific fields
1102            temp_cap.u.vnode_vtd_ctxt_table.base =
1103                    genpaddr + dest_i * objsize_vnode;
1104
1105            // Insert the capability
1106            err = set_cap(&dest_caps[dest_i].cap, &temp_cap);
1107            if (err_is_fail(err)) {
1108                break;
1109            }
1110        }
1111
1112        break;
1113    }
1114
1115    case ObjType_Dispatcher:
1116        assert(OBJSIZE_DISPATCHER >= sizeof(struct dcb));
1117
1118        for(dest_i = 0; dest_i < count; dest_i++) {
1119            // Initialize type specific fields
1120            temp_cap.u.dispatcher.dcb = (struct dcb *)
1121                (lvaddr + dest_i * OBJSIZE_DISPATCHER);
1122            // Insert the capability
1123            err = set_cap(&dest_caps[dest_i].cap, &temp_cap);
1124            if (err_is_fail(err)) {
1125                break;
1126            }
1127        }
1128        break;
1129
1130    case ObjType_ID:
1131        // ID type does not refer to a kernel object
1132        assert(lpaddr  == 0);
1133        assert(size    == 0);
1134        assert(objsize == 0);
1135        assert(count   == 1);
1136
1137        // Prevent wrap around
1138        if (id_cap_counter >= UINT32_MAX) {
1139            return SYS_ERR_ID_SPACE_EXHAUSTED;
1140        }
1141
1142        // Generate a new ID, core_local_id monotonically increases
1143        temp_cap.u.id.coreid = my_core_id;
1144        temp_cap.u.id.core_local_id = id_cap_counter++;
1145
1146        // Insert the capability
1147        err = set_cap(&dest_caps->cap, &temp_cap);
1148        break;
1149
1150    case ObjType_Domain:
1151        // Domain type does not refer to a kernel object
1152        assert(lpaddr  == 0);
1153        assert(size    == 0);
1154        assert(objsize == 0);
1155        assert(count   <= L2_CNODE_SLOTS);
1156
1157        // Prevent wrap around
1158        if (domain_cap_counter + count >= UINT32_MAX) {
1159            return SYS_ERR_DOMAIN_SPACE_EXHAUSTED;
1160        }
1161
1162        for(size_t i = 0; i < count; i++) {
1163            // Initialize type specific fields
1164            temp_cap.u.domain.coreid = my_core_id;
1165            temp_cap.u.domain.core_local_id = domain_cap_counter++;
1166            // Insert the capability
1167            err = set_cap(&dest_caps[i].cap, &temp_cap);
1168            if (err_is_fail(err)) {
1169                break;
1170            }
1171        }
1172        break;
1173    case ObjType_IO:
1174        temp_cap.u.io.start = 0;
1175        temp_cap.u.io.end   = 65535;
1176        /* fall through */
1177
1178    case ObjType_IRQSrc:
1179        /* Caller has to set vec_start and vec_end */
1180    case ObjType_Kernel:
1181    case ObjType_IPI:
1182    case ObjType_IRQTable:
1183    case ObjType_IRQDest:
1184    case ObjType_EndPointLMP:
1185    case ObjType_Notify_IPI:
1186    case ObjType_PerfMon:
1187    case ObjType_ProcessManager:
1188    case ObjType_DeviceID :
1189    case ObjType_DeviceIDManager :
1190        // These types do not refer to a kernel object
1191        assert(lpaddr  == 0);
1192        assert(size    == 0);
1193        assert(objsize == 0);
1194        assert(count   == 1);
1195
1196        // Insert the capability
1197        err = set_cap(&dest_caps->cap, &temp_cap);
1198        if (err_is_ok(err)) {
1199            dest_i = 1;
1200        }
1201        break;
1202
1203    case ObjType_KernelControlBlock:
1204        assert(OBJSIZE_KCB >= sizeof(struct kcb));
1205
1206        for(size_t i = 0; i < count; i++) {
1207            // Initialize type specific fields
1208            temp_cap.u.kernelcontrolblock.kcb = (struct kcb *)
1209                (lvaddr + i * OBJSIZE_KCB);
1210            // Insert the capability
1211            err = set_cap(&dest_caps[i].cap, &temp_cap);
1212            if (err_is_fail(err)) {
1213                return err;
1214            }
1215        }
1216        return SYS_ERR_OK;
1217
1218    default:
1219        panic("Unhandled capability type or capability of this type cannot"
1220              " be created");
1221    }
1222
1223    if (err_is_fail(err)) {
1224        // Revert the partially initialized caps to zero
1225        for (size_t i = 0; i < dest_i; i++) {
1226            memset(&dest_caps[i], 0, sizeof(dest_caps[i]));
1227        }
1228        return err;
1229    }
1230    else {
1231        // Set the owner for all the new caps
1232        for (size_t i = 0; i < dest_i; i++) {
1233            dest_caps[i].mdbnode.owner = owner;
1234        }
1235    }
1236
1237    return SYS_ERR_OK;
1238}
1239
1240/**
1241 * Look up a capability in two-level cspace rooted at `rootcn`.
1242 */
1243errval_t caps_lookup_slot(struct capability *rootcn, capaddr_t cptr,
1244                          uint8_t level, struct cte **ret, CapRights rights)
1245{
1246    TRACE(KERNEL, CAP_LOOKUP_SLOT, 0);
1247
1248    cslot_t l1index, l2index;
1249    l1index = (cptr >> L2_CNODE_BITS) & MASK(CPTR_BITS-L2_CNODE_BITS);
1250    l2index = cptr & MASK(L2_CNODE_BITS);
1251
1252    assert(ret != NULL);
1253    assert(rootcn != NULL);
1254
1255    if (level > 2) {
1256        debug(SUBSYS_CAPS, "%s called with level=%hhu, from %p\n",
1257                __FUNCTION__, level,
1258                (void*)kernel_virt_to_elf_addr(__builtin_return_address(0)));
1259        TRACE(KERNEL, CAP_LOOKUP_SLOT, 1);
1260        return SYS_ERR_CAP_LOOKUP_DEPTH;
1261    }
1262    assert(level <= 2);
1263
1264    // level 0 means that we do not do any resolution and just return the cte
1265    // for rootcn.
1266    if (level == 0) {
1267        *ret = cte_for_cap(rootcn);
1268        TRACE(KERNEL, CAP_LOOKUP_SLOT, 1);
1269        return SYS_ERR_OK;
1270    }
1271
1272    if (rootcn->type != ObjType_L1CNode) {
1273        debug(SUBSYS_CAPS, "%s: rootcn->type = %d, called from %p\n",
1274                __FUNCTION__, rootcn->type,
1275                (void*)kernel_virt_to_elf_addr(__builtin_return_address(0)));
1276        TRACE(KERNEL, CAP_LOOKUP_SLOT, 1);
1277        // XXX: think about errors
1278        return SYS_ERR_CNODE_TYPE;
1279    }
1280    assert(rootcn->type == ObjType_L1CNode);
1281
1282    if (l1index >= cnode_get_slots(rootcn)) {
1283        TRACE(KERNEL, CAP_LOOKUP_SLOT, 1);
1284        debug(SUBSYS_CAPS, "%s: l1index = %"PRIuCSLOT", slots= %zu\n",
1285                __FUNCTION__, l1index, cnode_get_slots(rootcn));
1286        return SYS_ERR_L1_CNODE_INDEX;
1287    }
1288
1289    /* Apply rights to L1 CNode */
1290    if ((rootcn->rights & rights) != rights) {
1291        debug(SUBSYS_CAPS, "caps_lookup_slot: Rights mismatch\n"
1292              "Passed rights = %u, cnode_cap->rights = %u\n",
1293              rights, rootcn->rights);
1294        TRACE(KERNEL, CAP_LOOKUP_SLOT, 1);
1295        return SYS_ERR_CNODE_RIGHTS;
1296    }
1297
1298    struct cte *l2cnode = caps_locate_slot(get_address(rootcn), l1index);
1299
1300    // level == 1 means that we terminate after looking up the slot in the L1
1301    // cnode.
1302    if (level == 1) {
1303        if (l2cnode->cap.type == ObjType_Null) {
1304            TRACE(KERNEL, CAP_LOOKUP_SLOT, 1);
1305            return SYS_ERR_CAP_NOT_FOUND;
1306        }
1307        *ret = l2cnode;
1308        TRACE(KERNEL, CAP_LOOKUP_SLOT, 1);
1309        return SYS_ERR_OK;
1310    }
1311
1312    // L2 CNode in given L1 slot does not exist
1313    if (l2cnode->cap.type == ObjType_Null) {
1314        TRACE(KERNEL, CAP_LOOKUP_SLOT, 1);
1315        debug(SUBSYS_CAPS, "%s: l2cnode is NULL\n", __FUNCTION__);
1316        return SYS_ERR_CNODE_NOT_FOUND;
1317    }
1318    if (l2cnode->cap.type != ObjType_L2CNode) {
1319        TRACE(KERNEL, CAP_LOOKUP_SLOT, 1);
1320        debug(SUBSYS_CAPS, "%s: l2cnode->type = %d\n", __FUNCTION__,
1321               l2cnode->cap.type);
1322        return SYS_ERR_CNODE_TYPE;
1323    }
1324    assert(l2cnode->cap.type == ObjType_L2CNode);
1325
1326    assert(l2index < L2_CNODE_SLOTS);
1327
1328    /* Apply rights to L2 CNode */
1329    if ((l2cnode->cap.rights & rights) != rights) {
1330        debug(SUBSYS_CAPS, "caps_lookup_slot: Rights mismatch\n"
1331              "Passed rights = %u, cnode_cap->rights = %u\n",
1332              rights, l2cnode->cap.rights);
1333        TRACE(KERNEL, CAP_LOOKUP_SLOT, 1);
1334        return SYS_ERR_CNODE_RIGHTS;
1335    }
1336
1337    struct cte *cte = caps_locate_slot(get_address(&l2cnode->cap), l2index);
1338    if (cte->cap.type == ObjType_Null) {
1339        TRACE(KERNEL, CAP_LOOKUP_SLOT, 1);
1340        return SYS_ERR_CAP_NOT_FOUND;
1341    }
1342
1343    *ret = cte;
1344
1345    TRACE(KERNEL, CAP_LOOKUP_SLOT, 1);
1346    return SYS_ERR_OK;
1347}
1348
1349/**
1350 * Wrapper for caps_lookup_slot returning capability instead of cte.
1351 */
1352errval_t caps_lookup_cap(struct capability *cnode_cap, capaddr_t cptr,
1353                         uint8_t level, struct capability **ret, CapRights rights)
1354{
1355    TRACE(KERNEL, CAP_LOOKUP_CAP, 0);
1356
1357    struct cte *ret_cte;
1358    errval_t err = caps_lookup_slot(cnode_cap, cptr, level, &ret_cte, rights);
1359    if (err_is_fail(err)) {
1360        return err;
1361    }
1362    *ret = &ret_cte->cap;
1363    TRACE(KERNEL, CAP_LOOKUP_CAP, 1);
1364    return SYS_ERR_OK;
1365}
1366
1367/**
1368 * \brief Create a capability from an existing capability metadata.
1369 *
1370 * Used when sending capabilities across cores. The metadata is sent across
1371 * cores and the receiving monitor can create the new capability on its core.
1372 *
1373 * \bug Does not check that supplied owner matches existing copies of cap.
1374 */
1375errval_t caps_create_from_existing(struct capability *root, capaddr_t cnode_cptr,
1376                                   int cnode_level, cslot_t dest_slot, coreid_t owner,
1377                                   struct capability *src)
1378{
1379    TRACE(KERNEL, CAP_CREATE_FROM_EXISTING, 0);
1380    errval_t err;
1381    struct capability *cnode;
1382    err = caps_lookup_cap(root, cnode_cptr, cnode_level, &cnode,
1383                          CAPRIGHTS_READ_WRITE);
1384    if (err_is_fail(err)) {
1385        return err_push(err, SYS_ERR_SLOT_LOOKUP_FAIL);
1386    }
1387    if (cnode->type != ObjType_L1CNode &&
1388        cnode->type != ObjType_L2CNode)
1389    {
1390        return SYS_ERR_CNODE_TYPE;
1391    }
1392
1393    struct cte *dest = caps_locate_slot(get_address(cnode), dest_slot);
1394
1395    err = set_cap(&dest->cap, src);
1396    if (err_is_fail(err)) {
1397        return err;
1398    }
1399
1400    dest->mdbnode.owner = owner;
1401
1402    err = mdb_insert(dest);
1403    assert(err_is_ok(err));
1404
1405    struct cte *neighbour = NULL;
1406    if (!neighbour
1407        && (neighbour = mdb_predecessor(dest))
1408        && !is_copy(&dest->cap, &neighbour->cap))
1409    {
1410        neighbour = NULL;
1411    }
1412    if (!neighbour
1413        && (neighbour = mdb_successor(dest))
1414        && !is_copy(&dest->cap, &neighbour->cap))
1415    {
1416        neighbour = NULL;
1417    }
1418
1419    if (neighbour) {
1420        assert(!neighbour->mdbnode.in_delete);
1421        assert(neighbour->mdbnode.owner == owner);
1422#define CP_ATTR(a) dest->mdbnode.a = neighbour->mdbnode.a
1423        CP_ATTR(locked);
1424        CP_ATTR(remote_copies);
1425        CP_ATTR(remote_ancs);
1426        CP_ATTR(remote_descs);
1427#undef CP_ATTR
1428    }
1429    else {
1430        dest->mdbnode.locked = false;
1431        if (owner != my_core_id) {
1432            // For foreign caps it does not really matter if ancestors or
1433            // descendants exist
1434            dest->mdbnode.remote_copies = true;
1435            dest->mdbnode.remote_ancs = false;
1436            dest->mdbnode.remote_descs = false;
1437        }
1438        else {
1439            // We just created a new copy of a owned capability from nothing.
1440            // This is either caused by a retype, or by sharing a capability
1441            // that does not care about locality.
1442            // XXX: This should probably be done more explicitly -MN
1443            if (distcap_needs_locality(dest->cap.type)) {
1444                // Retype, so have ancestors and no descendants
1445                dest->mdbnode.remote_copies = false;
1446                dest->mdbnode.remote_ancs = true;
1447                dest->mdbnode.remote_descs = false;
1448            }
1449            else {
1450                dest->mdbnode.remote_copies = false;
1451                dest->mdbnode.remote_ancs = false;
1452                dest->mdbnode.remote_descs = false;
1453            }
1454        }
1455    }
1456
1457    TRACE_CAP_MSG("created", dest);
1458
1459    TRACE(KERNEL, CAP_CREATE_FROM_EXISTING, 1);
1460    return SYS_ERR_OK;
1461}
1462
1463//{{{1 Capability creation
1464
1465/// check arguments, return true iff ok
1466STATIC_ASSERT(68 == ObjType_Num, "Knowledge of all cap types");
1467#ifndef NDEBUG
1468static bool check_caps_create_arguments(enum objtype type,
1469                                        size_t bytes, size_t objsize,
1470                                        bool exact)
1471{
1472    gensize_t base_mask = BASE_PAGE_MASK;
1473    if (type_is_vnode(type)) {
1474        base_mask = vnode_objsize(type) - 1;
1475    }
1476    /* mappable types need to be at least BASE_PAGE_SIZEd */
1477    if (type_is_mappable(type)) {
1478        /* source size not multiple of or not aligned to BASE_PAGE_SIZE */
1479        if (bytes & base_mask) {
1480            debug(SUBSYS_CAPS, "source size not multiple of BASE_PAGE_SIZE\n");
1481            return false;
1482        }
1483        /* objsize > 0 and not multiple of BASE_PAGE_SIZE */
1484        if (objsize > 0 && objsize & base_mask) {
1485            debug(SUBSYS_CAPS, "object size not multiple of BASE_PAGE_SIZE\n");
1486            return false;
1487        }
1488
1489        /* check that bytes can be evenly divided into objsize sized chunks */
1490        if (exact && bytes > 0 && objsize > 0) {
1491            if (bytes % objsize) {
1492                debug(SUBSYS_CAPS, "source size cannot be evenly divided into object size-sized chunks\n");
1493            }
1494            return bytes % objsize == 0;
1495        }
1496
1497        return true;
1498    }
1499
1500    if (type == ObjType_L1CNode) {
1501        /* L1 CNode minimum size is OBJSIZE_L2CNODE */
1502        if (bytes < OBJSIZE_L2CNODE || objsize < OBJSIZE_L2CNODE) {
1503            debug(SUBSYS_CAPS, "source size or L1 CNode objsize < OBJSIZE_L2CNODE\n");
1504            return false;
1505        }
1506        /* check that bytes can be evenly divided into L1 CNodes of objsize */
1507        if (exact && (bytes % objsize != 0)) {
1508            debug(SUBSYS_CAPS, "source not evenly divisible into L1 CNodes of objsize\n");
1509            return false;
1510        }
1511        /* L1 CNode size must be multiple of 1UL << OBJBITS_CTE */
1512        return objsize % (1UL << OBJBITS_CTE) == 0;
1513    }
1514
1515    if (type == ObjType_L2CNode) {
1516        /* L2 CNode size must be OBJSIZE_L2CNODE */
1517        if (bytes < OBJSIZE_L2CNODE || objsize != OBJSIZE_L2CNODE) {
1518            debug(SUBSYS_CAPS, "source size < or L2 CNode objsize != OBJSIZE_L2CNODE\n");
1519            return false;
1520        }
1521        if (exact && (bytes % objsize != 0)) {
1522            debug(SUBSYS_CAPS, "source not evenly divisible into L2 CNodes of objsize\n");
1523            return false;
1524        }
1525        return true;
1526    }
1527
1528    /* special case Dispatcher which is 1kB right now */
1529    if (type == ObjType_Dispatcher) {
1530        if (bytes & (OBJSIZE_DISPATCHER - 1)) {
1531            return false;
1532        }
1533        if (objsize > 0 && objsize != OBJSIZE_DISPATCHER) {
1534            return false;
1535        }
1536
1537        return true;
1538    }
1539
1540    // All other types do not need special alignments/offsets
1541    return true;
1542}
1543#else
1544#define check_caps_create_arguments(a,b,c,d) 0
1545#endif
1546
1547/** Create caps to new kernel objects.
1548 * This takes the size of the memory region in bytes, and the size of
1549 * individual objects in bytes. The following needs to hold:
1550 *      bytes % objbytes == 0
1551 */
1552errval_t caps_create_new(enum objtype type, lpaddr_t addr, size_t bytes,
1553                         size_t objsize, coreid_t owner, struct cte *caps)
1554{
1555    TRACE(KERNEL, CAP_CREATE_NEW, 0);
1556    /* Parameter checking */
1557    assert(type != ObjType_EndPointLMP); // Cap of this type cannot be created
1558    debug(SUBSYS_CAPS, "caps_create_new: type = %d, addr = %#"PRIxLPADDR
1559            ", bytes=%zu, objsize=%zu\n", type, addr, bytes, objsize);
1560
1561    assert(check_caps_create_arguments(type, bytes, objsize, false));
1562    assert(addr == 0 || check_caps_create_arguments(type, bytes, objsize, true));
1563
1564    size_t numobjs = caps_max_numobjs(type, bytes, objsize);
1565    assert(numobjs > 0);
1566    // XXX: Dispatcher creation is kind of hacky right now :(
1567    // Consider allowing non-mappable types to be < BASE_PAGE_SIZE
1568    //if (type == ObjType_Dispatcher) {
1569    //    numobjs = 1;
1570    //}
1571
1572    /* Create the new capabilities */
1573    errval_t err = caps_create(type, addr, bytes, objsize, numobjs, owner, caps);
1574    if (err_is_fail(err)) {
1575        return err;
1576    }
1577
1578    // Handle the mapping database
1579    set_init_mapping(caps, numobjs);
1580
1581    TRACE_CAP_MSG("created", &caps[0]);
1582
1583    TRACE(KERNEL, CAP_CREATE_NEW, 1);
1584    return SYS_ERR_OK;
1585}
1586
1587STATIC_ASSERT(68 == ObjType_Num, "Knowledge of all cap types");
1588/// Retype caps
1589/// Create `count` new caps of `type` from `offset` in src, and put them in
1590/// `dest_cnode` starting at `dest_slot`.
1591errval_t caps_retype(enum objtype type, gensize_t objsize, size_t count,
1592                     struct capability *dest_cnode, cslot_t dest_slot,
1593                     struct cte *src_cte, gensize_t offset,
1594                     bool from_monitor)
1595{
1596    TRACE(KERNEL, CAP_RETYPE, 0);
1597    TRACE(KERNEL_CAPOPS, RETYPE_ENTER, ++retype_seqnum);
1598    size_t maxobjs;
1599    genpaddr_t base = 0;
1600    gensize_t size = 0;
1601    errval_t err;
1602    bool do_range_check = false;
1603    struct capability *src_cap = &src_cte->cap;
1604
1605    /* Parameter checking */
1606    assert(type != ObjType_Null);
1607    assert(type < ObjType_Num);
1608    if (type == ObjType_Null || type >= ObjType_Num) {
1609        TRACE(KERNEL_CAPOPS, RETYPE_DONE, retype_seqnum);
1610        return SYS_ERR_INVALID_RETYPE;
1611    }
1612
1613    debug(SUBSYS_CAPS, "%s: Retyping to type=%d, from offset=%" PRIuGENSIZE
1614            ", objsize=%" PRIuGENSIZE ", count=%zu\n",
1615            __FUNCTION__, type, offset, objsize, count);
1616
1617    /*
1618     * check that offset into source cap is multiple of destination object
1619     * size, or base page size, whichever is smaller.
1620     */
1621    gensize_t dest_obj_alignment = BASE_PAGE_SIZE;
1622    if (type_is_vnode(type) && vnode_objsize(type) < BASE_PAGE_SIZE) {
1623        dest_obj_alignment = vnode_objsize(type);
1624    } else if (type == ObjType_Dispatcher) {
1625        dest_obj_alignment = OBJSIZE_DISPATCHER;
1626    }
1627    if (src_cap->type != ObjType_IRQSrc && offset % dest_obj_alignment != 0) {
1628        TRACE(KERNEL_CAPOPS, RETYPE_DONE, retype_seqnum);
1629        return SYS_ERR_RETYPE_INVALID_OFFSET;
1630    }
1631    assert(offset % dest_obj_alignment == 0 || src_cap->type == ObjType_IRQSrc);
1632
1633    // check that size is multiple of BASE_PAGE_SIZE for mappable types
1634    gensize_t base_size = BASE_PAGE_SIZE;
1635    if (type_is_vnode(type)) {
1636        base_size = vnode_objsize(type);
1637    }
1638    if (type_is_mappable(type) && objsize % base_size != 0) {
1639        debug(SUBSYS_CAPS, "%s: objsize = %"PRIuGENSIZE"\n", __FUNCTION__, objsize);
1640        TRACE(KERNEL_CAPOPS, RETYPE_DONE, retype_seqnum);
1641        return SYS_ERR_INVALID_SIZE;
1642    }
1643    else if (type == ObjType_L1CNode && objsize % OBJSIZE_L2CNODE != 0)
1644    {
1645        printk(LOG_WARN, "%s: CNode: objsize = %" PRIuGENSIZE "\n", __FUNCTION__, objsize);
1646        TRACE(KERNEL_CAPOPS, RETYPE_DONE, retype_seqnum);
1647        return SYS_ERR_INVALID_SIZE;
1648    }
1649    else if (type == ObjType_L2CNode && objsize != OBJSIZE_L2CNODE)
1650    {
1651        printk(LOG_WARN, "%s: L2CNode: objsize = %"PRIuGENSIZE"\n", __FUNCTION__, objsize);
1652        TRACE(KERNEL_CAPOPS, RETYPE_DONE, retype_seqnum);
1653        return SYS_ERR_INVALID_SIZE;
1654    }
1655    assert((type_is_mappable(type) && objsize % base_size == 0) ||
1656           (type == ObjType_L1CNode && objsize % OBJSIZE_L2CNODE == 0 &&
1657            objsize >= OBJSIZE_L2CNODE) ||
1658           (type == ObjType_L2CNode && objsize == OBJSIZE_L2CNODE) ||
1659           !type_is_mappable(type));
1660
1661    /* No explicit retypes to Mapping allowed */
1662    if (type_is_mapping(type)) {
1663        TRACE(KERNEL_CAPOPS, RETYPE_DONE, retype_seqnum);
1664        return SYS_ERR_RETYPE_MAPPING_EXPLICIT;
1665    }
1666
1667
1668    TRACE_CAP_MSG("retyping", src_cte);
1669
1670    TRACE(KERNEL_CAPOPS, RETYPE_IS_RETYPEABLE, retype_seqnum);
1671    /* Check retypability */
1672    err = is_retypeable(src_cte, src_cap->type, type, from_monitor);
1673    if (err_is_fail(err)) {
1674        if (err_no(err) != SYS_ERR_REVOKE_FIRST) {
1675            debug(SUBSYS_CAPS, "caps_retype: is_retypeable failed\n");
1676            TRACE(KERNEL_CAPOPS, RETYPE_DONE, retype_seqnum);
1677            return err;
1678        } else {
1679            debug(SUBSYS_CAPS,
1680                    "caps_retype: is_retypeable() returned SYS_ERR_REVOKE_FIRST, doing range check\n");
1681            // We handle err_revoke_first fine-grained checking below, as it
1682            // might happen for non-overlapping regions.
1683
1684            // TODO: move the range checking into is_retypeable() or even
1685            // is_revoked_first(), -SG 2016-04-18
1686            do_range_check = true;
1687        }
1688    }
1689    TRACE(KERNEL_CAPOPS, RETYPE_IS_RETYPEABLE_DONE, retype_seqnum);
1690    // from here: src cap type is one of these.
1691    assert(src_cap->type == ObjType_PhysAddr ||
1692           src_cap->type == ObjType_RAM ||
1693           src_cap->type == ObjType_Dispatcher ||
1694           src_cap->type == ObjType_Frame ||
1695           src_cap->type == ObjType_DevFrame ||
1696           src_cap->type == ObjType_IRQSrc ||
1697           src_cap->type == ObjType_ProcessManager ||
1698           src_cap->type == ObjType_DeviceIDManager);
1699
1700    if (src_cap->type != ObjType_Dispatcher && src_cap->type != ObjType_IRQSrc) {
1701        base = get_address(src_cap);
1702        size = get_size(src_cap);
1703    }
1704
1705    maxobjs = caps_max_numobjs(type, get_size(src_cap), objsize);
1706    debug(SUBSYS_CAPS, "maximum possible new object count: %zu\n", maxobjs);
1707
1708    if (maxobjs == 0) {
1709        debug(SUBSYS_CAPS, "caps_retype: maxobjs == 0\n");
1710        TRACE(KERNEL_CAPOPS, RETYPE_DONE, retype_seqnum);
1711        return SYS_ERR_INVALID_SIZE;
1712    }
1713
1714    if (count > maxobjs) {
1715        debug(SUBSYS_CAPS, "caps_retype: maxobjs = %zu, count = %zu\n", maxobjs, count);
1716        TRACE(KERNEL_CAPOPS, RETYPE_DONE, retype_seqnum);
1717        return SYS_ERR_RETYPE_INVALID_COUNT;
1718    }
1719    // from here: count <= maxobjs
1720    assert(count <= maxobjs);
1721    // make sure nobody calls with the old behaviour
1722    if (count == 0) {
1723        TRACE(KERNEL_CAPOPS, RETYPE_DONE, retype_seqnum);
1724        return SYS_ERR_RETYPE_INVALID_COUNT;
1725    }
1726    assert(count > 0);
1727
1728    /* check that we can create `count` objs from `offset` in source, and
1729     * update base accordingly */
1730    if (src_cap->type != ObjType_Dispatcher && src_cap->type != ObjType_IRQSrc
1731            && src_cap->type != ObjType_Domain) {
1732        // TODO: convince ourselves that this is the only condition on offset
1733        if (offset + count * objsize > get_size(src_cap)) {
1734            debug(SUBSYS_CAPS, "caps_retype: cannot create all %zu objects"
1735                    " of size 0x%" PRIxGENSIZE " from offset 0x%" PRIxGENSIZE "\n",
1736                    count, objsize, offset);
1737            TRACE(KERNEL_CAPOPS, RETYPE_DONE, retype_seqnum);
1738            return SYS_ERR_RETYPE_INVALID_OFFSET;
1739        }
1740        // adjust base address for new objects
1741        base += offset;
1742
1743        // Check whether we got SYS_ERR_REVOKE_FIRST because of
1744        // non-overlapping child
1745        if (do_range_check) {
1746            TRACE(KERNEL_CAPOPS, RETYPE_RANGE_CHECK, retype_seqnum);
1747            int find_range_result = 0;
1748            struct cte *found_cte = NULL;
1749            err = mdb_find_range(get_type_root(src_cap->type), base, objsize * count,
1750                    MDB_RANGE_FOUND_SURROUNDING, &found_cte, &find_range_result);
1751            // this should never return an error unless we mess up the
1752            // non-user supplied arguments
1753            if (err_is_fail(err)) {
1754                printk(LOG_WARN, "mdb_find_range returned: %"PRIuERRV"\n", err);
1755            }
1756            assert(err_is_ok(err));
1757            // return REVOKE_FIRST, if we found a cap inside the region
1758            // (FOUND_INNER == 2) or overlapping the region (FOUND_PARTIAL == 3)
1759            if (find_range_result >= MDB_RANGE_FOUND_INNER) {
1760                debug(SUBSYS_CAPS,
1761                    "%s: found existing region inside, or overlapping requested region:\n",
1762                    __FUNCTION__);
1763                debug(SUBSYS_CAPS, "%s: our region: %#"PRIxGENPADDR"--%#"PRIxGENPADDR"\n",
1764                        __FUNCTION__, base, base+objsize*count);
1765                if (found_cte && kernel_loglevel >= LOG_DEBUG &&
1766                    kernel_log_subsystem_mask & SUBSYS_CAPS)
1767                {
1768                    char capbuf[128];
1769                    sprint_cap(capbuf, 128, &found_cte->cap);
1770                    printk(LOG_NOTE, "%s: cap=%s\n", __FUNCTION__, capbuf);
1771                    if (type_is_mapping(found_cte->cap.type)) {
1772                        sprint_cap(capbuf, 128, found_cte->cap.u.frame_mapping.cap);
1773                        printk(LOG_NOTE, "%s: ... is mapping for cap=%s\n",
1774                                __FUNCTION__, capbuf);
1775                    }
1776                    assert(get_address(&found_cte->cap) >= base &&
1777                           get_address(&found_cte->cap) < base+objsize*count);
1778                }
1779                TRACE(KERNEL_CAPOPS, RETYPE_DONE, retype_seqnum);
1780                return SYS_ERR_REVOKE_FIRST;
1781            }
1782            // return REVOKE_FIRST, if we found a cap that isn't our source
1783            // (or a copy of our source) covering the whole requested region.
1784            else if (find_range_result == MDB_RANGE_FOUND_SURROUNDING &&
1785                     !is_copy(&found_cte->cap, src_cap))
1786            {
1787                debug(SUBSYS_CAPS,
1788                       "%s: found non source region fully covering requested region\n",
1789                       __FUNCTION__);
1790                TRACE(KERNEL_CAPOPS, RETYPE_DONE, retype_seqnum);
1791                return SYS_ERR_REVOKE_FIRST;
1792            }
1793            TRACE(KERNEL_CAPOPS, RETYPE_RANGE_CHECK_DONE, retype_seqnum);
1794        }
1795    }
1796
1797    /* check that destination slots all fit within target cnode */
1798    if (dest_slot + count > cnode_get_slots(dest_cnode)) {
1799        debug(SUBSYS_CAPS, "caps_retype: dest slots don't fit in cnode\n");
1800        TRACE(KERNEL_CAPOPS, RETYPE_DONE, retype_seqnum);
1801        return SYS_ERR_SLOTS_INVALID;
1802    }
1803
1804    /* check that destination slots are all empty */
1805    debug(SUBSYS_CAPS, "caps_retype: dest cnode is %#" PRIxLPADDR
1806          " dest_slot %d\n",
1807          get_address(dest_cnode), (int)dest_slot);
1808    for (cslot_t i = 0; i < count; i++) {
1809        if (caps_locate_slot(get_address(dest_cnode), dest_slot + i)->cap.type
1810            != ObjType_Null) {
1811            debug(SUBSYS_CAPS, "caps_retype: dest slot %d in use\n",
1812                  (int)(dest_slot + i));
1813            TRACE(KERNEL_CAPOPS, RETYPE_DONE, retype_seqnum);
1814            return SYS_ERR_SLOTS_IN_USE;
1815        }
1816    }
1817
1818    /* Check that L1 CNode is destination when creating L2 CNode */
1819    if (type == ObjType_L2CNode) {
1820        debug(SUBSYS_CAPS, "caps_retype: check that dest cnode is L1"
1821                           " when creating L2 CNodes\n");
1822        if (dest_cnode->type != ObjType_L1CNode &&
1823            dest_cnode->type != ObjType_L2CNode)
1824        {
1825            panic("L2 CNode can only be created in L1 or L2 CNode\n");
1826        }
1827    }
1828
1829    // IRQSrc specific checks
1830    uint64_t vec_start_new = offset;
1831    uint64_t vec_end_new = objsize;
1832    if(src_cap->type == ObjType_IRQSrc){
1833
1834        // Check new range is valid
1835        if(vec_start_new > vec_end_new){
1836            TRACE(KERNEL_CAPOPS, RETYPE_DONE, retype_seqnum);
1837            return SYS_ERR_RETYPE_INVALID_OFFSET;
1838        }
1839
1840        // Check vec_start_new in range
1841        if(!(src_cap->u.irqsrc.vec_start <= vec_start_new &&
1842                vec_start_new <= src_cap->u.irqsrc.vec_end)){
1843            TRACE(KERNEL_CAPOPS, RETYPE_DONE, retype_seqnum);
1844            return SYS_ERR_RETYPE_INVALID_OFFSET;
1845        }
1846
1847        // Check vec_end_new in range
1848        if(!(src_cap->u.irqsrc.vec_start <= vec_end_new &&
1849                vec_end_new <= src_cap->u.irqsrc.vec_end)){
1850            TRACE(KERNEL_CAPOPS, RETYPE_DONE, retype_seqnum);
1851            return SYS_ERR_RETYPE_INVALID_OBJSIZE;
1852        }
1853    }
1854
1855
1856    TRACE(KERNEL_CAPOPS, RETYPE_CREATE_CAPS, retype_seqnum);
1857    /* create new caps */
1858    struct cte *dest_cte =
1859        caps_locate_slot(get_address(dest_cnode), dest_slot);
1860    if(type == ObjType_IRQSrc){
1861        // Pass special arguments
1862        err = caps_create(type, 0, 0, 0, 1, my_core_id, dest_cte);
1863        if(err_is_ok(err)){
1864            dest_cte->cap.u.irqsrc.vec_start = vec_start_new;
1865            dest_cte->cap.u.irqsrc.vec_end = vec_end_new;
1866        }
1867    } else {
1868        err = caps_create(type, base, size, objsize, count, my_core_id, dest_cte);
1869    }
1870    if (err_is_fail(err)) {
1871        debug(SUBSYS_CAPS, "caps_retype: failed to create a dest cap\n");
1872        TRACE(KERNEL_CAPOPS, RETYPE_DONE, retype_seqnum);
1873        return err_push(err, SYS_ERR_RETYPE_CREATE);
1874    }
1875    TRACE(KERNEL_CAPOPS, RETYPE_CREATE_CAPS_DONE, retype_seqnum);
1876
1877    /* special initialisation for endpoint caps */
1878    if (type == ObjType_EndPointLMP) {
1879        assert(src_cap->type == ObjType_Dispatcher);
1880        assert(count == 1);
1881        struct capability *dest_cap = &dest_cte->cap;
1882        dest_cap->u.endpointlmp.listener = src_cap->u.dispatcher.dcb;
1883    }
1884
1885    // XXX: Treat full object retypes to same type as copies as calling
1886    // is_copy(dst, src) will return true for such retypes.
1887    if (count == 1 && objsize == get_size(src_cap) && type == src_cap->type) {
1888        // sanity check: is_copy() really returns true for the two caps
1889        assert(is_copy(&dest_cte[0].cap, src_cap));
1890        // If we're not owner, and type needs locality
1891        if (src_cte->mdbnode.owner != my_core_id &&
1892            distcap_needs_locality(dest_cte[0].cap.type))
1893        {
1894            // fix owner for new cap and set remote_copies bit
1895            dest_cte[0].mdbnode.owner = src_cte->mdbnode.owner;
1896            dest_cte[0].mdbnode.remote_copies = true;
1897        }
1898    }
1899
1900    TRACE(KERNEL_CAPOPS, RETYPE_MDB_INSERT, retype_seqnum);
1901    /* Handle mapping */
1902    for (size_t i = 0; i < count; i++) {
1903        mdb_insert(&dest_cte[i]);
1904    }
1905    TRACE(KERNEL_CAPOPS, RETYPE_MDB_INSERT_DONE, retype_seqnum);
1906
1907#ifdef TRACE_PMEM_CAPS
1908    for (size_t i = 0; i < count; i++) {
1909        TRACE_CAP_MSG("created", &dest_cte[i]);
1910    }
1911#endif
1912
1913    TRACE(KERNEL, CAP_RETYPE, 1);
1914    TRACE(KERNEL_CAPOPS, RETYPE_DONE, retype_seqnum);
1915    return SYS_ERR_OK;
1916}
1917
1918/// Check the validity of a retype operation
1919errval_t is_retypeable(struct cte *src_cte, enum objtype src_type,
1920                       enum objtype dest_type, bool from_monitor)
1921{
1922    if (!is_well_founded(src_type, dest_type)) {
1923        return SYS_ERR_INVALID_RETYPE;
1924    } else if (!is_revoked_first(src_cte, src_type)){
1925        //printf("err_revoke_first: (%p, %d, %d)\n", src_cte, src_type, dest_type);
1926        return SYS_ERR_REVOKE_FIRST;
1927    } else if (dest_type == ObjType_EndPointLMP && src_cte->mdbnode.owner == my_core_id) {
1928        // XXX: because of the current "multi-retype" hack for endpoints, a
1929        // dispatcher->endpoint retype can happen irrespective of the existence
1930        // of descendants on any core.
1931        // However, we only do this for locally owned caps as the owner should
1932        // be notified that the cap has remote descendants
1933        return SYS_ERR_OK;
1934    } else if (!from_monitor && (src_cte->mdbnode.owner != my_core_id
1935                                 || src_cte->mdbnode.remote_descs)) {
1936        return SYS_ERR_RETRY_THROUGH_MONITOR;
1937    } else {
1938        return SYS_ERR_OK;
1939    }
1940}
1941
1942/// Create copies to a slot within a cnode
1943errval_t caps_copy_to_cnode(struct cte *dest_cnode_cte, cslot_t dest_slot,
1944                            struct cte *src_cte, bool mint, uintptr_t param1,
1945                            uintptr_t param2)
1946{
1947    /* Parameter Checking */
1948    assert(dest_cnode_cte->cap.type == ObjType_L1CNode ||
1949           dest_cnode_cte->cap.type == ObjType_L2CNode);
1950
1951    // only allow L2 CNodes and BSP KCB in L1 CNode
1952    // XXX: BSPKCB should not be in rootcn...
1953    if (dest_cnode_cte->cap.type == ObjType_L1CNode &&
1954        src_cte->cap.type != ObjType_L2CNode &&
1955        src_cte->cap.type != ObjType_KernelControlBlock)
1956    {
1957        printk(LOG_WARN, "trying to copy cap type %d into cap type %d\n",
1958                src_cte->cap.type, dest_cnode_cte->cap.type);
1959        return SYS_ERR_DEST_TYPE_INVALID;
1960    }
1961
1962    struct cte *dest_cte;
1963    dest_cte = caps_locate_slot(get_address(&dest_cnode_cte->cap), dest_slot);
1964    return caps_copy_to_cte(dest_cte, src_cte, mint, param1, param2);
1965
1966}
1967
1968/// Create copies to a cte
1969STATIC_ASSERT(68 == ObjType_Num, "Knowledge of all cap types");
1970errval_t caps_copy_to_cte(struct cte *dest_cte, struct cte *src_cte, bool mint,
1971                          uintptr_t param1, uintptr_t param2)
1972{
1973    errval_t err;
1974    /* Parameter checking */
1975    // Null checking
1976    assert(dest_cte != NULL);
1977    assert(src_cte != NULL);
1978
1979    struct capability *src_cap  = &src_cte->cap;
1980    struct capability *dest_cap = &dest_cte->cap;
1981    // NULL caps cannot be copied/minted
1982    if (src_cap->type == ObjType_Null) {
1983        return SYS_ERR_CAP_NOT_FOUND;
1984    }
1985    // Parameters should be 0 if not minting
1986    if (!mint) {
1987        assert(param1 == 0);
1988        assert(param2 == 0);
1989    }
1990
1991    assert(!src_cte->mdbnode.in_delete);
1992
1993    /* Insert #source_cap into #dest_cap */
1994    err = set_cap(dest_cap, src_cap);
1995    if (err_is_fail(err)) {
1996        return err;
1997    }
1998
1999    /* Transfer MDB attributes that must be equal for all copies */
2000#define CP_ATTR(at) dest_cte->mdbnode.at = src_cte->mdbnode.at
2001    CP_ATTR(owner);
2002    CP_ATTR(locked);
2003    CP_ATTR(remote_copies);
2004    CP_ATTR(remote_ancs);
2005    CP_ATTR(remote_descs);
2006#undef CP_ATTR
2007
2008    /* Copy is done */
2009    if(!mint) {
2010        TRACE_CAP_MSG("copied to", dest_cte);
2011        // Handle mapping here only for non-mint operations
2012        // (mint can change eq fields which would make the early insertion
2013        // invalid in some cases)
2014        mdb_insert(dest_cte);
2015        return SYS_ERR_OK;
2016    }
2017    else {
2018        TRACE_CAP_MSG("minting to", dest_cte);
2019    }
2020
2021    /* For minting, set the specified parameters */
2022    // Process source-specific parameters for minting
2023    // XXX: If failure, revert the insertion
2024    switch(src_cap->type) {
2025    case ObjType_EndPointLMP:
2026        // XXX: FIXME: check that buffer offset lies wholly within the disp frame
2027        // can't easily enforce this here, because the dispatcher frame may not
2028        // yet be setup
2029/*        if (param1 < sizeof(struct dispatcher) ||
2030            dest_cap->u.endpointlmp.endpointlmp->disp == NULL ||
2031            param2 < IDC_RECV_LENGTH ||
2032            param1 + sizeof(struct idc_endpoint) + param2 * sizeof(uintptr_t) >
2033            (1UL << dest_cap->u.endpointlmp.listener->disp_cte.cap.u.frame.bits)) {
2034            return SYS_ERR_INVALID_EPBUF;
2035        }*/
2036        if (param2 < LMP_RECV_HEADER_LENGTH) {
2037            return SYS_ERR_INVALID_EPLEN;
2038        }
2039        uint16_t iftype = param2 >> 16;
2040        uint16_t buflen = param2 & 0xFFFF;
2041        dest_cap->u.endpointlmp.epoffset = param1;
2042        dest_cap->u.endpointlmp.epbuflen = buflen;
2043        dest_cap->u.endpointlmp.iftype = iftype;
2044        break;
2045
2046    case ObjType_EndPointUMP:
2047        assert(param2 == 0);
2048        dest_cap->u.endpointump.iftype = param1;
2049        break;
2050
2051    case ObjType_IO:
2052        if(src_cap->u.io.start  <= param1) {
2053            dest_cap->u.io.start = param1;
2054        }
2055        if(src_cap->u.io.end  >= param2) {
2056            dest_cap->u.io.end = param2;
2057        }
2058        break;
2059    default:
2060        // Mint the caprights by default
2061        dest_cap->rights = src_cap->rights & param1;
2062    }
2063
2064    // Insert after doing minting operation
2065    mdb_insert(dest_cte);
2066
2067    return SYS_ERR_OK;
2068}
2069
2070STATIC_ASSERT(68 == ObjType_Num, "Knowledge of all cap types");
2071errval_t redact_capability(struct capability *cap)
2072{
2073    // TODO: figure out which other types need redacting
2074    switch (cap->type) {
2075        case ObjType_KernelControlBlock:
2076            // don't leak KCB kernel pointer in KCB cap
2077            cap->u.kernelcontrolblock.kcb = NULL;
2078        default:
2079            // Don't redact all other capability types
2080            break;
2081    }
2082    return SYS_ERR_OK;
2083}
2084