machdep.c revision 261985
1/*-
2 * Copyright (c) 2003,2004 Marcel Moolenaar
3 * Copyright (c) 2000,2001 Doug Rabson
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD: stable/10/sys/ia64/ia64/machdep.c 261985 2014-02-16 19:12:50Z marcel $");
30
31#include "opt_compat.h"
32#include "opt_ddb.h"
33#include "opt_kstack_pages.h"
34#include "opt_sched.h"
35
36#include <sys/param.h>
37#include <sys/proc.h>
38#include <sys/systm.h>
39#include <sys/bio.h>
40#include <sys/buf.h>
41#include <sys/bus.h>
42#include <sys/cons.h>
43#include <sys/cpu.h>
44#include <sys/eventhandler.h>
45#include <sys/exec.h>
46#include <sys/imgact.h>
47#include <sys/kdb.h>
48#include <sys/kernel.h>
49#include <sys/linker.h>
50#include <sys/lock.h>
51#include <sys/malloc.h>
52#include <sys/mbuf.h>
53#include <sys/msgbuf.h>
54#include <sys/pcpu.h>
55#include <sys/ptrace.h>
56#include <sys/random.h>
57#include <sys/reboot.h>
58#include <sys/rwlock.h>
59#include <sys/sched.h>
60#include <sys/signalvar.h>
61#include <sys/syscall.h>
62#include <sys/syscallsubr.h>
63#include <sys/sysctl.h>
64#include <sys/sysproto.h>
65#include <sys/ucontext.h>
66#include <sys/uio.h>
67#include <sys/uuid.h>
68#include <sys/vmmeter.h>
69#include <sys/vnode.h>
70
71#include <ddb/ddb.h>
72
73#include <net/netisr.h>
74
75#include <vm/vm.h>
76#include <vm/vm_extern.h>
77#include <vm/vm_kern.h>
78#include <vm/vm_page.h>
79#include <vm/vm_map.h>
80#include <vm/vm_object.h>
81#include <vm/vm_pager.h>
82
83#include <machine/bootinfo.h>
84#include <machine/cpu.h>
85#include <machine/efi.h>
86#include <machine/elf.h>
87#include <machine/fpu.h>
88#include <machine/intr.h>
89#include <machine/mca.h>
90#include <machine/md_var.h>
91#include <machine/pal.h>
92#include <machine/pcb.h>
93#include <machine/reg.h>
94#include <machine/sal.h>
95#include <machine/sigframe.h>
96#ifdef SMP
97#include <machine/smp.h>
98#endif
99#include <machine/unwind.h>
100#include <machine/vmparam.h>
101
102/*
103 * For atomicity reasons, we demand that pc_curthread is the first
104 * field in the struct pcpu. It allows us to read the pointer with
105 * a single atomic instruction:
106 *	ld8 %curthread = [r13]
107 * Otherwise we would first have to calculate the load address and
108 * store the result in a temporary register and that for the load:
109 *	add %temp = %offsetof(struct pcpu), r13
110 *	ld8 %curthread = [%temp]
111 * A context switch inbetween the add and the ld8 could have the
112 * thread migrate to a different core. In that case,  %curthread
113 * would be the thread running on the original core and not actually
114 * the current thread.
115 */
116CTASSERT(offsetof(struct pcpu, pc_curthread) == 0);
117
118static SYSCTL_NODE(_hw, OID_AUTO, freq, CTLFLAG_RD, 0, "");
119static SYSCTL_NODE(_machdep, OID_AUTO, cpu, CTLFLAG_RD, 0, "");
120
121static u_int bus_freq;
122SYSCTL_UINT(_hw_freq, OID_AUTO, bus, CTLFLAG_RD, &bus_freq, 0,
123    "Bus clock frequency");
124
125static u_int cpu_freq;
126SYSCTL_UINT(_hw_freq, OID_AUTO, cpu, CTLFLAG_RD, &cpu_freq, 0,
127    "CPU clock frequency");
128
129static u_int itc_freq;
130SYSCTL_UINT(_hw_freq, OID_AUTO, itc, CTLFLAG_RD, &itc_freq, 0,
131    "ITC frequency");
132
133int cold = 1;
134
135struct bootinfo *bootinfo;
136
137struct pcpu pcpu0;
138
139extern u_int64_t kernel_text[], _end[];
140
141extern u_int64_t ia64_gateway_page[];
142extern u_int64_t break_sigtramp[];
143extern u_int64_t epc_sigtramp[];
144
145struct fpswa_iface *fpswa_iface;
146
147vm_size_t ia64_pal_size;
148vm_paddr_t ia64_pal_base;
149vm_offset_t ia64_port_base;
150
151u_int64_t ia64_lapic_addr = PAL_PIB_DEFAULT_ADDR;
152
153struct ia64_pib *ia64_pib;
154
155static int ia64_sync_icache_needed;
156
157char machine[] = MACHINE;
158SYSCTL_STRING(_hw, HW_MACHINE, machine, CTLFLAG_RD, machine, 0, "");
159
160static char cpu_model[64];
161SYSCTL_STRING(_hw, HW_MODEL, model, CTLFLAG_RD, cpu_model, 0,
162    "The CPU model name");
163
164static char cpu_family[64];
165SYSCTL_STRING(_hw, OID_AUTO, family, CTLFLAG_RD, cpu_family, 0,
166    "The CPU family name");
167
168#ifdef DDB
169extern vm_offset_t ksym_start, ksym_end;
170#endif
171
172struct msgbuf *msgbufp = NULL;
173
174/* Other subsystems (e.g., ACPI) can hook this later. */
175void (*cpu_idle_hook)(sbintime_t) = NULL;
176
177struct kva_md_info kmi;
178
179#define	Mhz	1000000L
180#define	Ghz	(1000L*Mhz)
181
182static void
183identifycpu(void)
184{
185	char vendor[17];
186	char *family_name, *model_name;
187	u_int64_t features, tmp;
188	int number, revision, model, family, archrev;
189
190	/*
191	 * Assumes little-endian.
192	 */
193	*(u_int64_t *) &vendor[0] = ia64_get_cpuid(0);
194	*(u_int64_t *) &vendor[8] = ia64_get_cpuid(1);
195	vendor[16] = '\0';
196
197	tmp = ia64_get_cpuid(3);
198	number = (tmp >> 0) & 0xff;
199	revision = (tmp >> 8) & 0xff;
200	model = (tmp >> 16) & 0xff;
201	family = (tmp >> 24) & 0xff;
202	archrev = (tmp >> 32) & 0xff;
203
204	family_name = model_name = "unknown";
205	switch (family) {
206	case 0x07:
207		family_name = "Itanium";
208		model_name = "Merced";
209		break;
210	case 0x1f:
211		family_name = "Itanium 2";
212		switch (model) {
213		case 0x00:
214			model_name = "McKinley";
215			break;
216		case 0x01:
217			/*
218			 * Deerfield is a low-voltage variant based on the
219			 * Madison core. We need circumstantial evidence
220			 * (i.e. the clock frequency) to identify those.
221			 * Allow for roughly 1% error margin.
222			 */
223			if (cpu_freq > 990 && cpu_freq < 1010)
224				model_name = "Deerfield";
225			else
226				model_name = "Madison";
227			break;
228		case 0x02:
229			model_name = "Madison II";
230			break;
231		}
232		break;
233	case 0x20:
234		ia64_sync_icache_needed = 1;
235
236		family_name = "Itanium 2";
237		switch (model) {
238		case 0x00:
239			model_name = "Montecito";
240			break;
241		case 0x01:
242			model_name = "Montvale";
243			break;
244		}
245		break;
246	}
247	snprintf(cpu_family, sizeof(cpu_family), "%s", family_name);
248	snprintf(cpu_model, sizeof(cpu_model), "%s", model_name);
249
250	features = ia64_get_cpuid(4);
251
252	printf("CPU: %s (", model_name);
253	if (cpu_freq)
254		printf("%u MHz ", cpu_freq);
255	printf("%s)\n", family_name);
256	printf("  Origin = \"%s\"  Revision = %d\n", vendor, revision);
257	printf("  Features = 0x%b\n", (u_int32_t) features,
258	    "\020"
259	    "\001LB"	/* long branch (brl) instruction. */
260	    "\002SD"	/* Spontaneous deferral. */
261	    "\003AO"	/* 16-byte atomic operations (ld, st, cmpxchg). */ );
262}
263
264static void
265cpu_startup(void *dummy)
266{
267	char nodename[16];
268	struct pcpu *pc;
269	struct pcpu_stats *pcs;
270
271	/*
272	 * Good {morning,afternoon,evening,night}.
273	 */
274	identifycpu();
275
276#ifdef PERFMON
277	perfmon_init();
278#endif
279	printf("real memory  = %ld (%ld MB)\n", ptoa(realmem),
280	    ptoa(realmem) / 1048576);
281
282	vm_ksubmap_init(&kmi);
283
284	printf("avail memory = %ld (%ld MB)\n", ptoa(cnt.v_free_count),
285	    ptoa(cnt.v_free_count) / 1048576);
286
287	if (fpswa_iface == NULL)
288		printf("Warning: no FPSWA package supplied\n");
289	else
290		printf("FPSWA Revision = 0x%lx, Entry = %p\n",
291		    (long)fpswa_iface->if_rev, (void *)fpswa_iface->if_fpswa);
292
293	/*
294	 * Set up buffers, so they can be used to read disk labels.
295	 */
296	bufinit();
297	vm_pager_bufferinit();
298
299	/*
300	 * Traverse the MADT to discover IOSAPIC and Local SAPIC
301	 * information.
302	 */
303	ia64_probe_sapics();
304	ia64_pib = pmap_mapdev(ia64_lapic_addr, sizeof(*ia64_pib));
305
306	ia64_mca_init();
307
308	/*
309	 * Create sysctl tree for per-CPU information.
310	 */
311	STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
312		snprintf(nodename, sizeof(nodename), "%u", pc->pc_cpuid);
313		sysctl_ctx_init(&pc->pc_md.sysctl_ctx);
314		pc->pc_md.sysctl_tree = SYSCTL_ADD_NODE(&pc->pc_md.sysctl_ctx,
315		    SYSCTL_STATIC_CHILDREN(_machdep_cpu), OID_AUTO, nodename,
316		    CTLFLAG_RD, NULL, "");
317		if (pc->pc_md.sysctl_tree == NULL)
318			continue;
319
320		pcs = &pc->pc_md.stats;
321
322		SYSCTL_ADD_ULONG(&pc->pc_md.sysctl_ctx,
323		    SYSCTL_CHILDREN(pc->pc_md.sysctl_tree), OID_AUTO,
324		    "nasts", CTLFLAG_RD, &pcs->pcs_nasts,
325		    "Number of IPI_AST interrupts");
326
327		SYSCTL_ADD_ULONG(&pc->pc_md.sysctl_ctx,
328		    SYSCTL_CHILDREN(pc->pc_md.sysctl_tree), OID_AUTO,
329		    "nclks", CTLFLAG_RD, &pcs->pcs_nclks,
330		    "Number of clock interrupts");
331
332		SYSCTL_ADD_ULONG(&pc->pc_md.sysctl_ctx,
333		    SYSCTL_CHILDREN(pc->pc_md.sysctl_tree), OID_AUTO,
334		    "nextints", CTLFLAG_RD, &pcs->pcs_nextints,
335		    "Number of ExtINT interrupts");
336
337		SYSCTL_ADD_ULONG(&pc->pc_md.sysctl_ctx,
338		    SYSCTL_CHILDREN(pc->pc_md.sysctl_tree), OID_AUTO,
339		    "nhardclocks", CTLFLAG_RD, &pcs->pcs_nhardclocks,
340		    "Number of IPI_HARDCLOCK interrupts");
341
342		SYSCTL_ADD_ULONG(&pc->pc_md.sysctl_ctx,
343		    SYSCTL_CHILDREN(pc->pc_md.sysctl_tree), OID_AUTO,
344		    "nhighfps", CTLFLAG_RD, &pcs->pcs_nhighfps,
345		    "Number of IPI_HIGH_FP interrupts");
346
347		SYSCTL_ADD_ULONG(&pc->pc_md.sysctl_ctx,
348		    SYSCTL_CHILDREN(pc->pc_md.sysctl_tree), OID_AUTO,
349		    "nhwints", CTLFLAG_RD, &pcs->pcs_nhwints,
350		    "Number of hardware (device) interrupts");
351
352		SYSCTL_ADD_ULONG(&pc->pc_md.sysctl_ctx,
353		    SYSCTL_CHILDREN(pc->pc_md.sysctl_tree), OID_AUTO,
354		    "npreempts", CTLFLAG_RD, &pcs->pcs_npreempts,
355		    "Number of IPI_PREEMPT interrupts");
356
357		SYSCTL_ADD_ULONG(&pc->pc_md.sysctl_ctx,
358		    SYSCTL_CHILDREN(pc->pc_md.sysctl_tree), OID_AUTO,
359		    "nrdvs", CTLFLAG_RD, &pcs->pcs_nrdvs,
360		    "Number of IPI_RENDEZVOUS interrupts");
361
362		SYSCTL_ADD_ULONG(&pc->pc_md.sysctl_ctx,
363		    SYSCTL_CHILDREN(pc->pc_md.sysctl_tree), OID_AUTO,
364		    "nstops", CTLFLAG_RD, &pcs->pcs_nstops,
365		    "Number of IPI_STOP interrupts");
366
367		SYSCTL_ADD_ULONG(&pc->pc_md.sysctl_ctx,
368		    SYSCTL_CHILDREN(pc->pc_md.sysctl_tree), OID_AUTO,
369		    "nstrays", CTLFLAG_RD, &pcs->pcs_nstrays,
370		    "Number of stray interrupts");
371	}
372}
373SYSINIT(cpu_startup, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL);
374
375void
376cpu_flush_dcache(void *ptr, size_t len)
377{
378	vm_offset_t lim, va;
379
380	va = (uintptr_t)ptr & ~31;
381	lim = (uintptr_t)ptr + len;
382	while (va < lim) {
383		ia64_fc(va);
384		va += 32;
385	}
386
387	ia64_srlz_d();
388}
389
390/* Get current clock frequency for the given cpu id. */
391int
392cpu_est_clockrate(int cpu_id, uint64_t *rate)
393{
394
395	if (pcpu_find(cpu_id) == NULL || rate == NULL)
396		return (EINVAL);
397	*rate = (u_long)cpu_freq * 1000000ul;
398	return (0);
399}
400
401void
402cpu_halt()
403{
404
405	efi_reset_system();
406}
407
408void
409cpu_idle(int busy)
410{
411	register_t ie;
412	sbintime_t sbt = -1;
413
414	if (!busy) {
415		critical_enter();
416		sbt = cpu_idleclock();
417	}
418
419	ie = intr_disable();
420	KASSERT(ie != 0, ("%s called with interrupts disabled\n", __func__));
421
422	if (sched_runnable())
423		ia64_enable_intr();
424	else if (cpu_idle_hook != NULL) {
425		(*cpu_idle_hook)(sbt);
426		/* The hook must enable interrupts! */
427	} else {
428		ia64_call_pal_static(PAL_HALT_LIGHT, 0, 0, 0);
429		ia64_enable_intr();
430	}
431
432	if (!busy) {
433		cpu_activeclock();
434		critical_exit();
435	}
436}
437
438int
439cpu_idle_wakeup(int cpu)
440{
441
442	return (0);
443}
444
445void
446cpu_reset()
447{
448
449	efi_reset_system();
450}
451
452void
453cpu_switch(struct thread *old, struct thread *new, struct mtx *mtx)
454{
455	struct pcb *oldpcb, *newpcb;
456
457	oldpcb = old->td_pcb;
458#ifdef COMPAT_FREEBSD32
459	ia32_savectx(oldpcb);
460#endif
461	if (PCPU_GET(fpcurthread) == old)
462		old->td_frame->tf_special.psr |= IA64_PSR_DFH;
463	if (!savectx(oldpcb)) {
464		newpcb = new->td_pcb;
465		oldpcb->pcb_current_pmap =
466		    pmap_switch(newpcb->pcb_current_pmap);
467
468		atomic_store_rel_ptr(&old->td_lock, mtx);
469
470#if defined(SCHED_ULE) && defined(SMP)
471		while (atomic_load_acq_ptr(&new->td_lock) == &blocked_lock)
472			cpu_spinwait();
473#endif
474
475		PCPU_SET(curthread, new);
476
477#ifdef COMPAT_FREEBSD32
478		ia32_restorectx(newpcb);
479#endif
480
481		if (PCPU_GET(fpcurthread) == new)
482			new->td_frame->tf_special.psr &= ~IA64_PSR_DFH;
483		restorectx(newpcb);
484		/* We should not get here. */
485		panic("cpu_switch: restorectx() returned");
486		/* NOTREACHED */
487	}
488}
489
490void
491cpu_throw(struct thread *old __unused, struct thread *new)
492{
493	struct pcb *newpcb;
494
495	newpcb = new->td_pcb;
496	(void)pmap_switch(newpcb->pcb_current_pmap);
497
498#if defined(SCHED_ULE) && defined(SMP)
499	while (atomic_load_acq_ptr(&new->td_lock) == &blocked_lock)
500		cpu_spinwait();
501#endif
502
503	PCPU_SET(curthread, new);
504
505#ifdef COMPAT_FREEBSD32
506	ia32_restorectx(newpcb);
507#endif
508
509	restorectx(newpcb);
510	/* We should not get here. */
511	panic("cpu_throw: restorectx() returned");
512	/* NOTREACHED */
513}
514
515void
516cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
517{
518
519	/*
520	 * Set pc_acpi_id to "uninitialized".
521	 * See sys/dev/acpica/acpi_cpu.c
522	 */
523	pcpu->pc_acpi_id = 0xffffffff;
524}
525
526void
527cpu_pcpu_setup(struct pcpu *pc, u_int acpi_id, u_int sapic_id)
528{
529
530	pc->pc_acpi_id = acpi_id;
531	pc->pc_md.lid = IA64_LID_SET_SAPIC_ID(sapic_id);
532}
533
534void
535spinlock_enter(void)
536{
537	struct thread *td;
538	int intr;
539
540	td = curthread;
541	if (td->td_md.md_spinlock_count == 0) {
542		intr = intr_disable();
543		td->td_md.md_spinlock_count = 1;
544		td->td_md.md_saved_intr = intr;
545	} else
546		td->td_md.md_spinlock_count++;
547	critical_enter();
548}
549
550void
551spinlock_exit(void)
552{
553	struct thread *td;
554	int intr;
555
556	td = curthread;
557	critical_exit();
558	intr = td->td_md.md_saved_intr;
559	td->td_md.md_spinlock_count--;
560	if (td->td_md.md_spinlock_count == 0)
561		intr_restore(intr);
562}
563
564void
565map_vhpt(uintptr_t vhpt)
566{
567	pt_entry_t pte;
568	uint64_t psr;
569
570	pte = PTE_PRESENT | PTE_MA_WB | PTE_ACCESSED | PTE_DIRTY |
571	    PTE_PL_KERN | PTE_AR_RW;
572	pte |= vhpt & PTE_PPN_MASK;
573
574	__asm __volatile("ptr.d %0,%1" :: "r"(vhpt),
575	    "r"(pmap_vhpt_log2size << 2));
576
577	__asm __volatile("mov   %0=psr" : "=r"(psr));
578	__asm __volatile("rsm   psr.ic|psr.i");
579	ia64_srlz_i();
580	ia64_set_ifa(vhpt);
581	ia64_set_itir(pmap_vhpt_log2size << 2);
582	ia64_srlz_d();
583	__asm __volatile("itr.d dtr[%0]=%1" :: "r"(3), "r"(pte));
584	__asm __volatile("mov   psr.l=%0" :: "r" (psr));
585	ia64_srlz_i();
586}
587
588void
589map_pal_code(void)
590{
591	pt_entry_t pte;
592	vm_offset_t va;
593	vm_size_t sz;
594	uint64_t psr;
595	u_int shft;
596
597	if (ia64_pal_size == 0)
598		return;
599
600	va = IA64_PHYS_TO_RR7(ia64_pal_base);
601
602	sz = ia64_pal_size;
603	shft = 0;
604	while (sz > 1) {
605		shft++;
606		sz >>= 1;
607	}
608
609	pte = PTE_PRESENT | PTE_MA_WB | PTE_ACCESSED | PTE_DIRTY |
610	    PTE_PL_KERN | PTE_AR_RWX;
611	pte |= ia64_pal_base & PTE_PPN_MASK;
612
613	__asm __volatile("ptr.d %0,%1; ptr.i %0,%1" :: "r"(va), "r"(shft<<2));
614
615	__asm __volatile("mov	%0=psr" : "=r"(psr));
616	__asm __volatile("rsm	psr.ic|psr.i");
617	ia64_srlz_i();
618	ia64_set_ifa(va);
619	ia64_set_itir(shft << 2);
620	ia64_srlz_d();
621	__asm __volatile("itr.d	dtr[%0]=%1" :: "r"(4), "r"(pte));
622	ia64_srlz_d();
623	__asm __volatile("itr.i	itr[%0]=%1" :: "r"(1), "r"(pte));
624	__asm __volatile("mov	psr.l=%0" :: "r" (psr));
625	ia64_srlz_i();
626}
627
628void
629map_gateway_page(void)
630{
631	pt_entry_t pte;
632	uint64_t psr;
633
634	pte = PTE_PRESENT | PTE_MA_WB | PTE_ACCESSED | PTE_DIRTY |
635	    PTE_PL_KERN | PTE_AR_X_RX;
636	pte |= ia64_tpa((uint64_t)ia64_gateway_page) & PTE_PPN_MASK;
637
638	__asm __volatile("ptr.d %0,%1; ptr.i %0,%1" ::
639	    "r"(VM_MAXUSER_ADDRESS), "r"(PAGE_SHIFT << 2));
640
641	__asm __volatile("mov	%0=psr" : "=r"(psr));
642	__asm __volatile("rsm	psr.ic|psr.i");
643	ia64_srlz_i();
644	ia64_set_ifa(VM_MAXUSER_ADDRESS);
645	ia64_set_itir(PAGE_SHIFT << 2);
646	ia64_srlz_d();
647	__asm __volatile("itr.d	dtr[%0]=%1" :: "r"(5), "r"(pte));
648	ia64_srlz_d();
649	__asm __volatile("itr.i	itr[%0]=%1" :: "r"(2), "r"(pte));
650	__asm __volatile("mov	psr.l=%0" :: "r" (psr));
651	ia64_srlz_i();
652
653	/* Expose the mapping to userland in ar.k5 */
654	ia64_set_k5(VM_MAXUSER_ADDRESS);
655}
656
657static u_int
658freq_ratio(u_long base, u_long ratio)
659{
660	u_long f;
661
662	f = (base * (ratio >> 32)) / (ratio & 0xfffffffful);
663	return ((f + 500000) / 1000000);
664}
665
666static void
667calculate_frequencies(void)
668{
669	struct ia64_sal_result sal;
670	struct ia64_pal_result pal;
671	register_t ie;
672
673	ie = intr_disable();
674	sal = ia64_sal_entry(SAL_FREQ_BASE, 0, 0, 0, 0, 0, 0, 0);
675	pal = ia64_call_pal_static(PAL_FREQ_RATIOS, 0, 0, 0);
676	intr_restore(ie);
677
678	if (sal.sal_status == 0 && pal.pal_status == 0) {
679		if (bootverbose) {
680			printf("Platform clock frequency %ld Hz\n",
681			       sal.sal_result[0]);
682			printf("Processor ratio %ld/%ld, Bus ratio %ld/%ld, "
683			       "ITC ratio %ld/%ld\n",
684			       pal.pal_result[0] >> 32,
685			       pal.pal_result[0] & ((1L << 32) - 1),
686			       pal.pal_result[1] >> 32,
687			       pal.pal_result[1] & ((1L << 32) - 1),
688			       pal.pal_result[2] >> 32,
689			       pal.pal_result[2] & ((1L << 32) - 1));
690		}
691		cpu_freq = freq_ratio(sal.sal_result[0], pal.pal_result[0]);
692		bus_freq = freq_ratio(sal.sal_result[0], pal.pal_result[1]);
693		itc_freq = freq_ratio(sal.sal_result[0], pal.pal_result[2]);
694	}
695}
696
697struct ia64_init_return
698ia64_init(void)
699{
700	struct ia64_init_return ret;
701	struct efi_md *md;
702	pt_entry_t *pbvm_pgtbl_ent, *pbvm_pgtbl_lim;
703	char *p;
704	vm_size_t mdlen;
705	int metadata_missing;
706
707	/*
708	 * NO OUTPUT ALLOWED UNTIL FURTHER NOTICE.
709	 */
710
711	ia64_set_fpsr(IA64_FPSR_DEFAULT);
712
713	/*
714	 * Region 6 is direct mapped UC and region 7 is direct mapped
715	 * WC. The details of this is controlled by the Alt {I,D}TLB
716	 * handlers. Here we just make sure that they have the largest
717	 * possible page size to minimise TLB usage.
718	 */
719	ia64_set_rr(IA64_RR_BASE(6), (6 << 8) | (LOG2_ID_PAGE_SIZE << 2));
720	ia64_set_rr(IA64_RR_BASE(7), (7 << 8) | (LOG2_ID_PAGE_SIZE << 2));
721	ia64_srlz_d();
722
723	/* Initialize/setup physical memory datastructures */
724	ia64_physmem_init();
725
726	/*
727	 * Process the memory map. This gives us the PAL locations,
728	 * the I/O port base address, the available memory regions
729	 * for initializing the physical memory map.
730	 */
731	for (md = efi_md_first(); md != NULL; md = efi_md_next(md)) {
732		mdlen = md->md_pages * EFI_PAGE_SIZE;
733		switch (md->md_type) {
734		case EFI_MD_TYPE_IOPORT:
735			ia64_port_base = (uintptr_t)pmap_mapdev(md->md_phys,
736			    mdlen);
737			break;
738		case EFI_MD_TYPE_PALCODE:
739			ia64_pal_base = md->md_phys;
740			ia64_pal_size = mdlen;
741			/*FALLTHROUGH*/
742		case EFI_MD_TYPE_BAD:
743		case EFI_MD_TYPE_FIRMWARE:
744		case EFI_MD_TYPE_RECLAIM:
745		case EFI_MD_TYPE_RT_CODE:
746		case EFI_MD_TYPE_RT_DATA:
747			/* Don't use these memory regions. */
748			ia64_physmem_track(md->md_phys, mdlen);
749			break;
750		case EFI_MD_TYPE_BS_CODE:
751		case EFI_MD_TYPE_BS_DATA:
752		case EFI_MD_TYPE_CODE:
753		case EFI_MD_TYPE_DATA:
754		case EFI_MD_TYPE_FREE:
755			/* These are ok to use. */
756			ia64_physmem_add(md->md_phys, mdlen);
757			break;
758		}
759	}
760
761	/*
762	 * Remove the PBVM and its page table from phys_avail. The loader
763	 * passes the physical address of the page table to us. The virtual
764	 * address of the page table is fixed.
765	 * Track and the PBVM limit for later use.
766	 */
767	ia64_physmem_delete(bootinfo->bi_pbvm_pgtbl, bootinfo->bi_pbvm_pgtblsz);
768	pbvm_pgtbl_ent = (void *)IA64_PBVM_PGTBL;
769	pbvm_pgtbl_lim = (void *)(IA64_PBVM_PGTBL + bootinfo->bi_pbvm_pgtblsz);
770	while (pbvm_pgtbl_ent < pbvm_pgtbl_lim) {
771		if ((*pbvm_pgtbl_ent & PTE_PRESENT) == 0)
772			break;
773		ia64_physmem_delete(*pbvm_pgtbl_ent & PTE_PPN_MASK,
774		    IA64_PBVM_PAGE_SIZE);
775		pbvm_pgtbl_ent++;
776	}
777
778	/* Finalize physical memory datastructures */
779	ia64_physmem_fini();
780
781	metadata_missing = 0;
782	if (bootinfo->bi_modulep)
783		preload_metadata = (caddr_t)bootinfo->bi_modulep;
784	else
785		metadata_missing = 1;
786
787	if (envmode == 0 && bootinfo->bi_envp)
788		kern_envp = (caddr_t)bootinfo->bi_envp;
789	else
790		kern_envp = static_env;
791
792	/*
793	 * Look at arguments passed to us and compute boothowto.
794	 */
795	boothowto = bootinfo->bi_boothowto;
796
797	if (boothowto & RB_VERBOSE)
798		bootverbose = 1;
799
800	/*
801	 * Wire things up so we can call the firmware.
802	 */
803	map_pal_code();
804	efi_boot_minimal(bootinfo->bi_systab);
805	ia64_xiv_init();
806	ia64_sal_init();
807	calculate_frequencies();
808
809	set_cputicker(ia64_get_itc, (u_long)itc_freq * 1000000, 0);
810
811	/*
812	 * Setup the PCPU data for the bootstrap processor. It is needed
813	 * by printf(). Also, since printf() has critical sections, we
814	 * need to initialize at least pc_curthread.
815	 */
816	pcpup = &pcpu0;
817	ia64_set_k4((u_int64_t)pcpup);
818	pcpu_init(pcpup, 0, sizeof(pcpu0));
819	dpcpu_init(ia64_physmem_alloc(DPCPU_SIZE, PAGE_SIZE), 0);
820	cpu_pcpu_setup(pcpup, ~0U, ia64_get_lid());
821	PCPU_SET(curthread, &thread0);
822
823	/*
824	 * Initialize the console before we print anything out.
825	 */
826	cninit();
827
828	/* OUTPUT NOW ALLOWED */
829
830	if (metadata_missing)
831		printf("WARNING: loader(8) metadata is missing!\n");
832
833	/* Get FPSWA interface */
834	fpswa_iface = (bootinfo->bi_fpswa == 0) ? NULL :
835	    (struct fpswa_iface *)IA64_PHYS_TO_RR7(bootinfo->bi_fpswa);
836
837	/* Init basic tunables, including hz */
838	init_param1();
839
840	p = getenv("kernelname");
841	if (p != NULL) {
842		strlcpy(kernelname, p, sizeof(kernelname));
843		freeenv(p);
844	}
845
846	init_param2(physmem);
847
848	/*
849	 * Initialize error message buffer (at end of core).
850	 */
851	msgbufp = ia64_physmem_alloc(msgbufsize, PAGE_SIZE);
852	msgbufinit(msgbufp, msgbufsize);
853
854	proc_linkup0(&proc0, &thread0);
855	/*
856	 * Init mapping for kernel stack for proc 0
857	 */
858	p = ia64_physmem_alloc(KSTACK_PAGES * PAGE_SIZE, PAGE_SIZE);
859	thread0.td_kstack = (uintptr_t)p;
860	thread0.td_kstack_pages = KSTACK_PAGES;
861
862	mutex_init();
863
864	/*
865	 * Initialize the rest of proc 0's PCB.
866	 *
867	 * Set the kernel sp, reserving space for an (empty) trapframe,
868	 * and make proc0's trapframe pointer point to it for sanity.
869	 * Initialise proc0's backing store to start after u area.
870	 */
871	cpu_thread_alloc(&thread0);
872	thread0.td_frame->tf_flags = FRAME_SYSCALL;
873	thread0.td_pcb->pcb_special.sp =
874	    (u_int64_t)thread0.td_frame - 16;
875	thread0.td_pcb->pcb_special.bspstore = thread0.td_kstack;
876
877	/*
878	 * Initialize the virtual memory system.
879	 */
880	pmap_bootstrap();
881
882	/*
883	 * Initialize debuggers, and break into them if appropriate.
884	 */
885#ifdef DDB
886	ksym_start = bootinfo->bi_symtab;
887	ksym_end = bootinfo->bi_esymtab;
888#endif
889
890	kdb_init();
891
892#ifdef KDB
893	if (boothowto & RB_KDB)
894		kdb_enter(KDB_WHY_BOOTFLAGS,
895		    "Boot flags requested debugger\n");
896#endif
897
898	ia64_set_tpr(0);
899	ia64_srlz_d();
900
901	ret.bspstore = thread0.td_pcb->pcb_special.bspstore;
902	ret.sp = thread0.td_pcb->pcb_special.sp;
903	return (ret);
904}
905
906uint64_t
907ia64_get_hcdp(void)
908{
909
910	return (bootinfo->bi_hcdp);
911}
912
913void
914bzero(void *buf, size_t len)
915{
916	caddr_t p = buf;
917
918	while (((vm_offset_t) p & (sizeof(u_long) - 1)) && len) {
919		*p++ = 0;
920		len--;
921	}
922	while (len >= sizeof(u_long) * 8) {
923		*(u_long*) p = 0;
924		*((u_long*) p + 1) = 0;
925		*((u_long*) p + 2) = 0;
926		*((u_long*) p + 3) = 0;
927		len -= sizeof(u_long) * 8;
928		*((u_long*) p + 4) = 0;
929		*((u_long*) p + 5) = 0;
930		*((u_long*) p + 6) = 0;
931		*((u_long*) p + 7) = 0;
932		p += sizeof(u_long) * 8;
933	}
934	while (len >= sizeof(u_long)) {
935		*(u_long*) p = 0;
936		len -= sizeof(u_long);
937		p += sizeof(u_long);
938	}
939	while (len) {
940		*p++ = 0;
941		len--;
942	}
943}
944
945u_int
946ia64_itc_freq(void)
947{
948
949	return (itc_freq);
950}
951
952void
953DELAY(int n)
954{
955	u_int64_t start, end, now;
956
957	sched_pin();
958
959	start = ia64_get_itc();
960	end = start + itc_freq * n;
961	/* printf("DELAY from 0x%lx to 0x%lx\n", start, end); */
962	do {
963		now = ia64_get_itc();
964	} while (now < end || (now > start && end < start));
965
966	sched_unpin();
967}
968
969/*
970 * Send an interrupt (signal) to a process.
971 */
972void
973sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
974{
975	struct proc *p;
976	struct thread *td;
977	struct trapframe *tf;
978	struct sigacts *psp;
979	struct sigframe sf, *sfp;
980	u_int64_t sbs, sp;
981	int oonstack;
982	int sig;
983	u_long code;
984
985	td = curthread;
986	p = td->td_proc;
987	PROC_LOCK_ASSERT(p, MA_OWNED);
988	sig = ksi->ksi_signo;
989	code = ksi->ksi_code;
990	psp = p->p_sigacts;
991	mtx_assert(&psp->ps_mtx, MA_OWNED);
992	tf = td->td_frame;
993	sp = tf->tf_special.sp;
994	oonstack = sigonstack(sp);
995	sbs = 0;
996
997	/* save user context */
998	bzero(&sf, sizeof(struct sigframe));
999	sf.sf_uc.uc_sigmask = *mask;
1000	sf.sf_uc.uc_stack = td->td_sigstk;
1001	sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
1002	    ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
1003
1004	/*
1005	 * Allocate and validate space for the signal handler
1006	 * context. Note that if the stack is in P0 space, the
1007	 * call to grow() is a nop, and the useracc() check
1008	 * will fail if the process has not already allocated
1009	 * the space with a `brk'.
1010	 */
1011	if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
1012	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
1013		sbs = (u_int64_t)td->td_sigstk.ss_sp;
1014		sbs = (sbs + 15) & ~15;
1015		sfp = (struct sigframe *)(sbs + td->td_sigstk.ss_size);
1016#if defined(COMPAT_43)
1017		td->td_sigstk.ss_flags |= SS_ONSTACK;
1018#endif
1019	} else
1020		sfp = (struct sigframe *)sp;
1021	sfp = (struct sigframe *)((u_int64_t)(sfp - 1) & ~15);
1022
1023	/* Fill in the siginfo structure for POSIX handlers. */
1024	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
1025		sf.sf_si = ksi->ksi_info;
1026		sf.sf_si.si_signo = sig;
1027		/*
1028		 * XXX this shouldn't be here after code in trap.c
1029		 * is fixed
1030		 */
1031		sf.sf_si.si_addr = (void*)tf->tf_special.ifa;
1032		code = (u_int64_t)&sfp->sf_si;
1033	}
1034
1035	mtx_unlock(&psp->ps_mtx);
1036	PROC_UNLOCK(p);
1037
1038	get_mcontext(td, &sf.sf_uc.uc_mcontext, 0);
1039
1040	/* Copy the frame out to userland. */
1041	if (copyout(&sf, sfp, sizeof(sf)) != 0) {
1042		/*
1043		 * Process has trashed its stack; give it an illegal
1044		 * instruction to halt it in its tracks.
1045		 */
1046		PROC_LOCK(p);
1047		sigexit(td, SIGILL);
1048		return;
1049	}
1050
1051	if ((tf->tf_flags & FRAME_SYSCALL) == 0) {
1052		tf->tf_special.psr &= ~IA64_PSR_RI;
1053		tf->tf_special.iip = ia64_get_k5() +
1054		    ((uint64_t)break_sigtramp - (uint64_t)ia64_gateway_page);
1055	} else
1056		tf->tf_special.iip = ia64_get_k5() +
1057		    ((uint64_t)epc_sigtramp - (uint64_t)ia64_gateway_page);
1058
1059	/*
1060	 * Setup the trapframe to return to the signal trampoline. We pass
1061	 * information to the trampoline in the following registers:
1062	 *
1063	 *	gp	new backing store or NULL
1064	 *	r8	signal number
1065	 *	r9	signal code or siginfo pointer
1066	 *	r10	signal handler (function descriptor)
1067	 */
1068	tf->tf_special.sp = (u_int64_t)sfp - 16;
1069	tf->tf_special.gp = sbs;
1070	tf->tf_special.bspstore = sf.sf_uc.uc_mcontext.mc_special.bspstore;
1071	tf->tf_special.ndirty = 0;
1072	tf->tf_special.rnat = sf.sf_uc.uc_mcontext.mc_special.rnat;
1073	tf->tf_scratch.gr8 = sig;
1074	tf->tf_scratch.gr9 = code;
1075	tf->tf_scratch.gr10 = (u_int64_t)catcher;
1076
1077	PROC_LOCK(p);
1078	mtx_lock(&psp->ps_mtx);
1079}
1080
1081/*
1082 * System call to cleanup state after a signal
1083 * has been taken.  Reset signal mask and
1084 * stack state from context left by sendsig (above).
1085 * Return to previous pc and psl as specified by
1086 * context left by sendsig. Check carefully to
1087 * make sure that the user has not modified the
1088 * state to gain improper privileges.
1089 *
1090 * MPSAFE
1091 */
1092int
1093sys_sigreturn(struct thread *td,
1094	struct sigreturn_args /* {
1095		ucontext_t *sigcntxp;
1096	} */ *uap)
1097{
1098	ucontext_t uc;
1099	struct trapframe *tf;
1100	struct pcb *pcb;
1101
1102	tf = td->td_frame;
1103	pcb = td->td_pcb;
1104
1105	/*
1106	 * Fetch the entire context structure at once for speed.
1107	 * We don't use a normal argument to simplify RSE handling.
1108	 */
1109	if (copyin(uap->sigcntxp, (caddr_t)&uc, sizeof(uc)))
1110		return (EFAULT);
1111
1112	set_mcontext(td, &uc.uc_mcontext);
1113
1114#if defined(COMPAT_43)
1115	if (sigonstack(tf->tf_special.sp))
1116		td->td_sigstk.ss_flags |= SS_ONSTACK;
1117	else
1118		td->td_sigstk.ss_flags &= ~SS_ONSTACK;
1119#endif
1120	kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0);
1121
1122	return (EJUSTRETURN);
1123}
1124
1125#ifdef COMPAT_FREEBSD4
1126int
1127freebsd4_sigreturn(struct thread *td, struct freebsd4_sigreturn_args *uap)
1128{
1129
1130	return sys_sigreturn(td, (struct sigreturn_args *)uap);
1131}
1132#endif
1133
1134/*
1135 * Construct a PCB from a trapframe. This is called from kdb_trap() where
1136 * we want to start a backtrace from the function that caused us to enter
1137 * the debugger. We have the context in the trapframe, but base the trace
1138 * on the PCB. The PCB doesn't have to be perfect, as long as it contains
1139 * enough for a backtrace.
1140 */
1141void
1142makectx(struct trapframe *tf, struct pcb *pcb)
1143{
1144
1145	pcb->pcb_special = tf->tf_special;
1146	pcb->pcb_special.__spare = ~0UL;	/* XXX see unwind.c */
1147	save_callee_saved(&pcb->pcb_preserved);
1148	save_callee_saved_fp(&pcb->pcb_preserved_fp);
1149}
1150
1151int
1152ia64_flush_dirty(struct thread *td, struct _special *r)
1153{
1154	struct iovec iov;
1155	struct uio uio;
1156	uint64_t bspst, kstk, rnat;
1157	int error, locked;
1158
1159	if (r->ndirty == 0)
1160		return (0);
1161
1162	kstk = td->td_kstack + (r->bspstore & 0x1ffUL);
1163	if (td == curthread) {
1164		__asm __volatile("mov	ar.rsc=0;;");
1165		__asm __volatile("mov	%0=ar.bspstore" : "=r"(bspst));
1166		/* Make sure we have all the user registers written out. */
1167		if (bspst - kstk < r->ndirty) {
1168			__asm __volatile("flushrs;;");
1169			__asm __volatile("mov	%0=ar.bspstore" : "=r"(bspst));
1170		}
1171		__asm __volatile("mov	%0=ar.rnat;;" : "=r"(rnat));
1172		__asm __volatile("mov	ar.rsc=3");
1173		error = copyout((void*)kstk, (void*)r->bspstore, r->ndirty);
1174		kstk += r->ndirty;
1175		r->rnat = (bspst > kstk && (bspst & 0x1ffL) < (kstk & 0x1ffL))
1176		    ? *(uint64_t*)(kstk | 0x1f8L) : rnat;
1177	} else {
1178		locked = PROC_LOCKED(td->td_proc);
1179		if (!locked)
1180			PHOLD(td->td_proc);
1181		iov.iov_base = (void*)(uintptr_t)kstk;
1182		iov.iov_len = r->ndirty;
1183		uio.uio_iov = &iov;
1184		uio.uio_iovcnt = 1;
1185		uio.uio_offset = r->bspstore;
1186		uio.uio_resid = r->ndirty;
1187		uio.uio_segflg = UIO_SYSSPACE;
1188		uio.uio_rw = UIO_WRITE;
1189		uio.uio_td = td;
1190		error = proc_rwmem(td->td_proc, &uio);
1191		/*
1192		 * XXX proc_rwmem() doesn't currently return ENOSPC,
1193		 * so I think it can bogusly return 0. Neither do
1194		 * we allow short writes.
1195		 */
1196		if (uio.uio_resid != 0 && error == 0)
1197			error = ENOSPC;
1198		if (!locked)
1199			PRELE(td->td_proc);
1200	}
1201
1202	r->bspstore += r->ndirty;
1203	r->ndirty = 0;
1204	return (error);
1205}
1206
1207int
1208get_mcontext(struct thread *td, mcontext_t *mc, int flags)
1209{
1210	struct trapframe *tf;
1211	int error;
1212
1213	tf = td->td_frame;
1214	bzero(mc, sizeof(*mc));
1215	mc->mc_special = tf->tf_special;
1216	error = ia64_flush_dirty(td, &mc->mc_special);
1217	if (tf->tf_flags & FRAME_SYSCALL) {
1218		mc->mc_flags |= _MC_FLAGS_SYSCALL_CONTEXT;
1219		mc->mc_scratch = tf->tf_scratch;
1220		if (flags & GET_MC_CLEAR_RET) {
1221			mc->mc_scratch.gr8 = 0;
1222			mc->mc_scratch.gr9 = 0;
1223			mc->mc_scratch.gr10 = 0;
1224			mc->mc_scratch.gr11 = 0;
1225		}
1226	} else {
1227		mc->mc_flags |= _MC_FLAGS_ASYNC_CONTEXT;
1228		mc->mc_scratch = tf->tf_scratch;
1229		mc->mc_scratch_fp = tf->tf_scratch_fp;
1230		/*
1231		 * XXX If the thread never used the high FP registers, we
1232		 * probably shouldn't waste time saving them.
1233		 */
1234		ia64_highfp_save(td);
1235		mc->mc_flags |= _MC_FLAGS_HIGHFP_VALID;
1236		mc->mc_high_fp = td->td_pcb->pcb_high_fp;
1237	}
1238	save_callee_saved(&mc->mc_preserved);
1239	save_callee_saved_fp(&mc->mc_preserved_fp);
1240	return (error);
1241}
1242
1243int
1244set_mcontext(struct thread *td, const mcontext_t *mc)
1245{
1246	struct _special s;
1247	struct trapframe *tf;
1248	uint64_t psrmask;
1249
1250	tf = td->td_frame;
1251
1252	KASSERT((tf->tf_special.ndirty & ~PAGE_MASK) == 0,
1253	    ("Whoa there! We have more than 8KB of dirty registers!"));
1254
1255	s = mc->mc_special;
1256	/*
1257	 * Only copy the user mask and the restart instruction bit from
1258	 * the new context.
1259	 */
1260	psrmask = IA64_PSR_BE | IA64_PSR_UP | IA64_PSR_AC | IA64_PSR_MFL |
1261	    IA64_PSR_MFH | IA64_PSR_RI;
1262	s.psr = (tf->tf_special.psr & ~psrmask) | (s.psr & psrmask);
1263	/* We don't have any dirty registers of the new context. */
1264	s.ndirty = 0;
1265	if (mc->mc_flags & _MC_FLAGS_ASYNC_CONTEXT) {
1266		/*
1267		 * We can get an async context passed to us while we
1268		 * entered the kernel through a syscall: sigreturn(2)
1269		 * takes contexts that could previously be the result of
1270		 * a trap or interrupt.
1271		 * Hence, we cannot assert that the trapframe is not
1272		 * a syscall frame, but we can assert that it's at
1273		 * least an expected syscall.
1274		 */
1275		if (tf->tf_flags & FRAME_SYSCALL) {
1276			KASSERT(tf->tf_scratch.gr15 == SYS_sigreturn, ("foo"));
1277			tf->tf_flags &= ~FRAME_SYSCALL;
1278		}
1279		tf->tf_scratch = mc->mc_scratch;
1280		tf->tf_scratch_fp = mc->mc_scratch_fp;
1281		if (mc->mc_flags & _MC_FLAGS_HIGHFP_VALID)
1282			td->td_pcb->pcb_high_fp = mc->mc_high_fp;
1283	} else {
1284		KASSERT((tf->tf_flags & FRAME_SYSCALL) != 0, ("foo"));
1285		if ((mc->mc_flags & _MC_FLAGS_SYSCALL_CONTEXT) == 0) {
1286			s.cfm = tf->tf_special.cfm;
1287			s.iip = tf->tf_special.iip;
1288			tf->tf_scratch.gr15 = 0;	/* Clear syscall nr. */
1289		} else
1290			tf->tf_scratch = mc->mc_scratch;
1291	}
1292	tf->tf_special = s;
1293	restore_callee_saved(&mc->mc_preserved);
1294	restore_callee_saved_fp(&mc->mc_preserved_fp);
1295
1296	return (0);
1297}
1298
1299/*
1300 * Clear registers on exec.
1301 */
1302void
1303exec_setregs(struct thread *td, struct image_params *imgp, u_long stack)
1304{
1305	struct trapframe *tf;
1306	uint64_t *ksttop, *kst;
1307
1308	tf = td->td_frame;
1309	ksttop = (uint64_t*)(td->td_kstack + tf->tf_special.ndirty +
1310	    (tf->tf_special.bspstore & 0x1ffUL));
1311
1312	/*
1313	 * We can ignore up to 8KB of dirty registers by masking off the
1314	 * lower 13 bits in exception_restore() or epc_syscall(). This
1315	 * should be enough for a couple of years, but if there are more
1316	 * than 8KB of dirty registers, we lose track of the bottom of
1317	 * the kernel stack. The solution is to copy the active part of
1318	 * the kernel stack down 1 page (or 2, but not more than that)
1319	 * so that we always have less than 8KB of dirty registers.
1320	 */
1321	KASSERT((tf->tf_special.ndirty & ~PAGE_MASK) == 0,
1322	    ("Whoa there! We have more than 8KB of dirty registers!"));
1323
1324	bzero(&tf->tf_special, sizeof(tf->tf_special));
1325	if ((tf->tf_flags & FRAME_SYSCALL) == 0) {	/* break syscalls. */
1326		bzero(&tf->tf_scratch, sizeof(tf->tf_scratch));
1327		bzero(&tf->tf_scratch_fp, sizeof(tf->tf_scratch_fp));
1328		tf->tf_special.cfm = (1UL<<63) | (3UL<<7) | 3UL;
1329		tf->tf_special.bspstore = IA64_BACKINGSTORE;
1330		/*
1331		 * Copy the arguments onto the kernel register stack so that
1332		 * they get loaded by the loadrs instruction. Skip over the
1333		 * NaT collection points.
1334		 */
1335		kst = ksttop - 1;
1336		if (((uintptr_t)kst & 0x1ff) == 0x1f8)
1337			*kst-- = 0;
1338		*kst-- = 0;
1339		if (((uintptr_t)kst & 0x1ff) == 0x1f8)
1340			*kst-- = 0;
1341		*kst-- = imgp->ps_strings;
1342		if (((uintptr_t)kst & 0x1ff) == 0x1f8)
1343			*kst-- = 0;
1344		*kst = stack;
1345		tf->tf_special.ndirty = (ksttop - kst) << 3;
1346	} else {				/* epc syscalls (default). */
1347		tf->tf_special.cfm = (3UL<<62) | (3UL<<7) | 3UL;
1348		tf->tf_special.bspstore = IA64_BACKINGSTORE + 24;
1349		/*
1350		 * Write values for out0, out1 and out2 to the user's backing
1351		 * store and arrange for them to be restored into the user's
1352		 * initial register frame.
1353		 * Assumes that (bspstore & 0x1f8) < 0x1e0.
1354		 */
1355		suword((caddr_t)tf->tf_special.bspstore - 24, stack);
1356		suword((caddr_t)tf->tf_special.bspstore - 16, imgp->ps_strings);
1357		suword((caddr_t)tf->tf_special.bspstore -  8, 0);
1358	}
1359
1360	tf->tf_special.iip = imgp->entry_addr;
1361	tf->tf_special.sp = (stack & ~15) - 16;
1362	tf->tf_special.rsc = 0xf;
1363	tf->tf_special.fpsr = IA64_FPSR_DEFAULT;
1364	tf->tf_special.psr = IA64_PSR_IC | IA64_PSR_I | IA64_PSR_IT |
1365	    IA64_PSR_DT | IA64_PSR_RT | IA64_PSR_DFH | IA64_PSR_BN |
1366	    IA64_PSR_CPL_USER;
1367}
1368
1369int
1370ptrace_set_pc(struct thread *td, unsigned long addr)
1371{
1372	uint64_t slot;
1373
1374	switch (addr & 0xFUL) {
1375	case 0:
1376		slot = IA64_PSR_RI_0;
1377		break;
1378	case 1:
1379		/* XXX we need to deal with MLX bundles here */
1380		slot = IA64_PSR_RI_1;
1381		break;
1382	case 2:
1383		slot = IA64_PSR_RI_2;
1384		break;
1385	default:
1386		return (EINVAL);
1387	}
1388
1389	td->td_frame->tf_special.iip = addr & ~0x0FULL;
1390	td->td_frame->tf_special.psr =
1391	    (td->td_frame->tf_special.psr & ~IA64_PSR_RI) | slot;
1392	return (0);
1393}
1394
1395int
1396ptrace_single_step(struct thread *td)
1397{
1398	struct trapframe *tf;
1399
1400	/*
1401	 * There's no way to set single stepping when we're leaving the
1402	 * kernel through the EPC syscall path. The way we solve this is
1403	 * by enabling the lower-privilege trap so that we re-enter the
1404	 * kernel as soon as the privilege level changes. See trap.c for
1405	 * how we proceed from there.
1406	 */
1407	tf = td->td_frame;
1408	if (tf->tf_flags & FRAME_SYSCALL)
1409		tf->tf_special.psr |= IA64_PSR_LP;
1410	else
1411		tf->tf_special.psr |= IA64_PSR_SS;
1412	return (0);
1413}
1414
1415int
1416ptrace_clear_single_step(struct thread *td)
1417{
1418	struct trapframe *tf;
1419
1420	/*
1421	 * Clear any and all status bits we may use to implement single
1422	 * stepping.
1423	 */
1424	tf = td->td_frame;
1425	tf->tf_special.psr &= ~IA64_PSR_SS;
1426	tf->tf_special.psr &= ~IA64_PSR_LP;
1427	tf->tf_special.psr &= ~IA64_PSR_TB;
1428	return (0);
1429}
1430
1431int
1432fill_regs(struct thread *td, struct reg *regs)
1433{
1434	struct trapframe *tf;
1435
1436	tf = td->td_frame;
1437	regs->r_special = tf->tf_special;
1438	regs->r_scratch = tf->tf_scratch;
1439	save_callee_saved(&regs->r_preserved);
1440	return (0);
1441}
1442
1443int
1444set_regs(struct thread *td, struct reg *regs)
1445{
1446	struct trapframe *tf;
1447	int error;
1448
1449	tf = td->td_frame;
1450	error = ia64_flush_dirty(td, &tf->tf_special);
1451	if (!error) {
1452		tf->tf_special = regs->r_special;
1453		tf->tf_special.bspstore += tf->tf_special.ndirty;
1454		tf->tf_special.ndirty = 0;
1455		tf->tf_scratch = regs->r_scratch;
1456		restore_callee_saved(&regs->r_preserved);
1457	}
1458	return (error);
1459}
1460
1461int
1462fill_dbregs(struct thread *td, struct dbreg *dbregs)
1463{
1464
1465	return (ENOSYS);
1466}
1467
1468int
1469set_dbregs(struct thread *td, struct dbreg *dbregs)
1470{
1471
1472	return (ENOSYS);
1473}
1474
1475int
1476fill_fpregs(struct thread *td, struct fpreg *fpregs)
1477{
1478	struct trapframe *frame = td->td_frame;
1479	struct pcb *pcb = td->td_pcb;
1480
1481	/* Save the high FP registers. */
1482	ia64_highfp_save(td);
1483
1484	fpregs->fpr_scratch = frame->tf_scratch_fp;
1485	save_callee_saved_fp(&fpregs->fpr_preserved);
1486	fpregs->fpr_high = pcb->pcb_high_fp;
1487	return (0);
1488}
1489
1490int
1491set_fpregs(struct thread *td, struct fpreg *fpregs)
1492{
1493	struct trapframe *frame = td->td_frame;
1494	struct pcb *pcb = td->td_pcb;
1495
1496	/* Throw away the high FP registers (should be redundant). */
1497	ia64_highfp_drop(td);
1498
1499	frame->tf_scratch_fp = fpregs->fpr_scratch;
1500	restore_callee_saved_fp(&fpregs->fpr_preserved);
1501	pcb->pcb_high_fp = fpregs->fpr_high;
1502	return (0);
1503}
1504
1505void
1506ia64_sync_icache(vm_offset_t va, vm_offset_t sz)
1507{
1508	vm_offset_t lim;
1509
1510	if (!ia64_sync_icache_needed)
1511		return;
1512
1513	lim = va + sz;
1514	while (va < lim) {
1515		ia64_fc_i(va);
1516		va += 32;	/* XXX */
1517	}
1518
1519	ia64_sync_i();
1520	ia64_srlz_i();
1521}
1522