1192050Sjhb/*-
2192050Sjhb * Copyright (c) 2009 Advanced Computing Technologies LLC
3192050Sjhb * Written by: John H. Baldwin <jhb@FreeBSD.org>
4192050Sjhb * All rights reserved.
5192050Sjhb *
6192050Sjhb * Redistribution and use in source and binary forms, with or without
7192050Sjhb * modification, are permitted provided that the following conditions
8192050Sjhb * are met:
9192050Sjhb * 1. Redistributions of source code must retain the above copyright
10192050Sjhb *    notice, this list of conditions and the following disclaimer.
11192050Sjhb * 2. Redistributions in binary form must reproduce the above copyright
12192050Sjhb *    notice, this list of conditions and the following disclaimer in the
13192050Sjhb *    documentation and/or other materials provided with the distribution.
14192050Sjhb *
15192050Sjhb * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16192050Sjhb * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17192050Sjhb * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18192050Sjhb * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19192050Sjhb * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20192050Sjhb * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21192050Sjhb * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22192050Sjhb * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23192050Sjhb * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24192050Sjhb * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25192050Sjhb * SUCH DAMAGE.
26192050Sjhb */
27192050Sjhb
28192050Sjhb/*
29192050Sjhb * Support for x86 machine check architecture.
30192050Sjhb */
31192050Sjhb
32192050Sjhb#include <sys/cdefs.h>
33192050Sjhb__FBSDID("$FreeBSD$");
34192050Sjhb
35208921Sjhb#ifdef __amd64__
36208921Sjhb#define	DEV_APIC
37208921Sjhb#else
38208556Sjhb#include "opt_apic.h"
39208921Sjhb#endif
40208556Sjhb
41192050Sjhb#include <sys/param.h>
42208507Sjhb#include <sys/bus.h>
43208507Sjhb#include <sys/interrupt.h>
44192050Sjhb#include <sys/kernel.h>
45192050Sjhb#include <sys/lock.h>
46192050Sjhb#include <sys/malloc.h>
47192050Sjhb#include <sys/mutex.h>
48192050Sjhb#include <sys/proc.h>
49192050Sjhb#include <sys/sched.h>
50192050Sjhb#include <sys/smp.h>
51192050Sjhb#include <sys/sysctl.h>
52192050Sjhb#include <sys/systm.h>
53233793Sjhb#include <sys/taskqueue.h>
54208507Sjhb#include <machine/intr_machdep.h>
55208507Sjhb#include <machine/apicvar.h>
56200064Savg#include <machine/cputypes.h>
57214630Sjhb#include <x86/mca.h>
58192050Sjhb#include <machine/md_var.h>
59192050Sjhb#include <machine/specialreg.h>
60192050Sjhb
61208507Sjhb/* Modes for mca_scan() */
62208507Sjhbenum scan_mode {
63208507Sjhb	POLLED,
64208507Sjhb	MCE,
65208507Sjhb	CMCI,
66208507Sjhb};
67208507Sjhb
68208556Sjhb#ifdef DEV_APIC
69208507Sjhb/*
70208507Sjhb * State maintained for each monitored MCx bank to control the
71208507Sjhb * corrected machine check interrupt threshold.
72208507Sjhb */
73208507Sjhbstruct cmc_state {
74208507Sjhb	int	max_threshold;
75208507Sjhb	int	last_intr;
76208507Sjhb};
77208556Sjhb#endif
78208507Sjhb
79192050Sjhbstruct mca_internal {
80192050Sjhb	struct mca_record rec;
81192050Sjhb	int		logged;
82192050Sjhb	STAILQ_ENTRY(mca_internal) link;
83192050Sjhb};
84192050Sjhb
85192050Sjhbstatic MALLOC_DEFINE(M_MCA, "MCA", "Machine Check Architecture");
86192050Sjhb
87192050Sjhbstatic int mca_count;		/* Number of records stored. */
88233709Sjhbstatic int mca_banks;		/* Number of per-CPU register banks. */
89192050Sjhb
90227309Sedstatic SYSCTL_NODE(_hw, OID_AUTO, mca, CTLFLAG_RD, NULL,
91227309Sed    "Machine Check Architecture");
92192343Sjhb
93205573Salcstatic int mca_enabled = 1;
94192343SjhbTUNABLE_INT("hw.mca.enabled", &mca_enabled);
95192343SjhbSYSCTL_INT(_hw_mca, OID_AUTO, enabled, CTLFLAG_RDTUN, &mca_enabled, 0,
96192343Sjhb    "Administrative toggle for machine check support");
97192343Sjhb
98205573Salcstatic int amd10h_L1TP = 1;
99205573SalcTUNABLE_INT("hw.mca.amd10h_L1TP", &amd10h_L1TP);
100205573SalcSYSCTL_INT(_hw_mca, OID_AUTO, amd10h_L1TP, CTLFLAG_RDTUN, &amd10h_L1TP, 0,
101205573Salc    "Administrative toggle for logging of level one TLB parity (L1TP) errors");
102205573Salc
103205573Salcint workaround_erratum383;
104205573SalcSYSCTL_INT(_hw_mca, OID_AUTO, erratum383, CTLFLAG_RD, &workaround_erratum383, 0,
105205573Salc    "Is the workaround for Erratum 383 on AMD Family 10h processors enabled?");
106205573Salc
107233709Sjhbstatic STAILQ_HEAD(, mca_internal) mca_freelist;
108233709Sjhbstatic int mca_freecount;
109192050Sjhbstatic STAILQ_HEAD(, mca_internal) mca_records;
110192050Sjhbstatic struct callout mca_timer;
111192050Sjhbstatic int mca_ticks = 3600;	/* Check hourly by default. */
112233793Sjhbstatic struct taskqueue *mca_tq;
113233793Sjhbstatic struct task mca_refill_task, mca_scan_task;
114192050Sjhbstatic struct mtx mca_lock;
115208556Sjhb
116208556Sjhb#ifdef DEV_APIC
117208507Sjhbstatic struct cmc_state **cmc_state;	/* Indexed by cpuid, bank */
118208507Sjhbstatic int cmc_throttle = 60;	/* Time in seconds to throttle CMCI. */
119208556Sjhb#endif
120192050Sjhb
121192050Sjhbstatic int
122208507Sjhbsysctl_positive_int(SYSCTL_HANDLER_ARGS)
123192050Sjhb{
124192050Sjhb	int error, value;
125192050Sjhb
126208507Sjhb	value = *(int *)arg1;
127192050Sjhb	error = sysctl_handle_int(oidp, &value, 0, req);
128192050Sjhb	if (error || req->newptr == NULL)
129192050Sjhb		return (error);
130192050Sjhb	if (value <= 0)
131192050Sjhb		return (EINVAL);
132208507Sjhb	*(int *)arg1 = value;
133192050Sjhb	return (0);
134192050Sjhb}
135192050Sjhb
136192050Sjhbstatic int
137192050Sjhbsysctl_mca_records(SYSCTL_HANDLER_ARGS)
138192050Sjhb{
139192050Sjhb	int *name = (int *)arg1;
140192050Sjhb	u_int namelen = arg2;
141192050Sjhb	struct mca_record record;
142192050Sjhb	struct mca_internal *rec;
143192050Sjhb	int i;
144192050Sjhb
145192050Sjhb	if (namelen != 1)
146192050Sjhb		return (EINVAL);
147192050Sjhb
148192050Sjhb	if (name[0] < 0 || name[0] >= mca_count)
149192050Sjhb		return (EINVAL);
150192050Sjhb
151192050Sjhb	mtx_lock_spin(&mca_lock);
152192050Sjhb	if (name[0] >= mca_count) {
153192050Sjhb		mtx_unlock_spin(&mca_lock);
154192050Sjhb		return (EINVAL);
155192050Sjhb	}
156192050Sjhb	i = 0;
157192050Sjhb	STAILQ_FOREACH(rec, &mca_records, link) {
158192050Sjhb		if (i == name[0]) {
159192050Sjhb			record = rec->rec;
160192050Sjhb			break;
161192050Sjhb		}
162192050Sjhb		i++;
163192050Sjhb	}
164192050Sjhb	mtx_unlock_spin(&mca_lock);
165192050Sjhb	return (SYSCTL_OUT(req, &record, sizeof(record)));
166192050Sjhb}
167192050Sjhb
168192050Sjhbstatic const char *
169192050Sjhbmca_error_ttype(uint16_t mca_error)
170192050Sjhb{
171192050Sjhb
172192050Sjhb	switch ((mca_error & 0x000c) >> 2) {
173192050Sjhb	case 0:
174192050Sjhb		return ("I");
175192050Sjhb	case 1:
176192050Sjhb		return ("D");
177192050Sjhb	case 2:
178192050Sjhb		return ("G");
179192050Sjhb	}
180192050Sjhb	return ("?");
181192050Sjhb}
182192050Sjhb
183192050Sjhbstatic const char *
184192050Sjhbmca_error_level(uint16_t mca_error)
185192050Sjhb{
186192050Sjhb
187192050Sjhb	switch (mca_error & 0x0003) {
188192050Sjhb	case 0:
189192050Sjhb		return ("L0");
190192050Sjhb	case 1:
191192050Sjhb		return ("L1");
192192050Sjhb	case 2:
193192050Sjhb		return ("L2");
194192050Sjhb	case 3:
195192050Sjhb		return ("LG");
196192050Sjhb	}
197192050Sjhb	return ("L?");
198192050Sjhb}
199192050Sjhb
200192050Sjhbstatic const char *
201192050Sjhbmca_error_request(uint16_t mca_error)
202192050Sjhb{
203192050Sjhb
204192050Sjhb	switch ((mca_error & 0x00f0) >> 4) {
205192050Sjhb	case 0x0:
206192050Sjhb		return ("ERR");
207192050Sjhb	case 0x1:
208192050Sjhb		return ("RD");
209192050Sjhb	case 0x2:
210192050Sjhb		return ("WR");
211192050Sjhb	case 0x3:
212192050Sjhb		return ("DRD");
213192050Sjhb	case 0x4:
214192050Sjhb		return ("DWR");
215192050Sjhb	case 0x5:
216192050Sjhb		return ("IRD");
217192050Sjhb	case 0x6:
218192050Sjhb		return ("PREFETCH");
219192050Sjhb	case 0x7:
220192050Sjhb		return ("EVICT");
221192050Sjhb	case 0x8:
222192050Sjhb		return ("SNOOP");
223192050Sjhb	}
224192050Sjhb	return ("???");
225192050Sjhb}
226192050Sjhb
227205214Sjhbstatic const char *
228205214Sjhbmca_error_mmtype(uint16_t mca_error)
229205214Sjhb{
230205214Sjhb
231205214Sjhb	switch ((mca_error & 0x70) >> 4) {
232205214Sjhb	case 0x0:
233205214Sjhb		return ("GEN");
234205214Sjhb	case 0x1:
235205214Sjhb		return ("RD");
236205214Sjhb	case 0x2:
237205214Sjhb		return ("WR");
238205214Sjhb	case 0x3:
239205214Sjhb		return ("AC");
240205214Sjhb	case 0x4:
241205214Sjhb		return ("MS");
242205214Sjhb	}
243205214Sjhb	return ("???");
244205214Sjhb}
245205214Sjhb
246192050Sjhb/* Dump details about a single machine check. */
247200033Savgstatic void __nonnull(1)
248200033Savgmca_log(const struct mca_record *rec)
249192050Sjhb{
250192050Sjhb	uint16_t mca_error;
251192050Sjhb
252205214Sjhb	printf("MCA: Bank %d, Status 0x%016llx\n", rec->mr_bank,
253200033Savg	    (long long)rec->mr_status);
254205214Sjhb	printf("MCA: Global Cap 0x%016llx, Status 0x%016llx\n",
255205214Sjhb	    (long long)rec->mr_mcg_cap, (long long)rec->mr_mcg_status);
256205214Sjhb	printf("MCA: Vendor \"%s\", ID 0x%x, APIC ID %d\n", cpu_vendor,
257205214Sjhb	    rec->mr_cpu_id, rec->mr_apic_id);
258205214Sjhb	printf("MCA: CPU %d ", rec->mr_cpu);
259192050Sjhb	if (rec->mr_status & MC_STATUS_UC)
260192050Sjhb		printf("UNCOR ");
261205214Sjhb	else {
262192050Sjhb		printf("COR ");
263210577Sjhb		if (rec->mr_mcg_cap & MCG_CAP_CMCI_P)
264205214Sjhb			printf("(%lld) ", ((long long)rec->mr_status &
265205214Sjhb			    MC_STATUS_COR_COUNT) >> 38);
266205214Sjhb	}
267192050Sjhb	if (rec->mr_status & MC_STATUS_PCC)
268192050Sjhb		printf("PCC ");
269192050Sjhb	if (rec->mr_status & MC_STATUS_OVER)
270192050Sjhb		printf("OVER ");
271192050Sjhb	mca_error = rec->mr_status & MC_STATUS_MCA_ERROR;
272192050Sjhb	switch (mca_error) {
273192050Sjhb		/* Simple error codes. */
274192050Sjhb	case 0x0000:
275192050Sjhb		printf("no error");
276192050Sjhb		break;
277192050Sjhb	case 0x0001:
278192050Sjhb		printf("unclassified error");
279192050Sjhb		break;
280192050Sjhb	case 0x0002:
281192050Sjhb		printf("ucode ROM parity error");
282192050Sjhb		break;
283192050Sjhb	case 0x0003:
284192050Sjhb		printf("external error");
285192050Sjhb		break;
286192050Sjhb	case 0x0004:
287192050Sjhb		printf("FRC error");
288192050Sjhb		break;
289205214Sjhb	case 0x0005:
290205214Sjhb		printf("internal parity error");
291205214Sjhb		break;
292192050Sjhb	case 0x0400:
293192050Sjhb		printf("internal timer error");
294192050Sjhb		break;
295192050Sjhb	default:
296192050Sjhb		if ((mca_error & 0xfc00) == 0x0400) {
297192050Sjhb			printf("internal error %x", mca_error & 0x03ff);
298192050Sjhb			break;
299192050Sjhb		}
300192050Sjhb
301192050Sjhb		/* Compound error codes. */
302192050Sjhb
303192050Sjhb		/* Memory hierarchy error. */
304192050Sjhb		if ((mca_error & 0xeffc) == 0x000c) {
305192050Sjhb			printf("%s memory error", mca_error_level(mca_error));
306192050Sjhb			break;
307192050Sjhb		}
308192050Sjhb
309192050Sjhb		/* TLB error. */
310192050Sjhb		if ((mca_error & 0xeff0) == 0x0010) {
311192050Sjhb			printf("%sTLB %s error", mca_error_ttype(mca_error),
312192050Sjhb			    mca_error_level(mca_error));
313192050Sjhb			break;
314192050Sjhb		}
315192050Sjhb
316205214Sjhb		/* Memory controller error. */
317205214Sjhb		if ((mca_error & 0xef80) == 0x0080) {
318205214Sjhb			printf("%s channel ", mca_error_mmtype(mca_error));
319205214Sjhb			if ((mca_error & 0x000f) != 0x000f)
320205214Sjhb				printf("%d", mca_error & 0x000f);
321205214Sjhb			else
322205214Sjhb				printf("??");
323205214Sjhb			printf(" memory error");
324205214Sjhb			break;
325205214Sjhb		}
326205214Sjhb
327192050Sjhb		/* Cache error. */
328192050Sjhb		if ((mca_error & 0xef00) == 0x0100) {
329192050Sjhb			printf("%sCACHE %s %s error",
330192050Sjhb			    mca_error_ttype(mca_error),
331192050Sjhb			    mca_error_level(mca_error),
332192050Sjhb			    mca_error_request(mca_error));
333192050Sjhb			break;
334192050Sjhb		}
335192050Sjhb
336192050Sjhb		/* Bus and/or Interconnect error. */
337192050Sjhb		if ((mca_error & 0xe800) == 0x0800) {
338192050Sjhb			printf("BUS%s ", mca_error_level(mca_error));
339192050Sjhb			switch ((mca_error & 0x0600) >> 9) {
340192050Sjhb			case 0:
341192050Sjhb				printf("Source");
342192050Sjhb				break;
343192050Sjhb			case 1:
344192050Sjhb				printf("Responder");
345192050Sjhb				break;
346192050Sjhb			case 2:
347192050Sjhb				printf("Observer");
348192050Sjhb				break;
349192050Sjhb			default:
350192050Sjhb				printf("???");
351192050Sjhb				break;
352192050Sjhb			}
353192050Sjhb			printf(" %s ", mca_error_request(mca_error));
354192050Sjhb			switch ((mca_error & 0x000c) >> 2) {
355192050Sjhb			case 0:
356192050Sjhb				printf("Memory");
357192050Sjhb				break;
358192050Sjhb			case 2:
359192050Sjhb				printf("I/O");
360192050Sjhb				break;
361192050Sjhb			case 3:
362192050Sjhb				printf("Other");
363192050Sjhb				break;
364192050Sjhb			default:
365192050Sjhb				printf("???");
366192050Sjhb				break;
367192050Sjhb			}
368192050Sjhb			if (mca_error & 0x0100)
369192050Sjhb				printf(" timed out");
370192050Sjhb			break;
371192050Sjhb		}
372192050Sjhb
373192050Sjhb		printf("unknown error %x", mca_error);
374192050Sjhb		break;
375192050Sjhb	}
376192050Sjhb	printf("\n");
377192050Sjhb	if (rec->mr_status & MC_STATUS_ADDRV)
378192050Sjhb		printf("MCA: Address 0x%llx\n", (long long)rec->mr_addr);
379204518Sjhb	if (rec->mr_status & MC_STATUS_MISCV)
380204518Sjhb		printf("MCA: Misc 0x%llx\n", (long long)rec->mr_misc);
381192050Sjhb}
382192050Sjhb
383200033Savgstatic int __nonnull(2)
384200033Savgmca_check_status(int bank, struct mca_record *rec)
385200033Savg{
386200033Savg	uint64_t status;
387200033Savg	u_int p[4];
388200033Savg
389200033Savg	status = rdmsr(MSR_MC_STATUS(bank));
390200033Savg	if (!(status & MC_STATUS_VAL))
391200033Savg		return (0);
392200033Savg
393200033Savg	/* Save exception information. */
394200033Savg	rec->mr_status = status;
395200033Savg	rec->mr_bank = bank;
396200033Savg	rec->mr_addr = 0;
397200033Savg	if (status & MC_STATUS_ADDRV)
398200033Savg		rec->mr_addr = rdmsr(MSR_MC_ADDR(bank));
399200033Savg	rec->mr_misc = 0;
400200033Savg	if (status & MC_STATUS_MISCV)
401200033Savg		rec->mr_misc = rdmsr(MSR_MC_MISC(bank));
402200033Savg	rec->mr_tsc = rdtsc();
403200033Savg	rec->mr_apic_id = PCPU_GET(apic_id);
404205214Sjhb	rec->mr_mcg_cap = rdmsr(MSR_MCG_CAP);
405205214Sjhb	rec->mr_mcg_status = rdmsr(MSR_MCG_STATUS);
406205214Sjhb	rec->mr_cpu_id = cpu_id;
407205214Sjhb	rec->mr_cpu_vendor_id = cpu_vendor_id;
408205214Sjhb	rec->mr_cpu = PCPU_GET(cpuid);
409200033Savg
410200033Savg	/*
411200033Savg	 * Clear machine check.  Don't do this for uncorrectable
412200033Savg	 * errors so that the BIOS can see them.
413200033Savg	 */
414200033Savg	if (!(rec->mr_status & (MC_STATUS_PCC | MC_STATUS_UC))) {
415200033Savg		wrmsr(MSR_MC_STATUS(bank), 0);
416200033Savg		do_cpuid(0, p);
417200033Savg	}
418200033Savg	return (1);
419200033Savg}
420200033Savg
421233709Sjhbstatic void
422233709Sjhbmca_fill_freelist(void)
423200033Savg{
424200033Savg	struct mca_internal *rec;
425233709Sjhb	int desired;
426200033Savg
427233709Sjhb	/*
428233709Sjhb	 * Ensure we have at least one record for each bank and one
429233709Sjhb	 * record per CPU.
430233709Sjhb	 */
431233709Sjhb	desired = imax(mp_ncpus, mca_banks);
432233709Sjhb	mtx_lock_spin(&mca_lock);
433233709Sjhb	while (mca_freecount < desired) {
434233709Sjhb		mtx_unlock_spin(&mca_lock);
435233709Sjhb		rec = malloc(sizeof(*rec), M_MCA, M_WAITOK);
436233709Sjhb		mtx_lock_spin(&mca_lock);
437233709Sjhb		STAILQ_INSERT_TAIL(&mca_freelist, rec, link);
438233709Sjhb		mca_freecount++;
439200033Savg	}
440233709Sjhb	mtx_unlock_spin(&mca_lock);
441233709Sjhb}
442200033Savg
443233709Sjhbstatic void
444233793Sjhbmca_refill(void *context, int pending)
445233709Sjhb{
446233709Sjhb
447233709Sjhb	mca_fill_freelist();
448233709Sjhb}
449233709Sjhb
450233709Sjhbstatic void __nonnull(2)
451233709Sjhbmca_record_entry(enum scan_mode mode, const struct mca_record *record)
452233709Sjhb{
453233709Sjhb	struct mca_internal *rec;
454233709Sjhb
455233709Sjhb	if (mode == POLLED) {
456233709Sjhb		rec = malloc(sizeof(*rec), M_MCA, M_WAITOK);
457233709Sjhb		mtx_lock_spin(&mca_lock);
458233709Sjhb	} else {
459233709Sjhb		mtx_lock_spin(&mca_lock);
460233709Sjhb		rec = STAILQ_FIRST(&mca_freelist);
461233709Sjhb		if (rec == NULL) {
462233709Sjhb			printf("MCA: Unable to allocate space for an event.\n");
463233709Sjhb			mca_log(record);
464233781Sjhb			mtx_unlock_spin(&mca_lock);
465233709Sjhb			return;
466233709Sjhb		}
467233709Sjhb		STAILQ_REMOVE_HEAD(&mca_freelist, link);
468233709Sjhb		mca_freecount--;
469233709Sjhb	}
470233709Sjhb
471200033Savg	rec->rec = *record;
472200033Savg	rec->logged = 0;
473200033Savg	STAILQ_INSERT_TAIL(&mca_records, rec, link);
474200033Savg	mca_count++;
475200033Savg	mtx_unlock_spin(&mca_lock);
476233793Sjhb	if (mode == CMCI)
477233793Sjhb		taskqueue_enqueue_fast(mca_tq, &mca_refill_task);
478200033Savg}
479200033Savg
480208556Sjhb#ifdef DEV_APIC
481192050Sjhb/*
482208507Sjhb * Update the interrupt threshold for a CMCI.  The strategy is to use
483208507Sjhb * a low trigger that interrupts as soon as the first event occurs.
484208507Sjhb * However, if a steady stream of events arrive, the threshold is
485208507Sjhb * increased until the interrupts are throttled to once every
486208507Sjhb * cmc_throttle seconds or the periodic scan.  If a periodic scan
487208507Sjhb * finds that the threshold is too high, it is lowered.
488208507Sjhb */
489208507Sjhbstatic void
490208507Sjhbcmci_update(enum scan_mode mode, int bank, int valid, struct mca_record *rec)
491208507Sjhb{
492208507Sjhb	struct cmc_state *cc;
493208507Sjhb	uint64_t ctl;
494208507Sjhb	u_int delta;
495208507Sjhb	int count, limit;
496208507Sjhb
497208507Sjhb	/* Fetch the current limit for this bank. */
498208507Sjhb	cc = &cmc_state[PCPU_GET(cpuid)][bank];
499208507Sjhb	ctl = rdmsr(MSR_MC_CTL2(bank));
500208507Sjhb	count = (rec->mr_status & MC_STATUS_COR_COUNT) >> 38;
501208507Sjhb	delta = (u_int)(ticks - cc->last_intr);
502208507Sjhb
503208507Sjhb	/*
504208507Sjhb	 * If an interrupt was received less than cmc_throttle seconds
505208507Sjhb	 * since the previous interrupt and the count from the current
506208507Sjhb	 * event is greater than or equal to the current threshold,
507208507Sjhb	 * double the threshold up to the max.
508208507Sjhb	 */
509208507Sjhb	if (mode == CMCI && valid) {
510208507Sjhb		limit = ctl & MC_CTL2_THRESHOLD;
511208507Sjhb		if (delta < cmc_throttle && count >= limit &&
512208507Sjhb		    limit < cc->max_threshold) {
513208507Sjhb			limit = min(limit << 1, cc->max_threshold);
514208507Sjhb			ctl &= ~MC_CTL2_THRESHOLD;
515208507Sjhb			ctl |= limit;
516208507Sjhb			wrmsr(MSR_MC_CTL2(bank), limit);
517208507Sjhb		}
518208507Sjhb		cc->last_intr = ticks;
519208507Sjhb		return;
520208507Sjhb	}
521208507Sjhb
522208507Sjhb	/*
523208507Sjhb	 * When the banks are polled, check to see if the threshold
524208507Sjhb	 * should be lowered.
525208507Sjhb	 */
526208507Sjhb	if (mode != POLLED)
527208507Sjhb		return;
528208507Sjhb
529208507Sjhb	/* If a CMCI occured recently, do nothing for now. */
530208507Sjhb	if (delta < cmc_throttle)
531208507Sjhb		return;
532208507Sjhb
533208507Sjhb	/*
534208507Sjhb	 * Compute a new limit based on the average rate of events per
535208507Sjhb	 * cmc_throttle seconds since the last interrupt.
536208507Sjhb	 */
537208507Sjhb	if (valid) {
538208507Sjhb		count = (rec->mr_status & MC_STATUS_COR_COUNT) >> 38;
539208507Sjhb		limit = count * cmc_throttle / delta;
540208507Sjhb		if (limit <= 0)
541208507Sjhb			limit = 1;
542208507Sjhb		else if (limit > cc->max_threshold)
543208507Sjhb			limit = cc->max_threshold;
544208507Sjhb	} else
545208507Sjhb		limit = 1;
546208507Sjhb	if ((ctl & MC_CTL2_THRESHOLD) != limit) {
547208507Sjhb		ctl &= ~MC_CTL2_THRESHOLD;
548208507Sjhb		ctl |= limit;
549208507Sjhb		wrmsr(MSR_MC_CTL2(bank), limit);
550208507Sjhb	}
551208507Sjhb}
552208556Sjhb#endif
553208507Sjhb
554208507Sjhb/*
555192050Sjhb * This scans all the machine check banks of the current CPU to see if
556192050Sjhb * there are any machine checks.  Any non-recoverable errors are
557192050Sjhb * reported immediately via mca_log().  The current thread must be
558208507Sjhb * pinned when this is called.  The 'mode' parameter indicates if we
559208507Sjhb * are being called from the MC exception handler, the CMCI handler,
560208507Sjhb * or the periodic poller.  In the MC exception case this function
561208507Sjhb * returns true if the system is restartable.  Otherwise, it returns a
562208507Sjhb * count of the number of valid MC records found.
563192050Sjhb */
564192050Sjhbstatic int
565208507Sjhbmca_scan(enum scan_mode mode)
566192050Sjhb{
567200033Savg	struct mca_record rec;
568192050Sjhb	uint64_t mcg_cap, ucmask;
569208507Sjhb	int count, i, recoverable, valid;
570192050Sjhb
571192050Sjhb	count = 0;
572192050Sjhb	recoverable = 1;
573192050Sjhb	ucmask = MC_STATUS_UC | MC_STATUS_PCC;
574192050Sjhb
575192050Sjhb	/* When handling a MCE#, treat the OVER flag as non-restartable. */
576208507Sjhb	if (mode == MCE)
577192343Sjhb		ucmask |= MC_STATUS_OVER;
578192050Sjhb	mcg_cap = rdmsr(MSR_MCG_CAP);
579192050Sjhb	for (i = 0; i < (mcg_cap & MCG_CAP_COUNT); i++) {
580208556Sjhb#ifdef DEV_APIC
581208507Sjhb		/*
582208507Sjhb		 * For a CMCI, only check banks this CPU is
583208507Sjhb		 * responsible for.
584208507Sjhb		 */
585208507Sjhb		if (mode == CMCI && !(PCPU_GET(cmci_mask) & 1 << i))
586208507Sjhb			continue;
587208556Sjhb#endif
588208507Sjhb
589208507Sjhb		valid = mca_check_status(i, &rec);
590208507Sjhb		if (valid) {
591192050Sjhb			count++;
592200033Savg			if (rec.mr_status & ucmask) {
593192050Sjhb				recoverable = 0;
594233781Sjhb				mtx_lock_spin(&mca_lock);
595200033Savg				mca_log(&rec);
596233781Sjhb				mtx_unlock_spin(&mca_lock);
597192050Sjhb			}
598233709Sjhb			mca_record_entry(mode, &rec);
599192050Sjhb		}
600208507Sjhb
601208556Sjhb#ifdef DEV_APIC
602208507Sjhb		/*
603208507Sjhb		 * If this is a bank this CPU monitors via CMCI,
604208507Sjhb		 * update the threshold.
605208507Sjhb		 */
606209059Sjhb		if (PCPU_GET(cmci_mask) & 1 << i)
607208507Sjhb			cmci_update(mode, i, valid, &rec);
608208556Sjhb#endif
609192050Sjhb	}
610233709Sjhb	if (mode == POLLED)
611233709Sjhb		mca_fill_freelist();
612208507Sjhb	return (mode == MCE ? recoverable : count);
613192050Sjhb}
614192050Sjhb
615192050Sjhb/*
616192050Sjhb * Scan the machine check banks on all CPUs by binding to each CPU in
617192050Sjhb * turn.  If any of the CPUs contained new machine check records, log
618192050Sjhb * them to the console.
619192050Sjhb */
620192050Sjhbstatic void
621233793Sjhbmca_scan_cpus(void *context, int pending)
622192050Sjhb{
623192050Sjhb	struct mca_internal *mca;
624192050Sjhb	struct thread *td;
625192050Sjhb	int count, cpu;
626192050Sjhb
627233793Sjhb	mca_fill_freelist();
628192050Sjhb	td = curthread;
629192050Sjhb	count = 0;
630192050Sjhb	thread_lock(td);
631209059Sjhb	CPU_FOREACH(cpu) {
632192050Sjhb		sched_bind(td, cpu);
633192050Sjhb		thread_unlock(td);
634208507Sjhb		count += mca_scan(POLLED);
635192050Sjhb		thread_lock(td);
636192050Sjhb		sched_unbind(td);
637192050Sjhb	}
638192050Sjhb	thread_unlock(td);
639192050Sjhb	if (count != 0) {
640192050Sjhb		mtx_lock_spin(&mca_lock);
641192050Sjhb		STAILQ_FOREACH(mca, &mca_records, link) {
642192050Sjhb			if (!mca->logged) {
643192050Sjhb				mca->logged = 1;
644192050Sjhb				mca_log(&mca->rec);
645192050Sjhb			}
646192050Sjhb		}
647192050Sjhb		mtx_unlock_spin(&mca_lock);
648192050Sjhb	}
649192050Sjhb}
650192050Sjhb
651192050Sjhbstatic void
652192050Sjhbmca_periodic_scan(void *arg)
653192050Sjhb{
654192050Sjhb
655233793Sjhb	taskqueue_enqueue_fast(mca_tq, &mca_scan_task);
656192050Sjhb	callout_reset(&mca_timer, mca_ticks * hz, mca_periodic_scan, NULL);
657192050Sjhb}
658192050Sjhb
659192050Sjhbstatic int
660192050Sjhbsysctl_mca_scan(SYSCTL_HANDLER_ARGS)
661192050Sjhb{
662192050Sjhb	int error, i;
663192050Sjhb
664192050Sjhb	i = 0;
665192050Sjhb	error = sysctl_handle_int(oidp, &i, 0, req);
666192050Sjhb	if (error)
667192050Sjhb		return (error);
668192050Sjhb	if (i)
669233793Sjhb		taskqueue_enqueue_fast(mca_tq, &mca_scan_task);
670192050Sjhb	return (0);
671192050Sjhb}
672192050Sjhb
673192050Sjhbstatic void
674233793Sjhbmca_createtq(void *dummy)
675233793Sjhb{
676233793Sjhb	if (mca_banks <= 0)
677233793Sjhb		return;
678233793Sjhb
679233793Sjhb	mca_tq = taskqueue_create_fast("mca", M_WAITOK,
680233793Sjhb	    taskqueue_thread_enqueue, &mca_tq);
681233793Sjhb	taskqueue_start_threads(&mca_tq, 1, PI_SWI(SWI_TQ), "mca taskq");
682233793Sjhb}
683233793SjhbSYSINIT(mca_createtq, SI_SUB_CONFIGURE, SI_ORDER_ANY, mca_createtq, NULL);
684233793Sjhb
685233793Sjhbstatic void
686192050Sjhbmca_startup(void *dummy)
687192050Sjhb{
688192050Sjhb
689233793Sjhb	if (mca_banks <= 0)
690192050Sjhb		return;
691192050Sjhb
692233709Sjhb	callout_reset(&mca_timer, mca_ticks * hz, mca_periodic_scan, NULL);
693192050Sjhb}
694192050SjhbSYSINIT(mca_startup, SI_SUB_SMP, SI_ORDER_ANY, mca_startup, NULL);
695192050Sjhb
696208556Sjhb#ifdef DEV_APIC
697192050Sjhbstatic void
698233709Sjhbcmci_setup(void)
699192050Sjhb{
700208507Sjhb	int i;
701192050Sjhb
702208507Sjhb	cmc_state = malloc((mp_maxid + 1) * sizeof(struct cmc_state **),
703208507Sjhb	    M_MCA, M_WAITOK);
704208507Sjhb	for (i = 0; i <= mp_maxid; i++)
705233709Sjhb		cmc_state[i] = malloc(sizeof(struct cmc_state) * mca_banks,
706208507Sjhb		    M_MCA, M_WAITOK | M_ZERO);
707208507Sjhb	SYSCTL_ADD_PROC(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO,
708208507Sjhb	    "cmc_throttle", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
709208507Sjhb	    &cmc_throttle, 0, sysctl_positive_int, "I",
710208507Sjhb	    "Interval in seconds to throttle corrected MC interrupts");
711208507Sjhb}
712208556Sjhb#endif
713208507Sjhb
714208507Sjhbstatic void
715208507Sjhbmca_setup(uint64_t mcg_cap)
716208507Sjhb{
717208507Sjhb
718209212Sjhb	/*
719209212Sjhb	 * On AMD Family 10h processors, unless logging of level one TLB
720209212Sjhb	 * parity (L1TP) errors is disabled, enable the recommended workaround
721209212Sjhb	 * for Erratum 383.
722209212Sjhb	 */
723209212Sjhb	if (cpu_vendor_id == CPU_VENDOR_AMD &&
724209212Sjhb	    CPUID_TO_FAMILY(cpu_id) == 0x10 && amd10h_L1TP)
725209212Sjhb		workaround_erratum383 = 1;
726209212Sjhb
727233709Sjhb	mca_banks = mcg_cap & MCG_CAP_COUNT;
728192050Sjhb	mtx_init(&mca_lock, "mca", NULL, MTX_SPIN);
729192050Sjhb	STAILQ_INIT(&mca_records);
730233793Sjhb	TASK_INIT(&mca_scan_task, 0, mca_scan_cpus, NULL);
731192050Sjhb	callout_init(&mca_timer, CALLOUT_MPSAFE);
732233709Sjhb	STAILQ_INIT(&mca_freelist);
733233793Sjhb	TASK_INIT(&mca_refill_task, 0, mca_refill, NULL);
734233709Sjhb	mca_fill_freelist();
735192343Sjhb	SYSCTL_ADD_INT(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO,
736192050Sjhb	    "count", CTLFLAG_RD, &mca_count, 0, "Record count");
737192343Sjhb	SYSCTL_ADD_PROC(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO,
738192050Sjhb	    "interval", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, &mca_ticks,
739208507Sjhb	    0, sysctl_positive_int, "I",
740192050Sjhb	    "Periodic interval in seconds to scan for machine checks");
741192343Sjhb	SYSCTL_ADD_NODE(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO,
742192050Sjhb	    "records", CTLFLAG_RD, sysctl_mca_records, "Machine check records");
743192343Sjhb	SYSCTL_ADD_PROC(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO,
744192050Sjhb	    "force_scan", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0,
745192050Sjhb	    sysctl_mca_scan, "I", "Force an immediate scan for machine checks");
746208556Sjhb#ifdef DEV_APIC
747208507Sjhb	if (mcg_cap & MCG_CAP_CMCI_P)
748233709Sjhb		cmci_setup();
749208556Sjhb#endif
750192050Sjhb}
751192050Sjhb
752208556Sjhb#ifdef DEV_APIC
753208507Sjhb/*
754208507Sjhb * See if we should monitor CMCI for this bank.  If CMCI_EN is already
755208507Sjhb * set in MC_CTL2, then another CPU is responsible for this bank, so
756208507Sjhb * ignore it.  If CMCI_EN returns zero after being set, then this bank
757208507Sjhb * does not support CMCI_EN.  If this CPU sets CMCI_EN, then it should
758208507Sjhb * now monitor this bank.
759208507Sjhb */
760208507Sjhbstatic void
761208507Sjhbcmci_monitor(int i)
762208507Sjhb{
763208507Sjhb	struct cmc_state *cc;
764208507Sjhb	uint64_t ctl;
765208507Sjhb
766233709Sjhb	KASSERT(i < mca_banks, ("CPU %d has more MC banks", PCPU_GET(cpuid)));
767208507Sjhb
768208507Sjhb	ctl = rdmsr(MSR_MC_CTL2(i));
769208507Sjhb	if (ctl & MC_CTL2_CMCI_EN)
770208507Sjhb		/* Already monitored by another CPU. */
771208507Sjhb		return;
772208507Sjhb
773208507Sjhb	/* Set the threshold to one event for now. */
774208507Sjhb	ctl &= ~MC_CTL2_THRESHOLD;
775208507Sjhb	ctl |= MC_CTL2_CMCI_EN | 1;
776208507Sjhb	wrmsr(MSR_MC_CTL2(i), ctl);
777208507Sjhb	ctl = rdmsr(MSR_MC_CTL2(i));
778208507Sjhb	if (!(ctl & MC_CTL2_CMCI_EN))
779208507Sjhb		/* This bank does not support CMCI. */
780208507Sjhb		return;
781208507Sjhb
782208507Sjhb	cc = &cmc_state[PCPU_GET(cpuid)][i];
783208507Sjhb
784208507Sjhb	/* Determine maximum threshold. */
785208507Sjhb	ctl &= ~MC_CTL2_THRESHOLD;
786208507Sjhb	ctl |= 0x7fff;
787208507Sjhb	wrmsr(MSR_MC_CTL2(i), ctl);
788208507Sjhb	ctl = rdmsr(MSR_MC_CTL2(i));
789208507Sjhb	cc->max_threshold = ctl & MC_CTL2_THRESHOLD;
790208507Sjhb
791208507Sjhb	/* Start off with a threshold of 1. */
792208507Sjhb	ctl &= ~MC_CTL2_THRESHOLD;
793208507Sjhb	ctl |= 1;
794208507Sjhb	wrmsr(MSR_MC_CTL2(i), ctl);
795208507Sjhb
796208507Sjhb	/* Mark this bank as monitored. */
797208507Sjhb	PCPU_SET(cmci_mask, PCPU_GET(cmci_mask) | 1 << i);
798208507Sjhb}
799209212Sjhb
800209212Sjhb/*
801209212Sjhb * For resume, reset the threshold for any banks we monitor back to
802209212Sjhb * one and throw away the timestamp of the last interrupt.
803209212Sjhb */
804209212Sjhbstatic void
805209212Sjhbcmci_resume(int i)
806209212Sjhb{
807209212Sjhb	struct cmc_state *cc;
808209212Sjhb	uint64_t ctl;
809209212Sjhb
810233709Sjhb	KASSERT(i < mca_banks, ("CPU %d has more MC banks", PCPU_GET(cpuid)));
811209212Sjhb
812209212Sjhb	/* Ignore banks not monitored by this CPU. */
813209212Sjhb	if (!(PCPU_GET(cmci_mask) & 1 << i))
814209212Sjhb		return;
815209212Sjhb
816209212Sjhb	cc = &cmc_state[PCPU_GET(cpuid)][i];
817209212Sjhb	cc->last_intr = -ticks;
818209212Sjhb	ctl = rdmsr(MSR_MC_CTL2(i));
819209212Sjhb	ctl &= ~MC_CTL2_THRESHOLD;
820209212Sjhb	ctl |= MC_CTL2_CMCI_EN | 1;
821209212Sjhb	wrmsr(MSR_MC_CTL2(i), ctl);
822209212Sjhb}
823208556Sjhb#endif
824208507Sjhb
825209212Sjhb/*
826209212Sjhb * Initializes per-CPU machine check registers and enables corrected
827209212Sjhb * machine check interrupts.
828209212Sjhb */
829209212Sjhbstatic void
830209212Sjhb_mca_init(int boot)
831192050Sjhb{
832192050Sjhb	uint64_t mcg_cap;
833205573Salc	uint64_t ctl, mask;
834209212Sjhb	int i, skip;
835192050Sjhb
836192050Sjhb	/* MCE is required. */
837192343Sjhb	if (!mca_enabled || !(cpu_feature & CPUID_MCE))
838192050Sjhb		return;
839192050Sjhb
840192050Sjhb	if (cpu_feature & CPUID_MCA) {
841209212Sjhb		if (boot)
842209212Sjhb			PCPU_SET(cmci_mask, 0);
843192050Sjhb
844192050Sjhb		mcg_cap = rdmsr(MSR_MCG_CAP);
845192050Sjhb		if (mcg_cap & MCG_CAP_CTL_P)
846192050Sjhb			/* Enable MCA features. */
847192050Sjhb			wrmsr(MSR_MCG_CTL, MCG_CTL_ENABLE);
848209212Sjhb		if (PCPU_GET(cpuid) == 0 && boot)
849208507Sjhb			mca_setup(mcg_cap);
850192050Sjhb
851205573Salc		/*
852205573Salc		 * Disable logging of level one TLB parity (L1TP) errors by
853205573Salc		 * the data cache as an alternative workaround for AMD Family
854205573Salc		 * 10h Erratum 383.  Unlike the recommended workaround, there
855205573Salc		 * is no performance penalty to this workaround.  However,
856205573Salc		 * L1TP errors will go unreported.
857205573Salc		 */
858205573Salc		if (cpu_vendor_id == CPU_VENDOR_AMD &&
859205573Salc		    CPUID_TO_FAMILY(cpu_id) == 0x10 && !amd10h_L1TP) {
860205573Salc			mask = rdmsr(MSR_MC0_CTL_MASK);
861205573Salc			if ((mask & (1UL << 5)) == 0)
862205573Salc				wrmsr(MSR_MC0_CTL_MASK, mask | (1UL << 5));
863205573Salc		}
864192050Sjhb		for (i = 0; i < (mcg_cap & MCG_CAP_COUNT); i++) {
865200064Savg			/* By default enable logging of all errors. */
866200064Savg			ctl = 0xffffffffffffffffUL;
867200064Savg			skip = 0;
868192050Sjhb
869200064Savg			if (cpu_vendor_id == CPU_VENDOR_INTEL) {
870200064Savg				/*
871200064Savg				 * For P6 models before Nehalem MC0_CTL is
872200064Savg				 * always enabled and reserved.
873200064Savg				 */
874200064Savg				if (i == 0 && CPUID_TO_FAMILY(cpu_id) == 0x6
875200064Savg				    && CPUID_TO_MODEL(cpu_id) < 0x1a)
876200064Savg					skip = 1;
877200064Savg			} else if (cpu_vendor_id == CPU_VENDOR_AMD) {
878200064Savg				/* BKDG for Family 10h: unset GartTblWkEn. */
879200064Savg				if (i == 4 && CPUID_TO_FAMILY(cpu_id) >= 0xf)
880200064Savg					ctl &= ~(1UL << 10);
881200064Savg			}
882200064Savg
883200064Savg			if (!skip)
884200064Savg				wrmsr(MSR_MC_CTL(i), ctl);
885208507Sjhb
886208556Sjhb#ifdef DEV_APIC
887209212Sjhb			if (mcg_cap & MCG_CAP_CMCI_P) {
888209212Sjhb				if (boot)
889209212Sjhb					cmci_monitor(i);
890209212Sjhb				else
891209212Sjhb					cmci_resume(i);
892209212Sjhb			}
893208556Sjhb#endif
894208507Sjhb
895192050Sjhb			/* Clear all errors. */
896192050Sjhb			wrmsr(MSR_MC_STATUS(i), 0);
897192050Sjhb		}
898208507Sjhb
899208556Sjhb#ifdef DEV_APIC
900209212Sjhb		if (PCPU_GET(cmci_mask) != 0 && boot)
901208507Sjhb			lapic_enable_cmc();
902208556Sjhb#endif
903192050Sjhb	}
904192050Sjhb
905192050Sjhb	load_cr4(rcr4() | CR4_MCE);
906192050Sjhb}
907192050Sjhb
908209212Sjhb/* Must be executed on each CPU during boot. */
909209212Sjhbvoid
910209212Sjhbmca_init(void)
911209212Sjhb{
912209212Sjhb
913209212Sjhb	_mca_init(1);
914209212Sjhb}
915209212Sjhb
916209212Sjhb/* Must be executed on each CPU during resume. */
917209212Sjhbvoid
918209212Sjhbmca_resume(void)
919209212Sjhb{
920209212Sjhb
921209212Sjhb	_mca_init(0);
922209212Sjhb}
923209212Sjhb
924208621Sjhb/*
925208621Sjhb * The machine check registers for the BSP cannot be initialized until
926208621Sjhb * the local APIC is initialized.  This happens at SI_SUB_CPU,
927208621Sjhb * SI_ORDER_SECOND.
928208621Sjhb */
929208621Sjhbstatic void
930208621Sjhbmca_init_bsp(void *arg __unused)
931208621Sjhb{
932208621Sjhb
933208621Sjhb	mca_init();
934208621Sjhb}
935208621SjhbSYSINIT(mca_init_bsp, SI_SUB_CPU, SI_ORDER_ANY, mca_init_bsp, NULL);
936208621Sjhb
937192050Sjhb/* Called when a machine check exception fires. */
938233781Sjhbvoid
939192050Sjhbmca_intr(void)
940192050Sjhb{
941192050Sjhb	uint64_t mcg_status;
942192050Sjhb	int recoverable;
943192050Sjhb
944192050Sjhb	if (!(cpu_feature & CPUID_MCA)) {
945192050Sjhb		/*
946192050Sjhb		 * Just print the values of the old Pentium registers
947192050Sjhb		 * and panic.
948192050Sjhb		 */
949208921Sjhb		printf("MC Type: 0x%jx  Address: 0x%jx\n",
950208921Sjhb		    (uintmax_t)rdmsr(MSR_P5_MC_TYPE),
951208921Sjhb		    (uintmax_t)rdmsr(MSR_P5_MC_ADDR));
952233781Sjhb		panic("Machine check");
953192050Sjhb	}
954192050Sjhb
955192050Sjhb	/* Scan the banks and check for any non-recoverable errors. */
956208507Sjhb	recoverable = mca_scan(MCE);
957192050Sjhb	mcg_status = rdmsr(MSR_MCG_STATUS);
958192050Sjhb	if (!(mcg_status & MCG_STATUS_RIPV))
959192050Sjhb		recoverable = 0;
960192050Sjhb
961192050Sjhb	/* Clear MCIP. */
962192050Sjhb	wrmsr(MSR_MCG_STATUS, mcg_status & ~MCG_STATUS_MCIP);
963233781Sjhb	if (!recoverable)
964233781Sjhb		panic("Unrecoverable machine check exception");
965192050Sjhb}
966208507Sjhb
967208556Sjhb#ifdef DEV_APIC
968208507Sjhb/* Called for a CMCI (correctable machine check interrupt). */
969208507Sjhbvoid
970208507Sjhbcmc_intr(void)
971208507Sjhb{
972208507Sjhb	struct mca_internal *mca;
973208507Sjhb	int count;
974208507Sjhb
975208507Sjhb	/*
976208507Sjhb	 * Serialize MCA bank scanning to prevent collisions from
977208507Sjhb	 * sibling threads.
978208507Sjhb	 */
979208507Sjhb	count = mca_scan(CMCI);
980208507Sjhb
981208507Sjhb	/* If we found anything, log them to the console. */
982208507Sjhb	if (count != 0) {
983208507Sjhb		mtx_lock_spin(&mca_lock);
984208507Sjhb		STAILQ_FOREACH(mca, &mca_records, link) {
985208507Sjhb			if (!mca->logged) {
986208507Sjhb				mca->logged = 1;
987208507Sjhb				mca_log(&mca->rec);
988208507Sjhb			}
989208507Sjhb		}
990208507Sjhb		mtx_unlock_spin(&mca_lock);
991208507Sjhb	}
992208507Sjhb}
993208556Sjhb#endif
994