1// SPDX-License-Identifier: GPL-2.0-only
2#include <linux/module.h>
3#include <linux/slab.h>
4
5#include <asm/cpu.h>
6
7#include "mce_amd.h"
8
9static struct amd_decoder_ops fam_ops;
10
11static u8 xec_mask	 = 0xf;
12
13static void (*decode_dram_ecc)(int node_id, struct mce *m);
14
15void amd_register_ecc_decoder(void (*f)(int, struct mce *))
16{
17	decode_dram_ecc = f;
18}
19EXPORT_SYMBOL_GPL(amd_register_ecc_decoder);
20
21void amd_unregister_ecc_decoder(void (*f)(int, struct mce *))
22{
23	if (decode_dram_ecc) {
24		WARN_ON(decode_dram_ecc != f);
25
26		decode_dram_ecc = NULL;
27	}
28}
29EXPORT_SYMBOL_GPL(amd_unregister_ecc_decoder);
30
31/*
32 * string representation for the different MCA reported error types, see F3x48
33 * or MSR0000_0411.
34 */
35
36/* transaction type */
37static const char * const tt_msgs[] = { "INSN", "DATA", "GEN", "RESV" };
38
39/* cache level */
40static const char * const ll_msgs[] = { "RESV", "L1", "L2", "L3/GEN" };
41
42/* memory transaction type */
43static const char * const rrrr_msgs[] = {
44       "GEN", "RD", "WR", "DRD", "DWR", "IRD", "PRF", "EV", "SNP"
45};
46
47/* participating processor */
48const char * const pp_msgs[] = { "SRC", "RES", "OBS", "GEN" };
49EXPORT_SYMBOL_GPL(pp_msgs);
50
51/* request timeout */
52static const char * const to_msgs[] = { "no timeout", "timed out" };
53
54/* memory or i/o */
55static const char * const ii_msgs[] = { "MEM", "RESV", "IO", "GEN" };
56
57/* internal error type */
58static const char * const uu_msgs[] = { "RESV", "RESV", "HWA", "RESV" };
59
60static const char * const f15h_mc1_mce_desc[] = {
61	"UC during a demand linefill from L2",
62	"Parity error during data load from IC",
63	"Parity error for IC valid bit",
64	"Main tag parity error",
65	"Parity error in prediction queue",
66	"PFB data/address parity error",
67	"Parity error in the branch status reg",
68	"PFB promotion address error",
69	"Tag error during probe/victimization",
70	"Parity error for IC probe tag valid bit",
71	"PFB non-cacheable bit parity error",
72	"PFB valid bit parity error",			/* xec = 0xd */
73	"Microcode Patch Buffer",			/* xec = 010 */
74	"uop queue",
75	"insn buffer",
76	"predecode buffer",
77	"fetch address FIFO",
78	"dispatch uop queue"
79};
80
81static const char * const f15h_mc2_mce_desc[] = {
82	"Fill ECC error on data fills",			/* xec = 0x4 */
83	"Fill parity error on insn fills",
84	"Prefetcher request FIFO parity error",
85	"PRQ address parity error",
86	"PRQ data parity error",
87	"WCC Tag ECC error",
88	"WCC Data ECC error",
89	"WCB Data parity error",
90	"VB Data ECC or parity error",
91	"L2 Tag ECC error",				/* xec = 0x10 */
92	"Hard L2 Tag ECC error",
93	"Multiple hits on L2 tag",
94	"XAB parity error",
95	"PRB address parity error"
96};
97
98static const char * const mc4_mce_desc[] = {
99	"DRAM ECC error detected on the NB",
100	"CRC error detected on HT link",
101	"Link-defined sync error packets detected on HT link",
102	"HT Master abort",
103	"HT Target abort",
104	"Invalid GART PTE entry during GART table walk",
105	"Unsupported atomic RMW received from an IO link",
106	"Watchdog timeout due to lack of progress",
107	"DRAM ECC error detected on the NB",
108	"SVM DMA Exclusion Vector error",
109	"HT data error detected on link",
110	"Protocol error (link, L3, probe filter)",
111	"NB internal arrays parity error",
112	"DRAM addr/ctl signals parity error",
113	"IO link transmission error",
114	"L3 data cache ECC error",			/* xec = 0x1c */
115	"L3 cache tag error",
116	"L3 LRU parity bits error",
117	"ECC Error in the Probe Filter directory"
118};
119
120static const char * const mc5_mce_desc[] = {
121	"CPU Watchdog timer expire",
122	"Wakeup array dest tag",
123	"AG payload array",
124	"EX payload array",
125	"IDRF array",
126	"Retire dispatch queue",
127	"Mapper checkpoint array",
128	"Physical register file EX0 port",
129	"Physical register file EX1 port",
130	"Physical register file AG0 port",
131	"Physical register file AG1 port",
132	"Flag register file",
133	"DE error occurred",
134	"Retire status queue"
135};
136
137static const char * const mc6_mce_desc[] = {
138	"Hardware Assertion",
139	"Free List",
140	"Physical Register File",
141	"Retire Queue",
142	"Scheduler table",
143	"Status Register File",
144};
145
146static bool f12h_mc0_mce(u16 ec, u8 xec)
147{
148	bool ret = false;
149
150	if (MEM_ERROR(ec)) {
151		u8 ll = LL(ec);
152		ret = true;
153
154		if (ll == LL_L2)
155			pr_cont("during L1 linefill from L2.\n");
156		else if (ll == LL_L1)
157			pr_cont("Data/Tag %s error.\n", R4_MSG(ec));
158		else
159			ret = false;
160	}
161	return ret;
162}
163
164static bool f10h_mc0_mce(u16 ec, u8 xec)
165{
166	if (R4(ec) == R4_GEN && LL(ec) == LL_L1) {
167		pr_cont("during data scrub.\n");
168		return true;
169	}
170	return f12h_mc0_mce(ec, xec);
171}
172
173static bool k8_mc0_mce(u16 ec, u8 xec)
174{
175	if (BUS_ERROR(ec)) {
176		pr_cont("during system linefill.\n");
177		return true;
178	}
179
180	return f10h_mc0_mce(ec, xec);
181}
182
183static bool cat_mc0_mce(u16 ec, u8 xec)
184{
185	u8 r4	 = R4(ec);
186	bool ret = true;
187
188	if (MEM_ERROR(ec)) {
189
190		if (TT(ec) != TT_DATA || LL(ec) != LL_L1)
191			return false;
192
193		switch (r4) {
194		case R4_DRD:
195		case R4_DWR:
196			pr_cont("Data/Tag parity error due to %s.\n",
197				(r4 == R4_DRD ? "load/hw prf" : "store"));
198			break;
199		case R4_EVICT:
200			pr_cont("Copyback parity error on a tag miss.\n");
201			break;
202		case R4_SNOOP:
203			pr_cont("Tag parity error during snoop.\n");
204			break;
205		default:
206			ret = false;
207		}
208	} else if (BUS_ERROR(ec)) {
209
210		if ((II(ec) != II_MEM && II(ec) != II_IO) || LL(ec) != LL_LG)
211			return false;
212
213		pr_cont("System read data error on a ");
214
215		switch (r4) {
216		case R4_RD:
217			pr_cont("TLB reload.\n");
218			break;
219		case R4_DWR:
220			pr_cont("store.\n");
221			break;
222		case R4_DRD:
223			pr_cont("load.\n");
224			break;
225		default:
226			ret = false;
227		}
228	} else {
229		ret = false;
230	}
231
232	return ret;
233}
234
235static bool f15h_mc0_mce(u16 ec, u8 xec)
236{
237	bool ret = true;
238
239	if (MEM_ERROR(ec)) {
240
241		switch (xec) {
242		case 0x0:
243			pr_cont("Data Array access error.\n");
244			break;
245
246		case 0x1:
247			pr_cont("UC error during a linefill from L2/NB.\n");
248			break;
249
250		case 0x2:
251		case 0x11:
252			pr_cont("STQ access error.\n");
253			break;
254
255		case 0x3:
256			pr_cont("SCB access error.\n");
257			break;
258
259		case 0x10:
260			pr_cont("Tag error.\n");
261			break;
262
263		case 0x12:
264			pr_cont("LDQ access error.\n");
265			break;
266
267		default:
268			ret = false;
269		}
270	} else if (BUS_ERROR(ec)) {
271
272		if (!xec)
273			pr_cont("System Read Data Error.\n");
274		else
275			pr_cont(" Internal error condition type %d.\n", xec);
276	} else if (INT_ERROR(ec)) {
277		if (xec <= 0x1f)
278			pr_cont("Hardware Assert.\n");
279		else
280			ret = false;
281
282	} else
283		ret = false;
284
285	return ret;
286}
287
288static void decode_mc0_mce(struct mce *m)
289{
290	u16 ec = EC(m->status);
291	u8 xec = XEC(m->status, xec_mask);
292
293	pr_emerg(HW_ERR "MC0 Error: ");
294
295	/* TLB error signatures are the same across families */
296	if (TLB_ERROR(ec)) {
297		if (TT(ec) == TT_DATA) {
298			pr_cont("%s TLB %s.\n", LL_MSG(ec),
299				((xec == 2) ? "locked miss"
300					    : (xec ? "multimatch" : "parity")));
301			return;
302		}
303	} else if (fam_ops.mc0_mce(ec, xec))
304		;
305	else
306		pr_emerg(HW_ERR "Corrupted MC0 MCE info?\n");
307}
308
309static bool k8_mc1_mce(u16 ec, u8 xec)
310{
311	u8 ll	 = LL(ec);
312	bool ret = true;
313
314	if (!MEM_ERROR(ec))
315		return false;
316
317	if (ll == 0x2)
318		pr_cont("during a linefill from L2.\n");
319	else if (ll == 0x1) {
320		switch (R4(ec)) {
321		case R4_IRD:
322			pr_cont("Parity error during data load.\n");
323			break;
324
325		case R4_EVICT:
326			pr_cont("Copyback Parity/Victim error.\n");
327			break;
328
329		case R4_SNOOP:
330			pr_cont("Tag Snoop error.\n");
331			break;
332
333		default:
334			ret = false;
335			break;
336		}
337	} else
338		ret = false;
339
340	return ret;
341}
342
343static bool cat_mc1_mce(u16 ec, u8 xec)
344{
345	u8 r4    = R4(ec);
346	bool ret = true;
347
348	if (!MEM_ERROR(ec))
349		return false;
350
351	if (TT(ec) != TT_INSTR)
352		return false;
353
354	if (r4 == R4_IRD)
355		pr_cont("Data/tag array parity error for a tag hit.\n");
356	else if (r4 == R4_SNOOP)
357		pr_cont("Tag error during snoop/victimization.\n");
358	else if (xec == 0x0)
359		pr_cont("Tag parity error from victim castout.\n");
360	else if (xec == 0x2)
361		pr_cont("Microcode patch RAM parity error.\n");
362	else
363		ret = false;
364
365	return ret;
366}
367
368static bool f15h_mc1_mce(u16 ec, u8 xec)
369{
370	bool ret = true;
371
372	if (!MEM_ERROR(ec))
373		return false;
374
375	switch (xec) {
376	case 0x0 ... 0xa:
377		pr_cont("%s.\n", f15h_mc1_mce_desc[xec]);
378		break;
379
380	case 0xd:
381		pr_cont("%s.\n", f15h_mc1_mce_desc[xec-2]);
382		break;
383
384	case 0x10:
385		pr_cont("%s.\n", f15h_mc1_mce_desc[xec-4]);
386		break;
387
388	case 0x11 ... 0x15:
389		pr_cont("Decoder %s parity error.\n", f15h_mc1_mce_desc[xec-4]);
390		break;
391
392	default:
393		ret = false;
394	}
395	return ret;
396}
397
398static void decode_mc1_mce(struct mce *m)
399{
400	u16 ec = EC(m->status);
401	u8 xec = XEC(m->status, xec_mask);
402
403	pr_emerg(HW_ERR "MC1 Error: ");
404
405	if (TLB_ERROR(ec))
406		pr_cont("%s TLB %s.\n", LL_MSG(ec),
407			(xec ? "multimatch" : "parity error"));
408	else if (BUS_ERROR(ec)) {
409		bool k8 = (boot_cpu_data.x86 == 0xf && (m->status & BIT_64(58)));
410
411		pr_cont("during %s.\n", (k8 ? "system linefill" : "NB data read"));
412	} else if (INT_ERROR(ec)) {
413		if (xec <= 0x3f)
414			pr_cont("Hardware Assert.\n");
415		else
416			goto wrong_mc1_mce;
417	} else if (fam_ops.mc1_mce(ec, xec))
418		;
419	else
420		goto wrong_mc1_mce;
421
422	return;
423
424wrong_mc1_mce:
425	pr_emerg(HW_ERR "Corrupted MC1 MCE info?\n");
426}
427
428static bool k8_mc2_mce(u16 ec, u8 xec)
429{
430	bool ret = true;
431
432	if (xec == 0x1)
433		pr_cont(" in the write data buffers.\n");
434	else if (xec == 0x3)
435		pr_cont(" in the victim data buffers.\n");
436	else if (xec == 0x2 && MEM_ERROR(ec))
437		pr_cont(": %s error in the L2 cache tags.\n", R4_MSG(ec));
438	else if (xec == 0x0) {
439		if (TLB_ERROR(ec))
440			pr_cont("%s error in a Page Descriptor Cache or Guest TLB.\n",
441				TT_MSG(ec));
442		else if (BUS_ERROR(ec))
443			pr_cont(": %s/ECC error in data read from NB: %s.\n",
444				R4_MSG(ec), PP_MSG(ec));
445		else if (MEM_ERROR(ec)) {
446			u8 r4 = R4(ec);
447
448			if (r4 >= 0x7)
449				pr_cont(": %s error during data copyback.\n",
450					R4_MSG(ec));
451			else if (r4 <= 0x1)
452				pr_cont(": %s parity/ECC error during data "
453					"access from L2.\n", R4_MSG(ec));
454			else
455				ret = false;
456		} else
457			ret = false;
458	} else
459		ret = false;
460
461	return ret;
462}
463
464static bool f15h_mc2_mce(u16 ec, u8 xec)
465{
466	bool ret = true;
467
468	if (TLB_ERROR(ec)) {
469		if (xec == 0x0)
470			pr_cont("Data parity TLB read error.\n");
471		else if (xec == 0x1)
472			pr_cont("Poison data provided for TLB fill.\n");
473		else
474			ret = false;
475	} else if (BUS_ERROR(ec)) {
476		if (xec > 2)
477			ret = false;
478
479		pr_cont("Error during attempted NB data read.\n");
480	} else if (MEM_ERROR(ec)) {
481		switch (xec) {
482		case 0x4 ... 0xc:
483			pr_cont("%s.\n", f15h_mc2_mce_desc[xec - 0x4]);
484			break;
485
486		case 0x10 ... 0x14:
487			pr_cont("%s.\n", f15h_mc2_mce_desc[xec - 0x7]);
488			break;
489
490		default:
491			ret = false;
492		}
493	} else if (INT_ERROR(ec)) {
494		if (xec <= 0x3f)
495			pr_cont("Hardware Assert.\n");
496		else
497			ret = false;
498	}
499
500	return ret;
501}
502
503static bool f16h_mc2_mce(u16 ec, u8 xec)
504{
505	u8 r4 = R4(ec);
506
507	if (!MEM_ERROR(ec))
508		return false;
509
510	switch (xec) {
511	case 0x04 ... 0x05:
512		pr_cont("%cBUFF parity error.\n", (r4 == R4_RD) ? 'I' : 'O');
513		break;
514
515	case 0x09 ... 0x0b:
516	case 0x0d ... 0x0f:
517		pr_cont("ECC error in L2 tag (%s).\n",
518			((r4 == R4_GEN)   ? "BankReq" :
519			((r4 == R4_SNOOP) ? "Prb"     : "Fill")));
520		break;
521
522	case 0x10 ... 0x19:
523	case 0x1b:
524		pr_cont("ECC error in L2 data array (%s).\n",
525			(((r4 == R4_RD) && !(xec & 0x3)) ? "Hit"  :
526			((r4 == R4_GEN)   ? "Attr" :
527			((r4 == R4_EVICT) ? "Vict" : "Fill"))));
528		break;
529
530	case 0x1c ... 0x1d:
531	case 0x1f:
532		pr_cont("Parity error in L2 attribute bits (%s).\n",
533			((r4 == R4_RD)  ? "Hit"  :
534			((r4 == R4_GEN) ? "Attr" : "Fill")));
535		break;
536
537	default:
538		return false;
539	}
540
541	return true;
542}
543
544static void decode_mc2_mce(struct mce *m)
545{
546	u16 ec = EC(m->status);
547	u8 xec = XEC(m->status, xec_mask);
548
549	pr_emerg(HW_ERR "MC2 Error: ");
550
551	if (!fam_ops.mc2_mce(ec, xec))
552		pr_cont(HW_ERR "Corrupted MC2 MCE info?\n");
553}
554
555static void decode_mc3_mce(struct mce *m)
556{
557	u16 ec = EC(m->status);
558	u8 xec = XEC(m->status, xec_mask);
559
560	if (boot_cpu_data.x86 >= 0x14) {
561		pr_emerg("You shouldn't be seeing MC3 MCE on this cpu family,"
562			 " please report on LKML.\n");
563		return;
564	}
565
566	pr_emerg(HW_ERR "MC3 Error");
567
568	if (xec == 0x0) {
569		u8 r4 = R4(ec);
570
571		if (!BUS_ERROR(ec) || (r4 != R4_DRD && r4 != R4_DWR))
572			goto wrong_mc3_mce;
573
574		pr_cont(" during %s.\n", R4_MSG(ec));
575	} else
576		goto wrong_mc3_mce;
577
578	return;
579
580 wrong_mc3_mce:
581	pr_emerg(HW_ERR "Corrupted MC3 MCE info?\n");
582}
583
584static void decode_mc4_mce(struct mce *m)
585{
586	unsigned int fam = x86_family(m->cpuid);
587	int node_id = topology_amd_node_id(m->extcpu);
588	u16 ec = EC(m->status);
589	u8 xec = XEC(m->status, 0x1f);
590	u8 offset = 0;
591
592	pr_emerg(HW_ERR "MC4 Error (node %d): ", node_id);
593
594	switch (xec) {
595	case 0x0 ... 0xe:
596
597		/* special handling for DRAM ECCs */
598		if (xec == 0x0 || xec == 0x8) {
599			/* no ECCs on F11h */
600			if (fam == 0x11)
601				goto wrong_mc4_mce;
602
603			pr_cont("%s.\n", mc4_mce_desc[xec]);
604
605			if (decode_dram_ecc)
606				decode_dram_ecc(node_id, m);
607			return;
608		}
609		break;
610
611	case 0xf:
612		if (TLB_ERROR(ec))
613			pr_cont("GART Table Walk data error.\n");
614		else if (BUS_ERROR(ec))
615			pr_cont("DMA Exclusion Vector Table Walk error.\n");
616		else
617			goto wrong_mc4_mce;
618		return;
619
620	case 0x19:
621		if (fam == 0x15 || fam == 0x16)
622			pr_cont("Compute Unit Data Error.\n");
623		else
624			goto wrong_mc4_mce;
625		return;
626
627	case 0x1c ... 0x1f:
628		offset = 13;
629		break;
630
631	default:
632		goto wrong_mc4_mce;
633	}
634
635	pr_cont("%s.\n", mc4_mce_desc[xec - offset]);
636	return;
637
638 wrong_mc4_mce:
639	pr_emerg(HW_ERR "Corrupted MC4 MCE info?\n");
640}
641
642static void decode_mc5_mce(struct mce *m)
643{
644	unsigned int fam = x86_family(m->cpuid);
645	u16 ec = EC(m->status);
646	u8 xec = XEC(m->status, xec_mask);
647
648	if (fam == 0xf || fam == 0x11)
649		goto wrong_mc5_mce;
650
651	pr_emerg(HW_ERR "MC5 Error: ");
652
653	if (INT_ERROR(ec)) {
654		if (xec <= 0x1f) {
655			pr_cont("Hardware Assert.\n");
656			return;
657		} else
658			goto wrong_mc5_mce;
659	}
660
661	if (xec == 0x0 || xec == 0xc)
662		pr_cont("%s.\n", mc5_mce_desc[xec]);
663	else if (xec <= 0xd)
664		pr_cont("%s parity error.\n", mc5_mce_desc[xec]);
665	else
666		goto wrong_mc5_mce;
667
668	return;
669
670 wrong_mc5_mce:
671	pr_emerg(HW_ERR "Corrupted MC5 MCE info?\n");
672}
673
674static void decode_mc6_mce(struct mce *m)
675{
676	u8 xec = XEC(m->status, xec_mask);
677
678	pr_emerg(HW_ERR "MC6 Error: ");
679
680	if (xec > 0x5)
681		goto wrong_mc6_mce;
682
683	pr_cont("%s parity error.\n", mc6_mce_desc[xec]);
684	return;
685
686 wrong_mc6_mce:
687	pr_emerg(HW_ERR "Corrupted MC6 MCE info?\n");
688}
689
690static const char * const smca_long_names[] = {
691	[SMCA_LS ... SMCA_LS_V2]	= "Load Store Unit",
692	[SMCA_IF]			= "Instruction Fetch Unit",
693	[SMCA_L2_CACHE]			= "L2 Cache",
694	[SMCA_DE]			= "Decode Unit",
695	[SMCA_RESERVED]			= "Reserved",
696	[SMCA_EX]			= "Execution Unit",
697	[SMCA_FP]			= "Floating Point Unit",
698	[SMCA_L3_CACHE]			= "L3 Cache",
699	[SMCA_CS ... SMCA_CS_V2]	= "Coherent Slave",
700	[SMCA_PIE]			= "Power, Interrupts, etc.",
701
702	/* UMC v2 is separate because both of them can exist in a single system. */
703	[SMCA_UMC]			= "Unified Memory Controller",
704	[SMCA_UMC_V2]			= "Unified Memory Controller v2",
705	[SMCA_PB]			= "Parameter Block",
706	[SMCA_PSP ... SMCA_PSP_V2]	= "Platform Security Processor",
707	[SMCA_SMU ... SMCA_SMU_V2]	= "System Management Unit",
708	[SMCA_MP5]			= "Microprocessor 5 Unit",
709	[SMCA_MPDMA]			= "MPDMA Unit",
710	[SMCA_NBIO]			= "Northbridge IO Unit",
711	[SMCA_PCIE ... SMCA_PCIE_V2]	= "PCI Express Unit",
712	[SMCA_XGMI_PCS]			= "Ext Global Memory Interconnect PCS Unit",
713	[SMCA_NBIF]			= "NBIF Unit",
714	[SMCA_SHUB]			= "System Hub Unit",
715	[SMCA_SATA]			= "SATA Unit",
716	[SMCA_USB]			= "USB Unit",
717	[SMCA_GMI_PCS]			= "Global Memory Interconnect PCS Unit",
718	[SMCA_XGMI_PHY]			= "Ext Global Memory Interconnect PHY Unit",
719	[SMCA_WAFL_PHY]			= "WAFL PHY Unit",
720	[SMCA_GMI_PHY]			= "Global Memory Interconnect PHY Unit",
721};
722
723static const char *smca_get_long_name(enum smca_bank_types t)
724{
725	if (t >= N_SMCA_BANK_TYPES)
726		return NULL;
727
728	return smca_long_names[t];
729}
730
731/* Decode errors according to Scalable MCA specification */
732static void decode_smca_error(struct mce *m)
733{
734	enum smca_bank_types bank_type = smca_get_bank_type(m->extcpu, m->bank);
735	u8 xec = XEC(m->status, xec_mask);
736
737	if (bank_type >= N_SMCA_BANK_TYPES)
738		return;
739
740	if (bank_type == SMCA_RESERVED) {
741		pr_emerg(HW_ERR "Bank %d is reserved.\n", m->bank);
742		return;
743	}
744
745	pr_emerg(HW_ERR "%s Ext. Error Code: %d", smca_get_long_name(bank_type), xec);
746
747	if ((bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2) &&
748	    xec == 0 && decode_dram_ecc)
749		decode_dram_ecc(topology_amd_node_id(m->extcpu), m);
750}
751
752static inline void amd_decode_err_code(u16 ec)
753{
754	if (INT_ERROR(ec)) {
755		pr_emerg(HW_ERR "internal: %s\n", UU_MSG(ec));
756		return;
757	}
758
759	pr_emerg(HW_ERR "cache level: %s", LL_MSG(ec));
760
761	if (BUS_ERROR(ec))
762		pr_cont(", mem/io: %s", II_MSG(ec));
763	else
764		pr_cont(", tx: %s", TT_MSG(ec));
765
766	if (MEM_ERROR(ec) || BUS_ERROR(ec)) {
767		pr_cont(", mem-tx: %s", R4_MSG(ec));
768
769		if (BUS_ERROR(ec))
770			pr_cont(", part-proc: %s (%s)", PP_MSG(ec), TO_MSG(ec));
771	}
772
773	pr_cont("\n");
774}
775
776static const char *decode_error_status(struct mce *m)
777{
778	if (m->status & MCI_STATUS_UC) {
779		if (m->status & MCI_STATUS_PCC)
780			return "System Fatal error.";
781		if (m->mcgstatus & MCG_STATUS_RIPV)
782			return "Uncorrected, software restartable error.";
783		return "Uncorrected, software containable error.";
784	}
785
786	if (m->status & MCI_STATUS_DEFERRED)
787		return "Deferred error, no action required.";
788
789	return "Corrected error, no action required.";
790}
791
792static int
793amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
794{
795	struct mce *m = (struct mce *)data;
796	unsigned int fam = x86_family(m->cpuid);
797	int ecc;
798
799	if (m->kflags & MCE_HANDLED_CEC)
800		return NOTIFY_DONE;
801
802	pr_emerg(HW_ERR "%s\n", decode_error_status(m));
803
804	pr_emerg(HW_ERR "CPU:%d (%x:%x:%x) MC%d_STATUS[%s|%s|%s|%s|%s",
805		m->extcpu,
806		fam, x86_model(m->cpuid), x86_stepping(m->cpuid),
807		m->bank,
808		((m->status & MCI_STATUS_OVER)	? "Over"  : "-"),
809		((m->status & MCI_STATUS_UC)	? "UE"	  :
810		 (m->status & MCI_STATUS_DEFERRED) ? "-"  : "CE"),
811		((m->status & MCI_STATUS_MISCV)	? "MiscV" : "-"),
812		((m->status & MCI_STATUS_ADDRV)	? "AddrV" : "-"),
813		((m->status & MCI_STATUS_PCC)	? "PCC"	  : "-"));
814
815	if (boot_cpu_has(X86_FEATURE_SMCA)) {
816		u32 low, high;
817		u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank);
818
819		if (!rdmsr_safe(addr, &low, &high) &&
820		    (low & MCI_CONFIG_MCAX))
821			pr_cont("|%s", ((m->status & MCI_STATUS_TCC) ? "TCC" : "-"));
822
823		pr_cont("|%s", ((m->status & MCI_STATUS_SYNDV) ? "SyndV" : "-"));
824	}
825
826	/* do the two bits[14:13] together */
827	ecc = (m->status >> 45) & 0x3;
828	if (ecc)
829		pr_cont("|%sECC", ((ecc == 2) ? "C" : "U"));
830
831	if (fam >= 0x15) {
832		pr_cont("|%s", (m->status & MCI_STATUS_DEFERRED ? "Deferred" : "-"));
833
834		/* F15h, bank4, bit 43 is part of McaStatSubCache. */
835		if (fam != 0x15 || m->bank != 4)
836			pr_cont("|%s", (m->status & MCI_STATUS_POISON ? "Poison" : "-"));
837	}
838
839	if (fam >= 0x17)
840		pr_cont("|%s", (m->status & MCI_STATUS_SCRUB ? "Scrub" : "-"));
841
842	pr_cont("]: 0x%016llx\n", m->status);
843
844	if (m->status & MCI_STATUS_ADDRV)
845		pr_emerg(HW_ERR "Error Addr: 0x%016llx\n", m->addr);
846
847	if (m->ppin)
848		pr_emerg(HW_ERR "PPIN: 0x%016llx\n", m->ppin);
849
850	if (boot_cpu_has(X86_FEATURE_SMCA)) {
851		pr_emerg(HW_ERR "IPID: 0x%016llx", m->ipid);
852
853		if (m->status & MCI_STATUS_SYNDV)
854			pr_cont(", Syndrome: 0x%016llx", m->synd);
855
856		pr_cont("\n");
857
858		decode_smca_error(m);
859		goto err_code;
860	}
861
862	if (m->tsc)
863		pr_emerg(HW_ERR "TSC: %llu\n", m->tsc);
864
865	/* Doesn't matter which member to test. */
866	if (!fam_ops.mc0_mce)
867		goto err_code;
868
869	switch (m->bank) {
870	case 0:
871		decode_mc0_mce(m);
872		break;
873
874	case 1:
875		decode_mc1_mce(m);
876		break;
877
878	case 2:
879		decode_mc2_mce(m);
880		break;
881
882	case 3:
883		decode_mc3_mce(m);
884		break;
885
886	case 4:
887		decode_mc4_mce(m);
888		break;
889
890	case 5:
891		decode_mc5_mce(m);
892		break;
893
894	case 6:
895		decode_mc6_mce(m);
896		break;
897
898	default:
899		break;
900	}
901
902 err_code:
903	amd_decode_err_code(m->status & 0xffff);
904
905	m->kflags |= MCE_HANDLED_EDAC;
906	return NOTIFY_OK;
907}
908
909static struct notifier_block amd_mce_dec_nb = {
910	.notifier_call	= amd_decode_mce,
911	.priority	= MCE_PRIO_EDAC,
912};
913
914static int __init mce_amd_init(void)
915{
916	struct cpuinfo_x86 *c = &boot_cpu_data;
917
918	if (c->x86_vendor != X86_VENDOR_AMD &&
919	    c->x86_vendor != X86_VENDOR_HYGON)
920		return -ENODEV;
921
922	if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR))
923		return -ENODEV;
924
925	if (boot_cpu_has(X86_FEATURE_SMCA)) {
926		xec_mask = 0x3f;
927		goto out;
928	}
929
930	switch (c->x86) {
931	case 0xf:
932		fam_ops.mc0_mce = k8_mc0_mce;
933		fam_ops.mc1_mce = k8_mc1_mce;
934		fam_ops.mc2_mce = k8_mc2_mce;
935		break;
936
937	case 0x10:
938		fam_ops.mc0_mce = f10h_mc0_mce;
939		fam_ops.mc1_mce = k8_mc1_mce;
940		fam_ops.mc2_mce = k8_mc2_mce;
941		break;
942
943	case 0x11:
944		fam_ops.mc0_mce = k8_mc0_mce;
945		fam_ops.mc1_mce = k8_mc1_mce;
946		fam_ops.mc2_mce = k8_mc2_mce;
947		break;
948
949	case 0x12:
950		fam_ops.mc0_mce = f12h_mc0_mce;
951		fam_ops.mc1_mce = k8_mc1_mce;
952		fam_ops.mc2_mce = k8_mc2_mce;
953		break;
954
955	case 0x14:
956		fam_ops.mc0_mce = cat_mc0_mce;
957		fam_ops.mc1_mce = cat_mc1_mce;
958		fam_ops.mc2_mce = k8_mc2_mce;
959		break;
960
961	case 0x15:
962		xec_mask = c->x86_model == 0x60 ? 0x3f : 0x1f;
963
964		fam_ops.mc0_mce = f15h_mc0_mce;
965		fam_ops.mc1_mce = f15h_mc1_mce;
966		fam_ops.mc2_mce = f15h_mc2_mce;
967		break;
968
969	case 0x16:
970		xec_mask = 0x1f;
971		fam_ops.mc0_mce = cat_mc0_mce;
972		fam_ops.mc1_mce = cat_mc1_mce;
973		fam_ops.mc2_mce = f16h_mc2_mce;
974		break;
975
976	case 0x17:
977	case 0x18:
978		pr_warn_once("Decoding supported only on Scalable MCA processors.\n");
979		return -EINVAL;
980
981	default:
982		printk(KERN_WARNING "Huh? What family is it: 0x%x?!\n", c->x86);
983		return -EINVAL;
984	}
985
986out:
987	pr_info("MCE: In-kernel MCE decoding enabled.\n");
988
989	mce_register_decode_chain(&amd_mce_dec_nb);
990
991	return 0;
992}
993early_initcall(mce_amd_init);
994
995#ifdef MODULE
996static void __exit mce_amd_exit(void)
997{
998	mce_unregister_decode_chain(&amd_mce_dec_nb);
999}
1000
1001MODULE_DESCRIPTION("AMD MCE decoder");
1002MODULE_ALIAS("edac-mce-amd");
1003MODULE_LICENSE("GPL");
1004module_exit(mce_amd_exit);
1005#endif
1006