1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (C) 2007-2010 Advanced Micro Devices, Inc.
4 * Author: Joerg Roedel <jroedel@suse.de>
5 *         Leo Duran <leo.duran@amd.com>
6 */
7
8#define pr_fmt(fmt)     "AMD-Vi: " fmt
9#define dev_fmt(fmt)    pr_fmt(fmt)
10
11#include <linux/pci.h>
12#include <linux/acpi.h>
13#include <linux/list.h>
14#include <linux/bitmap.h>
15#include <linux/slab.h>
16#include <linux/syscore_ops.h>
17#include <linux/interrupt.h>
18#include <linux/msi.h>
19#include <linux/irq.h>
20#include <linux/amd-iommu.h>
21#include <linux/export.h>
22#include <linux/kmemleak.h>
23#include <linux/cc_platform.h>
24#include <linux/iopoll.h>
25#include <asm/pci-direct.h>
26#include <asm/iommu.h>
27#include <asm/apic.h>
28#include <asm/gart.h>
29#include <asm/x86_init.h>
30#include <asm/io_apic.h>
31#include <asm/irq_remapping.h>
32#include <asm/set_memory.h>
33#include <asm/sev.h>
34
35#include <linux/crash_dump.h>
36
37#include "amd_iommu.h"
38#include "../irq_remapping.h"
39#include "../iommu-pages.h"
40
41/*
42 * definitions for the ACPI scanning code
43 */
44#define IVRS_HEADER_LENGTH 48
45
46#define ACPI_IVHD_TYPE_MAX_SUPPORTED	0x40
47#define ACPI_IVMD_TYPE_ALL              0x20
48#define ACPI_IVMD_TYPE                  0x21
49#define ACPI_IVMD_TYPE_RANGE            0x22
50
51#define IVHD_DEV_ALL                    0x01
52#define IVHD_DEV_SELECT                 0x02
53#define IVHD_DEV_SELECT_RANGE_START     0x03
54#define IVHD_DEV_RANGE_END              0x04
55#define IVHD_DEV_ALIAS                  0x42
56#define IVHD_DEV_ALIAS_RANGE            0x43
57#define IVHD_DEV_EXT_SELECT             0x46
58#define IVHD_DEV_EXT_SELECT_RANGE       0x47
59#define IVHD_DEV_SPECIAL		0x48
60#define IVHD_DEV_ACPI_HID		0xf0
61
62#define UID_NOT_PRESENT                 0
63#define UID_IS_INTEGER                  1
64#define UID_IS_CHARACTER                2
65
66#define IVHD_SPECIAL_IOAPIC		1
67#define IVHD_SPECIAL_HPET		2
68
69#define IVHD_FLAG_HT_TUN_EN_MASK        0x01
70#define IVHD_FLAG_PASSPW_EN_MASK        0x02
71#define IVHD_FLAG_RESPASSPW_EN_MASK     0x04
72#define IVHD_FLAG_ISOC_EN_MASK          0x08
73
74#define IVMD_FLAG_EXCL_RANGE            0x08
75#define IVMD_FLAG_IW                    0x04
76#define IVMD_FLAG_IR                    0x02
77#define IVMD_FLAG_UNITY_MAP             0x01
78
79#define ACPI_DEVFLAG_INITPASS           0x01
80#define ACPI_DEVFLAG_EXTINT             0x02
81#define ACPI_DEVFLAG_NMI                0x04
82#define ACPI_DEVFLAG_SYSMGT1            0x10
83#define ACPI_DEVFLAG_SYSMGT2            0x20
84#define ACPI_DEVFLAG_LINT0              0x40
85#define ACPI_DEVFLAG_LINT1              0x80
86#define ACPI_DEVFLAG_ATSDIS             0x10000000
87
88#define IVRS_GET_SBDF_ID(seg, bus, dev, fn)	(((seg & 0xffff) << 16) | ((bus & 0xff) << 8) \
89						 | ((dev & 0x1f) << 3) | (fn & 0x7))
90
91/*
92 * ACPI table definitions
93 *
94 * These data structures are laid over the table to parse the important values
95 * out of it.
96 */
97
98/*
99 * structure describing one IOMMU in the ACPI table. Typically followed by one
100 * or more ivhd_entrys.
101 */
102struct ivhd_header {
103	u8 type;
104	u8 flags;
105	u16 length;
106	u16 devid;
107	u16 cap_ptr;
108	u64 mmio_phys;
109	u16 pci_seg;
110	u16 info;
111	u32 efr_attr;
112
113	/* Following only valid on IVHD type 11h and 40h */
114	u64 efr_reg; /* Exact copy of MMIO_EXT_FEATURES */
115	u64 efr_reg2;
116} __attribute__((packed));
117
118/*
119 * A device entry describing which devices a specific IOMMU translates and
120 * which requestor ids they use.
121 */
122struct ivhd_entry {
123	u8 type;
124	u16 devid;
125	u8 flags;
126	struct_group(ext_hid,
127		u32 ext;
128		u32 hidh;
129	);
130	u64 cid;
131	u8 uidf;
132	u8 uidl;
133	u8 uid;
134} __attribute__((packed));
135
136/*
137 * An AMD IOMMU memory definition structure. It defines things like exclusion
138 * ranges for devices and regions that should be unity mapped.
139 */
140struct ivmd_header {
141	u8 type;
142	u8 flags;
143	u16 length;
144	u16 devid;
145	u16 aux;
146	u16 pci_seg;
147	u8  resv[6];
148	u64 range_start;
149	u64 range_length;
150} __attribute__((packed));
151
152bool amd_iommu_dump;
153bool amd_iommu_irq_remap __read_mostly;
154
155enum io_pgtable_fmt amd_iommu_pgtable = AMD_IOMMU_V1;
156/* Guest page table level */
157int amd_iommu_gpt_level = PAGE_MODE_4_LEVEL;
158
159int amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC;
160static int amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE;
161
162static bool amd_iommu_detected;
163static bool amd_iommu_disabled __initdata;
164static bool amd_iommu_force_enable __initdata;
165static bool amd_iommu_irtcachedis;
166static int amd_iommu_target_ivhd_type;
167
168/* Global EFR and EFR2 registers */
169u64 amd_iommu_efr;
170u64 amd_iommu_efr2;
171
172/* SNP is enabled on the system? */
173bool amd_iommu_snp_en;
174EXPORT_SYMBOL(amd_iommu_snp_en);
175
176LIST_HEAD(amd_iommu_pci_seg_list);	/* list of all PCI segments */
177LIST_HEAD(amd_iommu_list);		/* list of all AMD IOMMUs in the
178					   system */
179
180/* Array to assign indices to IOMMUs*/
181struct amd_iommu *amd_iommus[MAX_IOMMUS];
182
183/* Number of IOMMUs present in the system */
184static int amd_iommus_present;
185
186/* IOMMUs have a non-present cache? */
187bool amd_iommu_np_cache __read_mostly;
188bool amd_iommu_iotlb_sup __read_mostly = true;
189
190static bool amd_iommu_pc_present __read_mostly;
191bool amdr_ivrs_remap_support __read_mostly;
192
193bool amd_iommu_force_isolation __read_mostly;
194
195/*
196 * AMD IOMMU allows up to 2^16 different protection domains. This is a bitmap
197 * to know which ones are already in use.
198 */
199unsigned long *amd_iommu_pd_alloc_bitmap;
200
201enum iommu_init_state {
202	IOMMU_START_STATE,
203	IOMMU_IVRS_DETECTED,
204	IOMMU_ACPI_FINISHED,
205	IOMMU_ENABLED,
206	IOMMU_PCI_INIT,
207	IOMMU_INTERRUPTS_EN,
208	IOMMU_INITIALIZED,
209	IOMMU_NOT_FOUND,
210	IOMMU_INIT_ERROR,
211	IOMMU_CMDLINE_DISABLED,
212};
213
214/* Early ioapic and hpet maps from kernel command line */
215#define EARLY_MAP_SIZE		4
216static struct devid_map __initdata early_ioapic_map[EARLY_MAP_SIZE];
217static struct devid_map __initdata early_hpet_map[EARLY_MAP_SIZE];
218static struct acpihid_map_entry __initdata early_acpihid_map[EARLY_MAP_SIZE];
219
220static int __initdata early_ioapic_map_size;
221static int __initdata early_hpet_map_size;
222static int __initdata early_acpihid_map_size;
223
224static bool __initdata cmdline_maps;
225
226static enum iommu_init_state init_state = IOMMU_START_STATE;
227
228static int amd_iommu_enable_interrupts(void);
229static int __init iommu_go_to_state(enum iommu_init_state state);
230static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg);
231
232static bool amd_iommu_pre_enabled = true;
233
234static u32 amd_iommu_ivinfo __initdata;
235
236bool translation_pre_enabled(struct amd_iommu *iommu)
237{
238	return (iommu->flags & AMD_IOMMU_FLAG_TRANS_PRE_ENABLED);
239}
240
241static void clear_translation_pre_enabled(struct amd_iommu *iommu)
242{
243	iommu->flags &= ~AMD_IOMMU_FLAG_TRANS_PRE_ENABLED;
244}
245
246static void init_translation_status(struct amd_iommu *iommu)
247{
248	u64 ctrl;
249
250	ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET);
251	if (ctrl & (1<<CONTROL_IOMMU_EN))
252		iommu->flags |= AMD_IOMMU_FLAG_TRANS_PRE_ENABLED;
253}
254
255static inline unsigned long tbl_size(int entry_size, int last_bdf)
256{
257	unsigned shift = PAGE_SHIFT +
258			 get_order((last_bdf + 1) * entry_size);
259
260	return 1UL << shift;
261}
262
263int amd_iommu_get_num_iommus(void)
264{
265	return amd_iommus_present;
266}
267
268/*
269 * Iterate through all the IOMMUs to get common EFR
270 * masks among all IOMMUs and warn if found inconsistency.
271 */
272static __init void get_global_efr(void)
273{
274	struct amd_iommu *iommu;
275
276	for_each_iommu(iommu) {
277		u64 tmp = iommu->features;
278		u64 tmp2 = iommu->features2;
279
280		if (list_is_first(&iommu->list, &amd_iommu_list)) {
281			amd_iommu_efr = tmp;
282			amd_iommu_efr2 = tmp2;
283			continue;
284		}
285
286		if (amd_iommu_efr == tmp &&
287		    amd_iommu_efr2 == tmp2)
288			continue;
289
290		pr_err(FW_BUG
291		       "Found inconsistent EFR/EFR2 %#llx,%#llx (global %#llx,%#llx) on iommu%d (%04x:%02x:%02x.%01x).\n",
292		       tmp, tmp2, amd_iommu_efr, amd_iommu_efr2,
293		       iommu->index, iommu->pci_seg->id,
294		       PCI_BUS_NUM(iommu->devid), PCI_SLOT(iommu->devid),
295		       PCI_FUNC(iommu->devid));
296
297		amd_iommu_efr &= tmp;
298		amd_iommu_efr2 &= tmp2;
299	}
300
301	pr_info("Using global IVHD EFR:%#llx, EFR2:%#llx\n", amd_iommu_efr, amd_iommu_efr2);
302}
303
304/*
305 * For IVHD type 0x11/0x40, EFR is also available via IVHD.
306 * Default to IVHD EFR since it is available sooner
307 * (i.e. before PCI init).
308 */
309static void __init early_iommu_features_init(struct amd_iommu *iommu,
310					     struct ivhd_header *h)
311{
312	if (amd_iommu_ivinfo & IOMMU_IVINFO_EFRSUP) {
313		iommu->features = h->efr_reg;
314		iommu->features2 = h->efr_reg2;
315	}
316	if (amd_iommu_ivinfo & IOMMU_IVINFO_DMA_REMAP)
317		amdr_ivrs_remap_support = true;
318}
319
320/* Access to l1 and l2 indexed register spaces */
321
322static u32 iommu_read_l1(struct amd_iommu *iommu, u16 l1, u8 address)
323{
324	u32 val;
325
326	pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16));
327	pci_read_config_dword(iommu->dev, 0xfc, &val);
328	return val;
329}
330
331static void iommu_write_l1(struct amd_iommu *iommu, u16 l1, u8 address, u32 val)
332{
333	pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16 | 1 << 31));
334	pci_write_config_dword(iommu->dev, 0xfc, val);
335	pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16));
336}
337
338static u32 iommu_read_l2(struct amd_iommu *iommu, u8 address)
339{
340	u32 val;
341
342	pci_write_config_dword(iommu->dev, 0xf0, address);
343	pci_read_config_dword(iommu->dev, 0xf4, &val);
344	return val;
345}
346
347static void iommu_write_l2(struct amd_iommu *iommu, u8 address, u32 val)
348{
349	pci_write_config_dword(iommu->dev, 0xf0, (address | 1 << 8));
350	pci_write_config_dword(iommu->dev, 0xf4, val);
351}
352
353/****************************************************************************
354 *
355 * AMD IOMMU MMIO register space handling functions
356 *
357 * These functions are used to program the IOMMU device registers in
358 * MMIO space required for that driver.
359 *
360 ****************************************************************************/
361
362/*
363 * This function set the exclusion range in the IOMMU. DMA accesses to the
364 * exclusion range are passed through untranslated
365 */
366static void iommu_set_exclusion_range(struct amd_iommu *iommu)
367{
368	u64 start = iommu->exclusion_start & PAGE_MASK;
369	u64 limit = (start + iommu->exclusion_length - 1) & PAGE_MASK;
370	u64 entry;
371
372	if (!iommu->exclusion_start)
373		return;
374
375	entry = start | MMIO_EXCL_ENABLE_MASK;
376	memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET,
377			&entry, sizeof(entry));
378
379	entry = limit;
380	memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET,
381			&entry, sizeof(entry));
382}
383
384static void iommu_set_cwwb_range(struct amd_iommu *iommu)
385{
386	u64 start = iommu_virt_to_phys((void *)iommu->cmd_sem);
387	u64 entry = start & PM_ADDR_MASK;
388
389	if (!check_feature(FEATURE_SNP))
390		return;
391
392	/* Note:
393	 * Re-purpose Exclusion base/limit registers for Completion wait
394	 * write-back base/limit.
395	 */
396	memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET,
397		    &entry, sizeof(entry));
398
399	/* Note:
400	 * Default to 4 Kbytes, which can be specified by setting base
401	 * address equal to the limit address.
402	 */
403	memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET,
404		    &entry, sizeof(entry));
405}
406
407/* Programs the physical address of the device table into the IOMMU hardware */
408static void iommu_set_device_table(struct amd_iommu *iommu)
409{
410	u64 entry;
411	u32 dev_table_size = iommu->pci_seg->dev_table_size;
412	void *dev_table = (void *)get_dev_table(iommu);
413
414	BUG_ON(iommu->mmio_base == NULL);
415
416	entry = iommu_virt_to_phys(dev_table);
417	entry |= (dev_table_size >> 12) - 1;
418	memcpy_toio(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET,
419			&entry, sizeof(entry));
420}
421
422/* Generic functions to enable/disable certain features of the IOMMU. */
423void iommu_feature_enable(struct amd_iommu *iommu, u8 bit)
424{
425	u64 ctrl;
426
427	ctrl = readq(iommu->mmio_base +  MMIO_CONTROL_OFFSET);
428	ctrl |= (1ULL << bit);
429	writeq(ctrl, iommu->mmio_base +  MMIO_CONTROL_OFFSET);
430}
431
432static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
433{
434	u64 ctrl;
435
436	ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET);
437	ctrl &= ~(1ULL << bit);
438	writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
439}
440
441static void iommu_set_inv_tlb_timeout(struct amd_iommu *iommu, int timeout)
442{
443	u64 ctrl;
444
445	ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET);
446	ctrl &= ~CTRL_INV_TO_MASK;
447	ctrl |= (timeout << CONTROL_INV_TIMEOUT) & CTRL_INV_TO_MASK;
448	writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
449}
450
451/* Function to enable the hardware */
452static void iommu_enable(struct amd_iommu *iommu)
453{
454	iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
455}
456
457static void iommu_disable(struct amd_iommu *iommu)
458{
459	if (!iommu->mmio_base)
460		return;
461
462	/* Disable command buffer */
463	iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
464
465	/* Disable event logging and event interrupts */
466	iommu_feature_disable(iommu, CONTROL_EVT_INT_EN);
467	iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN);
468
469	/* Disable IOMMU GA_LOG */
470	iommu_feature_disable(iommu, CONTROL_GALOG_EN);
471	iommu_feature_disable(iommu, CONTROL_GAINT_EN);
472
473	/* Disable IOMMU PPR logging */
474	iommu_feature_disable(iommu, CONTROL_PPRLOG_EN);
475	iommu_feature_disable(iommu, CONTROL_PPRINT_EN);
476
477	/* Disable IOMMU hardware itself */
478	iommu_feature_disable(iommu, CONTROL_IOMMU_EN);
479
480	/* Clear IRTE cache disabling bit */
481	iommu_feature_disable(iommu, CONTROL_IRTCACHEDIS);
482}
483
484/*
485 * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in
486 * the system has one.
487 */
488static u8 __iomem * __init iommu_map_mmio_space(u64 address, u64 end)
489{
490	if (!request_mem_region(address, end, "amd_iommu")) {
491		pr_err("Can not reserve memory region %llx-%llx for mmio\n",
492			address, end);
493		pr_err("This is a BIOS bug. Please contact your hardware vendor\n");
494		return NULL;
495	}
496
497	return (u8 __iomem *)ioremap(address, end);
498}
499
500static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu)
501{
502	if (iommu->mmio_base)
503		iounmap(iommu->mmio_base);
504	release_mem_region(iommu->mmio_phys, iommu->mmio_phys_end);
505}
506
507static inline u32 get_ivhd_header_size(struct ivhd_header *h)
508{
509	u32 size = 0;
510
511	switch (h->type) {
512	case 0x10:
513		size = 24;
514		break;
515	case 0x11:
516	case 0x40:
517		size = 40;
518		break;
519	}
520	return size;
521}
522
523/****************************************************************************
524 *
525 * The functions below belong to the first pass of AMD IOMMU ACPI table
526 * parsing. In this pass we try to find out the highest device id this
527 * code has to handle. Upon this information the size of the shared data
528 * structures is determined later.
529 *
530 ****************************************************************************/
531
532/*
533 * This function calculates the length of a given IVHD entry
534 */
535static inline int ivhd_entry_length(u8 *ivhd)
536{
537	u32 type = ((struct ivhd_entry *)ivhd)->type;
538
539	if (type < 0x80) {
540		return 0x04 << (*ivhd >> 6);
541	} else if (type == IVHD_DEV_ACPI_HID) {
542		/* For ACPI_HID, offset 21 is uid len */
543		return *((u8 *)ivhd + 21) + 22;
544	}
545	return 0;
546}
547
548/*
549 * After reading the highest device id from the IOMMU PCI capability header
550 * this function looks if there is a higher device id defined in the ACPI table
551 */
552static int __init find_last_devid_from_ivhd(struct ivhd_header *h)
553{
554	u8 *p = (void *)h, *end = (void *)h;
555	struct ivhd_entry *dev;
556	int last_devid = -EINVAL;
557
558	u32 ivhd_size = get_ivhd_header_size(h);
559
560	if (!ivhd_size) {
561		pr_err("Unsupported IVHD type %#x\n", h->type);
562		return -EINVAL;
563	}
564
565	p += ivhd_size;
566	end += h->length;
567
568	while (p < end) {
569		dev = (struct ivhd_entry *)p;
570		switch (dev->type) {
571		case IVHD_DEV_ALL:
572			/* Use maximum BDF value for DEV_ALL */
573			return 0xffff;
574		case IVHD_DEV_SELECT:
575		case IVHD_DEV_RANGE_END:
576		case IVHD_DEV_ALIAS:
577		case IVHD_DEV_EXT_SELECT:
578			/* all the above subfield types refer to device ids */
579			if (dev->devid > last_devid)
580				last_devid = dev->devid;
581			break;
582		default:
583			break;
584		}
585		p += ivhd_entry_length(p);
586	}
587
588	WARN_ON(p != end);
589
590	return last_devid;
591}
592
593static int __init check_ivrs_checksum(struct acpi_table_header *table)
594{
595	int i;
596	u8 checksum = 0, *p = (u8 *)table;
597
598	for (i = 0; i < table->length; ++i)
599		checksum += p[i];
600	if (checksum != 0) {
601		/* ACPI table corrupt */
602		pr_err(FW_BUG "IVRS invalid checksum\n");
603		return -ENODEV;
604	}
605
606	return 0;
607}
608
609/*
610 * Iterate over all IVHD entries in the ACPI table and find the highest device
611 * id which we need to handle. This is the first of three functions which parse
612 * the ACPI table. So we check the checksum here.
613 */
614static int __init find_last_devid_acpi(struct acpi_table_header *table, u16 pci_seg)
615{
616	u8 *p = (u8 *)table, *end = (u8 *)table;
617	struct ivhd_header *h;
618	int last_devid, last_bdf = 0;
619
620	p += IVRS_HEADER_LENGTH;
621
622	end += table->length;
623	while (p < end) {
624		h = (struct ivhd_header *)p;
625		if (h->pci_seg == pci_seg &&
626		    h->type == amd_iommu_target_ivhd_type) {
627			last_devid = find_last_devid_from_ivhd(h);
628
629			if (last_devid < 0)
630				return -EINVAL;
631			if (last_devid > last_bdf)
632				last_bdf = last_devid;
633		}
634		p += h->length;
635	}
636	WARN_ON(p != end);
637
638	return last_bdf;
639}
640
641/****************************************************************************
642 *
643 * The following functions belong to the code path which parses the ACPI table
644 * the second time. In this ACPI parsing iteration we allocate IOMMU specific
645 * data structures, initialize the per PCI segment device/alias/rlookup table
646 * and also basically initialize the hardware.
647 *
648 ****************************************************************************/
649
650/* Allocate per PCI segment device table */
651static inline int __init alloc_dev_table(struct amd_iommu_pci_seg *pci_seg)
652{
653	pci_seg->dev_table = iommu_alloc_pages(GFP_KERNEL | GFP_DMA32,
654					       get_order(pci_seg->dev_table_size));
655	if (!pci_seg->dev_table)
656		return -ENOMEM;
657
658	return 0;
659}
660
661static inline void free_dev_table(struct amd_iommu_pci_seg *pci_seg)
662{
663	iommu_free_pages(pci_seg->dev_table,
664			 get_order(pci_seg->dev_table_size));
665	pci_seg->dev_table = NULL;
666}
667
668/* Allocate per PCI segment IOMMU rlookup table. */
669static inline int __init alloc_rlookup_table(struct amd_iommu_pci_seg *pci_seg)
670{
671	pci_seg->rlookup_table = iommu_alloc_pages(GFP_KERNEL,
672						   get_order(pci_seg->rlookup_table_size));
673	if (pci_seg->rlookup_table == NULL)
674		return -ENOMEM;
675
676	return 0;
677}
678
679static inline void free_rlookup_table(struct amd_iommu_pci_seg *pci_seg)
680{
681	iommu_free_pages(pci_seg->rlookup_table,
682			 get_order(pci_seg->rlookup_table_size));
683	pci_seg->rlookup_table = NULL;
684}
685
686static inline int __init alloc_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg)
687{
688	pci_seg->irq_lookup_table = iommu_alloc_pages(GFP_KERNEL,
689						      get_order(pci_seg->rlookup_table_size));
690	kmemleak_alloc(pci_seg->irq_lookup_table,
691		       pci_seg->rlookup_table_size, 1, GFP_KERNEL);
692	if (pci_seg->irq_lookup_table == NULL)
693		return -ENOMEM;
694
695	return 0;
696}
697
698static inline void free_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg)
699{
700	kmemleak_free(pci_seg->irq_lookup_table);
701	iommu_free_pages(pci_seg->irq_lookup_table,
702			 get_order(pci_seg->rlookup_table_size));
703	pci_seg->irq_lookup_table = NULL;
704}
705
706static int __init alloc_alias_table(struct amd_iommu_pci_seg *pci_seg)
707{
708	int i;
709
710	pci_seg->alias_table = iommu_alloc_pages(GFP_KERNEL,
711						 get_order(pci_seg->alias_table_size));
712	if (!pci_seg->alias_table)
713		return -ENOMEM;
714
715	/*
716	 * let all alias entries point to itself
717	 */
718	for (i = 0; i <= pci_seg->last_bdf; ++i)
719		pci_seg->alias_table[i] = i;
720
721	return 0;
722}
723
724static void __init free_alias_table(struct amd_iommu_pci_seg *pci_seg)
725{
726	iommu_free_pages(pci_seg->alias_table,
727			 get_order(pci_seg->alias_table_size));
728	pci_seg->alias_table = NULL;
729}
730
731/*
732 * Allocates the command buffer. This buffer is per AMD IOMMU. We can
733 * write commands to that buffer later and the IOMMU will execute them
734 * asynchronously
735 */
736static int __init alloc_command_buffer(struct amd_iommu *iommu)
737{
738	iommu->cmd_buf = iommu_alloc_pages(GFP_KERNEL,
739					   get_order(CMD_BUFFER_SIZE));
740
741	return iommu->cmd_buf ? 0 : -ENOMEM;
742}
743
744/*
745 * Interrupt handler has processed all pending events and adjusted head
746 * and tail pointer. Reset overflow mask and restart logging again.
747 */
748void amd_iommu_restart_log(struct amd_iommu *iommu, const char *evt_type,
749			   u8 cntrl_intr, u8 cntrl_log,
750			   u32 status_run_mask, u32 status_overflow_mask)
751{
752	u32 status;
753
754	status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
755	if (status & status_run_mask)
756		return;
757
758	pr_info_ratelimited("IOMMU %s log restarting\n", evt_type);
759
760	iommu_feature_disable(iommu, cntrl_log);
761	iommu_feature_disable(iommu, cntrl_intr);
762
763	writel(status_overflow_mask, iommu->mmio_base + MMIO_STATUS_OFFSET);
764
765	iommu_feature_enable(iommu, cntrl_intr);
766	iommu_feature_enable(iommu, cntrl_log);
767}
768
769/*
770 * This function restarts event logging in case the IOMMU experienced
771 * an event log buffer overflow.
772 */
773void amd_iommu_restart_event_logging(struct amd_iommu *iommu)
774{
775	amd_iommu_restart_log(iommu, "Event", CONTROL_EVT_INT_EN,
776			      CONTROL_EVT_LOG_EN, MMIO_STATUS_EVT_RUN_MASK,
777			      MMIO_STATUS_EVT_OVERFLOW_MASK);
778}
779
780/*
781 * This function restarts event logging in case the IOMMU experienced
782 * GA log overflow.
783 */
784void amd_iommu_restart_ga_log(struct amd_iommu *iommu)
785{
786	amd_iommu_restart_log(iommu, "GA", CONTROL_GAINT_EN,
787			      CONTROL_GALOG_EN, MMIO_STATUS_GALOG_RUN_MASK,
788			      MMIO_STATUS_GALOG_OVERFLOW_MASK);
789}
790
791/*
792 * This function resets the command buffer if the IOMMU stopped fetching
793 * commands from it.
794 */
795static void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu)
796{
797	iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
798
799	writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
800	writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
801	iommu->cmd_buf_head = 0;
802	iommu->cmd_buf_tail = 0;
803
804	iommu_feature_enable(iommu, CONTROL_CMDBUF_EN);
805}
806
807/*
808 * This function writes the command buffer address to the hardware and
809 * enables it.
810 */
811static void iommu_enable_command_buffer(struct amd_iommu *iommu)
812{
813	u64 entry;
814
815	BUG_ON(iommu->cmd_buf == NULL);
816
817	entry = iommu_virt_to_phys(iommu->cmd_buf);
818	entry |= MMIO_CMD_SIZE_512;
819
820	memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET,
821		    &entry, sizeof(entry));
822
823	amd_iommu_reset_cmd_buffer(iommu);
824}
825
826/*
827 * This function disables the command buffer
828 */
829static void iommu_disable_command_buffer(struct amd_iommu *iommu)
830{
831	iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
832}
833
834static void __init free_command_buffer(struct amd_iommu *iommu)
835{
836	iommu_free_pages(iommu->cmd_buf, get_order(CMD_BUFFER_SIZE));
837}
838
839void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu, gfp_t gfp,
840				  size_t size)
841{
842	int order = get_order(size);
843	void *buf = iommu_alloc_pages(gfp, order);
844
845	if (buf &&
846	    check_feature(FEATURE_SNP) &&
847	    set_memory_4k((unsigned long)buf, (1 << order))) {
848		iommu_free_pages(buf, order);
849		buf = NULL;
850	}
851
852	return buf;
853}
854
855/* allocates the memory where the IOMMU will log its events to */
856static int __init alloc_event_buffer(struct amd_iommu *iommu)
857{
858	iommu->evt_buf = iommu_alloc_4k_pages(iommu, GFP_KERNEL,
859					      EVT_BUFFER_SIZE);
860
861	return iommu->evt_buf ? 0 : -ENOMEM;
862}
863
864static void iommu_enable_event_buffer(struct amd_iommu *iommu)
865{
866	u64 entry;
867
868	BUG_ON(iommu->evt_buf == NULL);
869
870	entry = iommu_virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK;
871
872	memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET,
873		    &entry, sizeof(entry));
874
875	/* set head and tail to zero manually */
876	writel(0x00, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
877	writel(0x00, iommu->mmio_base + MMIO_EVT_TAIL_OFFSET);
878
879	iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
880}
881
882/*
883 * This function disables the event log buffer
884 */
885static void iommu_disable_event_buffer(struct amd_iommu *iommu)
886{
887	iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN);
888}
889
890static void __init free_event_buffer(struct amd_iommu *iommu)
891{
892	iommu_free_pages(iommu->evt_buf, get_order(EVT_BUFFER_SIZE));
893}
894
895static void free_ga_log(struct amd_iommu *iommu)
896{
897#ifdef CONFIG_IRQ_REMAP
898	iommu_free_pages(iommu->ga_log, get_order(GA_LOG_SIZE));
899	iommu_free_pages(iommu->ga_log_tail, get_order(8));
900#endif
901}
902
903#ifdef CONFIG_IRQ_REMAP
904static int iommu_ga_log_enable(struct amd_iommu *iommu)
905{
906	u32 status, i;
907	u64 entry;
908
909	if (!iommu->ga_log)
910		return -EINVAL;
911
912	entry = iommu_virt_to_phys(iommu->ga_log) | GA_LOG_SIZE_512;
913	memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_BASE_OFFSET,
914		    &entry, sizeof(entry));
915	entry = (iommu_virt_to_phys(iommu->ga_log_tail) &
916		 (BIT_ULL(52)-1)) & ~7ULL;
917	memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_TAIL_OFFSET,
918		    &entry, sizeof(entry));
919	writel(0x00, iommu->mmio_base + MMIO_GA_HEAD_OFFSET);
920	writel(0x00, iommu->mmio_base + MMIO_GA_TAIL_OFFSET);
921
922
923	iommu_feature_enable(iommu, CONTROL_GAINT_EN);
924	iommu_feature_enable(iommu, CONTROL_GALOG_EN);
925
926	for (i = 0; i < MMIO_STATUS_TIMEOUT; ++i) {
927		status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
928		if (status & (MMIO_STATUS_GALOG_RUN_MASK))
929			break;
930		udelay(10);
931	}
932
933	if (WARN_ON(i >= MMIO_STATUS_TIMEOUT))
934		return -EINVAL;
935
936	return 0;
937}
938
939static int iommu_init_ga_log(struct amd_iommu *iommu)
940{
941	if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
942		return 0;
943
944	iommu->ga_log = iommu_alloc_pages(GFP_KERNEL, get_order(GA_LOG_SIZE));
945	if (!iommu->ga_log)
946		goto err_out;
947
948	iommu->ga_log_tail = iommu_alloc_pages(GFP_KERNEL, get_order(8));
949	if (!iommu->ga_log_tail)
950		goto err_out;
951
952	return 0;
953err_out:
954	free_ga_log(iommu);
955	return -EINVAL;
956}
957#endif /* CONFIG_IRQ_REMAP */
958
959static int __init alloc_cwwb_sem(struct amd_iommu *iommu)
960{
961	iommu->cmd_sem = iommu_alloc_4k_pages(iommu, GFP_KERNEL, 1);
962
963	return iommu->cmd_sem ? 0 : -ENOMEM;
964}
965
966static void __init free_cwwb_sem(struct amd_iommu *iommu)
967{
968	if (iommu->cmd_sem)
969		iommu_free_page((void *)iommu->cmd_sem);
970}
971
972static void iommu_enable_xt(struct amd_iommu *iommu)
973{
974#ifdef CONFIG_IRQ_REMAP
975	/*
976	 * XT mode (32-bit APIC destination ID) requires
977	 * GA mode (128-bit IRTE support) as a prerequisite.
978	 */
979	if (AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir) &&
980	    amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
981		iommu_feature_enable(iommu, CONTROL_XT_EN);
982#endif /* CONFIG_IRQ_REMAP */
983}
984
985static void iommu_enable_gt(struct amd_iommu *iommu)
986{
987	if (!check_feature(FEATURE_GT))
988		return;
989
990	iommu_feature_enable(iommu, CONTROL_GT_EN);
991}
992
993/* sets a specific bit in the device table entry. */
994static void __set_dev_entry_bit(struct dev_table_entry *dev_table,
995				u16 devid, u8 bit)
996{
997	int i = (bit >> 6) & 0x03;
998	int _bit = bit & 0x3f;
999
1000	dev_table[devid].data[i] |= (1UL << _bit);
1001}
1002
1003static void set_dev_entry_bit(struct amd_iommu *iommu, u16 devid, u8 bit)
1004{
1005	struct dev_table_entry *dev_table = get_dev_table(iommu);
1006
1007	return __set_dev_entry_bit(dev_table, devid, bit);
1008}
1009
1010static int __get_dev_entry_bit(struct dev_table_entry *dev_table,
1011			       u16 devid, u8 bit)
1012{
1013	int i = (bit >> 6) & 0x03;
1014	int _bit = bit & 0x3f;
1015
1016	return (dev_table[devid].data[i] & (1UL << _bit)) >> _bit;
1017}
1018
1019static int get_dev_entry_bit(struct amd_iommu *iommu, u16 devid, u8 bit)
1020{
1021	struct dev_table_entry *dev_table = get_dev_table(iommu);
1022
1023	return __get_dev_entry_bit(dev_table, devid, bit);
1024}
1025
1026static bool __copy_device_table(struct amd_iommu *iommu)
1027{
1028	u64 int_ctl, int_tab_len, entry = 0;
1029	struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg;
1030	struct dev_table_entry *old_devtb = NULL;
1031	u32 lo, hi, devid, old_devtb_size;
1032	phys_addr_t old_devtb_phys;
1033	u16 dom_id, dte_v, irq_v;
1034	u64 tmp;
1035
1036	/* Each IOMMU use separate device table with the same size */
1037	lo = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET);
1038	hi = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET + 4);
1039	entry = (((u64) hi) << 32) + lo;
1040
1041	old_devtb_size = ((entry & ~PAGE_MASK) + 1) << 12;
1042	if (old_devtb_size != pci_seg->dev_table_size) {
1043		pr_err("The device table size of IOMMU:%d is not expected!\n",
1044			iommu->index);
1045		return false;
1046	}
1047
1048	/*
1049	 * When SME is enabled in the first kernel, the entry includes the
1050	 * memory encryption mask(sme_me_mask), we must remove the memory
1051	 * encryption mask to obtain the true physical address in kdump kernel.
1052	 */
1053	old_devtb_phys = __sme_clr(entry) & PAGE_MASK;
1054
1055	if (old_devtb_phys >= 0x100000000ULL) {
1056		pr_err("The address of old device table is above 4G, not trustworthy!\n");
1057		return false;
1058	}
1059	old_devtb = (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT) && is_kdump_kernel())
1060		    ? (__force void *)ioremap_encrypted(old_devtb_phys,
1061							pci_seg->dev_table_size)
1062		    : memremap(old_devtb_phys, pci_seg->dev_table_size, MEMREMAP_WB);
1063
1064	if (!old_devtb)
1065		return false;
1066
1067	pci_seg->old_dev_tbl_cpy = iommu_alloc_pages(GFP_KERNEL | GFP_DMA32,
1068						     get_order(pci_seg->dev_table_size));
1069	if (pci_seg->old_dev_tbl_cpy == NULL) {
1070		pr_err("Failed to allocate memory for copying old device table!\n");
1071		memunmap(old_devtb);
1072		return false;
1073	}
1074
1075	for (devid = 0; devid <= pci_seg->last_bdf; ++devid) {
1076		pci_seg->old_dev_tbl_cpy[devid] = old_devtb[devid];
1077		dom_id = old_devtb[devid].data[1] & DEV_DOMID_MASK;
1078		dte_v = old_devtb[devid].data[0] & DTE_FLAG_V;
1079
1080		if (dte_v && dom_id) {
1081			pci_seg->old_dev_tbl_cpy[devid].data[0] = old_devtb[devid].data[0];
1082			pci_seg->old_dev_tbl_cpy[devid].data[1] = old_devtb[devid].data[1];
1083			__set_bit(dom_id, amd_iommu_pd_alloc_bitmap);
1084			/* If gcr3 table existed, mask it out */
1085			if (old_devtb[devid].data[0] & DTE_FLAG_GV) {
1086				tmp = DTE_GCR3_VAL_B(~0ULL) << DTE_GCR3_SHIFT_B;
1087				tmp |= DTE_GCR3_VAL_C(~0ULL) << DTE_GCR3_SHIFT_C;
1088				pci_seg->old_dev_tbl_cpy[devid].data[1] &= ~tmp;
1089				tmp = DTE_GCR3_VAL_A(~0ULL) << DTE_GCR3_SHIFT_A;
1090				tmp |= DTE_FLAG_GV;
1091				pci_seg->old_dev_tbl_cpy[devid].data[0] &= ~tmp;
1092			}
1093		}
1094
1095		irq_v = old_devtb[devid].data[2] & DTE_IRQ_REMAP_ENABLE;
1096		int_ctl = old_devtb[devid].data[2] & DTE_IRQ_REMAP_INTCTL_MASK;
1097		int_tab_len = old_devtb[devid].data[2] & DTE_INTTABLEN_MASK;
1098		if (irq_v && (int_ctl || int_tab_len)) {
1099			if ((int_ctl != DTE_IRQ_REMAP_INTCTL) ||
1100			    (int_tab_len != DTE_INTTABLEN)) {
1101				pr_err("Wrong old irq remapping flag: %#x\n", devid);
1102				memunmap(old_devtb);
1103				return false;
1104			}
1105
1106			pci_seg->old_dev_tbl_cpy[devid].data[2] = old_devtb[devid].data[2];
1107		}
1108	}
1109	memunmap(old_devtb);
1110
1111	return true;
1112}
1113
1114static bool copy_device_table(void)
1115{
1116	struct amd_iommu *iommu;
1117	struct amd_iommu_pci_seg *pci_seg;
1118
1119	if (!amd_iommu_pre_enabled)
1120		return false;
1121
1122	pr_warn("Translation is already enabled - trying to copy translation structures\n");
1123
1124	/*
1125	 * All IOMMUs within PCI segment shares common device table.
1126	 * Hence copy device table only once per PCI segment.
1127	 */
1128	for_each_pci_segment(pci_seg) {
1129		for_each_iommu(iommu) {
1130			if (pci_seg->id != iommu->pci_seg->id)
1131				continue;
1132			if (!__copy_device_table(iommu))
1133				return false;
1134			break;
1135		}
1136	}
1137
1138	return true;
1139}
1140
1141void amd_iommu_apply_erratum_63(struct amd_iommu *iommu, u16 devid)
1142{
1143	int sysmgt;
1144
1145	sysmgt = get_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT1) |
1146		 (get_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT2) << 1);
1147
1148	if (sysmgt == 0x01)
1149		set_dev_entry_bit(iommu, devid, DEV_ENTRY_IW);
1150}
1151
1152/*
1153 * This function takes the device specific flags read from the ACPI
1154 * table and sets up the device table entry with that information
1155 */
1156static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu,
1157					   u16 devid, u32 flags, u32 ext_flags)
1158{
1159	if (flags & ACPI_DEVFLAG_INITPASS)
1160		set_dev_entry_bit(iommu, devid, DEV_ENTRY_INIT_PASS);
1161	if (flags & ACPI_DEVFLAG_EXTINT)
1162		set_dev_entry_bit(iommu, devid, DEV_ENTRY_EINT_PASS);
1163	if (flags & ACPI_DEVFLAG_NMI)
1164		set_dev_entry_bit(iommu, devid, DEV_ENTRY_NMI_PASS);
1165	if (flags & ACPI_DEVFLAG_SYSMGT1)
1166		set_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT1);
1167	if (flags & ACPI_DEVFLAG_SYSMGT2)
1168		set_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT2);
1169	if (flags & ACPI_DEVFLAG_LINT0)
1170		set_dev_entry_bit(iommu, devid, DEV_ENTRY_LINT0_PASS);
1171	if (flags & ACPI_DEVFLAG_LINT1)
1172		set_dev_entry_bit(iommu, devid, DEV_ENTRY_LINT1_PASS);
1173
1174	amd_iommu_apply_erratum_63(iommu, devid);
1175
1176	amd_iommu_set_rlookup_table(iommu, devid);
1177}
1178
1179int __init add_special_device(u8 type, u8 id, u32 *devid, bool cmd_line)
1180{
1181	struct devid_map *entry;
1182	struct list_head *list;
1183
1184	if (type == IVHD_SPECIAL_IOAPIC)
1185		list = &ioapic_map;
1186	else if (type == IVHD_SPECIAL_HPET)
1187		list = &hpet_map;
1188	else
1189		return -EINVAL;
1190
1191	list_for_each_entry(entry, list, list) {
1192		if (!(entry->id == id && entry->cmd_line))
1193			continue;
1194
1195		pr_info("Command-line override present for %s id %d - ignoring\n",
1196			type == IVHD_SPECIAL_IOAPIC ? "IOAPIC" : "HPET", id);
1197
1198		*devid = entry->devid;
1199
1200		return 0;
1201	}
1202
1203	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
1204	if (!entry)
1205		return -ENOMEM;
1206
1207	entry->id	= id;
1208	entry->devid	= *devid;
1209	entry->cmd_line	= cmd_line;
1210
1211	list_add_tail(&entry->list, list);
1212
1213	return 0;
1214}
1215
1216static int __init add_acpi_hid_device(u8 *hid, u8 *uid, u32 *devid,
1217				      bool cmd_line)
1218{
1219	struct acpihid_map_entry *entry;
1220	struct list_head *list = &acpihid_map;
1221
1222	list_for_each_entry(entry, list, list) {
1223		if (strcmp(entry->hid, hid) ||
1224		    (*uid && *entry->uid && strcmp(entry->uid, uid)) ||
1225		    !entry->cmd_line)
1226			continue;
1227
1228		pr_info("Command-line override for hid:%s uid:%s\n",
1229			hid, uid);
1230		*devid = entry->devid;
1231		return 0;
1232	}
1233
1234	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
1235	if (!entry)
1236		return -ENOMEM;
1237
1238	memcpy(entry->uid, uid, strlen(uid));
1239	memcpy(entry->hid, hid, strlen(hid));
1240	entry->devid = *devid;
1241	entry->cmd_line	= cmd_line;
1242	entry->root_devid = (entry->devid & (~0x7));
1243
1244	pr_info("%s, add hid:%s, uid:%s, rdevid:%d\n",
1245		entry->cmd_line ? "cmd" : "ivrs",
1246		entry->hid, entry->uid, entry->root_devid);
1247
1248	list_add_tail(&entry->list, list);
1249	return 0;
1250}
1251
1252static int __init add_early_maps(void)
1253{
1254	int i, ret;
1255
1256	for (i = 0; i < early_ioapic_map_size; ++i) {
1257		ret = add_special_device(IVHD_SPECIAL_IOAPIC,
1258					 early_ioapic_map[i].id,
1259					 &early_ioapic_map[i].devid,
1260					 early_ioapic_map[i].cmd_line);
1261		if (ret)
1262			return ret;
1263	}
1264
1265	for (i = 0; i < early_hpet_map_size; ++i) {
1266		ret = add_special_device(IVHD_SPECIAL_HPET,
1267					 early_hpet_map[i].id,
1268					 &early_hpet_map[i].devid,
1269					 early_hpet_map[i].cmd_line);
1270		if (ret)
1271			return ret;
1272	}
1273
1274	for (i = 0; i < early_acpihid_map_size; ++i) {
1275		ret = add_acpi_hid_device(early_acpihid_map[i].hid,
1276					  early_acpihid_map[i].uid,
1277					  &early_acpihid_map[i].devid,
1278					  early_acpihid_map[i].cmd_line);
1279		if (ret)
1280			return ret;
1281	}
1282
1283	return 0;
1284}
1285
1286/*
1287 * Takes a pointer to an AMD IOMMU entry in the ACPI table and
1288 * initializes the hardware and our data structures with it.
1289 */
1290static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
1291					struct ivhd_header *h)
1292{
1293	u8 *p = (u8 *)h;
1294	u8 *end = p, flags = 0;
1295	u16 devid = 0, devid_start = 0, devid_to = 0, seg_id;
1296	u32 dev_i, ext_flags = 0;
1297	bool alias = false;
1298	struct ivhd_entry *e;
1299	struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg;
1300	u32 ivhd_size;
1301	int ret;
1302
1303
1304	ret = add_early_maps();
1305	if (ret)
1306		return ret;
1307
1308	amd_iommu_apply_ivrs_quirks();
1309
1310	/*
1311	 * First save the recommended feature enable bits from ACPI
1312	 */
1313	iommu->acpi_flags = h->flags;
1314
1315	/*
1316	 * Done. Now parse the device entries
1317	 */
1318	ivhd_size = get_ivhd_header_size(h);
1319	if (!ivhd_size) {
1320		pr_err("Unsupported IVHD type %#x\n", h->type);
1321		return -EINVAL;
1322	}
1323
1324	p += ivhd_size;
1325
1326	end += h->length;
1327
1328
1329	while (p < end) {
1330		e = (struct ivhd_entry *)p;
1331		seg_id = pci_seg->id;
1332
1333		switch (e->type) {
1334		case IVHD_DEV_ALL:
1335
1336			DUMP_printk("  DEV_ALL\t\t\tflags: %02x\n", e->flags);
1337
1338			for (dev_i = 0; dev_i <= pci_seg->last_bdf; ++dev_i)
1339				set_dev_entry_from_acpi(iommu, dev_i, e->flags, 0);
1340			break;
1341		case IVHD_DEV_SELECT:
1342
1343			DUMP_printk("  DEV_SELECT\t\t\t devid: %04x:%02x:%02x.%x "
1344				    "flags: %02x\n",
1345				    seg_id, PCI_BUS_NUM(e->devid),
1346				    PCI_SLOT(e->devid),
1347				    PCI_FUNC(e->devid),
1348				    e->flags);
1349
1350			devid = e->devid;
1351			set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
1352			break;
1353		case IVHD_DEV_SELECT_RANGE_START:
1354
1355			DUMP_printk("  DEV_SELECT_RANGE_START\t "
1356				    "devid: %04x:%02x:%02x.%x flags: %02x\n",
1357				    seg_id, PCI_BUS_NUM(e->devid),
1358				    PCI_SLOT(e->devid),
1359				    PCI_FUNC(e->devid),
1360				    e->flags);
1361
1362			devid_start = e->devid;
1363			flags = e->flags;
1364			ext_flags = 0;
1365			alias = false;
1366			break;
1367		case IVHD_DEV_ALIAS:
1368
1369			DUMP_printk("  DEV_ALIAS\t\t\t devid: %04x:%02x:%02x.%x "
1370				    "flags: %02x devid_to: %02x:%02x.%x\n",
1371				    seg_id, PCI_BUS_NUM(e->devid),
1372				    PCI_SLOT(e->devid),
1373				    PCI_FUNC(e->devid),
1374				    e->flags,
1375				    PCI_BUS_NUM(e->ext >> 8),
1376				    PCI_SLOT(e->ext >> 8),
1377				    PCI_FUNC(e->ext >> 8));
1378
1379			devid = e->devid;
1380			devid_to = e->ext >> 8;
1381			set_dev_entry_from_acpi(iommu, devid   , e->flags, 0);
1382			set_dev_entry_from_acpi(iommu, devid_to, e->flags, 0);
1383			pci_seg->alias_table[devid] = devid_to;
1384			break;
1385		case IVHD_DEV_ALIAS_RANGE:
1386
1387			DUMP_printk("  DEV_ALIAS_RANGE\t\t "
1388				    "devid: %04x:%02x:%02x.%x flags: %02x "
1389				    "devid_to: %04x:%02x:%02x.%x\n",
1390				    seg_id, PCI_BUS_NUM(e->devid),
1391				    PCI_SLOT(e->devid),
1392				    PCI_FUNC(e->devid),
1393				    e->flags,
1394				    seg_id, PCI_BUS_NUM(e->ext >> 8),
1395				    PCI_SLOT(e->ext >> 8),
1396				    PCI_FUNC(e->ext >> 8));
1397
1398			devid_start = e->devid;
1399			flags = e->flags;
1400			devid_to = e->ext >> 8;
1401			ext_flags = 0;
1402			alias = true;
1403			break;
1404		case IVHD_DEV_EXT_SELECT:
1405
1406			DUMP_printk("  DEV_EXT_SELECT\t\t devid: %04x:%02x:%02x.%x "
1407				    "flags: %02x ext: %08x\n",
1408				    seg_id, PCI_BUS_NUM(e->devid),
1409				    PCI_SLOT(e->devid),
1410				    PCI_FUNC(e->devid),
1411				    e->flags, e->ext);
1412
1413			devid = e->devid;
1414			set_dev_entry_from_acpi(iommu, devid, e->flags,
1415						e->ext);
1416			break;
1417		case IVHD_DEV_EXT_SELECT_RANGE:
1418
1419			DUMP_printk("  DEV_EXT_SELECT_RANGE\t devid: "
1420				    "%04x:%02x:%02x.%x flags: %02x ext: %08x\n",
1421				    seg_id, PCI_BUS_NUM(e->devid),
1422				    PCI_SLOT(e->devid),
1423				    PCI_FUNC(e->devid),
1424				    e->flags, e->ext);
1425
1426			devid_start = e->devid;
1427			flags = e->flags;
1428			ext_flags = e->ext;
1429			alias = false;
1430			break;
1431		case IVHD_DEV_RANGE_END:
1432
1433			DUMP_printk("  DEV_RANGE_END\t\t devid: %04x:%02x:%02x.%x\n",
1434				    seg_id, PCI_BUS_NUM(e->devid),
1435				    PCI_SLOT(e->devid),
1436				    PCI_FUNC(e->devid));
1437
1438			devid = e->devid;
1439			for (dev_i = devid_start; dev_i <= devid; ++dev_i) {
1440				if (alias) {
1441					pci_seg->alias_table[dev_i] = devid_to;
1442					set_dev_entry_from_acpi(iommu,
1443						devid_to, flags, ext_flags);
1444				}
1445				set_dev_entry_from_acpi(iommu, dev_i,
1446							flags, ext_flags);
1447			}
1448			break;
1449		case IVHD_DEV_SPECIAL: {
1450			u8 handle, type;
1451			const char *var;
1452			u32 devid;
1453			int ret;
1454
1455			handle = e->ext & 0xff;
1456			devid = PCI_SEG_DEVID_TO_SBDF(seg_id, (e->ext >> 8));
1457			type   = (e->ext >> 24) & 0xff;
1458
1459			if (type == IVHD_SPECIAL_IOAPIC)
1460				var = "IOAPIC";
1461			else if (type == IVHD_SPECIAL_HPET)
1462				var = "HPET";
1463			else
1464				var = "UNKNOWN";
1465
1466			DUMP_printk("  DEV_SPECIAL(%s[%d])\t\tdevid: %04x:%02x:%02x.%x\n",
1467				    var, (int)handle,
1468				    seg_id, PCI_BUS_NUM(devid),
1469				    PCI_SLOT(devid),
1470				    PCI_FUNC(devid));
1471
1472			ret = add_special_device(type, handle, &devid, false);
1473			if (ret)
1474				return ret;
1475
1476			/*
1477			 * add_special_device might update the devid in case a
1478			 * command-line override is present. So call
1479			 * set_dev_entry_from_acpi after add_special_device.
1480			 */
1481			set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
1482
1483			break;
1484		}
1485		case IVHD_DEV_ACPI_HID: {
1486			u32 devid;
1487			u8 hid[ACPIHID_HID_LEN];
1488			u8 uid[ACPIHID_UID_LEN];
1489			int ret;
1490
1491			if (h->type != 0x40) {
1492				pr_err(FW_BUG "Invalid IVHD device type %#x\n",
1493				       e->type);
1494				break;
1495			}
1496
1497			BUILD_BUG_ON(sizeof(e->ext_hid) != ACPIHID_HID_LEN - 1);
1498			memcpy(hid, &e->ext_hid, ACPIHID_HID_LEN - 1);
1499			hid[ACPIHID_HID_LEN - 1] = '\0';
1500
1501			if (!(*hid)) {
1502				pr_err(FW_BUG "Invalid HID.\n");
1503				break;
1504			}
1505
1506			uid[0] = '\0';
1507			switch (e->uidf) {
1508			case UID_NOT_PRESENT:
1509
1510				if (e->uidl != 0)
1511					pr_warn(FW_BUG "Invalid UID length.\n");
1512
1513				break;
1514			case UID_IS_INTEGER:
1515
1516				sprintf(uid, "%d", e->uid);
1517
1518				break;
1519			case UID_IS_CHARACTER:
1520
1521				memcpy(uid, &e->uid, e->uidl);
1522				uid[e->uidl] = '\0';
1523
1524				break;
1525			default:
1526				break;
1527			}
1528
1529			devid = PCI_SEG_DEVID_TO_SBDF(seg_id, e->devid);
1530			DUMP_printk("  DEV_ACPI_HID(%s[%s])\t\tdevid: %04x:%02x:%02x.%x\n",
1531				    hid, uid, seg_id,
1532				    PCI_BUS_NUM(devid),
1533				    PCI_SLOT(devid),
1534				    PCI_FUNC(devid));
1535
1536			flags = e->flags;
1537
1538			ret = add_acpi_hid_device(hid, uid, &devid, false);
1539			if (ret)
1540				return ret;
1541
1542			/*
1543			 * add_special_device might update the devid in case a
1544			 * command-line override is present. So call
1545			 * set_dev_entry_from_acpi after add_special_device.
1546			 */
1547			set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
1548
1549			break;
1550		}
1551		default:
1552			break;
1553		}
1554
1555		p += ivhd_entry_length(p);
1556	}
1557
1558	return 0;
1559}
1560
1561/* Allocate PCI segment data structure */
1562static struct amd_iommu_pci_seg *__init alloc_pci_segment(u16 id,
1563					  struct acpi_table_header *ivrs_base)
1564{
1565	struct amd_iommu_pci_seg *pci_seg;
1566	int last_bdf;
1567
1568	/*
1569	 * First parse ACPI tables to find the largest Bus/Dev/Func we need to
1570	 * handle in this PCI segment. Upon this information the shared data
1571	 * structures for the PCI segments in the system will be allocated.
1572	 */
1573	last_bdf = find_last_devid_acpi(ivrs_base, id);
1574	if (last_bdf < 0)
1575		return NULL;
1576
1577	pci_seg = kzalloc(sizeof(struct amd_iommu_pci_seg), GFP_KERNEL);
1578	if (pci_seg == NULL)
1579		return NULL;
1580
1581	pci_seg->last_bdf = last_bdf;
1582	DUMP_printk("PCI segment : 0x%0x, last bdf : 0x%04x\n", id, last_bdf);
1583	pci_seg->dev_table_size     = tbl_size(DEV_TABLE_ENTRY_SIZE, last_bdf);
1584	pci_seg->alias_table_size   = tbl_size(ALIAS_TABLE_ENTRY_SIZE, last_bdf);
1585	pci_seg->rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE, last_bdf);
1586
1587	pci_seg->id = id;
1588	init_llist_head(&pci_seg->dev_data_list);
1589	INIT_LIST_HEAD(&pci_seg->unity_map);
1590	list_add_tail(&pci_seg->list, &amd_iommu_pci_seg_list);
1591
1592	if (alloc_dev_table(pci_seg))
1593		return NULL;
1594	if (alloc_alias_table(pci_seg))
1595		return NULL;
1596	if (alloc_rlookup_table(pci_seg))
1597		return NULL;
1598
1599	return pci_seg;
1600}
1601
1602static struct amd_iommu_pci_seg *__init get_pci_segment(u16 id,
1603					struct acpi_table_header *ivrs_base)
1604{
1605	struct amd_iommu_pci_seg *pci_seg;
1606
1607	for_each_pci_segment(pci_seg) {
1608		if (pci_seg->id == id)
1609			return pci_seg;
1610	}
1611
1612	return alloc_pci_segment(id, ivrs_base);
1613}
1614
1615static void __init free_pci_segments(void)
1616{
1617	struct amd_iommu_pci_seg *pci_seg, *next;
1618
1619	for_each_pci_segment_safe(pci_seg, next) {
1620		list_del(&pci_seg->list);
1621		free_irq_lookup_table(pci_seg);
1622		free_rlookup_table(pci_seg);
1623		free_alias_table(pci_seg);
1624		free_dev_table(pci_seg);
1625		kfree(pci_seg);
1626	}
1627}
1628
1629static void __init free_sysfs(struct amd_iommu *iommu)
1630{
1631	if (iommu->iommu.dev) {
1632		iommu_device_unregister(&iommu->iommu);
1633		iommu_device_sysfs_remove(&iommu->iommu);
1634	}
1635}
1636
1637static void __init free_iommu_one(struct amd_iommu *iommu)
1638{
1639	free_sysfs(iommu);
1640	free_cwwb_sem(iommu);
1641	free_command_buffer(iommu);
1642	free_event_buffer(iommu);
1643	amd_iommu_free_ppr_log(iommu);
1644	free_ga_log(iommu);
1645	iommu_unmap_mmio_space(iommu);
1646	amd_iommu_iopf_uninit(iommu);
1647}
1648
1649static void __init free_iommu_all(void)
1650{
1651	struct amd_iommu *iommu, *next;
1652
1653	for_each_iommu_safe(iommu, next) {
1654		list_del(&iommu->list);
1655		free_iommu_one(iommu);
1656		kfree(iommu);
1657	}
1658}
1659
1660/*
1661 * Family15h Model 10h-1fh erratum 746 (IOMMU Logging May Stall Translations)
1662 * Workaround:
1663 *     BIOS should disable L2B micellaneous clock gating by setting
1664 *     L2_L2B_CK_GATE_CONTROL[CKGateL2BMiscDisable](D0F2xF4_x90[2]) = 1b
1665 */
1666static void amd_iommu_erratum_746_workaround(struct amd_iommu *iommu)
1667{
1668	u32 value;
1669
1670	if ((boot_cpu_data.x86 != 0x15) ||
1671	    (boot_cpu_data.x86_model < 0x10) ||
1672	    (boot_cpu_data.x86_model > 0x1f))
1673		return;
1674
1675	pci_write_config_dword(iommu->dev, 0xf0, 0x90);
1676	pci_read_config_dword(iommu->dev, 0xf4, &value);
1677
1678	if (value & BIT(2))
1679		return;
1680
1681	/* Select NB indirect register 0x90 and enable writing */
1682	pci_write_config_dword(iommu->dev, 0xf0, 0x90 | (1 << 8));
1683
1684	pci_write_config_dword(iommu->dev, 0xf4, value | 0x4);
1685	pci_info(iommu->dev, "Applying erratum 746 workaround\n");
1686
1687	/* Clear the enable writing bit */
1688	pci_write_config_dword(iommu->dev, 0xf0, 0x90);
1689}
1690
1691/*
1692 * Family15h Model 30h-3fh (IOMMU Mishandles ATS Write Permission)
1693 * Workaround:
1694 *     BIOS should enable ATS write permission check by setting
1695 *     L2_DEBUG_3[AtsIgnoreIWDis](D0F2xF4_x47[0]) = 1b
1696 */
1697static void amd_iommu_ats_write_check_workaround(struct amd_iommu *iommu)
1698{
1699	u32 value;
1700
1701	if ((boot_cpu_data.x86 != 0x15) ||
1702	    (boot_cpu_data.x86_model < 0x30) ||
1703	    (boot_cpu_data.x86_model > 0x3f))
1704		return;
1705
1706	/* Test L2_DEBUG_3[AtsIgnoreIWDis] == 1 */
1707	value = iommu_read_l2(iommu, 0x47);
1708
1709	if (value & BIT(0))
1710		return;
1711
1712	/* Set L2_DEBUG_3[AtsIgnoreIWDis] = 1 */
1713	iommu_write_l2(iommu, 0x47, value | BIT(0));
1714
1715	pci_info(iommu->dev, "Applying ATS write check workaround\n");
1716}
1717
1718/*
1719 * This function glues the initialization function for one IOMMU
1720 * together and also allocates the command buffer and programs the
1721 * hardware. It does NOT enable the IOMMU. This is done afterwards.
1722 */
1723static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h,
1724				 struct acpi_table_header *ivrs_base)
1725{
1726	struct amd_iommu_pci_seg *pci_seg;
1727
1728	pci_seg = get_pci_segment(h->pci_seg, ivrs_base);
1729	if (pci_seg == NULL)
1730		return -ENOMEM;
1731	iommu->pci_seg = pci_seg;
1732
1733	raw_spin_lock_init(&iommu->lock);
1734	atomic64_set(&iommu->cmd_sem_val, 0);
1735
1736	/* Add IOMMU to internal data structures */
1737	list_add_tail(&iommu->list, &amd_iommu_list);
1738	iommu->index = amd_iommus_present++;
1739
1740	if (unlikely(iommu->index >= MAX_IOMMUS)) {
1741		WARN(1, "System has more IOMMUs than supported by this driver\n");
1742		return -ENOSYS;
1743	}
1744
1745	/* Index is fine - add IOMMU to the array */
1746	amd_iommus[iommu->index] = iommu;
1747
1748	/*
1749	 * Copy data from ACPI table entry to the iommu struct
1750	 */
1751	iommu->devid   = h->devid;
1752	iommu->cap_ptr = h->cap_ptr;
1753	iommu->mmio_phys = h->mmio_phys;
1754
1755	switch (h->type) {
1756	case 0x10:
1757		/* Check if IVHD EFR contains proper max banks/counters */
1758		if ((h->efr_attr != 0) &&
1759		    ((h->efr_attr & (0xF << 13)) != 0) &&
1760		    ((h->efr_attr & (0x3F << 17)) != 0))
1761			iommu->mmio_phys_end = MMIO_REG_END_OFFSET;
1762		else
1763			iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET;
1764
1765		/*
1766		 * Note: GA (128-bit IRTE) mode requires cmpxchg16b supports.
1767		 * GAM also requires GA mode. Therefore, we need to
1768		 * check cmpxchg16b support before enabling it.
1769		 */
1770		if (!boot_cpu_has(X86_FEATURE_CX16) ||
1771		    ((h->efr_attr & (0x1 << IOMMU_FEAT_GASUP_SHIFT)) == 0))
1772			amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY;
1773		break;
1774	case 0x11:
1775	case 0x40:
1776		if (h->efr_reg & (1 << 9))
1777			iommu->mmio_phys_end = MMIO_REG_END_OFFSET;
1778		else
1779			iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET;
1780
1781		/*
1782		 * Note: GA (128-bit IRTE) mode requires cmpxchg16b supports.
1783		 * XT, GAM also requires GA mode. Therefore, we need to
1784		 * check cmpxchg16b support before enabling them.
1785		 */
1786		if (!boot_cpu_has(X86_FEATURE_CX16) ||
1787		    ((h->efr_reg & (0x1 << IOMMU_EFR_GASUP_SHIFT)) == 0)) {
1788			amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY;
1789			break;
1790		}
1791
1792		if (h->efr_reg & BIT(IOMMU_EFR_XTSUP_SHIFT))
1793			amd_iommu_xt_mode = IRQ_REMAP_X2APIC_MODE;
1794
1795		early_iommu_features_init(iommu, h);
1796
1797		break;
1798	default:
1799		return -EINVAL;
1800	}
1801
1802	iommu->mmio_base = iommu_map_mmio_space(iommu->mmio_phys,
1803						iommu->mmio_phys_end);
1804	if (!iommu->mmio_base)
1805		return -ENOMEM;
1806
1807	return init_iommu_from_acpi(iommu, h);
1808}
1809
1810static int __init init_iommu_one_late(struct amd_iommu *iommu)
1811{
1812	int ret;
1813
1814	if (alloc_cwwb_sem(iommu))
1815		return -ENOMEM;
1816
1817	if (alloc_command_buffer(iommu))
1818		return -ENOMEM;
1819
1820	if (alloc_event_buffer(iommu))
1821		return -ENOMEM;
1822
1823	iommu->int_enabled = false;
1824
1825	init_translation_status(iommu);
1826	if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
1827		iommu_disable(iommu);
1828		clear_translation_pre_enabled(iommu);
1829		pr_warn("Translation was enabled for IOMMU:%d but we are not in kdump mode\n",
1830			iommu->index);
1831	}
1832	if (amd_iommu_pre_enabled)
1833		amd_iommu_pre_enabled = translation_pre_enabled(iommu);
1834
1835	if (amd_iommu_irq_remap) {
1836		ret = amd_iommu_create_irq_domain(iommu);
1837		if (ret)
1838			return ret;
1839	}
1840
1841	/*
1842	 * Make sure IOMMU is not considered to translate itself. The IVRS
1843	 * table tells us so, but this is a lie!
1844	 */
1845	iommu->pci_seg->rlookup_table[iommu->devid] = NULL;
1846
1847	return 0;
1848}
1849
1850/**
1851 * get_highest_supported_ivhd_type - Look up the appropriate IVHD type
1852 * @ivrs: Pointer to the IVRS header
1853 *
1854 * This function search through all IVDB of the maximum supported IVHD
1855 */
1856static u8 get_highest_supported_ivhd_type(struct acpi_table_header *ivrs)
1857{
1858	u8 *base = (u8 *)ivrs;
1859	struct ivhd_header *ivhd = (struct ivhd_header *)
1860					(base + IVRS_HEADER_LENGTH);
1861	u8 last_type = ivhd->type;
1862	u16 devid = ivhd->devid;
1863
1864	while (((u8 *)ivhd - base < ivrs->length) &&
1865	       (ivhd->type <= ACPI_IVHD_TYPE_MAX_SUPPORTED)) {
1866		u8 *p = (u8 *) ivhd;
1867
1868		if (ivhd->devid == devid)
1869			last_type = ivhd->type;
1870		ivhd = (struct ivhd_header *)(p + ivhd->length);
1871	}
1872
1873	return last_type;
1874}
1875
1876/*
1877 * Iterates over all IOMMU entries in the ACPI table, allocates the
1878 * IOMMU structure and initializes it with init_iommu_one()
1879 */
1880static int __init init_iommu_all(struct acpi_table_header *table)
1881{
1882	u8 *p = (u8 *)table, *end = (u8 *)table;
1883	struct ivhd_header *h;
1884	struct amd_iommu *iommu;
1885	int ret;
1886
1887	end += table->length;
1888	p += IVRS_HEADER_LENGTH;
1889
1890	/* Phase 1: Process all IVHD blocks */
1891	while (p < end) {
1892		h = (struct ivhd_header *)p;
1893		if (*p == amd_iommu_target_ivhd_type) {
1894
1895			DUMP_printk("device: %04x:%02x:%02x.%01x cap: %04x "
1896				    "flags: %01x info %04x\n",
1897				    h->pci_seg, PCI_BUS_NUM(h->devid),
1898				    PCI_SLOT(h->devid), PCI_FUNC(h->devid),
1899				    h->cap_ptr, h->flags, h->info);
1900			DUMP_printk("       mmio-addr: %016llx\n",
1901				    h->mmio_phys);
1902
1903			iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL);
1904			if (iommu == NULL)
1905				return -ENOMEM;
1906
1907			ret = init_iommu_one(iommu, h, table);
1908			if (ret)
1909				return ret;
1910		}
1911		p += h->length;
1912
1913	}
1914	WARN_ON(p != end);
1915
1916	/* Phase 2 : Early feature support check */
1917	get_global_efr();
1918
1919	/* Phase 3 : Enabling IOMMU features */
1920	for_each_iommu(iommu) {
1921		ret = init_iommu_one_late(iommu);
1922		if (ret)
1923			return ret;
1924	}
1925
1926	return 0;
1927}
1928
1929static void init_iommu_perf_ctr(struct amd_iommu *iommu)
1930{
1931	u64 val;
1932	struct pci_dev *pdev = iommu->dev;
1933
1934	if (!check_feature(FEATURE_PC))
1935		return;
1936
1937	amd_iommu_pc_present = true;
1938
1939	pci_info(pdev, "IOMMU performance counters supported\n");
1940
1941	val = readl(iommu->mmio_base + MMIO_CNTR_CONF_OFFSET);
1942	iommu->max_banks = (u8) ((val >> 12) & 0x3f);
1943	iommu->max_counters = (u8) ((val >> 7) & 0xf);
1944
1945	return;
1946}
1947
1948static ssize_t amd_iommu_show_cap(struct device *dev,
1949				  struct device_attribute *attr,
1950				  char *buf)
1951{
1952	struct amd_iommu *iommu = dev_to_amd_iommu(dev);
1953	return sysfs_emit(buf, "%x\n", iommu->cap);
1954}
1955static DEVICE_ATTR(cap, S_IRUGO, amd_iommu_show_cap, NULL);
1956
1957static ssize_t amd_iommu_show_features(struct device *dev,
1958				       struct device_attribute *attr,
1959				       char *buf)
1960{
1961	return sysfs_emit(buf, "%llx:%llx\n", amd_iommu_efr, amd_iommu_efr2);
1962}
1963static DEVICE_ATTR(features, S_IRUGO, amd_iommu_show_features, NULL);
1964
1965static struct attribute *amd_iommu_attrs[] = {
1966	&dev_attr_cap.attr,
1967	&dev_attr_features.attr,
1968	NULL,
1969};
1970
1971static struct attribute_group amd_iommu_group = {
1972	.name = "amd-iommu",
1973	.attrs = amd_iommu_attrs,
1974};
1975
1976static const struct attribute_group *amd_iommu_groups[] = {
1977	&amd_iommu_group,
1978	NULL,
1979};
1980
1981/*
1982 * Note: IVHD 0x11 and 0x40 also contains exact copy
1983 * of the IOMMU Extended Feature Register [MMIO Offset 0030h].
1984 * Default to EFR in IVHD since it is available sooner (i.e. before PCI init).
1985 */
1986static void __init late_iommu_features_init(struct amd_iommu *iommu)
1987{
1988	u64 features, features2;
1989
1990	if (!(iommu->cap & (1 << IOMMU_CAP_EFR)))
1991		return;
1992
1993	/* read extended feature bits */
1994	features = readq(iommu->mmio_base + MMIO_EXT_FEATURES);
1995	features2 = readq(iommu->mmio_base + MMIO_EXT_FEATURES2);
1996
1997	if (!amd_iommu_efr) {
1998		amd_iommu_efr = features;
1999		amd_iommu_efr2 = features2;
2000		return;
2001	}
2002
2003	/*
2004	 * Sanity check and warn if EFR values from
2005	 * IVHD and MMIO conflict.
2006	 */
2007	if (features != amd_iommu_efr ||
2008	    features2 != amd_iommu_efr2) {
2009		pr_warn(FW_WARN
2010			"EFR mismatch. Use IVHD EFR (%#llx : %#llx), EFR2 (%#llx : %#llx).\n",
2011			features, amd_iommu_efr,
2012			features2, amd_iommu_efr2);
2013	}
2014}
2015
2016static int __init iommu_init_pci(struct amd_iommu *iommu)
2017{
2018	int cap_ptr = iommu->cap_ptr;
2019	int ret;
2020
2021	iommu->dev = pci_get_domain_bus_and_slot(iommu->pci_seg->id,
2022						 PCI_BUS_NUM(iommu->devid),
2023						 iommu->devid & 0xff);
2024	if (!iommu->dev)
2025		return -ENODEV;
2026
2027	/* Prevent binding other PCI device drivers to IOMMU devices */
2028	iommu->dev->match_driver = false;
2029
2030	/* ACPI _PRT won't have an IRQ for IOMMU */
2031	iommu->dev->irq_managed = 1;
2032
2033	pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET,
2034			      &iommu->cap);
2035
2036	if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB)))
2037		amd_iommu_iotlb_sup = false;
2038
2039	late_iommu_features_init(iommu);
2040
2041	if (check_feature(FEATURE_GT)) {
2042		int glxval;
2043		u64 pasmax;
2044
2045		pasmax = amd_iommu_efr & FEATURE_PASID_MASK;
2046		pasmax >>= FEATURE_PASID_SHIFT;
2047		iommu->iommu.max_pasids = (1 << (pasmax + 1)) - 1;
2048
2049		BUG_ON(iommu->iommu.max_pasids & ~PASID_MASK);
2050
2051		glxval   = amd_iommu_efr & FEATURE_GLXVAL_MASK;
2052		glxval >>= FEATURE_GLXVAL_SHIFT;
2053
2054		if (amd_iommu_max_glx_val == -1)
2055			amd_iommu_max_glx_val = glxval;
2056		else
2057			amd_iommu_max_glx_val = min(amd_iommu_max_glx_val, glxval);
2058
2059		iommu_enable_gt(iommu);
2060	}
2061
2062	if (check_feature(FEATURE_PPR) && amd_iommu_alloc_ppr_log(iommu))
2063		return -ENOMEM;
2064
2065	if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE)) {
2066		pr_info("Using strict mode due to virtualization\n");
2067		iommu_set_dma_strict();
2068		amd_iommu_np_cache = true;
2069	}
2070
2071	init_iommu_perf_ctr(iommu);
2072
2073	if (amd_iommu_pgtable == AMD_IOMMU_V2) {
2074		if (!check_feature(FEATURE_GIOSUP) ||
2075		    !check_feature(FEATURE_GT)) {
2076			pr_warn("Cannot enable v2 page table for DMA-API. Fallback to v1.\n");
2077			amd_iommu_pgtable = AMD_IOMMU_V1;
2078		}
2079	}
2080
2081	if (is_rd890_iommu(iommu->dev)) {
2082		int i, j;
2083
2084		iommu->root_pdev =
2085			pci_get_domain_bus_and_slot(iommu->pci_seg->id,
2086						    iommu->dev->bus->number,
2087						    PCI_DEVFN(0, 0));
2088
2089		/*
2090		 * Some rd890 systems may not be fully reconfigured by the
2091		 * BIOS, so it's necessary for us to store this information so
2092		 * it can be reprogrammed on resume
2093		 */
2094		pci_read_config_dword(iommu->dev, iommu->cap_ptr + 4,
2095				&iommu->stored_addr_lo);
2096		pci_read_config_dword(iommu->dev, iommu->cap_ptr + 8,
2097				&iommu->stored_addr_hi);
2098
2099		/* Low bit locks writes to configuration space */
2100		iommu->stored_addr_lo &= ~1;
2101
2102		for (i = 0; i < 6; i++)
2103			for (j = 0; j < 0x12; j++)
2104				iommu->stored_l1[i][j] = iommu_read_l1(iommu, i, j);
2105
2106		for (i = 0; i < 0x83; i++)
2107			iommu->stored_l2[i] = iommu_read_l2(iommu, i);
2108	}
2109
2110	amd_iommu_erratum_746_workaround(iommu);
2111	amd_iommu_ats_write_check_workaround(iommu);
2112
2113	ret = iommu_device_sysfs_add(&iommu->iommu, &iommu->dev->dev,
2114			       amd_iommu_groups, "ivhd%d", iommu->index);
2115	if (ret)
2116		return ret;
2117
2118	/*
2119	 * Allocate per IOMMU IOPF queue here so that in attach device path,
2120	 * PRI capable device can be added to IOPF queue
2121	 */
2122	if (amd_iommu_gt_ppr_supported()) {
2123		ret = amd_iommu_iopf_init(iommu);
2124		if (ret)
2125			return ret;
2126	}
2127
2128	iommu_device_register(&iommu->iommu, &amd_iommu_ops, NULL);
2129
2130	return pci_enable_device(iommu->dev);
2131}
2132
2133static void print_iommu_info(void)
2134{
2135	int i;
2136	static const char * const feat_str[] = {
2137		"PreF", "PPR", "X2APIC", "NX", "GT", "[5]",
2138		"IA", "GA", "HE", "PC"
2139	};
2140
2141	if (amd_iommu_efr) {
2142		pr_info("Extended features (%#llx, %#llx):", amd_iommu_efr, amd_iommu_efr2);
2143
2144		for (i = 0; i < ARRAY_SIZE(feat_str); ++i) {
2145			if (check_feature(1ULL << i))
2146				pr_cont(" %s", feat_str[i]);
2147		}
2148
2149		if (check_feature(FEATURE_GAM_VAPIC))
2150			pr_cont(" GA_vAPIC");
2151
2152		if (check_feature(FEATURE_SNP))
2153			pr_cont(" SNP");
2154
2155		pr_cont("\n");
2156	}
2157
2158	if (irq_remapping_enabled) {
2159		pr_info("Interrupt remapping enabled\n");
2160		if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
2161			pr_info("X2APIC enabled\n");
2162	}
2163	if (amd_iommu_pgtable == AMD_IOMMU_V2) {
2164		pr_info("V2 page table enabled (Paging mode : %d level)\n",
2165			amd_iommu_gpt_level);
2166	}
2167}
2168
2169static int __init amd_iommu_init_pci(void)
2170{
2171	struct amd_iommu *iommu;
2172	struct amd_iommu_pci_seg *pci_seg;
2173	int ret;
2174
2175	for_each_iommu(iommu) {
2176		ret = iommu_init_pci(iommu);
2177		if (ret) {
2178			pr_err("IOMMU%d: Failed to initialize IOMMU Hardware (error=%d)!\n",
2179			       iommu->index, ret);
2180			goto out;
2181		}
2182		/* Need to setup range after PCI init */
2183		iommu_set_cwwb_range(iommu);
2184	}
2185
2186	/*
2187	 * Order is important here to make sure any unity map requirements are
2188	 * fulfilled. The unity mappings are created and written to the device
2189	 * table during the iommu_init_pci() call.
2190	 *
2191	 * After that we call init_device_table_dma() to make sure any
2192	 * uninitialized DTE will block DMA, and in the end we flush the caches
2193	 * of all IOMMUs to make sure the changes to the device table are
2194	 * active.
2195	 */
2196	for_each_pci_segment(pci_seg)
2197		init_device_table_dma(pci_seg);
2198
2199	for_each_iommu(iommu)
2200		amd_iommu_flush_all_caches(iommu);
2201
2202	print_iommu_info();
2203
2204out:
2205	return ret;
2206}
2207
2208/****************************************************************************
2209 *
2210 * The following functions initialize the MSI interrupts for all IOMMUs
2211 * in the system. It's a bit challenging because there could be multiple
2212 * IOMMUs per PCI BDF but we can call pci_enable_msi(x) only once per
2213 * pci_dev.
2214 *
2215 ****************************************************************************/
2216
2217static int iommu_setup_msi(struct amd_iommu *iommu)
2218{
2219	int r;
2220
2221	r = pci_enable_msi(iommu->dev);
2222	if (r)
2223		return r;
2224
2225	r = request_threaded_irq(iommu->dev->irq,
2226				 amd_iommu_int_handler,
2227				 amd_iommu_int_thread,
2228				 0, "AMD-Vi",
2229				 iommu);
2230
2231	if (r) {
2232		pci_disable_msi(iommu->dev);
2233		return r;
2234	}
2235
2236	return 0;
2237}
2238
2239union intcapxt {
2240	u64	capxt;
2241	struct {
2242		u64	reserved_0		:  2,
2243			dest_mode_logical	:  1,
2244			reserved_1		:  5,
2245			destid_0_23		: 24,
2246			vector			:  8,
2247			reserved_2		: 16,
2248			destid_24_31		:  8;
2249	};
2250} __attribute__ ((packed));
2251
2252
2253static struct irq_chip intcapxt_controller;
2254
2255static int intcapxt_irqdomain_activate(struct irq_domain *domain,
2256				       struct irq_data *irqd, bool reserve)
2257{
2258	return 0;
2259}
2260
2261static void intcapxt_irqdomain_deactivate(struct irq_domain *domain,
2262					  struct irq_data *irqd)
2263{
2264}
2265
2266
2267static int intcapxt_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
2268				    unsigned int nr_irqs, void *arg)
2269{
2270	struct irq_alloc_info *info = arg;
2271	int i, ret;
2272
2273	if (!info || info->type != X86_IRQ_ALLOC_TYPE_AMDVI)
2274		return -EINVAL;
2275
2276	ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
2277	if (ret < 0)
2278		return ret;
2279
2280	for (i = virq; i < virq + nr_irqs; i++) {
2281		struct irq_data *irqd = irq_domain_get_irq_data(domain, i);
2282
2283		irqd->chip = &intcapxt_controller;
2284		irqd->hwirq = info->hwirq;
2285		irqd->chip_data = info->data;
2286		__irq_set_handler(i, handle_edge_irq, 0, "edge");
2287	}
2288
2289	return ret;
2290}
2291
2292static void intcapxt_irqdomain_free(struct irq_domain *domain, unsigned int virq,
2293				    unsigned int nr_irqs)
2294{
2295	irq_domain_free_irqs_top(domain, virq, nr_irqs);
2296}
2297
2298
2299static void intcapxt_unmask_irq(struct irq_data *irqd)
2300{
2301	struct amd_iommu *iommu = irqd->chip_data;
2302	struct irq_cfg *cfg = irqd_cfg(irqd);
2303	union intcapxt xt;
2304
2305	xt.capxt = 0ULL;
2306	xt.dest_mode_logical = apic->dest_mode_logical;
2307	xt.vector = cfg->vector;
2308	xt.destid_0_23 = cfg->dest_apicid & GENMASK(23, 0);
2309	xt.destid_24_31 = cfg->dest_apicid >> 24;
2310
2311	writeq(xt.capxt, iommu->mmio_base + irqd->hwirq);
2312}
2313
2314static void intcapxt_mask_irq(struct irq_data *irqd)
2315{
2316	struct amd_iommu *iommu = irqd->chip_data;
2317
2318	writeq(0, iommu->mmio_base + irqd->hwirq);
2319}
2320
2321
2322static int intcapxt_set_affinity(struct irq_data *irqd,
2323				 const struct cpumask *mask, bool force)
2324{
2325	struct irq_data *parent = irqd->parent_data;
2326	int ret;
2327
2328	ret = parent->chip->irq_set_affinity(parent, mask, force);
2329	if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE)
2330		return ret;
2331	return 0;
2332}
2333
2334static int intcapxt_set_wake(struct irq_data *irqd, unsigned int on)
2335{
2336	return on ? -EOPNOTSUPP : 0;
2337}
2338
2339static struct irq_chip intcapxt_controller = {
2340	.name			= "IOMMU-MSI",
2341	.irq_unmask		= intcapxt_unmask_irq,
2342	.irq_mask		= intcapxt_mask_irq,
2343	.irq_ack		= irq_chip_ack_parent,
2344	.irq_retrigger		= irq_chip_retrigger_hierarchy,
2345	.irq_set_affinity       = intcapxt_set_affinity,
2346	.irq_set_wake		= intcapxt_set_wake,
2347	.flags			= IRQCHIP_MASK_ON_SUSPEND,
2348};
2349
2350static const struct irq_domain_ops intcapxt_domain_ops = {
2351	.alloc			= intcapxt_irqdomain_alloc,
2352	.free			= intcapxt_irqdomain_free,
2353	.activate		= intcapxt_irqdomain_activate,
2354	.deactivate		= intcapxt_irqdomain_deactivate,
2355};
2356
2357
2358static struct irq_domain *iommu_irqdomain;
2359
2360static struct irq_domain *iommu_get_irqdomain(void)
2361{
2362	struct fwnode_handle *fn;
2363
2364	/* No need for locking here (yet) as the init is single-threaded */
2365	if (iommu_irqdomain)
2366		return iommu_irqdomain;
2367
2368	fn = irq_domain_alloc_named_fwnode("AMD-Vi-MSI");
2369	if (!fn)
2370		return NULL;
2371
2372	iommu_irqdomain = irq_domain_create_hierarchy(x86_vector_domain, 0, 0,
2373						      fn, &intcapxt_domain_ops,
2374						      NULL);
2375	if (!iommu_irqdomain)
2376		irq_domain_free_fwnode(fn);
2377
2378	return iommu_irqdomain;
2379}
2380
2381static int __iommu_setup_intcapxt(struct amd_iommu *iommu, const char *devname,
2382				  int hwirq, irq_handler_t thread_fn)
2383{
2384	struct irq_domain *domain;
2385	struct irq_alloc_info info;
2386	int irq, ret;
2387	int node = dev_to_node(&iommu->dev->dev);
2388
2389	domain = iommu_get_irqdomain();
2390	if (!domain)
2391		return -ENXIO;
2392
2393	init_irq_alloc_info(&info, NULL);
2394	info.type = X86_IRQ_ALLOC_TYPE_AMDVI;
2395	info.data = iommu;
2396	info.hwirq = hwirq;
2397
2398	irq = irq_domain_alloc_irqs(domain, 1, node, &info);
2399	if (irq < 0) {
2400		irq_domain_remove(domain);
2401		return irq;
2402	}
2403
2404	ret = request_threaded_irq(irq, amd_iommu_int_handler,
2405				   thread_fn, 0, devname, iommu);
2406	if (ret) {
2407		irq_domain_free_irqs(irq, 1);
2408		irq_domain_remove(domain);
2409		return ret;
2410	}
2411
2412	return 0;
2413}
2414
2415static int iommu_setup_intcapxt(struct amd_iommu *iommu)
2416{
2417	int ret;
2418
2419	snprintf(iommu->evt_irq_name, sizeof(iommu->evt_irq_name),
2420		 "AMD-Vi%d-Evt", iommu->index);
2421	ret = __iommu_setup_intcapxt(iommu, iommu->evt_irq_name,
2422				     MMIO_INTCAPXT_EVT_OFFSET,
2423				     amd_iommu_int_thread_evtlog);
2424	if (ret)
2425		return ret;
2426
2427	snprintf(iommu->ppr_irq_name, sizeof(iommu->ppr_irq_name),
2428		 "AMD-Vi%d-PPR", iommu->index);
2429	ret = __iommu_setup_intcapxt(iommu, iommu->ppr_irq_name,
2430				     MMIO_INTCAPXT_PPR_OFFSET,
2431				     amd_iommu_int_thread_pprlog);
2432	if (ret)
2433		return ret;
2434
2435#ifdef CONFIG_IRQ_REMAP
2436	snprintf(iommu->ga_irq_name, sizeof(iommu->ga_irq_name),
2437		 "AMD-Vi%d-GA", iommu->index);
2438	ret = __iommu_setup_intcapxt(iommu, iommu->ga_irq_name,
2439				     MMIO_INTCAPXT_GALOG_OFFSET,
2440				     amd_iommu_int_thread_galog);
2441#endif
2442
2443	return ret;
2444}
2445
2446static int iommu_init_irq(struct amd_iommu *iommu)
2447{
2448	int ret;
2449
2450	if (iommu->int_enabled)
2451		goto enable_faults;
2452
2453	if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
2454		ret = iommu_setup_intcapxt(iommu);
2455	else if (iommu->dev->msi_cap)
2456		ret = iommu_setup_msi(iommu);
2457	else
2458		ret = -ENODEV;
2459
2460	if (ret)
2461		return ret;
2462
2463	iommu->int_enabled = true;
2464enable_faults:
2465
2466	if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
2467		iommu_feature_enable(iommu, CONTROL_INTCAPXT_EN);
2468
2469	iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
2470
2471	return 0;
2472}
2473
2474/****************************************************************************
2475 *
2476 * The next functions belong to the third pass of parsing the ACPI
2477 * table. In this last pass the memory mapping requirements are
2478 * gathered (like exclusion and unity mapping ranges).
2479 *
2480 ****************************************************************************/
2481
2482static void __init free_unity_maps(void)
2483{
2484	struct unity_map_entry *entry, *next;
2485	struct amd_iommu_pci_seg *p, *pci_seg;
2486
2487	for_each_pci_segment_safe(pci_seg, p) {
2488		list_for_each_entry_safe(entry, next, &pci_seg->unity_map, list) {
2489			list_del(&entry->list);
2490			kfree(entry);
2491		}
2492	}
2493}
2494
2495/* called for unity map ACPI definition */
2496static int __init init_unity_map_range(struct ivmd_header *m,
2497				       struct acpi_table_header *ivrs_base)
2498{
2499	struct unity_map_entry *e = NULL;
2500	struct amd_iommu_pci_seg *pci_seg;
2501	char *s;
2502
2503	pci_seg = get_pci_segment(m->pci_seg, ivrs_base);
2504	if (pci_seg == NULL)
2505		return -ENOMEM;
2506
2507	e = kzalloc(sizeof(*e), GFP_KERNEL);
2508	if (e == NULL)
2509		return -ENOMEM;
2510
2511	switch (m->type) {
2512	default:
2513		kfree(e);
2514		return 0;
2515	case ACPI_IVMD_TYPE:
2516		s = "IVMD_TYPEi\t\t\t";
2517		e->devid_start = e->devid_end = m->devid;
2518		break;
2519	case ACPI_IVMD_TYPE_ALL:
2520		s = "IVMD_TYPE_ALL\t\t";
2521		e->devid_start = 0;
2522		e->devid_end = pci_seg->last_bdf;
2523		break;
2524	case ACPI_IVMD_TYPE_RANGE:
2525		s = "IVMD_TYPE_RANGE\t\t";
2526		e->devid_start = m->devid;
2527		e->devid_end = m->aux;
2528		break;
2529	}
2530	e->address_start = PAGE_ALIGN(m->range_start);
2531	e->address_end = e->address_start + PAGE_ALIGN(m->range_length);
2532	e->prot = m->flags >> 1;
2533
2534	/*
2535	 * Treat per-device exclusion ranges as r/w unity-mapped regions
2536	 * since some buggy BIOSes might lead to the overwritten exclusion
2537	 * range (exclusion_start and exclusion_length members). This
2538	 * happens when there are multiple exclusion ranges (IVMD entries)
2539	 * defined in ACPI table.
2540	 */
2541	if (m->flags & IVMD_FLAG_EXCL_RANGE)
2542		e->prot = (IVMD_FLAG_IW | IVMD_FLAG_IR) >> 1;
2543
2544	DUMP_printk("%s devid_start: %04x:%02x:%02x.%x devid_end: "
2545		    "%04x:%02x:%02x.%x range_start: %016llx range_end: %016llx"
2546		    " flags: %x\n", s, m->pci_seg,
2547		    PCI_BUS_NUM(e->devid_start), PCI_SLOT(e->devid_start),
2548		    PCI_FUNC(e->devid_start), m->pci_seg,
2549		    PCI_BUS_NUM(e->devid_end),
2550		    PCI_SLOT(e->devid_end), PCI_FUNC(e->devid_end),
2551		    e->address_start, e->address_end, m->flags);
2552
2553	list_add_tail(&e->list, &pci_seg->unity_map);
2554
2555	return 0;
2556}
2557
2558/* iterates over all memory definitions we find in the ACPI table */
2559static int __init init_memory_definitions(struct acpi_table_header *table)
2560{
2561	u8 *p = (u8 *)table, *end = (u8 *)table;
2562	struct ivmd_header *m;
2563
2564	end += table->length;
2565	p += IVRS_HEADER_LENGTH;
2566
2567	while (p < end) {
2568		m = (struct ivmd_header *)p;
2569		if (m->flags & (IVMD_FLAG_UNITY_MAP | IVMD_FLAG_EXCL_RANGE))
2570			init_unity_map_range(m, table);
2571
2572		p += m->length;
2573	}
2574
2575	return 0;
2576}
2577
2578/*
2579 * Init the device table to not allow DMA access for devices
2580 */
2581static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg)
2582{
2583	u32 devid;
2584	struct dev_table_entry *dev_table = pci_seg->dev_table;
2585
2586	if (dev_table == NULL)
2587		return;
2588
2589	for (devid = 0; devid <= pci_seg->last_bdf; ++devid) {
2590		__set_dev_entry_bit(dev_table, devid, DEV_ENTRY_VALID);
2591		if (!amd_iommu_snp_en)
2592			__set_dev_entry_bit(dev_table, devid, DEV_ENTRY_TRANSLATION);
2593	}
2594}
2595
2596static void __init uninit_device_table_dma(struct amd_iommu_pci_seg *pci_seg)
2597{
2598	u32 devid;
2599	struct dev_table_entry *dev_table = pci_seg->dev_table;
2600
2601	if (dev_table == NULL)
2602		return;
2603
2604	for (devid = 0; devid <= pci_seg->last_bdf; ++devid) {
2605		dev_table[devid].data[0] = 0ULL;
2606		dev_table[devid].data[1] = 0ULL;
2607	}
2608}
2609
2610static void init_device_table(void)
2611{
2612	struct amd_iommu_pci_seg *pci_seg;
2613	u32 devid;
2614
2615	if (!amd_iommu_irq_remap)
2616		return;
2617
2618	for_each_pci_segment(pci_seg) {
2619		for (devid = 0; devid <= pci_seg->last_bdf; ++devid)
2620			__set_dev_entry_bit(pci_seg->dev_table,
2621					    devid, DEV_ENTRY_IRQ_TBL_EN);
2622	}
2623}
2624
2625static void iommu_init_flags(struct amd_iommu *iommu)
2626{
2627	iommu->acpi_flags & IVHD_FLAG_HT_TUN_EN_MASK ?
2628		iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) :
2629		iommu_feature_disable(iommu, CONTROL_HT_TUN_EN);
2630
2631	iommu->acpi_flags & IVHD_FLAG_PASSPW_EN_MASK ?
2632		iommu_feature_enable(iommu, CONTROL_PASSPW_EN) :
2633		iommu_feature_disable(iommu, CONTROL_PASSPW_EN);
2634
2635	iommu->acpi_flags & IVHD_FLAG_RESPASSPW_EN_MASK ?
2636		iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) :
2637		iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN);
2638
2639	iommu->acpi_flags & IVHD_FLAG_ISOC_EN_MASK ?
2640		iommu_feature_enable(iommu, CONTROL_ISOC_EN) :
2641		iommu_feature_disable(iommu, CONTROL_ISOC_EN);
2642
2643	/*
2644	 * make IOMMU memory accesses cache coherent
2645	 */
2646	iommu_feature_enable(iommu, CONTROL_COHERENT_EN);
2647
2648	/* Set IOTLB invalidation timeout to 1s */
2649	iommu_set_inv_tlb_timeout(iommu, CTRL_INV_TO_1S);
2650}
2651
2652static void iommu_apply_resume_quirks(struct amd_iommu *iommu)
2653{
2654	int i, j;
2655	u32 ioc_feature_control;
2656	struct pci_dev *pdev = iommu->root_pdev;
2657
2658	/* RD890 BIOSes may not have completely reconfigured the iommu */
2659	if (!is_rd890_iommu(iommu->dev) || !pdev)
2660		return;
2661
2662	/*
2663	 * First, we need to ensure that the iommu is enabled. This is
2664	 * controlled by a register in the northbridge
2665	 */
2666
2667	/* Select Northbridge indirect register 0x75 and enable writing */
2668	pci_write_config_dword(pdev, 0x60, 0x75 | (1 << 7));
2669	pci_read_config_dword(pdev, 0x64, &ioc_feature_control);
2670
2671	/* Enable the iommu */
2672	if (!(ioc_feature_control & 0x1))
2673		pci_write_config_dword(pdev, 0x64, ioc_feature_control | 1);
2674
2675	/* Restore the iommu BAR */
2676	pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4,
2677			       iommu->stored_addr_lo);
2678	pci_write_config_dword(iommu->dev, iommu->cap_ptr + 8,
2679			       iommu->stored_addr_hi);
2680
2681	/* Restore the l1 indirect regs for each of the 6 l1s */
2682	for (i = 0; i < 6; i++)
2683		for (j = 0; j < 0x12; j++)
2684			iommu_write_l1(iommu, i, j, iommu->stored_l1[i][j]);
2685
2686	/* Restore the l2 indirect regs */
2687	for (i = 0; i < 0x83; i++)
2688		iommu_write_l2(iommu, i, iommu->stored_l2[i]);
2689
2690	/* Lock PCI setup registers */
2691	pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4,
2692			       iommu->stored_addr_lo | 1);
2693}
2694
2695static void iommu_enable_ga(struct amd_iommu *iommu)
2696{
2697#ifdef CONFIG_IRQ_REMAP
2698	switch (amd_iommu_guest_ir) {
2699	case AMD_IOMMU_GUEST_IR_VAPIC:
2700	case AMD_IOMMU_GUEST_IR_LEGACY_GA:
2701		iommu_feature_enable(iommu, CONTROL_GA_EN);
2702		iommu->irte_ops = &irte_128_ops;
2703		break;
2704	default:
2705		iommu->irte_ops = &irte_32_ops;
2706		break;
2707	}
2708#endif
2709}
2710
2711static void iommu_disable_irtcachedis(struct amd_iommu *iommu)
2712{
2713	iommu_feature_disable(iommu, CONTROL_IRTCACHEDIS);
2714}
2715
2716static void iommu_enable_irtcachedis(struct amd_iommu *iommu)
2717{
2718	u64 ctrl;
2719
2720	if (!amd_iommu_irtcachedis)
2721		return;
2722
2723	/*
2724	 * Note:
2725	 * The support for IRTCacheDis feature is dertermined by
2726	 * checking if the bit is writable.
2727	 */
2728	iommu_feature_enable(iommu, CONTROL_IRTCACHEDIS);
2729	ctrl = readq(iommu->mmio_base +  MMIO_CONTROL_OFFSET);
2730	ctrl &= (1ULL << CONTROL_IRTCACHEDIS);
2731	if (ctrl)
2732		iommu->irtcachedis_enabled = true;
2733	pr_info("iommu%d (%#06x) : IRT cache is %s\n",
2734		iommu->index, iommu->devid,
2735		iommu->irtcachedis_enabled ? "disabled" : "enabled");
2736}
2737
2738static void early_enable_iommu(struct amd_iommu *iommu)
2739{
2740	iommu_disable(iommu);
2741	iommu_init_flags(iommu);
2742	iommu_set_device_table(iommu);
2743	iommu_enable_command_buffer(iommu);
2744	iommu_enable_event_buffer(iommu);
2745	iommu_set_exclusion_range(iommu);
2746	iommu_enable_ga(iommu);
2747	iommu_enable_xt(iommu);
2748	iommu_enable_irtcachedis(iommu);
2749	iommu_enable(iommu);
2750	amd_iommu_flush_all_caches(iommu);
2751}
2752
2753/*
2754 * This function finally enables all IOMMUs found in the system after
2755 * they have been initialized.
2756 *
2757 * Or if in kdump kernel and IOMMUs are all pre-enabled, try to copy
2758 * the old content of device table entries. Not this case or copy failed,
2759 * just continue as normal kernel does.
2760 */
2761static void early_enable_iommus(void)
2762{
2763	struct amd_iommu *iommu;
2764	struct amd_iommu_pci_seg *pci_seg;
2765
2766	if (!copy_device_table()) {
2767		/*
2768		 * If come here because of failure in copying device table from old
2769		 * kernel with all IOMMUs enabled, print error message and try to
2770		 * free allocated old_dev_tbl_cpy.
2771		 */
2772		if (amd_iommu_pre_enabled)
2773			pr_err("Failed to copy DEV table from previous kernel.\n");
2774
2775		for_each_pci_segment(pci_seg) {
2776			if (pci_seg->old_dev_tbl_cpy != NULL) {
2777				iommu_free_pages(pci_seg->old_dev_tbl_cpy,
2778						 get_order(pci_seg->dev_table_size));
2779				pci_seg->old_dev_tbl_cpy = NULL;
2780			}
2781		}
2782
2783		for_each_iommu(iommu) {
2784			clear_translation_pre_enabled(iommu);
2785			early_enable_iommu(iommu);
2786		}
2787	} else {
2788		pr_info("Copied DEV table from previous kernel.\n");
2789
2790		for_each_pci_segment(pci_seg) {
2791			iommu_free_pages(pci_seg->dev_table,
2792					 get_order(pci_seg->dev_table_size));
2793			pci_seg->dev_table = pci_seg->old_dev_tbl_cpy;
2794		}
2795
2796		for_each_iommu(iommu) {
2797			iommu_disable_command_buffer(iommu);
2798			iommu_disable_event_buffer(iommu);
2799			iommu_disable_irtcachedis(iommu);
2800			iommu_enable_command_buffer(iommu);
2801			iommu_enable_event_buffer(iommu);
2802			iommu_enable_ga(iommu);
2803			iommu_enable_xt(iommu);
2804			iommu_enable_irtcachedis(iommu);
2805			iommu_set_device_table(iommu);
2806			amd_iommu_flush_all_caches(iommu);
2807		}
2808	}
2809}
2810
2811static void enable_iommus_ppr(void)
2812{
2813	struct amd_iommu *iommu;
2814
2815	if (!amd_iommu_gt_ppr_supported())
2816		return;
2817
2818	for_each_iommu(iommu)
2819		amd_iommu_enable_ppr_log(iommu);
2820}
2821
2822static void enable_iommus_vapic(void)
2823{
2824#ifdef CONFIG_IRQ_REMAP
2825	u32 status, i;
2826	struct amd_iommu *iommu;
2827
2828	for_each_iommu(iommu) {
2829		/*
2830		 * Disable GALog if already running. It could have been enabled
2831		 * in the previous boot before kdump.
2832		 */
2833		status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
2834		if (!(status & MMIO_STATUS_GALOG_RUN_MASK))
2835			continue;
2836
2837		iommu_feature_disable(iommu, CONTROL_GALOG_EN);
2838		iommu_feature_disable(iommu, CONTROL_GAINT_EN);
2839
2840		/*
2841		 * Need to set and poll check the GALOGRun bit to zero before
2842		 * we can set/ modify GA Log registers safely.
2843		 */
2844		for (i = 0; i < MMIO_STATUS_TIMEOUT; ++i) {
2845			status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
2846			if (!(status & MMIO_STATUS_GALOG_RUN_MASK))
2847				break;
2848			udelay(10);
2849		}
2850
2851		if (WARN_ON(i >= MMIO_STATUS_TIMEOUT))
2852			return;
2853	}
2854
2855	if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) &&
2856	    !check_feature(FEATURE_GAM_VAPIC)) {
2857		amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA;
2858		return;
2859	}
2860
2861	if (amd_iommu_snp_en &&
2862	    !FEATURE_SNPAVICSUP_GAM(amd_iommu_efr2)) {
2863		pr_warn("Force to disable Virtual APIC due to SNP\n");
2864		amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA;
2865		return;
2866	}
2867
2868	/* Enabling GAM and SNPAVIC support */
2869	for_each_iommu(iommu) {
2870		if (iommu_init_ga_log(iommu) ||
2871		    iommu_ga_log_enable(iommu))
2872			return;
2873
2874		iommu_feature_enable(iommu, CONTROL_GAM_EN);
2875		if (amd_iommu_snp_en)
2876			iommu_feature_enable(iommu, CONTROL_SNPAVIC_EN);
2877	}
2878
2879	amd_iommu_irq_ops.capability |= (1 << IRQ_POSTING_CAP);
2880	pr_info("Virtual APIC enabled\n");
2881#endif
2882}
2883
2884static void enable_iommus(void)
2885{
2886	early_enable_iommus();
2887}
2888
2889static void disable_iommus(void)
2890{
2891	struct amd_iommu *iommu;
2892
2893	for_each_iommu(iommu)
2894		iommu_disable(iommu);
2895
2896#ifdef CONFIG_IRQ_REMAP
2897	if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
2898		amd_iommu_irq_ops.capability &= ~(1 << IRQ_POSTING_CAP);
2899#endif
2900}
2901
2902/*
2903 * Suspend/Resume support
2904 * disable suspend until real resume implemented
2905 */
2906
2907static void amd_iommu_resume(void)
2908{
2909	struct amd_iommu *iommu;
2910
2911	for_each_iommu(iommu)
2912		iommu_apply_resume_quirks(iommu);
2913
2914	/* re-load the hardware */
2915	enable_iommus();
2916
2917	amd_iommu_enable_interrupts();
2918}
2919
2920static int amd_iommu_suspend(void)
2921{
2922	/* disable IOMMUs to go out of the way for BIOS */
2923	disable_iommus();
2924
2925	return 0;
2926}
2927
2928static struct syscore_ops amd_iommu_syscore_ops = {
2929	.suspend = amd_iommu_suspend,
2930	.resume = amd_iommu_resume,
2931};
2932
2933static void __init free_iommu_resources(void)
2934{
2935	kmem_cache_destroy(amd_iommu_irq_cache);
2936	amd_iommu_irq_cache = NULL;
2937
2938	free_iommu_all();
2939	free_pci_segments();
2940}
2941
2942/* SB IOAPIC is always on this device in AMD systems */
2943#define IOAPIC_SB_DEVID		((0x00 << 8) | PCI_DEVFN(0x14, 0))
2944
2945static bool __init check_ioapic_information(void)
2946{
2947	const char *fw_bug = FW_BUG;
2948	bool ret, has_sb_ioapic;
2949	int idx;
2950
2951	has_sb_ioapic = false;
2952	ret           = false;
2953
2954	/*
2955	 * If we have map overrides on the kernel command line the
2956	 * messages in this function might not describe firmware bugs
2957	 * anymore - so be careful
2958	 */
2959	if (cmdline_maps)
2960		fw_bug = "";
2961
2962	for (idx = 0; idx < nr_ioapics; idx++) {
2963		int devid, id = mpc_ioapic_id(idx);
2964
2965		devid = get_ioapic_devid(id);
2966		if (devid < 0) {
2967			pr_err("%s: IOAPIC[%d] not in IVRS table\n",
2968				fw_bug, id);
2969			ret = false;
2970		} else if (devid == IOAPIC_SB_DEVID) {
2971			has_sb_ioapic = true;
2972			ret           = true;
2973		}
2974	}
2975
2976	if (!has_sb_ioapic) {
2977		/*
2978		 * We expect the SB IOAPIC to be listed in the IVRS
2979		 * table. The system timer is connected to the SB IOAPIC
2980		 * and if we don't have it in the list the system will
2981		 * panic at boot time.  This situation usually happens
2982		 * when the BIOS is buggy and provides us the wrong
2983		 * device id for the IOAPIC in the system.
2984		 */
2985		pr_err("%s: No southbridge IOAPIC found\n", fw_bug);
2986	}
2987
2988	if (!ret)
2989		pr_err("Disabling interrupt remapping\n");
2990
2991	return ret;
2992}
2993
2994static void __init free_dma_resources(void)
2995{
2996	iommu_free_pages(amd_iommu_pd_alloc_bitmap,
2997			 get_order(MAX_DOMAIN_ID / 8));
2998	amd_iommu_pd_alloc_bitmap = NULL;
2999
3000	free_unity_maps();
3001}
3002
3003static void __init ivinfo_init(void *ivrs)
3004{
3005	amd_iommu_ivinfo = *((u32 *)(ivrs + IOMMU_IVINFO_OFFSET));
3006}
3007
3008/*
3009 * This is the hardware init function for AMD IOMMU in the system.
3010 * This function is called either from amd_iommu_init or from the interrupt
3011 * remapping setup code.
3012 *
3013 * This function basically parses the ACPI table for AMD IOMMU (IVRS)
3014 * four times:
3015 *
3016 *	1 pass) Discover the most comprehensive IVHD type to use.
3017 *
3018 *	2 pass) Find the highest PCI device id the driver has to handle.
3019 *		Upon this information the size of the data structures is
3020 *		determined that needs to be allocated.
3021 *
3022 *	3 pass) Initialize the data structures just allocated with the
3023 *		information in the ACPI table about available AMD IOMMUs
3024 *		in the system. It also maps the PCI devices in the
3025 *		system to specific IOMMUs
3026 *
3027 *	4 pass) After the basic data structures are allocated and
3028 *		initialized we update them with information about memory
3029 *		remapping requirements parsed out of the ACPI table in
3030 *		this last pass.
3031 *
3032 * After everything is set up the IOMMUs are enabled and the necessary
3033 * hotplug and suspend notifiers are registered.
3034 */
3035static int __init early_amd_iommu_init(void)
3036{
3037	struct acpi_table_header *ivrs_base;
3038	int remap_cache_sz, ret;
3039	acpi_status status;
3040
3041	if (!amd_iommu_detected)
3042		return -ENODEV;
3043
3044	status = acpi_get_table("IVRS", 0, &ivrs_base);
3045	if (status == AE_NOT_FOUND)
3046		return -ENODEV;
3047	else if (ACPI_FAILURE(status)) {
3048		const char *err = acpi_format_exception(status);
3049		pr_err("IVRS table error: %s\n", err);
3050		return -EINVAL;
3051	}
3052
3053	/*
3054	 * Validate checksum here so we don't need to do it when
3055	 * we actually parse the table
3056	 */
3057	ret = check_ivrs_checksum(ivrs_base);
3058	if (ret)
3059		goto out;
3060
3061	ivinfo_init(ivrs_base);
3062
3063	amd_iommu_target_ivhd_type = get_highest_supported_ivhd_type(ivrs_base);
3064	DUMP_printk("Using IVHD type %#x\n", amd_iommu_target_ivhd_type);
3065
3066	/* Device table - directly used by all IOMMUs */
3067	ret = -ENOMEM;
3068
3069	amd_iommu_pd_alloc_bitmap = iommu_alloc_pages(GFP_KERNEL,
3070						      get_order(MAX_DOMAIN_ID / 8));
3071	if (amd_iommu_pd_alloc_bitmap == NULL)
3072		goto out;
3073
3074	/*
3075	 * never allocate domain 0 because its used as the non-allocated and
3076	 * error value placeholder
3077	 */
3078	__set_bit(0, amd_iommu_pd_alloc_bitmap);
3079
3080	/*
3081	 * now the data structures are allocated and basically initialized
3082	 * start the real acpi table scan
3083	 */
3084	ret = init_iommu_all(ivrs_base);
3085	if (ret)
3086		goto out;
3087
3088	/* 5 level guest page table */
3089	if (cpu_feature_enabled(X86_FEATURE_LA57) &&
3090	    check_feature_gpt_level() == GUEST_PGTABLE_5_LEVEL)
3091		amd_iommu_gpt_level = PAGE_MODE_5_LEVEL;
3092
3093	/* Disable any previously enabled IOMMUs */
3094	if (!is_kdump_kernel() || amd_iommu_disabled)
3095		disable_iommus();
3096
3097	if (amd_iommu_irq_remap)
3098		amd_iommu_irq_remap = check_ioapic_information();
3099
3100	if (amd_iommu_irq_remap) {
3101		struct amd_iommu_pci_seg *pci_seg;
3102		/*
3103		 * Interrupt remapping enabled, create kmem_cache for the
3104		 * remapping tables.
3105		 */
3106		ret = -ENOMEM;
3107		if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir))
3108			remap_cache_sz = MAX_IRQS_PER_TABLE * sizeof(u32);
3109		else
3110			remap_cache_sz = MAX_IRQS_PER_TABLE * (sizeof(u64) * 2);
3111		amd_iommu_irq_cache = kmem_cache_create("irq_remap_cache",
3112							remap_cache_sz,
3113							DTE_INTTAB_ALIGNMENT,
3114							0, NULL);
3115		if (!amd_iommu_irq_cache)
3116			goto out;
3117
3118		for_each_pci_segment(pci_seg) {
3119			if (alloc_irq_lookup_table(pci_seg))
3120				goto out;
3121		}
3122	}
3123
3124	ret = init_memory_definitions(ivrs_base);
3125	if (ret)
3126		goto out;
3127
3128	/* init the device table */
3129	init_device_table();
3130
3131out:
3132	/* Don't leak any ACPI memory */
3133	acpi_put_table(ivrs_base);
3134
3135	return ret;
3136}
3137
3138static int amd_iommu_enable_interrupts(void)
3139{
3140	struct amd_iommu *iommu;
3141	int ret = 0;
3142
3143	for_each_iommu(iommu) {
3144		ret = iommu_init_irq(iommu);
3145		if (ret)
3146			goto out;
3147	}
3148
3149	/*
3150	 * Interrupt handler is ready to process interrupts. Enable
3151	 * PPR and GA log interrupt for all IOMMUs.
3152	 */
3153	enable_iommus_vapic();
3154	enable_iommus_ppr();
3155
3156out:
3157	return ret;
3158}
3159
3160static bool __init detect_ivrs(void)
3161{
3162	struct acpi_table_header *ivrs_base;
3163	acpi_status status;
3164	int i;
3165
3166	status = acpi_get_table("IVRS", 0, &ivrs_base);
3167	if (status == AE_NOT_FOUND)
3168		return false;
3169	else if (ACPI_FAILURE(status)) {
3170		const char *err = acpi_format_exception(status);
3171		pr_err("IVRS table error: %s\n", err);
3172		return false;
3173	}
3174
3175	acpi_put_table(ivrs_base);
3176
3177	if (amd_iommu_force_enable)
3178		goto out;
3179
3180	/* Don't use IOMMU if there is Stoney Ridge graphics */
3181	for (i = 0; i < 32; i++) {
3182		u32 pci_id;
3183
3184		pci_id = read_pci_config(0, i, 0, 0);
3185		if ((pci_id & 0xffff) == 0x1002 && (pci_id >> 16) == 0x98e4) {
3186			pr_info("Disable IOMMU on Stoney Ridge\n");
3187			return false;
3188		}
3189	}
3190
3191out:
3192	/* Make sure ACS will be enabled during PCI probe */
3193	pci_request_acs();
3194
3195	return true;
3196}
3197
3198static void iommu_snp_enable(void)
3199{
3200#ifdef CONFIG_KVM_AMD_SEV
3201	if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
3202		return;
3203	/*
3204	 * The SNP support requires that IOMMU must be enabled, and is
3205	 * configured with V1 page table (DTE[Mode] = 0 is not supported).
3206	 */
3207	if (no_iommu || iommu_default_passthrough()) {
3208		pr_warn("SNP: IOMMU disabled or configured in passthrough mode, SNP cannot be supported.\n");
3209		goto disable_snp;
3210	}
3211
3212	if (amd_iommu_pgtable != AMD_IOMMU_V1) {
3213		pr_warn("SNP: IOMMU is configured with V2 page table mode, SNP cannot be supported.\n");
3214		goto disable_snp;
3215	}
3216
3217	amd_iommu_snp_en = check_feature(FEATURE_SNP);
3218	if (!amd_iommu_snp_en) {
3219		pr_warn("SNP: IOMMU SNP feature not enabled, SNP cannot be supported.\n");
3220		goto disable_snp;
3221	}
3222
3223	pr_info("IOMMU SNP support enabled.\n");
3224	return;
3225
3226disable_snp:
3227	cc_platform_clear(CC_ATTR_HOST_SEV_SNP);
3228#endif
3229}
3230
3231/****************************************************************************
3232 *
3233 * AMD IOMMU Initialization State Machine
3234 *
3235 ****************************************************************************/
3236
3237static int __init state_next(void)
3238{
3239	int ret = 0;
3240
3241	switch (init_state) {
3242	case IOMMU_START_STATE:
3243		if (!detect_ivrs()) {
3244			init_state	= IOMMU_NOT_FOUND;
3245			ret		= -ENODEV;
3246		} else {
3247			init_state	= IOMMU_IVRS_DETECTED;
3248		}
3249		break;
3250	case IOMMU_IVRS_DETECTED:
3251		if (amd_iommu_disabled) {
3252			init_state = IOMMU_CMDLINE_DISABLED;
3253			ret = -EINVAL;
3254		} else {
3255			ret = early_amd_iommu_init();
3256			init_state = ret ? IOMMU_INIT_ERROR : IOMMU_ACPI_FINISHED;
3257		}
3258		break;
3259	case IOMMU_ACPI_FINISHED:
3260		early_enable_iommus();
3261		x86_platform.iommu_shutdown = disable_iommus;
3262		init_state = IOMMU_ENABLED;
3263		break;
3264	case IOMMU_ENABLED:
3265		register_syscore_ops(&amd_iommu_syscore_ops);
3266		iommu_snp_enable();
3267		ret = amd_iommu_init_pci();
3268		init_state = ret ? IOMMU_INIT_ERROR : IOMMU_PCI_INIT;
3269		break;
3270	case IOMMU_PCI_INIT:
3271		ret = amd_iommu_enable_interrupts();
3272		init_state = ret ? IOMMU_INIT_ERROR : IOMMU_INTERRUPTS_EN;
3273		break;
3274	case IOMMU_INTERRUPTS_EN:
3275		init_state = IOMMU_INITIALIZED;
3276		break;
3277	case IOMMU_INITIALIZED:
3278		/* Nothing to do */
3279		break;
3280	case IOMMU_NOT_FOUND:
3281	case IOMMU_INIT_ERROR:
3282	case IOMMU_CMDLINE_DISABLED:
3283		/* Error states => do nothing */
3284		ret = -EINVAL;
3285		break;
3286	default:
3287		/* Unknown state */
3288		BUG();
3289	}
3290
3291	if (ret) {
3292		free_dma_resources();
3293		if (!irq_remapping_enabled) {
3294			disable_iommus();
3295			free_iommu_resources();
3296		} else {
3297			struct amd_iommu *iommu;
3298			struct amd_iommu_pci_seg *pci_seg;
3299
3300			for_each_pci_segment(pci_seg)
3301				uninit_device_table_dma(pci_seg);
3302
3303			for_each_iommu(iommu)
3304				amd_iommu_flush_all_caches(iommu);
3305		}
3306	}
3307	return ret;
3308}
3309
3310static int __init iommu_go_to_state(enum iommu_init_state state)
3311{
3312	int ret = -EINVAL;
3313
3314	while (init_state != state) {
3315		if (init_state == IOMMU_NOT_FOUND         ||
3316		    init_state == IOMMU_INIT_ERROR        ||
3317		    init_state == IOMMU_CMDLINE_DISABLED)
3318			break;
3319		ret = state_next();
3320	}
3321
3322	return ret;
3323}
3324
3325#ifdef CONFIG_IRQ_REMAP
3326int __init amd_iommu_prepare(void)
3327{
3328	int ret;
3329
3330	amd_iommu_irq_remap = true;
3331
3332	ret = iommu_go_to_state(IOMMU_ACPI_FINISHED);
3333	if (ret) {
3334		amd_iommu_irq_remap = false;
3335		return ret;
3336	}
3337
3338	return amd_iommu_irq_remap ? 0 : -ENODEV;
3339}
3340
3341int __init amd_iommu_enable(void)
3342{
3343	int ret;
3344
3345	ret = iommu_go_to_state(IOMMU_ENABLED);
3346	if (ret)
3347		return ret;
3348
3349	irq_remapping_enabled = 1;
3350	return amd_iommu_xt_mode;
3351}
3352
3353void amd_iommu_disable(void)
3354{
3355	amd_iommu_suspend();
3356}
3357
3358int amd_iommu_reenable(int mode)
3359{
3360	amd_iommu_resume();
3361
3362	return 0;
3363}
3364
3365int __init amd_iommu_enable_faulting(unsigned int cpu)
3366{
3367	/* We enable MSI later when PCI is initialized */
3368	return 0;
3369}
3370#endif
3371
3372/*
3373 * This is the core init function for AMD IOMMU hardware in the system.
3374 * This function is called from the generic x86 DMA layer initialization
3375 * code.
3376 */
3377static int __init amd_iommu_init(void)
3378{
3379	struct amd_iommu *iommu;
3380	int ret;
3381
3382	ret = iommu_go_to_state(IOMMU_INITIALIZED);
3383#ifdef CONFIG_GART_IOMMU
3384	if (ret && list_empty(&amd_iommu_list)) {
3385		/*
3386		 * We failed to initialize the AMD IOMMU - try fallback
3387		 * to GART if possible.
3388		 */
3389		gart_iommu_init();
3390	}
3391#endif
3392
3393	for_each_iommu(iommu)
3394		amd_iommu_debugfs_setup(iommu);
3395
3396	return ret;
3397}
3398
3399static bool amd_iommu_sme_check(void)
3400{
3401	if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT) ||
3402	    (boot_cpu_data.x86 != 0x17))
3403		return true;
3404
3405	/* For Fam17h, a specific level of support is required */
3406	if (boot_cpu_data.microcode >= 0x08001205)
3407		return true;
3408
3409	if ((boot_cpu_data.microcode >= 0x08001126) &&
3410	    (boot_cpu_data.microcode <= 0x080011ff))
3411		return true;
3412
3413	pr_notice("IOMMU not currently supported when SME is active\n");
3414
3415	return false;
3416}
3417
3418/****************************************************************************
3419 *
3420 * Early detect code. This code runs at IOMMU detection time in the DMA
3421 * layer. It just looks if there is an IVRS ACPI table to detect AMD
3422 * IOMMUs
3423 *
3424 ****************************************************************************/
3425int __init amd_iommu_detect(void)
3426{
3427	int ret;
3428
3429	if (no_iommu || (iommu_detected && !gart_iommu_aperture))
3430		return -ENODEV;
3431
3432	if (!amd_iommu_sme_check())
3433		return -ENODEV;
3434
3435	ret = iommu_go_to_state(IOMMU_IVRS_DETECTED);
3436	if (ret)
3437		return ret;
3438
3439	amd_iommu_detected = true;
3440	iommu_detected = 1;
3441	x86_init.iommu.iommu_init = amd_iommu_init;
3442
3443	return 1;
3444}
3445
3446/****************************************************************************
3447 *
3448 * Parsing functions for the AMD IOMMU specific kernel command line
3449 * options.
3450 *
3451 ****************************************************************************/
3452
3453static int __init parse_amd_iommu_dump(char *str)
3454{
3455	amd_iommu_dump = true;
3456
3457	return 1;
3458}
3459
3460static int __init parse_amd_iommu_intr(char *str)
3461{
3462	for (; *str; ++str) {
3463		if (strncmp(str, "legacy", 6) == 0) {
3464			amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA;
3465			break;
3466		}
3467		if (strncmp(str, "vapic", 5) == 0) {
3468			amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC;
3469			break;
3470		}
3471	}
3472	return 1;
3473}
3474
3475static int __init parse_amd_iommu_options(char *str)
3476{
3477	if (!str)
3478		return -EINVAL;
3479
3480	while (*str) {
3481		if (strncmp(str, "fullflush", 9) == 0) {
3482			pr_warn("amd_iommu=fullflush deprecated; use iommu.strict=1 instead\n");
3483			iommu_set_dma_strict();
3484		} else if (strncmp(str, "force_enable", 12) == 0) {
3485			amd_iommu_force_enable = true;
3486		} else if (strncmp(str, "off", 3) == 0) {
3487			amd_iommu_disabled = true;
3488		} else if (strncmp(str, "force_isolation", 15) == 0) {
3489			amd_iommu_force_isolation = true;
3490		} else if (strncmp(str, "pgtbl_v1", 8) == 0) {
3491			amd_iommu_pgtable = AMD_IOMMU_V1;
3492		} else if (strncmp(str, "pgtbl_v2", 8) == 0) {
3493			amd_iommu_pgtable = AMD_IOMMU_V2;
3494		} else if (strncmp(str, "irtcachedis", 11) == 0) {
3495			amd_iommu_irtcachedis = true;
3496		} else {
3497			pr_notice("Unknown option - '%s'\n", str);
3498		}
3499
3500		str += strcspn(str, ",");
3501		while (*str == ',')
3502			str++;
3503	}
3504
3505	return 1;
3506}
3507
3508static int __init parse_ivrs_ioapic(char *str)
3509{
3510	u32 seg = 0, bus, dev, fn;
3511	int id, i;
3512	u32 devid;
3513
3514	if (sscanf(str, "=%d@%x:%x.%x", &id, &bus, &dev, &fn) == 4 ||
3515	    sscanf(str, "=%d@%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5)
3516		goto found;
3517
3518	if (sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn) == 4 ||
3519	    sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) {
3520		pr_warn("ivrs_ioapic%s option format deprecated; use ivrs_ioapic=%d@%04x:%02x:%02x.%d instead\n",
3521			str, id, seg, bus, dev, fn);
3522		goto found;
3523	}
3524
3525	pr_err("Invalid command line: ivrs_ioapic%s\n", str);
3526	return 1;
3527
3528found:
3529	if (early_ioapic_map_size == EARLY_MAP_SIZE) {
3530		pr_err("Early IOAPIC map overflow - ignoring ivrs_ioapic%s\n",
3531			str);
3532		return 1;
3533	}
3534
3535	devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn);
3536
3537	cmdline_maps			= true;
3538	i				= early_ioapic_map_size++;
3539	early_ioapic_map[i].id		= id;
3540	early_ioapic_map[i].devid	= devid;
3541	early_ioapic_map[i].cmd_line	= true;
3542
3543	return 1;
3544}
3545
3546static int __init parse_ivrs_hpet(char *str)
3547{
3548	u32 seg = 0, bus, dev, fn;
3549	int id, i;
3550	u32 devid;
3551
3552	if (sscanf(str, "=%d@%x:%x.%x", &id, &bus, &dev, &fn) == 4 ||
3553	    sscanf(str, "=%d@%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5)
3554		goto found;
3555
3556	if (sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn) == 4 ||
3557	    sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) {
3558		pr_warn("ivrs_hpet%s option format deprecated; use ivrs_hpet=%d@%04x:%02x:%02x.%d instead\n",
3559			str, id, seg, bus, dev, fn);
3560		goto found;
3561	}
3562
3563	pr_err("Invalid command line: ivrs_hpet%s\n", str);
3564	return 1;
3565
3566found:
3567	if (early_hpet_map_size == EARLY_MAP_SIZE) {
3568		pr_err("Early HPET map overflow - ignoring ivrs_hpet%s\n",
3569			str);
3570		return 1;
3571	}
3572
3573	devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn);
3574
3575	cmdline_maps			= true;
3576	i				= early_hpet_map_size++;
3577	early_hpet_map[i].id		= id;
3578	early_hpet_map[i].devid		= devid;
3579	early_hpet_map[i].cmd_line	= true;
3580
3581	return 1;
3582}
3583
3584#define ACPIID_LEN (ACPIHID_UID_LEN + ACPIHID_HID_LEN)
3585
3586static int __init parse_ivrs_acpihid(char *str)
3587{
3588	u32 seg = 0, bus, dev, fn;
3589	char *hid, *uid, *p, *addr;
3590	char acpiid[ACPIID_LEN] = {0};
3591	int i;
3592
3593	addr = strchr(str, '@');
3594	if (!addr) {
3595		addr = strchr(str, '=');
3596		if (!addr)
3597			goto not_found;
3598
3599		++addr;
3600
3601		if (strlen(addr) > ACPIID_LEN)
3602			goto not_found;
3603
3604		if (sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid) == 4 ||
3605		    sscanf(str, "[%x:%x:%x.%x]=%s", &seg, &bus, &dev, &fn, acpiid) == 5) {
3606			pr_warn("ivrs_acpihid%s option format deprecated; use ivrs_acpihid=%s@%04x:%02x:%02x.%d instead\n",
3607				str, acpiid, seg, bus, dev, fn);
3608			goto found;
3609		}
3610		goto not_found;
3611	}
3612
3613	/* We have the '@', make it the terminator to get just the acpiid */
3614	*addr++ = 0;
3615
3616	if (strlen(str) > ACPIID_LEN + 1)
3617		goto not_found;
3618
3619	if (sscanf(str, "=%s", acpiid) != 1)
3620		goto not_found;
3621
3622	if (sscanf(addr, "%x:%x.%x", &bus, &dev, &fn) == 3 ||
3623	    sscanf(addr, "%x:%x:%x.%x", &seg, &bus, &dev, &fn) == 4)
3624		goto found;
3625
3626not_found:
3627	pr_err("Invalid command line: ivrs_acpihid%s\n", str);
3628	return 1;
3629
3630found:
3631	p = acpiid;
3632	hid = strsep(&p, ":");
3633	uid = p;
3634
3635	if (!hid || !(*hid) || !uid) {
3636		pr_err("Invalid command line: hid or uid\n");
3637		return 1;
3638	}
3639
3640	/*
3641	 * Ignore leading zeroes after ':', so e.g., AMDI0095:00
3642	 * will match AMDI0095:0 in the second strcmp in acpi_dev_hid_uid_match
3643	 */
3644	while (*uid == '0' && *(uid + 1))
3645		uid++;
3646
3647	i = early_acpihid_map_size++;
3648	memcpy(early_acpihid_map[i].hid, hid, strlen(hid));
3649	memcpy(early_acpihid_map[i].uid, uid, strlen(uid));
3650	early_acpihid_map[i].devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn);
3651	early_acpihid_map[i].cmd_line	= true;
3652
3653	return 1;
3654}
3655
3656__setup("amd_iommu_dump",	parse_amd_iommu_dump);
3657__setup("amd_iommu=",		parse_amd_iommu_options);
3658__setup("amd_iommu_intr=",	parse_amd_iommu_intr);
3659__setup("ivrs_ioapic",		parse_ivrs_ioapic);
3660__setup("ivrs_hpet",		parse_ivrs_hpet);
3661__setup("ivrs_acpihid",		parse_ivrs_acpihid);
3662
3663bool amd_iommu_pasid_supported(void)
3664{
3665	/* CPU page table size should match IOMMU guest page table size */
3666	if (cpu_feature_enabled(X86_FEATURE_LA57) &&
3667	    amd_iommu_gpt_level != PAGE_MODE_5_LEVEL)
3668		return false;
3669
3670	/*
3671	 * Since DTE[Mode]=0 is prohibited on SNP-enabled system
3672	 * (i.e. EFR[SNPSup]=1), IOMMUv2 page table cannot be used without
3673	 * setting up IOMMUv1 page table.
3674	 */
3675	return amd_iommu_gt_ppr_supported() && !amd_iommu_snp_en;
3676}
3677
3678struct amd_iommu *get_amd_iommu(unsigned int idx)
3679{
3680	unsigned int i = 0;
3681	struct amd_iommu *iommu;
3682
3683	for_each_iommu(iommu)
3684		if (i++ == idx)
3685			return iommu;
3686	return NULL;
3687}
3688
3689/****************************************************************************
3690 *
3691 * IOMMU EFR Performance Counter support functionality. This code allows
3692 * access to the IOMMU PC functionality.
3693 *
3694 ****************************************************************************/
3695
3696u8 amd_iommu_pc_get_max_banks(unsigned int idx)
3697{
3698	struct amd_iommu *iommu = get_amd_iommu(idx);
3699
3700	if (iommu)
3701		return iommu->max_banks;
3702
3703	return 0;
3704}
3705
3706bool amd_iommu_pc_supported(void)
3707{
3708	return amd_iommu_pc_present;
3709}
3710
3711u8 amd_iommu_pc_get_max_counters(unsigned int idx)
3712{
3713	struct amd_iommu *iommu = get_amd_iommu(idx);
3714
3715	if (iommu)
3716		return iommu->max_counters;
3717
3718	return 0;
3719}
3720
3721static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
3722				u8 fxn, u64 *value, bool is_write)
3723{
3724	u32 offset;
3725	u32 max_offset_lim;
3726
3727	/* Make sure the IOMMU PC resource is available */
3728	if (!amd_iommu_pc_present)
3729		return -ENODEV;
3730
3731	/* Check for valid iommu and pc register indexing */
3732	if (WARN_ON(!iommu || (fxn > 0x28) || (fxn & 7)))
3733		return -ENODEV;
3734
3735	offset = (u32)(((0x40 | bank) << 12) | (cntr << 8) | fxn);
3736
3737	/* Limit the offset to the hw defined mmio region aperture */
3738	max_offset_lim = (u32)(((0x40 | iommu->max_banks) << 12) |
3739				(iommu->max_counters << 8) | 0x28);
3740	if ((offset < MMIO_CNTR_REG_OFFSET) ||
3741	    (offset > max_offset_lim))
3742		return -EINVAL;
3743
3744	if (is_write) {
3745		u64 val = *value & GENMASK_ULL(47, 0);
3746
3747		writel((u32)val, iommu->mmio_base + offset);
3748		writel((val >> 32), iommu->mmio_base + offset + 4);
3749	} else {
3750		*value = readl(iommu->mmio_base + offset + 4);
3751		*value <<= 32;
3752		*value |= readl(iommu->mmio_base + offset);
3753		*value &= GENMASK_ULL(47, 0);
3754	}
3755
3756	return 0;
3757}
3758
3759int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value)
3760{
3761	if (!iommu)
3762		return -EINVAL;
3763
3764	return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, false);
3765}
3766
3767int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value)
3768{
3769	if (!iommu)
3770		return -EINVAL;
3771
3772	return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, true);
3773}
3774
3775#ifdef CONFIG_KVM_AMD_SEV
3776static int iommu_page_make_shared(void *page)
3777{
3778	unsigned long paddr, pfn;
3779
3780	paddr = iommu_virt_to_phys(page);
3781	/* Cbit maybe set in the paddr */
3782	pfn = __sme_clr(paddr) >> PAGE_SHIFT;
3783
3784	if (!(pfn % PTRS_PER_PMD)) {
3785		int ret, level;
3786		bool assigned;
3787
3788		ret = snp_lookup_rmpentry(pfn, &assigned, &level);
3789		if (ret) {
3790			pr_warn("IOMMU PFN %lx RMP lookup failed, ret %d\n", pfn, ret);
3791			return ret;
3792		}
3793
3794		if (!assigned) {
3795			pr_warn("IOMMU PFN %lx not assigned in RMP table\n", pfn);
3796			return -EINVAL;
3797		}
3798
3799		if (level > PG_LEVEL_4K) {
3800			ret = psmash(pfn);
3801			if (!ret)
3802				goto done;
3803
3804			pr_warn("PSMASH failed for IOMMU PFN %lx huge RMP entry, ret: %d, level: %d\n",
3805				pfn, ret, level);
3806			return ret;
3807		}
3808	}
3809
3810done:
3811	return rmp_make_shared(pfn, PG_LEVEL_4K);
3812}
3813
3814static int iommu_make_shared(void *va, size_t size)
3815{
3816	void *page;
3817	int ret;
3818
3819	if (!va)
3820		return 0;
3821
3822	for (page = va; page < (va + size); page += PAGE_SIZE) {
3823		ret = iommu_page_make_shared(page);
3824		if (ret)
3825			return ret;
3826	}
3827
3828	return 0;
3829}
3830
3831int amd_iommu_snp_disable(void)
3832{
3833	struct amd_iommu *iommu;
3834	int ret;
3835
3836	if (!amd_iommu_snp_en)
3837		return 0;
3838
3839	for_each_iommu(iommu) {
3840		ret = iommu_make_shared(iommu->evt_buf, EVT_BUFFER_SIZE);
3841		if (ret)
3842			return ret;
3843
3844		ret = iommu_make_shared(iommu->ppr_log, PPR_LOG_SIZE);
3845		if (ret)
3846			return ret;
3847
3848		ret = iommu_make_shared((void *)iommu->cmd_sem, PAGE_SIZE);
3849		if (ret)
3850			return ret;
3851	}
3852
3853	return 0;
3854}
3855EXPORT_SYMBOL_GPL(amd_iommu_snp_disable);
3856#endif
3857