1// SPDX-License-Identifier: MIT
2/*
3 * Copyright �� 2022 Intel Corporation
4 */
5
6#include "xe_gt_topology.h"
7
8#include <linux/bitmap.h>
9
10#include "regs/xe_gt_regs.h"
11#include "xe_assert.h"
12#include "xe_gt.h"
13#include "xe_mmio.h"
14
15static void
16load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs, ...)
17{
18	va_list argp;
19	u32 fuse_val[XE_MAX_DSS_FUSE_REGS] = {};
20	int i;
21
22	if (drm_WARN_ON(&gt_to_xe(gt)->drm, numregs > XE_MAX_DSS_FUSE_REGS))
23		numregs = XE_MAX_DSS_FUSE_REGS;
24
25	va_start(argp, numregs);
26	for (i = 0; i < numregs; i++)
27		fuse_val[i] = xe_mmio_read32(gt, va_arg(argp, struct xe_reg));
28	va_end(argp);
29
30	bitmap_from_arr32(mask, fuse_val, numregs * 32);
31}
32
33static void
34load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask)
35{
36	struct xe_device *xe = gt_to_xe(gt);
37	u32 reg_val = xe_mmio_read32(gt, XELP_EU_ENABLE);
38	u32 val = 0;
39	int i;
40
41	BUILD_BUG_ON(XE_MAX_EU_FUSE_REGS > 1);
42
43	/*
44	 * Pre-Xe_HP platforms inverted the bit meaning (disable instead
45	 * of enable).
46	 */
47	if (GRAPHICS_VERx100(xe) < 1250)
48		reg_val = ~reg_val & XELP_EU_MASK;
49
50	/* On PVC, one bit = one EU */
51	if (GRAPHICS_VERx100(xe) == 1260) {
52		val = reg_val;
53	} else {
54		/* All other platforms, one bit = 2 EU */
55		for (i = 0; i < fls(reg_val); i++)
56			if (reg_val & BIT(i))
57				val |= 0x3 << 2 * i;
58	}
59
60	bitmap_from_arr32(mask, &val, XE_MAX_EU_FUSE_BITS);
61}
62
63/**
64 * gen_l3_mask_from_pattern - Replicate a bit pattern according to a mask
65 *
66 * It is used to compute the L3 bank masks in a generic format on
67 * various platforms where the internal representation of L3 node
68 * and masks from registers are different.
69 *
70 * @xe: device
71 * @dst: destination
72 * @pattern: pattern to replicate
73 * @patternbits: size of the pattern, in bits
74 * @mask: mask describing where to replicate the pattern
75 *
76 * Example 1:
77 * ----------
78 * @pattern =    0b1111
79 *                 ������������
80 * @patternbits =   4 (bits)
81 * @mask = 0b0101
82 *           ������������
83 *           ������������������������������������������������������������������ 0b1111 (=1��0b1111)
84 *           ��������������������������������������������� 0b0000    ���   (=0��0b1111)
85 *           ������������������������ 0b1111    ���      ���   (=1��0b1111)
86 *           ��� 0b0000    ���      ���      ���   (=0��0b1111)
87 *                ���      ���      ���      ���
88 * @dst =      0b0000 0b1111 0b0000 0b1111
89 *
90 * Example 2:
91 * ----------
92 * @pattern =    0b11111111
93 *                 ������������������������
94 * @patternbits =   8 (bits)
95 * @mask = 0b10
96 *           ������
97 *           ������
98 *           ������
99 *           ������������������������������������ 0b00000000 (=0��0b11111111)
100 *           ��� 0b11111111      ���     (=1��0b11111111)
101 *                  ���          ���
102 * @dst =      0b11111111 0b00000000
103 */
104static void
105gen_l3_mask_from_pattern(struct xe_device *xe, xe_l3_bank_mask_t dst,
106			 xe_l3_bank_mask_t pattern, int patternbits,
107			 unsigned long mask)
108{
109	unsigned long bit;
110
111	xe_assert(xe, fls(mask) <= patternbits);
112	for_each_set_bit(bit, &mask, 32) {
113		xe_l3_bank_mask_t shifted_pattern = {};
114
115		bitmap_shift_left(shifted_pattern, pattern, bit * patternbits,
116				  XE_MAX_L3_BANK_MASK_BITS);
117		bitmap_or(dst, dst, shifted_pattern, XE_MAX_L3_BANK_MASK_BITS);
118	}
119}
120
121static void
122load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask)
123{
124	struct xe_device *xe = gt_to_xe(gt);
125	u32 fuse3 = xe_mmio_read32(gt, MIRROR_FUSE3);
126
127	if (GRAPHICS_VER(xe) >= 20) {
128		xe_l3_bank_mask_t per_node = {};
129		u32 meml3_en = REG_FIELD_GET(XE2_NODE_ENABLE_MASK, fuse3);
130		u32 bank_val = REG_FIELD_GET(XE2_GT_L3_MODE_MASK, fuse3);
131
132		bitmap_from_arr32(per_node, &bank_val, 32);
133		gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 4,
134					 meml3_en);
135	} else if (GRAPHICS_VERx100(xe) >= 1270) {
136		xe_l3_bank_mask_t per_node = {};
137		xe_l3_bank_mask_t per_mask_bit = {};
138		u32 meml3_en = REG_FIELD_GET(MEML3_EN_MASK, fuse3);
139		u32 fuse4 = xe_mmio_read32(gt, XEHP_FUSE4);
140		u32 bank_val = REG_FIELD_GET(GT_L3_EXC_MASK, fuse4);
141
142		bitmap_set_value8(per_mask_bit, 0x3, 0);
143		gen_l3_mask_from_pattern(xe, per_node, per_mask_bit, 2, bank_val);
144		gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 4,
145					 meml3_en);
146	} else if (xe->info.platform == XE_PVC) {
147		xe_l3_bank_mask_t per_node = {};
148		xe_l3_bank_mask_t per_mask_bit = {};
149		u32 meml3_en = REG_FIELD_GET(MEML3_EN_MASK, fuse3);
150		u32 bank_val = REG_FIELD_GET(XEHPC_GT_L3_MODE_MASK, fuse3);
151
152		bitmap_set_value8(per_mask_bit, 0xf, 0);
153		gen_l3_mask_from_pattern(xe, per_node, per_mask_bit, 4,
154					 bank_val);
155		gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 16,
156					 meml3_en);
157	} else if (xe->info.platform == XE_DG2) {
158		xe_l3_bank_mask_t per_node = {};
159		u32 mask = REG_FIELD_GET(MEML3_EN_MASK, fuse3);
160
161		bitmap_set_value8(per_node, 0xff, 0);
162		gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 8, mask);
163	} else {
164		/* 1:1 register bit to mask bit (inverted register bits) */
165		u32 mask = REG_FIELD_GET(XELP_GT_L3_MODE_MASK, ~fuse3);
166
167		bitmap_from_arr32(l3_bank_mask, &mask, 32);
168	}
169}
170
171static void
172get_num_dss_regs(struct xe_device *xe, int *geometry_regs, int *compute_regs)
173{
174	if (GRAPHICS_VER(xe) > 20) {
175		*geometry_regs = 3;
176		*compute_regs = 3;
177	} else if (GRAPHICS_VERx100(xe) == 1260) {
178		*geometry_regs = 0;
179		*compute_regs = 2;
180	} else if (GRAPHICS_VERx100(xe) >= 1250) {
181		*geometry_regs = 1;
182		*compute_regs = 1;
183	} else {
184		*geometry_regs = 1;
185		*compute_regs = 0;
186	}
187}
188
189void
190xe_gt_topology_init(struct xe_gt *gt)
191{
192	struct xe_device *xe = gt_to_xe(gt);
193	struct drm_printer p;
194	int num_geometry_regs, num_compute_regs;
195
196	get_num_dss_regs(xe, &num_geometry_regs, &num_compute_regs);
197
198	/*
199	 * Register counts returned shouldn't exceed the number of registers
200	 * passed as parameters below.
201	 */
202	drm_WARN_ON(&xe->drm, num_geometry_regs > 3);
203	drm_WARN_ON(&xe->drm, num_compute_regs > 3);
204
205	load_dss_mask(gt, gt->fuse_topo.g_dss_mask,
206		      num_geometry_regs,
207		      XELP_GT_GEOMETRY_DSS_ENABLE,
208		      XE2_GT_GEOMETRY_DSS_1,
209		      XE2_GT_GEOMETRY_DSS_2);
210	load_dss_mask(gt, gt->fuse_topo.c_dss_mask, num_compute_regs,
211		      XEHP_GT_COMPUTE_DSS_ENABLE,
212		      XEHPC_GT_COMPUTE_DSS_ENABLE_EXT,
213		      XE2_GT_COMPUTE_DSS_2);
214	load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss);
215	load_l3_bank_mask(gt, gt->fuse_topo.l3_bank_mask);
216
217	p = drm_dbg_printer(&gt_to_xe(gt)->drm, DRM_UT_DRIVER, "GT topology");
218
219	xe_gt_topology_dump(gt, &p);
220}
221
222void
223xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p)
224{
225	drm_printf(p, "dss mask (geometry): %*pb\n", XE_MAX_DSS_FUSE_BITS,
226		   gt->fuse_topo.g_dss_mask);
227	drm_printf(p, "dss mask (compute):  %*pb\n", XE_MAX_DSS_FUSE_BITS,
228		   gt->fuse_topo.c_dss_mask);
229
230	drm_printf(p, "EU mask per DSS:     %*pb\n", XE_MAX_EU_FUSE_BITS,
231		   gt->fuse_topo.eu_mask_per_dss);
232
233	drm_printf(p, "L3 bank mask:        %*pb\n", XE_MAX_L3_BANK_MASK_BITS,
234		   gt->fuse_topo.l3_bank_mask);
235}
236
237/*
238 * Used to obtain the index of the first DSS.  Can start searching from the
239 * beginning of a specific dss group (e.g., gslice, cslice, etc.) if
240 * groupsize and groupnum are non-zero.
241 */
242unsigned int
243xe_dss_mask_group_ffs(const xe_dss_mask_t mask, int groupsize, int groupnum)
244{
245	return find_next_bit(mask, XE_MAX_DSS_FUSE_BITS, groupnum * groupsize);
246}
247
248bool xe_dss_mask_empty(const xe_dss_mask_t mask)
249{
250	return bitmap_empty(mask, XE_MAX_DSS_FUSE_BITS);
251}
252
253/**
254 * xe_gt_topology_has_dss_in_quadrant - check fusing of DSS in GT quadrant
255 * @gt: GT to check
256 * @quad: Which quadrant of the DSS space to check
257 *
258 * Since Xe_HP platforms can have up to four CCS engines, those engines
259 * are each logically associated with a quarter of the possible DSS.  If there
260 * are no DSS present in one of the four quadrants of the DSS space, the
261 * corresponding CCS engine is also not available for use.
262 *
263 * Returns false if all DSS in a quadrant of the GT are fused off, else true.
264 */
265bool xe_gt_topology_has_dss_in_quadrant(struct xe_gt *gt, int quad)
266{
267	struct xe_device *xe = gt_to_xe(gt);
268	xe_dss_mask_t all_dss;
269	int g_dss_regs, c_dss_regs, dss_per_quad, quad_first;
270
271	bitmap_or(all_dss, gt->fuse_topo.g_dss_mask, gt->fuse_topo.c_dss_mask,
272		  XE_MAX_DSS_FUSE_BITS);
273
274	get_num_dss_regs(xe, &g_dss_regs, &c_dss_regs);
275	dss_per_quad = 32 * max(g_dss_regs, c_dss_regs) / 4;
276
277	quad_first = xe_dss_mask_group_ffs(all_dss, dss_per_quad, quad);
278
279	return quad_first < (quad + 1) * dss_per_quad;
280}
281