1/*
2 * Copyright 2011 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Alex Deucher
23 */
24
25#include <sys/cdefs.h>
26__FBSDID("$FreeBSD$");
27
28#include <dev/drm2/drmP.h>
29#include "radeon.h"
30#include "radeon_asic.h"
31#include <dev/drm2/radeon/radeon_drm.h>
32#include "sid.h"
33#include "atom.h"
34#include "si_blit_shaders.h"
35
36#define SI_PFP_UCODE_SIZE 2144
37#define SI_PM4_UCODE_SIZE 2144
38#define SI_CE_UCODE_SIZE 2144
39#define SI_RLC_UCODE_SIZE 2048
40#define SI_MC_UCODE_SIZE 7769
41
42/* get temperature in millidegrees */
43int si_get_temp(struct radeon_device *rdev)
44{
45	u32 temp;
46	int actual_temp = 0;
47
48	temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
49		CTF_TEMP_SHIFT;
50
51	if (temp & 0x200)
52		actual_temp = 255;
53	else
54		actual_temp = temp & 0x1ff;
55
56	actual_temp = (actual_temp * 1000);
57
58	return actual_temp;
59}
60
61#define TAHITI_IO_MC_REGS_SIZE 36
62
63static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
64	{0x0000006f, 0x03044000},
65	{0x00000070, 0x0480c018},
66	{0x00000071, 0x00000040},
67	{0x00000072, 0x01000000},
68	{0x00000074, 0x000000ff},
69	{0x00000075, 0x00143400},
70	{0x00000076, 0x08ec0800},
71	{0x00000077, 0x040000cc},
72	{0x00000079, 0x00000000},
73	{0x0000007a, 0x21000409},
74	{0x0000007c, 0x00000000},
75	{0x0000007d, 0xe8000000},
76	{0x0000007e, 0x044408a8},
77	{0x0000007f, 0x00000003},
78	{0x00000080, 0x00000000},
79	{0x00000081, 0x01000000},
80	{0x00000082, 0x02000000},
81	{0x00000083, 0x00000000},
82	{0x00000084, 0xe3f3e4f4},
83	{0x00000085, 0x00052024},
84	{0x00000087, 0x00000000},
85	{0x00000088, 0x66036603},
86	{0x00000089, 0x01000000},
87	{0x0000008b, 0x1c0a0000},
88	{0x0000008c, 0xff010000},
89	{0x0000008e, 0xffffefff},
90	{0x0000008f, 0xfff3efff},
91	{0x00000090, 0xfff3efbf},
92	{0x00000094, 0x00101101},
93	{0x00000095, 0x00000fff},
94	{0x00000096, 0x00116fff},
95	{0x00000097, 0x60010000},
96	{0x00000098, 0x10010000},
97	{0x00000099, 0x00006000},
98	{0x0000009a, 0x00001000},
99	{0x0000009f, 0x00a77400}
100};
101
102static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
103	{0x0000006f, 0x03044000},
104	{0x00000070, 0x0480c018},
105	{0x00000071, 0x00000040},
106	{0x00000072, 0x01000000},
107	{0x00000074, 0x000000ff},
108	{0x00000075, 0x00143400},
109	{0x00000076, 0x08ec0800},
110	{0x00000077, 0x040000cc},
111	{0x00000079, 0x00000000},
112	{0x0000007a, 0x21000409},
113	{0x0000007c, 0x00000000},
114	{0x0000007d, 0xe8000000},
115	{0x0000007e, 0x044408a8},
116	{0x0000007f, 0x00000003},
117	{0x00000080, 0x00000000},
118	{0x00000081, 0x01000000},
119	{0x00000082, 0x02000000},
120	{0x00000083, 0x00000000},
121	{0x00000084, 0xe3f3e4f4},
122	{0x00000085, 0x00052024},
123	{0x00000087, 0x00000000},
124	{0x00000088, 0x66036603},
125	{0x00000089, 0x01000000},
126	{0x0000008b, 0x1c0a0000},
127	{0x0000008c, 0xff010000},
128	{0x0000008e, 0xffffefff},
129	{0x0000008f, 0xfff3efff},
130	{0x00000090, 0xfff3efbf},
131	{0x00000094, 0x00101101},
132	{0x00000095, 0x00000fff},
133	{0x00000096, 0x00116fff},
134	{0x00000097, 0x60010000},
135	{0x00000098, 0x10010000},
136	{0x00000099, 0x00006000},
137	{0x0000009a, 0x00001000},
138	{0x0000009f, 0x00a47400}
139};
140
141static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
142	{0x0000006f, 0x03044000},
143	{0x00000070, 0x0480c018},
144	{0x00000071, 0x00000040},
145	{0x00000072, 0x01000000},
146	{0x00000074, 0x000000ff},
147	{0x00000075, 0x00143400},
148	{0x00000076, 0x08ec0800},
149	{0x00000077, 0x040000cc},
150	{0x00000079, 0x00000000},
151	{0x0000007a, 0x21000409},
152	{0x0000007c, 0x00000000},
153	{0x0000007d, 0xe8000000},
154	{0x0000007e, 0x044408a8},
155	{0x0000007f, 0x00000003},
156	{0x00000080, 0x00000000},
157	{0x00000081, 0x01000000},
158	{0x00000082, 0x02000000},
159	{0x00000083, 0x00000000},
160	{0x00000084, 0xe3f3e4f4},
161	{0x00000085, 0x00052024},
162	{0x00000087, 0x00000000},
163	{0x00000088, 0x66036603},
164	{0x00000089, 0x01000000},
165	{0x0000008b, 0x1c0a0000},
166	{0x0000008c, 0xff010000},
167	{0x0000008e, 0xffffefff},
168	{0x0000008f, 0xfff3efff},
169	{0x00000090, 0xfff3efbf},
170	{0x00000094, 0x00101101},
171	{0x00000095, 0x00000fff},
172	{0x00000096, 0x00116fff},
173	{0x00000097, 0x60010000},
174	{0x00000098, 0x10010000},
175	{0x00000099, 0x00006000},
176	{0x0000009a, 0x00001000},
177	{0x0000009f, 0x00a37400}
178};
179
180/* ucode loading */
181static int si_mc_load_microcode(struct radeon_device *rdev)
182{
183	const __be32 *fw_data;
184	u32 running, blackout = 0;
185	u32 *io_mc_regs;
186	int i, ucode_size, regs_size;
187
188	if (!rdev->mc_fw)
189		return -EINVAL;
190
191	switch (rdev->family) {
192	case CHIP_TAHITI:
193		io_mc_regs = (u32 *)&tahiti_io_mc_regs;
194		ucode_size = SI_MC_UCODE_SIZE;
195		regs_size = TAHITI_IO_MC_REGS_SIZE;
196		break;
197	case CHIP_PITCAIRN:
198		io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
199		ucode_size = SI_MC_UCODE_SIZE;
200		regs_size = TAHITI_IO_MC_REGS_SIZE;
201		break;
202	case CHIP_VERDE:
203	default:
204		io_mc_regs = (u32 *)&verde_io_mc_regs;
205		ucode_size = SI_MC_UCODE_SIZE;
206		regs_size = TAHITI_IO_MC_REGS_SIZE;
207		break;
208	}
209
210	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
211
212	if (running == 0) {
213		if (running) {
214			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
215			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
216		}
217
218		/* reset the engine and set to writable */
219		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
220		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
221
222		/* load mc io regs */
223		for (i = 0; i < regs_size; i++) {
224			WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
225			WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
226		}
227		/* load the MC ucode */
228		fw_data = (const __be32 *)rdev->mc_fw->data;
229		for (i = 0; i < ucode_size; i++)
230			WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
231
232		/* put the engine back into the active state */
233		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
234		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
235		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
236
237		/* wait for training to complete */
238		for (i = 0; i < rdev->usec_timeout; i++) {
239			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
240				break;
241			DRM_UDELAY(1);
242		}
243		for (i = 0; i < rdev->usec_timeout; i++) {
244			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
245				break;
246			DRM_UDELAY(1);
247		}
248
249		if (running)
250			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
251	}
252
253	return 0;
254}
255
256static int si_init_microcode(struct radeon_device *rdev)
257{
258	const char *chip_name;
259	const char *rlc_chip_name;
260	size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
261	char fw_name[30];
262	int err;
263
264	DRM_DEBUG("\n");
265
266	switch (rdev->family) {
267	case CHIP_TAHITI:
268		chip_name = "TAHITI";
269		rlc_chip_name = "TAHITI";
270		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
271		me_req_size = SI_PM4_UCODE_SIZE * 4;
272		ce_req_size = SI_CE_UCODE_SIZE * 4;
273		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
274		mc_req_size = SI_MC_UCODE_SIZE * 4;
275		break;
276	case CHIP_PITCAIRN:
277		chip_name = "PITCAIRN";
278		rlc_chip_name = "PITCAIRN";
279		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
280		me_req_size = SI_PM4_UCODE_SIZE * 4;
281		ce_req_size = SI_CE_UCODE_SIZE * 4;
282		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
283		mc_req_size = SI_MC_UCODE_SIZE * 4;
284		break;
285	case CHIP_VERDE:
286		chip_name = "VERDE";
287		rlc_chip_name = "VERDE";
288		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
289		me_req_size = SI_PM4_UCODE_SIZE * 4;
290		ce_req_size = SI_CE_UCODE_SIZE * 4;
291		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
292		mc_req_size = SI_MC_UCODE_SIZE * 4;
293		break;
294	default: panic("%s: Unsupported family %d", __func__, rdev->family);
295	}
296
297	DRM_INFO("Loading %s Microcode\n", chip_name);
298	err = 0;
299
300	snprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", chip_name);
301	rdev->pfp_fw = firmware_get(fw_name);
302	if (rdev->pfp_fw == NULL) {
303		err = -ENOENT;
304		goto out;
305	}
306	if (rdev->pfp_fw->datasize != pfp_req_size) {
307		DRM_ERROR(
308		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
309		       rdev->pfp_fw->datasize, fw_name);
310		err = -EINVAL;
311		goto out;
312	}
313
314	snprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", chip_name);
315	rdev->me_fw = firmware_get(fw_name);
316	if (rdev->me_fw == NULL) {
317		err = -ENOENT;
318		goto out;
319	}
320	if (rdev->me_fw->datasize != me_req_size) {
321		DRM_ERROR(
322		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
323		       rdev->me_fw->datasize, fw_name);
324		err = -EINVAL;
325	}
326
327	snprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", chip_name);
328	rdev->ce_fw = firmware_get(fw_name);
329	if (rdev->ce_fw == NULL) {
330		err = -ENOENT;
331		goto out;
332	}
333	if (rdev->ce_fw->datasize != ce_req_size) {
334		DRM_ERROR(
335		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
336		       rdev->ce_fw->datasize, fw_name);
337		err = -EINVAL;
338	}
339
340	snprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", rlc_chip_name);
341	rdev->rlc_fw = firmware_get(fw_name);
342	if (rdev->rlc_fw == NULL) {
343		err = -ENOENT;
344		goto out;
345	}
346	if (rdev->rlc_fw->datasize != rlc_req_size) {
347		DRM_ERROR(
348		       "si_rlc: Bogus length %zu in firmware \"%s\"\n",
349		       rdev->rlc_fw->datasize, fw_name);
350		err = -EINVAL;
351	}
352
353	snprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", chip_name);
354	rdev->mc_fw = firmware_get(fw_name);
355	if (rdev->mc_fw == NULL) {
356		err = -ENOENT;
357		goto out;
358	}
359	if (rdev->mc_fw->datasize != mc_req_size) {
360		DRM_ERROR(
361		       "si_mc: Bogus length %zu in firmware \"%s\"\n",
362		       rdev->mc_fw->datasize, fw_name);
363		err = -EINVAL;
364	}
365
366out:
367	if (err) {
368		if (err != -EINVAL)
369			DRM_ERROR(
370			       "si_cp: Failed to load firmware \"%s\"\n",
371			       fw_name);
372		if (rdev->pfp_fw != NULL) {
373			firmware_put(rdev->pfp_fw, FIRMWARE_UNLOAD);
374			rdev->pfp_fw = NULL;
375		}
376		if (rdev->me_fw != NULL) {
377			firmware_put(rdev->me_fw, FIRMWARE_UNLOAD);
378			rdev->me_fw = NULL;
379		}
380		if (rdev->ce_fw != NULL) {
381			firmware_put(rdev->ce_fw, FIRMWARE_UNLOAD);
382			rdev->ce_fw = NULL;
383		}
384		if (rdev->rlc_fw != NULL) {
385			firmware_put(rdev->rlc_fw, FIRMWARE_UNLOAD);
386			rdev->rlc_fw = NULL;
387		}
388		if (rdev->mc_fw != NULL) {
389			firmware_put(rdev->mc_fw, FIRMWARE_UNLOAD);
390			rdev->mc_fw = NULL;
391		}
392	}
393	return err;
394}
395
396/**
397 * si_fini_microcode - drop the firmwares image references
398 *
399 * @rdev: radeon_device pointer
400 *
401 * Drop the pfp, me, rlc, mc and ce firmware image references.
402 * Called at driver shutdown.
403 */
404static void si_fini_microcode(struct radeon_device *rdev)
405{
406
407	if (rdev->pfp_fw != NULL) {
408		firmware_put(rdev->pfp_fw, FIRMWARE_UNLOAD);
409		rdev->pfp_fw = NULL;
410	}
411
412	if (rdev->me_fw != NULL) {
413		firmware_put(rdev->me_fw, FIRMWARE_UNLOAD);
414		rdev->me_fw = NULL;
415	}
416
417	if (rdev->rlc_fw != NULL) {
418		firmware_put(rdev->rlc_fw, FIRMWARE_UNLOAD);
419		rdev->rlc_fw = NULL;
420	}
421
422	if (rdev->mc_fw != NULL) {
423		firmware_put(rdev->mc_fw, FIRMWARE_UNLOAD);
424		rdev->mc_fw = NULL;
425	}
426
427	if (rdev->ce_fw != NULL) {
428		firmware_put(rdev->ce_fw, FIRMWARE_UNLOAD);
429		rdev->ce_fw = NULL;
430	}
431}
432
433/* watermark setup */
434static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
435				   struct radeon_crtc *radeon_crtc,
436				   struct drm_display_mode *mode,
437				   struct drm_display_mode *other_mode)
438{
439	u32 tmp;
440	/*
441	 * Line Buffer Setup
442	 * There are 3 line buffers, each one shared by 2 display controllers.
443	 * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
444	 * the display controllers.  The paritioning is done via one of four
445	 * preset allocations specified in bits 21:20:
446	 *  0 - half lb
447	 *  2 - whole lb, other crtc must be disabled
448	 */
449	/* this can get tricky if we have two large displays on a paired group
450	 * of crtcs.  Ideally for multiple large displays we'd assign them to
451	 * non-linked crtcs for maximum line buffer allocation.
452	 */
453	if (radeon_crtc->base.enabled && mode) {
454		if (other_mode)
455			tmp = 0; /* 1/2 */
456		else
457			tmp = 2; /* whole */
458	} else
459		tmp = 0;
460
461	WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
462	       DC_LB_MEMORY_CONFIG(tmp));
463
464	if (radeon_crtc->base.enabled && mode) {
465		switch (tmp) {
466		case 0:
467		default:
468			return 4096 * 2;
469		case 2:
470			return 8192 * 2;
471		}
472	}
473
474	/* controller not enabled, so no lb used */
475	return 0;
476}
477
478static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
479{
480	u32 tmp = RREG32(MC_SHARED_CHMAP);
481
482	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
483	case 0:
484	default:
485		return 1;
486	case 1:
487		return 2;
488	case 2:
489		return 4;
490	case 3:
491		return 8;
492	case 4:
493		return 3;
494	case 5:
495		return 6;
496	case 6:
497		return 10;
498	case 7:
499		return 12;
500	case 8:
501		return 16;
502	}
503}
504
505struct dce6_wm_params {
506	u32 dram_channels; /* number of dram channels */
507	u32 yclk;          /* bandwidth per dram data pin in kHz */
508	u32 sclk;          /* engine clock in kHz */
509	u32 disp_clk;      /* display clock in kHz */
510	u32 src_width;     /* viewport width */
511	u32 active_time;   /* active display time in ns */
512	u32 blank_time;    /* blank time in ns */
513	bool interlaced;    /* mode is interlaced */
514	fixed20_12 vsc;    /* vertical scale ratio */
515	u32 num_heads;     /* number of active crtcs */
516	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
517	u32 lb_size;       /* line buffer allocated to pipe */
518	u32 vtaps;         /* vertical scaler taps */
519};
520
521static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
522{
523	/* Calculate raw DRAM Bandwidth */
524	fixed20_12 dram_efficiency; /* 0.7 */
525	fixed20_12 yclk, dram_channels, bandwidth;
526	fixed20_12 a;
527
528	a.full = dfixed_const(1000);
529	yclk.full = dfixed_const(wm->yclk);
530	yclk.full = dfixed_div(yclk, a);
531	dram_channels.full = dfixed_const(wm->dram_channels * 4);
532	a.full = dfixed_const(10);
533	dram_efficiency.full = dfixed_const(7);
534	dram_efficiency.full = dfixed_div(dram_efficiency, a);
535	bandwidth.full = dfixed_mul(dram_channels, yclk);
536	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
537
538	return dfixed_trunc(bandwidth);
539}
540
541static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
542{
543	/* Calculate DRAM Bandwidth and the part allocated to display. */
544	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
545	fixed20_12 yclk, dram_channels, bandwidth;
546	fixed20_12 a;
547
548	a.full = dfixed_const(1000);
549	yclk.full = dfixed_const(wm->yclk);
550	yclk.full = dfixed_div(yclk, a);
551	dram_channels.full = dfixed_const(wm->dram_channels * 4);
552	a.full = dfixed_const(10);
553	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
554	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
555	bandwidth.full = dfixed_mul(dram_channels, yclk);
556	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
557
558	return dfixed_trunc(bandwidth);
559}
560
561static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
562{
563	/* Calculate the display Data return Bandwidth */
564	fixed20_12 return_efficiency; /* 0.8 */
565	fixed20_12 sclk, bandwidth;
566	fixed20_12 a;
567
568	a.full = dfixed_const(1000);
569	sclk.full = dfixed_const(wm->sclk);
570	sclk.full = dfixed_div(sclk, a);
571	a.full = dfixed_const(10);
572	return_efficiency.full = dfixed_const(8);
573	return_efficiency.full = dfixed_div(return_efficiency, a);
574	a.full = dfixed_const(32);
575	bandwidth.full = dfixed_mul(a, sclk);
576	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
577
578	return dfixed_trunc(bandwidth);
579}
580
581static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
582{
583	return 32;
584}
585
586static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
587{
588	/* Calculate the DMIF Request Bandwidth */
589	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
590	fixed20_12 disp_clk, sclk, bandwidth;
591	fixed20_12 a, b1, b2;
592	u32 min_bandwidth;
593
594	a.full = dfixed_const(1000);
595	disp_clk.full = dfixed_const(wm->disp_clk);
596	disp_clk.full = dfixed_div(disp_clk, a);
597	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
598	b1.full = dfixed_mul(a, disp_clk);
599
600	a.full = dfixed_const(1000);
601	sclk.full = dfixed_const(wm->sclk);
602	sclk.full = dfixed_div(sclk, a);
603	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
604	b2.full = dfixed_mul(a, sclk);
605
606	a.full = dfixed_const(10);
607	disp_clk_request_efficiency.full = dfixed_const(8);
608	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
609
610	min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
611
612	a.full = dfixed_const(min_bandwidth);
613	bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
614
615	return dfixed_trunc(bandwidth);
616}
617
618static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
619{
620	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
621	u32 dram_bandwidth = dce6_dram_bandwidth(wm);
622	u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
623	u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
624
625	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
626}
627
628static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
629{
630	/* Calculate the display mode Average Bandwidth
631	 * DisplayMode should contain the source and destination dimensions,
632	 * timing, etc.
633	 */
634	fixed20_12 bpp;
635	fixed20_12 line_time;
636	fixed20_12 src_width;
637	fixed20_12 bandwidth;
638	fixed20_12 a;
639
640	a.full = dfixed_const(1000);
641	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
642	line_time.full = dfixed_div(line_time, a);
643	bpp.full = dfixed_const(wm->bytes_per_pixel);
644	src_width.full = dfixed_const(wm->src_width);
645	bandwidth.full = dfixed_mul(src_width, bpp);
646	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
647	bandwidth.full = dfixed_div(bandwidth, line_time);
648
649	return dfixed_trunc(bandwidth);
650}
651
652static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
653{
654	/* First calcualte the latency in ns */
655	u32 mc_latency = 2000; /* 2000 ns. */
656	u32 available_bandwidth = dce6_available_bandwidth(wm);
657	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
658	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
659	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
660	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
661		(wm->num_heads * cursor_line_pair_return_time);
662	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
663	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
664	u32 tmp, dmif_size = 12288;
665	fixed20_12 a, b, c;
666
667	if (wm->num_heads == 0)
668		return 0;
669
670	a.full = dfixed_const(2);
671	b.full = dfixed_const(1);
672	if ((wm->vsc.full > a.full) ||
673	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
674	    (wm->vtaps >= 5) ||
675	    ((wm->vsc.full >= a.full) && wm->interlaced))
676		max_src_lines_per_dst_line = 4;
677	else
678		max_src_lines_per_dst_line = 2;
679
680	a.full = dfixed_const(available_bandwidth);
681	b.full = dfixed_const(wm->num_heads);
682	a.full = dfixed_div(a, b);
683
684	b.full = dfixed_const(mc_latency + 512);
685	c.full = dfixed_const(wm->disp_clk);
686	b.full = dfixed_div(b, c);
687
688	c.full = dfixed_const(dmif_size);
689	b.full = dfixed_div(c, b);
690
691	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
692
693	b.full = dfixed_const(1000);
694	c.full = dfixed_const(wm->disp_clk);
695	b.full = dfixed_div(c, b);
696	c.full = dfixed_const(wm->bytes_per_pixel);
697	b.full = dfixed_mul(b, c);
698
699	lb_fill_bw = min(tmp, dfixed_trunc(b));
700
701	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
702	b.full = dfixed_const(1000);
703	c.full = dfixed_const(lb_fill_bw);
704	b.full = dfixed_div(c, b);
705	a.full = dfixed_div(a, b);
706	line_fill_time = dfixed_trunc(a);
707
708	if (line_fill_time < wm->active_time)
709		return latency;
710	else
711		return latency + (line_fill_time - wm->active_time);
712
713}
714
715static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
716{
717	if (dce6_average_bandwidth(wm) <=
718	    (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
719		return true;
720	else
721		return false;
722};
723
724static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
725{
726	if (dce6_average_bandwidth(wm) <=
727	    (dce6_available_bandwidth(wm) / wm->num_heads))
728		return true;
729	else
730		return false;
731};
732
733static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
734{
735	u32 lb_partitions = wm->lb_size / wm->src_width;
736	u32 line_time = wm->active_time + wm->blank_time;
737	u32 latency_tolerant_lines;
738	u32 latency_hiding;
739	fixed20_12 a;
740
741	a.full = dfixed_const(1);
742	if (wm->vsc.full > a.full)
743		latency_tolerant_lines = 1;
744	else {
745		if (lb_partitions <= (wm->vtaps + 1))
746			latency_tolerant_lines = 1;
747		else
748			latency_tolerant_lines = 2;
749	}
750
751	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
752
753	if (dce6_latency_watermark(wm) <= latency_hiding)
754		return true;
755	else
756		return false;
757}
758
759static void dce6_program_watermarks(struct radeon_device *rdev,
760					 struct radeon_crtc *radeon_crtc,
761					 u32 lb_size, u32 num_heads)
762{
763	struct drm_display_mode *mode = &radeon_crtc->base.mode;
764	struct dce6_wm_params wm;
765	u32 pixel_period;
766	u32 line_time = 0;
767	u32 latency_watermark_a = 0, latency_watermark_b = 0;
768	u32 priority_a_mark = 0, priority_b_mark = 0;
769	u32 priority_a_cnt = PRIORITY_OFF;
770	u32 priority_b_cnt = PRIORITY_OFF;
771	u32 tmp, arb_control3;
772	fixed20_12 a, b, c;
773
774	if (radeon_crtc->base.enabled && num_heads && mode) {
775		pixel_period = 1000000 / (u32)mode->clock;
776		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
777		priority_a_cnt = 0;
778		priority_b_cnt = 0;
779
780		wm.yclk = rdev->pm.current_mclk * 10;
781		wm.sclk = rdev->pm.current_sclk * 10;
782		wm.disp_clk = mode->clock;
783		wm.src_width = mode->crtc_hdisplay;
784		wm.active_time = mode->crtc_hdisplay * pixel_period;
785		wm.blank_time = line_time - wm.active_time;
786		wm.interlaced = false;
787		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
788			wm.interlaced = true;
789		wm.vsc = radeon_crtc->vsc;
790		wm.vtaps = 1;
791		if (radeon_crtc->rmx_type != RMX_OFF)
792			wm.vtaps = 2;
793		wm.bytes_per_pixel = 4; /* XXX: get this from fb config */
794		wm.lb_size = lb_size;
795		if (rdev->family == CHIP_ARUBA)
796			wm.dram_channels = evergreen_get_number_of_dram_channels(rdev);
797		else
798			wm.dram_channels = si_get_number_of_dram_channels(rdev);
799		wm.num_heads = num_heads;
800
801		/* set for high clocks */
802		latency_watermark_a = min(dce6_latency_watermark(&wm), (u32)65535);
803		/* set for low clocks */
804		/* wm.yclk = low clk; wm.sclk = low clk */
805		latency_watermark_b = min(dce6_latency_watermark(&wm), (u32)65535);
806
807		/* possibly force display priority to high */
808		/* should really do this at mode validation time... */
809		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm) ||
810		    !dce6_average_bandwidth_vs_available_bandwidth(&wm) ||
811		    !dce6_check_latency_hiding(&wm) ||
812		    (rdev->disp_priority == 2)) {
813			DRM_DEBUG_KMS("force priority to high\n");
814			priority_a_cnt |= PRIORITY_ALWAYS_ON;
815			priority_b_cnt |= PRIORITY_ALWAYS_ON;
816		}
817
818		a.full = dfixed_const(1000);
819		b.full = dfixed_const(mode->clock);
820		b.full = dfixed_div(b, a);
821		c.full = dfixed_const(latency_watermark_a);
822		c.full = dfixed_mul(c, b);
823		c.full = dfixed_mul(c, radeon_crtc->hsc);
824		c.full = dfixed_div(c, a);
825		a.full = dfixed_const(16);
826		c.full = dfixed_div(c, a);
827		priority_a_mark = dfixed_trunc(c);
828		priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
829
830		a.full = dfixed_const(1000);
831		b.full = dfixed_const(mode->clock);
832		b.full = dfixed_div(b, a);
833		c.full = dfixed_const(latency_watermark_b);
834		c.full = dfixed_mul(c, b);
835		c.full = dfixed_mul(c, radeon_crtc->hsc);
836		c.full = dfixed_div(c, a);
837		a.full = dfixed_const(16);
838		c.full = dfixed_div(c, a);
839		priority_b_mark = dfixed_trunc(c);
840		priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
841	}
842
843	/* select wm A */
844	arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
845	tmp = arb_control3;
846	tmp &= ~LATENCY_WATERMARK_MASK(3);
847	tmp |= LATENCY_WATERMARK_MASK(1);
848	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
849	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
850	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
851		LATENCY_HIGH_WATERMARK(line_time)));
852	/* select wm B */
853	tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
854	tmp &= ~LATENCY_WATERMARK_MASK(3);
855	tmp |= LATENCY_WATERMARK_MASK(2);
856	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
857	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
858	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
859		LATENCY_HIGH_WATERMARK(line_time)));
860	/* restore original selection */
861	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
862
863	/* write the priority marks */
864	WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
865	WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
866
867}
868
869void dce6_bandwidth_update(struct radeon_device *rdev)
870{
871	struct drm_display_mode *mode0 = NULL;
872	struct drm_display_mode *mode1 = NULL;
873	u32 num_heads = 0, lb_size;
874	int i;
875
876	radeon_update_display_priority(rdev);
877
878	for (i = 0; i < rdev->num_crtc; i++) {
879		if (rdev->mode_info.crtcs[i]->base.enabled)
880			num_heads++;
881	}
882	for (i = 0; i < rdev->num_crtc; i += 2) {
883		mode0 = &rdev->mode_info.crtcs[i]->base.mode;
884		mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
885		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
886		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
887		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
888		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
889	}
890}
891
892/*
893 * Core functions
894 */
895static void si_tiling_mode_table_init(struct radeon_device *rdev)
896{
897	const u32 num_tile_mode_states = 32;
898	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
899
900	switch (rdev->config.si.mem_row_size_in_kb) {
901	case 1:
902		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
903		break;
904	case 2:
905	default:
906		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
907		break;
908	case 4:
909		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
910		break;
911	}
912
913	if ((rdev->family == CHIP_TAHITI) ||
914	    (rdev->family == CHIP_PITCAIRN)) {
915		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
916			switch (reg_offset) {
917			case 0:  /* non-AA compressed depth or any compressed stencil */
918				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
919						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
920						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
921						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
922						 NUM_BANKS(ADDR_SURF_16_BANK) |
923						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
924						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
925						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
926				break;
927			case 1:  /* 2xAA/4xAA compressed depth only */
928				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
929						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
930						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
931						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
932						 NUM_BANKS(ADDR_SURF_16_BANK) |
933						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
934						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
935						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
936				break;
937			case 2:  /* 8xAA compressed depth only */
938				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
939						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
940						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
941						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
942						 NUM_BANKS(ADDR_SURF_16_BANK) |
943						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
944						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
945						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
946				break;
947			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
948				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
949						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
950						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
951						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
952						 NUM_BANKS(ADDR_SURF_16_BANK) |
953						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
954						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
955						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
956				break;
957			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
958				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
959						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
960						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
961						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
962						 NUM_BANKS(ADDR_SURF_16_BANK) |
963						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
964						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
965						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
966				break;
967			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
968				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
969						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
970						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
971						 TILE_SPLIT(split_equal_to_row_size) |
972						 NUM_BANKS(ADDR_SURF_16_BANK) |
973						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
974						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
975						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
976				break;
977			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
978				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
979						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
980						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
981						 TILE_SPLIT(split_equal_to_row_size) |
982						 NUM_BANKS(ADDR_SURF_16_BANK) |
983						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
984						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
985						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
986				break;
987			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
988				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
989						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
990						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
991						 TILE_SPLIT(split_equal_to_row_size) |
992						 NUM_BANKS(ADDR_SURF_16_BANK) |
993						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
994						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
995						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
996				break;
997			case 8:  /* 1D and 1D Array Surfaces */
998				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
999						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1000						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1001						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1002						 NUM_BANKS(ADDR_SURF_16_BANK) |
1003						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1004						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1005						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1006				break;
1007			case 9:  /* Displayable maps. */
1008				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1009						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1010						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1011						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1012						 NUM_BANKS(ADDR_SURF_16_BANK) |
1013						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1014						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1015						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1016				break;
1017			case 10:  /* Display 8bpp. */
1018				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1019						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1020						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1021						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1022						 NUM_BANKS(ADDR_SURF_16_BANK) |
1023						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1024						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1025						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1026				break;
1027			case 11:  /* Display 16bpp. */
1028				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1029						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1030						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1031						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1032						 NUM_BANKS(ADDR_SURF_16_BANK) |
1033						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1034						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1035						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1036				break;
1037			case 12:  /* Display 32bpp. */
1038				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1039						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1040						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1041						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1042						 NUM_BANKS(ADDR_SURF_16_BANK) |
1043						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1044						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1045						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1046				break;
1047			case 13:  /* Thin. */
1048				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1049						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1050						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1051						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1052						 NUM_BANKS(ADDR_SURF_16_BANK) |
1053						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1054						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1055						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1056				break;
1057			case 14:  /* Thin 8 bpp. */
1058				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1059						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1060						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1061						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1062						 NUM_BANKS(ADDR_SURF_16_BANK) |
1063						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1064						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1065						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1066				break;
1067			case 15:  /* Thin 16 bpp. */
1068				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1069						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1070						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1071						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1072						 NUM_BANKS(ADDR_SURF_16_BANK) |
1073						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1074						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1075						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1076				break;
1077			case 16:  /* Thin 32 bpp. */
1078				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1079						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1080						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1081						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1082						 NUM_BANKS(ADDR_SURF_16_BANK) |
1083						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1084						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1085						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1086				break;
1087			case 17:  /* Thin 64 bpp. */
1088				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1089						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1090						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1091						 TILE_SPLIT(split_equal_to_row_size) |
1092						 NUM_BANKS(ADDR_SURF_16_BANK) |
1093						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1094						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1095						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1096				break;
1097			case 21:  /* 8 bpp PRT. */
1098				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1099						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1100						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1101						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1102						 NUM_BANKS(ADDR_SURF_16_BANK) |
1103						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1104						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1105						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1106				break;
1107			case 22:  /* 16 bpp PRT */
1108				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1109						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1110						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1111						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1112						 NUM_BANKS(ADDR_SURF_16_BANK) |
1113						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1114						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1115						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1116				break;
1117			case 23:  /* 32 bpp PRT */
1118				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1119						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1120						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1121						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1122						 NUM_BANKS(ADDR_SURF_16_BANK) |
1123						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1124						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1125						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1126				break;
1127			case 24:  /* 64 bpp PRT */
1128				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1129						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1130						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1131						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1132						 NUM_BANKS(ADDR_SURF_16_BANK) |
1133						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1134						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1135						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1136				break;
1137			case 25:  /* 128 bpp PRT */
1138				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1139						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1140						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1141						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
1142						 NUM_BANKS(ADDR_SURF_8_BANK) |
1143						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1144						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1145						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1146				break;
1147			default:
1148				gb_tile_moden = 0;
1149				break;
1150			}
1151			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1152		}
1153	} else if (rdev->family == CHIP_VERDE) {
1154		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1155			switch (reg_offset) {
1156			case 0:  /* non-AA compressed depth or any compressed stencil */
1157				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1158						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1159						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1160						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1161						 NUM_BANKS(ADDR_SURF_16_BANK) |
1162						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1163						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1164						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1165				break;
1166			case 1:  /* 2xAA/4xAA compressed depth only */
1167				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1168						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1169						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1170						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1171						 NUM_BANKS(ADDR_SURF_16_BANK) |
1172						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1173						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1174						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1175				break;
1176			case 2:  /* 8xAA compressed depth only */
1177				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1178						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1179						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1180						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1181						 NUM_BANKS(ADDR_SURF_16_BANK) |
1182						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1183						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1184						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1185				break;
1186			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
1187				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1188						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1189						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1190						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1191						 NUM_BANKS(ADDR_SURF_16_BANK) |
1192						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1193						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1194						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1195				break;
1196			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
1197				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1198						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1199						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1200						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1201						 NUM_BANKS(ADDR_SURF_16_BANK) |
1202						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1203						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1204						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1205				break;
1206			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
1207				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1208						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1209						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1210						 TILE_SPLIT(split_equal_to_row_size) |
1211						 NUM_BANKS(ADDR_SURF_16_BANK) |
1212						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1213						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1214						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1215				break;
1216			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
1217				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1218						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1219						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1220						 TILE_SPLIT(split_equal_to_row_size) |
1221						 NUM_BANKS(ADDR_SURF_16_BANK) |
1222						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1223						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1224						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1225				break;
1226			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
1227				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1228						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1229						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1230						 TILE_SPLIT(split_equal_to_row_size) |
1231						 NUM_BANKS(ADDR_SURF_16_BANK) |
1232						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1233						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1234						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1235				break;
1236			case 8:  /* 1D and 1D Array Surfaces */
1237				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1238						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1239						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1240						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1241						 NUM_BANKS(ADDR_SURF_16_BANK) |
1242						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1243						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1244						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1245				break;
1246			case 9:  /* Displayable maps. */
1247				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1248						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1249						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1250						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1251						 NUM_BANKS(ADDR_SURF_16_BANK) |
1252						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1253						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1254						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1255				break;
1256			case 10:  /* Display 8bpp. */
1257				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1258						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1259						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1260						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1261						 NUM_BANKS(ADDR_SURF_16_BANK) |
1262						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1263						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1264						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1265				break;
1266			case 11:  /* Display 16bpp. */
1267				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1268						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1269						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1270						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1271						 NUM_BANKS(ADDR_SURF_16_BANK) |
1272						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1273						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1274						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1275				break;
1276			case 12:  /* Display 32bpp. */
1277				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1278						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1279						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1280						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1281						 NUM_BANKS(ADDR_SURF_16_BANK) |
1282						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1283						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1284						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1285				break;
1286			case 13:  /* Thin. */
1287				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1288						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1289						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1290						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1291						 NUM_BANKS(ADDR_SURF_16_BANK) |
1292						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1293						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1294						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1295				break;
1296			case 14:  /* Thin 8 bpp. */
1297				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1298						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1299						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1300						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1301						 NUM_BANKS(ADDR_SURF_16_BANK) |
1302						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1303						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1304						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1305				break;
1306			case 15:  /* Thin 16 bpp. */
1307				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1308						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1309						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1310						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1311						 NUM_BANKS(ADDR_SURF_16_BANK) |
1312						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1313						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1314						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1315				break;
1316			case 16:  /* Thin 32 bpp. */
1317				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1318						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1319						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1320						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1321						 NUM_BANKS(ADDR_SURF_16_BANK) |
1322						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1323						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1324						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1325				break;
1326			case 17:  /* Thin 64 bpp. */
1327				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1328						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1329						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1330						 TILE_SPLIT(split_equal_to_row_size) |
1331						 NUM_BANKS(ADDR_SURF_16_BANK) |
1332						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1333						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1334						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1335				break;
1336			case 21:  /* 8 bpp PRT. */
1337				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1338						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1339						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1340						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1341						 NUM_BANKS(ADDR_SURF_16_BANK) |
1342						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1343						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1344						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1345				break;
1346			case 22:  /* 16 bpp PRT */
1347				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1348						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1349						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1350						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1351						 NUM_BANKS(ADDR_SURF_16_BANK) |
1352						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1353						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1354						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1355				break;
1356			case 23:  /* 32 bpp PRT */
1357				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1358						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1359						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1360						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1361						 NUM_BANKS(ADDR_SURF_16_BANK) |
1362						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1363						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1364						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1365				break;
1366			case 24:  /* 64 bpp PRT */
1367				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1368						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1369						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1370						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1371						 NUM_BANKS(ADDR_SURF_16_BANK) |
1372						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1373						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1374						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1375				break;
1376			case 25:  /* 128 bpp PRT */
1377				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1378						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1379						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1380						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
1381						 NUM_BANKS(ADDR_SURF_8_BANK) |
1382						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1383						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1384						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1385				break;
1386			default:
1387				gb_tile_moden = 0;
1388				break;
1389			}
1390			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1391		}
1392	} else
1393		DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
1394}
1395
1396static void si_select_se_sh(struct radeon_device *rdev,
1397			    u32 se_num, u32 sh_num)
1398{
1399	u32 data = INSTANCE_BROADCAST_WRITES;
1400
1401	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
1402		data = SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
1403	else if (se_num == 0xffffffff)
1404		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
1405	else if (sh_num == 0xffffffff)
1406		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
1407	else
1408		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
1409	WREG32(GRBM_GFX_INDEX, data);
1410}
1411
1412static u32 si_create_bitmask(u32 bit_width)
1413{
1414	u32 i, mask = 0;
1415
1416	for (i = 0; i < bit_width; i++) {
1417		mask <<= 1;
1418		mask |= 1;
1419	}
1420	return mask;
1421}
1422
1423static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
1424{
1425	u32 data, mask;
1426
1427	data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
1428	if (data & 1)
1429		data &= INACTIVE_CUS_MASK;
1430	else
1431		data = 0;
1432	data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
1433
1434	data >>= INACTIVE_CUS_SHIFT;
1435
1436	mask = si_create_bitmask(cu_per_sh);
1437
1438	return ~data & mask;
1439}
1440
1441static void si_setup_spi(struct radeon_device *rdev,
1442			 u32 se_num, u32 sh_per_se,
1443			 u32 cu_per_sh)
1444{
1445	int i, j, k;
1446	u32 data, mask, active_cu;
1447
1448	for (i = 0; i < se_num; i++) {
1449		for (j = 0; j < sh_per_se; j++) {
1450			si_select_se_sh(rdev, i, j);
1451			data = RREG32(SPI_STATIC_THREAD_MGMT_3);
1452			active_cu = si_get_cu_enabled(rdev, cu_per_sh);
1453
1454			mask = 1;
1455			for (k = 0; k < 16; k++) {
1456				mask <<= k;
1457				if (active_cu & mask) {
1458					data &= ~mask;
1459					WREG32(SPI_STATIC_THREAD_MGMT_3, data);
1460					break;
1461				}
1462			}
1463		}
1464	}
1465	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1466}
1467
1468static u32 si_get_rb_disabled(struct radeon_device *rdev,
1469			      u32 max_rb_num, u32 se_num,
1470			      u32 sh_per_se)
1471{
1472	u32 data, mask;
1473
1474	data = RREG32(CC_RB_BACKEND_DISABLE);
1475	if (data & 1)
1476		data &= BACKEND_DISABLE_MASK;
1477	else
1478		data = 0;
1479	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
1480
1481	data >>= BACKEND_DISABLE_SHIFT;
1482
1483	mask = si_create_bitmask(max_rb_num / se_num / sh_per_se);
1484
1485	return data & mask;
1486}
1487
1488static void si_setup_rb(struct radeon_device *rdev,
1489			u32 se_num, u32 sh_per_se,
1490			u32 max_rb_num)
1491{
1492	int i, j;
1493	u32 data, mask;
1494	u32 disabled_rbs = 0;
1495	u32 enabled_rbs = 0;
1496
1497	for (i = 0; i < se_num; i++) {
1498		for (j = 0; j < sh_per_se; j++) {
1499			si_select_se_sh(rdev, i, j);
1500			data = si_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
1501			disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
1502		}
1503	}
1504	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1505
1506	mask = 1;
1507	for (i = 0; i < max_rb_num; i++) {
1508		if (!(disabled_rbs & mask))
1509			enabled_rbs |= mask;
1510		mask <<= 1;
1511	}
1512
1513	for (i = 0; i < se_num; i++) {
1514		si_select_se_sh(rdev, i, 0xffffffff);
1515		data = 0;
1516		for (j = 0; j < sh_per_se; j++) {
1517			switch (enabled_rbs & 3) {
1518			case 1:
1519				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
1520				break;
1521			case 2:
1522				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
1523				break;
1524			case 3:
1525			default:
1526				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
1527				break;
1528			}
1529			enabled_rbs >>= 2;
1530		}
1531		WREG32(PA_SC_RASTER_CONFIG, data);
1532	}
1533	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1534}
1535
1536static void si_gpu_init(struct radeon_device *rdev)
1537{
1538	u32 gb_addr_config = 0;
1539	u32 mc_shared_chmap, mc_arb_ramcfg;
1540	u32 sx_debug_1;
1541	u32 hdp_host_path_cntl;
1542	u32 tmp;
1543	int i, j;
1544
1545	switch (rdev->family) {
1546	case CHIP_TAHITI:
1547		rdev->config.si.max_shader_engines = 2;
1548		rdev->config.si.max_tile_pipes = 12;
1549		rdev->config.si.max_cu_per_sh = 8;
1550		rdev->config.si.max_sh_per_se = 2;
1551		rdev->config.si.max_backends_per_se = 4;
1552		rdev->config.si.max_texture_channel_caches = 12;
1553		rdev->config.si.max_gprs = 256;
1554		rdev->config.si.max_gs_threads = 32;
1555		rdev->config.si.max_hw_contexts = 8;
1556
1557		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
1558		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
1559		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
1560		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
1561		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
1562		break;
1563	case CHIP_PITCAIRN:
1564		rdev->config.si.max_shader_engines = 2;
1565		rdev->config.si.max_tile_pipes = 8;
1566		rdev->config.si.max_cu_per_sh = 5;
1567		rdev->config.si.max_sh_per_se = 2;
1568		rdev->config.si.max_backends_per_se = 4;
1569		rdev->config.si.max_texture_channel_caches = 8;
1570		rdev->config.si.max_gprs = 256;
1571		rdev->config.si.max_gs_threads = 32;
1572		rdev->config.si.max_hw_contexts = 8;
1573
1574		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
1575		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
1576		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
1577		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
1578		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
1579		break;
1580	case CHIP_VERDE:
1581	default:
1582		rdev->config.si.max_shader_engines = 1;
1583		rdev->config.si.max_tile_pipes = 4;
1584		rdev->config.si.max_cu_per_sh = 2;
1585		rdev->config.si.max_sh_per_se = 2;
1586		rdev->config.si.max_backends_per_se = 4;
1587		rdev->config.si.max_texture_channel_caches = 4;
1588		rdev->config.si.max_gprs = 256;
1589		rdev->config.si.max_gs_threads = 32;
1590		rdev->config.si.max_hw_contexts = 8;
1591
1592		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
1593		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
1594		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
1595		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
1596		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
1597		break;
1598	}
1599
1600	/* Initialize HDP */
1601	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
1602		WREG32((0x2c14 + j), 0x00000000);
1603		WREG32((0x2c18 + j), 0x00000000);
1604		WREG32((0x2c1c + j), 0x00000000);
1605		WREG32((0x2c20 + j), 0x00000000);
1606		WREG32((0x2c24 + j), 0x00000000);
1607	}
1608
1609	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
1610
1611	evergreen_fix_pci_max_read_req_size(rdev);
1612
1613	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
1614
1615	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
1616	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
1617
1618	rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
1619	rdev->config.si.mem_max_burst_length_bytes = 256;
1620	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
1621	rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1622	if (rdev->config.si.mem_row_size_in_kb > 4)
1623		rdev->config.si.mem_row_size_in_kb = 4;
1624	/* XXX use MC settings? */
1625	rdev->config.si.shader_engine_tile_size = 32;
1626	rdev->config.si.num_gpus = 1;
1627	rdev->config.si.multi_gpu_tile_size = 64;
1628
1629	/* fix up row size */
1630	gb_addr_config &= ~ROW_SIZE_MASK;
1631	switch (rdev->config.si.mem_row_size_in_kb) {
1632	case 1:
1633	default:
1634		gb_addr_config |= ROW_SIZE(0);
1635		break;
1636	case 2:
1637		gb_addr_config |= ROW_SIZE(1);
1638		break;
1639	case 4:
1640		gb_addr_config |= ROW_SIZE(2);
1641		break;
1642	}
1643
1644	/* setup tiling info dword.  gb_addr_config is not adequate since it does
1645	 * not have bank info, so create a custom tiling dword.
1646	 * bits 3:0   num_pipes
1647	 * bits 7:4   num_banks
1648	 * bits 11:8  group_size
1649	 * bits 15:12 row_size
1650	 */
1651	rdev->config.si.tile_config = 0;
1652	switch (rdev->config.si.num_tile_pipes) {
1653	case 1:
1654		rdev->config.si.tile_config |= (0 << 0);
1655		break;
1656	case 2:
1657		rdev->config.si.tile_config |= (1 << 0);
1658		break;
1659	case 4:
1660		rdev->config.si.tile_config |= (2 << 0);
1661		break;
1662	case 8:
1663	default:
1664		/* XXX what about 12? */
1665		rdev->config.si.tile_config |= (3 << 0);
1666		break;
1667	}
1668	switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
1669	case 0: /* four banks */
1670		rdev->config.si.tile_config |= 0 << 4;
1671		break;
1672	case 1: /* eight banks */
1673		rdev->config.si.tile_config |= 1 << 4;
1674		break;
1675	case 2: /* sixteen banks */
1676	default:
1677		rdev->config.si.tile_config |= 2 << 4;
1678		break;
1679	}
1680	rdev->config.si.tile_config |=
1681		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
1682	rdev->config.si.tile_config |=
1683		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
1684
1685	WREG32(GB_ADDR_CONFIG, gb_addr_config);
1686	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
1687	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
1688	WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
1689	WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
1690
1691	si_tiling_mode_table_init(rdev);
1692
1693	si_setup_rb(rdev, rdev->config.si.max_shader_engines,
1694		    rdev->config.si.max_sh_per_se,
1695		    rdev->config.si.max_backends_per_se);
1696
1697	si_setup_spi(rdev, rdev->config.si.max_shader_engines,
1698		     rdev->config.si.max_sh_per_se,
1699		     rdev->config.si.max_cu_per_sh);
1700
1701
1702	/* set HW defaults for 3D engine */
1703	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
1704				     ROQ_IB2_START(0x2b)));
1705	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
1706
1707	sx_debug_1 = RREG32(SX_DEBUG_1);
1708	WREG32(SX_DEBUG_1, sx_debug_1);
1709
1710	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
1711
1712	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
1713				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
1714				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
1715				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
1716
1717	WREG32(VGT_NUM_INSTANCES, 1);
1718
1719	WREG32(CP_PERFMON_CNTL, 0);
1720
1721	WREG32(SQ_CONFIG, 0);
1722
1723	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
1724					  FORCE_EOV_MAX_REZ_CNT(255)));
1725
1726	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
1727	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
1728
1729	WREG32(VGT_GS_VERTEX_REUSE, 16);
1730	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
1731
1732	WREG32(CB_PERFCOUNTER0_SELECT0, 0);
1733	WREG32(CB_PERFCOUNTER0_SELECT1, 0);
1734	WREG32(CB_PERFCOUNTER1_SELECT0, 0);
1735	WREG32(CB_PERFCOUNTER1_SELECT1, 0);
1736	WREG32(CB_PERFCOUNTER2_SELECT0, 0);
1737	WREG32(CB_PERFCOUNTER2_SELECT1, 0);
1738	WREG32(CB_PERFCOUNTER3_SELECT0, 0);
1739	WREG32(CB_PERFCOUNTER3_SELECT1, 0);
1740
1741	tmp = RREG32(HDP_MISC_CNTL);
1742	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
1743	WREG32(HDP_MISC_CNTL, tmp);
1744
1745	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
1746	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
1747
1748	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
1749
1750	DRM_UDELAY(50);
1751}
1752
1753/*
1754 * GPU scratch registers helpers function.
1755 */
1756static void si_scratch_init(struct radeon_device *rdev)
1757{
1758	int i;
1759
1760	rdev->scratch.num_reg = 7;
1761	rdev->scratch.reg_base = SCRATCH_REG0;
1762	for (i = 0; i < rdev->scratch.num_reg; i++) {
1763		rdev->scratch.free[i] = true;
1764		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
1765	}
1766}
1767
1768void si_fence_ring_emit(struct radeon_device *rdev,
1769			struct radeon_fence *fence)
1770{
1771	struct radeon_ring *ring = &rdev->ring[fence->ring];
1772	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
1773
1774	/* flush read cache over gart */
1775	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
1776	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
1777	radeon_ring_write(ring, 0);
1778	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
1779	radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
1780			  PACKET3_TC_ACTION_ENA |
1781			  PACKET3_SH_KCACHE_ACTION_ENA |
1782			  PACKET3_SH_ICACHE_ACTION_ENA);
1783	radeon_ring_write(ring, 0xFFFFFFFF);
1784	radeon_ring_write(ring, 0);
1785	radeon_ring_write(ring, 10); /* poll interval */
1786	/* EVENT_WRITE_EOP - flush caches, send int */
1787	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
1788	radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
1789	radeon_ring_write(ring, addr & 0xffffffff);
1790	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
1791	radeon_ring_write(ring, fence->seq);
1792	radeon_ring_write(ring, 0);
1793}
1794
1795/*
1796 * IB stuff
1797 */
1798void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
1799{
1800	struct radeon_ring *ring = &rdev->ring[ib->ring];
1801	u32 header;
1802
1803	if (ib->is_const_ib) {
1804		/* set switch buffer packet before const IB */
1805		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
1806		radeon_ring_write(ring, 0);
1807
1808		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
1809	} else {
1810		u32 next_rptr;
1811		if (ring->rptr_save_reg) {
1812			next_rptr = ring->wptr + 3 + 4 + 8;
1813			radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
1814			radeon_ring_write(ring, ((ring->rptr_save_reg -
1815						  PACKET3_SET_CONFIG_REG_START) >> 2));
1816			radeon_ring_write(ring, next_rptr);
1817		} else if (rdev->wb.enabled) {
1818			next_rptr = ring->wptr + 5 + 4 + 8;
1819			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1820			radeon_ring_write(ring, (1 << 8));
1821			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
1822			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
1823			radeon_ring_write(ring, next_rptr);
1824		}
1825
1826		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
1827	}
1828
1829	radeon_ring_write(ring, header);
1830	radeon_ring_write(ring,
1831#ifdef __BIG_ENDIAN
1832			  (2 << 0) |
1833#endif
1834			  (ib->gpu_addr & 0xFFFFFFFC));
1835	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
1836	radeon_ring_write(ring, ib->length_dw |
1837			  (ib->vm ? (ib->vm->id << 24) : 0));
1838
1839	if (!ib->is_const_ib) {
1840		/* flush read cache over gart for this vmid */
1841		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
1842		radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
1843		radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
1844		radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
1845		radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
1846				  PACKET3_TC_ACTION_ENA |
1847				  PACKET3_SH_KCACHE_ACTION_ENA |
1848				  PACKET3_SH_ICACHE_ACTION_ENA);
1849		radeon_ring_write(ring, 0xFFFFFFFF);
1850		radeon_ring_write(ring, 0);
1851		radeon_ring_write(ring, 10); /* poll interval */
1852	}
1853}
1854
1855/*
1856 * CP.
1857 */
1858static void si_cp_enable(struct radeon_device *rdev, bool enable)
1859{
1860	if (enable)
1861		WREG32(CP_ME_CNTL, 0);
1862	else {
1863		radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
1864		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
1865		WREG32(SCRATCH_UMSK, 0);
1866		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
1867		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
1868		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
1869	}
1870	DRM_UDELAY(50);
1871}
1872
1873static int si_cp_load_microcode(struct radeon_device *rdev)
1874{
1875	const __be32 *fw_data;
1876	int i;
1877
1878	if (!rdev->me_fw || !rdev->pfp_fw)
1879		return -EINVAL;
1880
1881	si_cp_enable(rdev, false);
1882
1883	/* PFP */
1884	fw_data = (const __be32 *)rdev->pfp_fw->data;
1885	WREG32(CP_PFP_UCODE_ADDR, 0);
1886	for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
1887		WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
1888	WREG32(CP_PFP_UCODE_ADDR, 0);
1889
1890	/* CE */
1891	fw_data = (const __be32 *)rdev->ce_fw->data;
1892	WREG32(CP_CE_UCODE_ADDR, 0);
1893	for (i = 0; i < SI_CE_UCODE_SIZE; i++)
1894		WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
1895	WREG32(CP_CE_UCODE_ADDR, 0);
1896
1897	/* ME */
1898	fw_data = (const __be32 *)rdev->me_fw->data;
1899	WREG32(CP_ME_RAM_WADDR, 0);
1900	for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
1901		WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
1902	WREG32(CP_ME_RAM_WADDR, 0);
1903
1904	WREG32(CP_PFP_UCODE_ADDR, 0);
1905	WREG32(CP_CE_UCODE_ADDR, 0);
1906	WREG32(CP_ME_RAM_WADDR, 0);
1907	WREG32(CP_ME_RAM_RADDR, 0);
1908	return 0;
1909}
1910
1911static int si_cp_start(struct radeon_device *rdev)
1912{
1913	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1914	int r, i;
1915
1916	r = radeon_ring_lock(rdev, ring, 7 + 4);
1917	if (r) {
1918		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
1919		return r;
1920	}
1921	/* init the CP */
1922	radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
1923	radeon_ring_write(ring, 0x1);
1924	radeon_ring_write(ring, 0x0);
1925	radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
1926	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
1927	radeon_ring_write(ring, 0);
1928	radeon_ring_write(ring, 0);
1929
1930	/* init the CE partitions */
1931	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
1932	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
1933	radeon_ring_write(ring, 0xc000);
1934	radeon_ring_write(ring, 0xe000);
1935	radeon_ring_unlock_commit(rdev, ring);
1936
1937	si_cp_enable(rdev, true);
1938
1939	r = radeon_ring_lock(rdev, ring, si_default_size + 10);
1940	if (r) {
1941		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
1942		return r;
1943	}
1944
1945	/* setup clear context state */
1946	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1947	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1948
1949	for (i = 0; i < si_default_size; i++)
1950		radeon_ring_write(ring, si_default_state[i]);
1951
1952	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1953	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
1954
1955	/* set clear context state */
1956	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
1957	radeon_ring_write(ring, 0);
1958
1959	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1960	radeon_ring_write(ring, 0x00000316);
1961	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
1962	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
1963
1964	radeon_ring_unlock_commit(rdev, ring);
1965
1966	for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
1967		ring = &rdev->ring[i];
1968		r = radeon_ring_lock(rdev, ring, 2);
1969
1970		/* clear the compute context state */
1971		radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
1972		radeon_ring_write(ring, 0);
1973
1974		radeon_ring_unlock_commit(rdev, ring);
1975	}
1976
1977	return 0;
1978}
1979
1980static void si_cp_fini(struct radeon_device *rdev)
1981{
1982	struct radeon_ring *ring;
1983	si_cp_enable(rdev, false);
1984
1985	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1986	radeon_ring_fini(rdev, ring);
1987	radeon_scratch_free(rdev, ring->rptr_save_reg);
1988
1989	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
1990	radeon_ring_fini(rdev, ring);
1991	radeon_scratch_free(rdev, ring->rptr_save_reg);
1992
1993	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
1994	radeon_ring_fini(rdev, ring);
1995	radeon_scratch_free(rdev, ring->rptr_save_reg);
1996}
1997
1998static int si_cp_resume(struct radeon_device *rdev)
1999{
2000	struct radeon_ring *ring;
2001	u32 tmp;
2002	u32 rb_bufsz;
2003	int r;
2004
2005	/* Reset cp; if cp is reset, then PA, SH, VGT also need to be reset */
2006	WREG32(GRBM_SOFT_RESET, (SOFT_RESET_CP |
2007				 SOFT_RESET_PA |
2008				 SOFT_RESET_VGT |
2009				 SOFT_RESET_SPI |
2010				 SOFT_RESET_SX));
2011	RREG32(GRBM_SOFT_RESET);
2012	DRM_MDELAY(15);
2013	WREG32(GRBM_SOFT_RESET, 0);
2014	RREG32(GRBM_SOFT_RESET);
2015
2016	WREG32(CP_SEM_WAIT_TIMER, 0x0);
2017	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
2018
2019	/* Set the write pointer delay */
2020	WREG32(CP_RB_WPTR_DELAY, 0);
2021
2022	WREG32(CP_DEBUG, 0);
2023	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
2024
2025	/* ring 0 - compute and gfx */
2026	/* Set ring buffer size */
2027	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2028	rb_bufsz = drm_order(ring->ring_size / 8);
2029	tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2030#ifdef __BIG_ENDIAN
2031	tmp |= BUF_SWAP_32BIT;
2032#endif
2033	WREG32(CP_RB0_CNTL, tmp);
2034
2035	/* Initialize the ring buffer's read and write pointers */
2036	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
2037	ring->wptr = 0;
2038	WREG32(CP_RB0_WPTR, ring->wptr);
2039
2040	/* set the wb address whether it's enabled or not */
2041	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
2042	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
2043
2044	if (rdev->wb.enabled)
2045		WREG32(SCRATCH_UMSK, 0xff);
2046	else {
2047		tmp |= RB_NO_UPDATE;
2048		WREG32(SCRATCH_UMSK, 0);
2049	}
2050
2051	DRM_MDELAY(1);
2052	WREG32(CP_RB0_CNTL, tmp);
2053
2054	WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
2055
2056	ring->rptr = RREG32(CP_RB0_RPTR);
2057
2058	/* ring1  - compute only */
2059	/* Set ring buffer size */
2060	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
2061	rb_bufsz = drm_order(ring->ring_size / 8);
2062	tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2063#ifdef __BIG_ENDIAN
2064	tmp |= BUF_SWAP_32BIT;
2065#endif
2066	WREG32(CP_RB1_CNTL, tmp);
2067
2068	/* Initialize the ring buffer's read and write pointers */
2069	WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
2070	ring->wptr = 0;
2071	WREG32(CP_RB1_WPTR, ring->wptr);
2072
2073	/* set the wb address whether it's enabled or not */
2074	WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
2075	WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
2076
2077	DRM_MDELAY(1);
2078	WREG32(CP_RB1_CNTL, tmp);
2079
2080	WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
2081
2082	ring->rptr = RREG32(CP_RB1_RPTR);
2083
2084	/* ring2 - compute only */
2085	/* Set ring buffer size */
2086	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
2087	rb_bufsz = drm_order(ring->ring_size / 8);
2088	tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2089#ifdef __BIG_ENDIAN
2090	tmp |= BUF_SWAP_32BIT;
2091#endif
2092	WREG32(CP_RB2_CNTL, tmp);
2093
2094	/* Initialize the ring buffer's read and write pointers */
2095	WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
2096	ring->wptr = 0;
2097	WREG32(CP_RB2_WPTR, ring->wptr);
2098
2099	/* set the wb address whether it's enabled or not */
2100	WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
2101	WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
2102
2103	DRM_MDELAY(1);
2104	WREG32(CP_RB2_CNTL, tmp);
2105
2106	WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
2107
2108	ring->rptr = RREG32(CP_RB2_RPTR);
2109
2110	/* start the rings */
2111	si_cp_start(rdev);
2112	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
2113	rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
2114	rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
2115	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2116	if (r) {
2117		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2118		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
2119		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
2120		return r;
2121	}
2122	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
2123	if (r) {
2124		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
2125	}
2126	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
2127	if (r) {
2128		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
2129	}
2130
2131	return 0;
2132}
2133
2134bool si_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
2135{
2136	u32 srbm_status;
2137	u32 grbm_status, grbm_status2;
2138	u32 grbm_status_se0, grbm_status_se1;
2139
2140	srbm_status = RREG32(SRBM_STATUS);
2141	grbm_status = RREG32(GRBM_STATUS);
2142	grbm_status2 = RREG32(GRBM_STATUS2);
2143	grbm_status_se0 = RREG32(GRBM_STATUS_SE0);
2144	grbm_status_se1 = RREG32(GRBM_STATUS_SE1);
2145	if (!(grbm_status & GUI_ACTIVE)) {
2146		radeon_ring_lockup_update(ring);
2147		return false;
2148	}
2149	/* force CP activities */
2150	radeon_ring_force_activity(rdev, ring);
2151	return radeon_ring_test_lockup(rdev, ring);
2152}
2153
2154static void si_gpu_soft_reset_gfx(struct radeon_device *rdev)
2155{
2156	u32 grbm_reset = 0;
2157
2158	if (!(RREG32(GRBM_STATUS) & GUI_ACTIVE))
2159		return;
2160
2161	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
2162		RREG32(GRBM_STATUS));
2163	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
2164		RREG32(GRBM_STATUS2));
2165	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
2166		RREG32(GRBM_STATUS_SE0));
2167	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
2168		RREG32(GRBM_STATUS_SE1));
2169	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
2170		RREG32(SRBM_STATUS));
2171
2172	/* Disable CP parsing/prefetching */
2173	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
2174
2175	/* reset all the gfx blocks */
2176	grbm_reset = (SOFT_RESET_CP |
2177		      SOFT_RESET_CB |
2178		      SOFT_RESET_DB |
2179		      SOFT_RESET_GDS |
2180		      SOFT_RESET_PA |
2181		      SOFT_RESET_SC |
2182		      SOFT_RESET_BCI |
2183		      SOFT_RESET_SPI |
2184		      SOFT_RESET_SX |
2185		      SOFT_RESET_TC |
2186		      SOFT_RESET_TA |
2187		      SOFT_RESET_VGT |
2188		      SOFT_RESET_IA);
2189
2190	dev_info(rdev->dev, "  GRBM_SOFT_RESET=0x%08X\n", grbm_reset);
2191	WREG32(GRBM_SOFT_RESET, grbm_reset);
2192	(void)RREG32(GRBM_SOFT_RESET);
2193	DRM_UDELAY(50);
2194	WREG32(GRBM_SOFT_RESET, 0);
2195	(void)RREG32(GRBM_SOFT_RESET);
2196
2197	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
2198		RREG32(GRBM_STATUS));
2199	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
2200		RREG32(GRBM_STATUS2));
2201	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
2202		RREG32(GRBM_STATUS_SE0));
2203	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
2204		RREG32(GRBM_STATUS_SE1));
2205	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
2206		RREG32(SRBM_STATUS));
2207}
2208
2209static void si_gpu_soft_reset_dma(struct radeon_device *rdev)
2210{
2211	u32 tmp;
2212
2213	if (RREG32(DMA_STATUS_REG) & DMA_IDLE)
2214		return;
2215
2216	dev_info(rdev->dev, "  DMA_STATUS_REG   = 0x%08X\n",
2217		RREG32(DMA_STATUS_REG));
2218
2219	/* dma0 */
2220	tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
2221	tmp &= ~DMA_RB_ENABLE;
2222	WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
2223
2224	/* dma1 */
2225	tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
2226	tmp &= ~DMA_RB_ENABLE;
2227	WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
2228
2229	/* Reset dma */
2230	WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA | SOFT_RESET_DMA1);
2231	RREG32(SRBM_SOFT_RESET);
2232	DRM_UDELAY(50);
2233	WREG32(SRBM_SOFT_RESET, 0);
2234
2235	dev_info(rdev->dev, "  DMA_STATUS_REG   = 0x%08X\n",
2236		RREG32(DMA_STATUS_REG));
2237}
2238
2239static int si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
2240{
2241	struct evergreen_mc_save save;
2242
2243	if (!(RREG32(GRBM_STATUS) & GUI_ACTIVE))
2244		reset_mask &= ~(RADEON_RESET_GFX | RADEON_RESET_COMPUTE);
2245
2246	if (RREG32(DMA_STATUS_REG) & DMA_IDLE)
2247		reset_mask &= ~RADEON_RESET_DMA;
2248
2249	if (reset_mask == 0)
2250		return 0;
2251
2252	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
2253
2254	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
2255		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
2256	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
2257		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
2258
2259	evergreen_mc_stop(rdev, &save);
2260	if (radeon_mc_wait_for_idle(rdev)) {
2261		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
2262	}
2263
2264	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE))
2265		si_gpu_soft_reset_gfx(rdev);
2266
2267	if (reset_mask & RADEON_RESET_DMA)
2268		si_gpu_soft_reset_dma(rdev);
2269
2270	/* Wait a little for things to settle down */
2271	DRM_UDELAY(50);
2272
2273	evergreen_mc_resume(rdev, &save);
2274	return 0;
2275}
2276
2277int si_asic_reset(struct radeon_device *rdev)
2278{
2279	return si_gpu_soft_reset(rdev, (RADEON_RESET_GFX |
2280					RADEON_RESET_COMPUTE |
2281					RADEON_RESET_DMA));
2282}
2283
2284/* MC */
2285static void si_mc_program(struct radeon_device *rdev)
2286{
2287	struct evergreen_mc_save save;
2288	u32 tmp;
2289	int i, j;
2290
2291	/* Initialize HDP */
2292	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2293		WREG32((0x2c14 + j), 0x00000000);
2294		WREG32((0x2c18 + j), 0x00000000);
2295		WREG32((0x2c1c + j), 0x00000000);
2296		WREG32((0x2c20 + j), 0x00000000);
2297		WREG32((0x2c24 + j), 0x00000000);
2298	}
2299	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
2300
2301	evergreen_mc_stop(rdev, &save);
2302	if (radeon_mc_wait_for_idle(rdev)) {
2303		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
2304	}
2305	/* Lockout access through VGA aperture*/
2306	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
2307	/* Update configuration */
2308	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
2309	       rdev->mc.vram_start >> 12);
2310	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
2311	       rdev->mc.vram_end >> 12);
2312	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
2313	       rdev->vram_scratch.gpu_addr >> 12);
2314	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
2315	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
2316	WREG32(MC_VM_FB_LOCATION, tmp);
2317	/* XXX double check these! */
2318	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
2319	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
2320	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
2321	WREG32(MC_VM_AGP_BASE, 0);
2322	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
2323	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
2324	if (radeon_mc_wait_for_idle(rdev)) {
2325		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
2326	}
2327	evergreen_mc_resume(rdev, &save);
2328	/* we need to own VRAM, so turn off the VGA renderer here
2329	 * to stop it overwriting our objects */
2330	rv515_vga_render_disable(rdev);
2331}
2332
2333/* SI MC address space is 40 bits */
2334static void si_vram_location(struct radeon_device *rdev,
2335			     struct radeon_mc *mc, u64 base)
2336{
2337	mc->vram_start = base;
2338	if (mc->mc_vram_size > (0xFFFFFFFFFFULL - base + 1)) {
2339		dev_warn(rdev->dev, "limiting VRAM to PCI aperture size\n");
2340		mc->real_vram_size = mc->aper_size;
2341		mc->mc_vram_size = mc->aper_size;
2342	}
2343	mc->vram_end = mc->vram_start + mc->mc_vram_size - 1;
2344	dev_info(rdev->dev, "VRAM: %juM 0x%016jX - 0x%016jX (%juM used)\n",
2345			(uintmax_t)mc->mc_vram_size >> 20, (uintmax_t)mc->vram_start,
2346			(uintmax_t)mc->vram_end, (uintmax_t)mc->real_vram_size >> 20);
2347}
2348
2349static void si_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc)
2350{
2351	u64 size_af, size_bf;
2352
2353	size_af = ((0xFFFFFFFFFFULL - mc->vram_end) + mc->gtt_base_align) & ~mc->gtt_base_align;
2354	size_bf = mc->vram_start & ~mc->gtt_base_align;
2355	if (size_bf > size_af) {
2356		if (mc->gtt_size > size_bf) {
2357			dev_warn(rdev->dev, "limiting GTT\n");
2358			mc->gtt_size = size_bf;
2359		}
2360		mc->gtt_start = (mc->vram_start & ~mc->gtt_base_align) - mc->gtt_size;
2361	} else {
2362		if (mc->gtt_size > size_af) {
2363			dev_warn(rdev->dev, "limiting GTT\n");
2364			mc->gtt_size = size_af;
2365		}
2366		mc->gtt_start = (mc->vram_end + 1 + mc->gtt_base_align) & ~mc->gtt_base_align;
2367	}
2368	mc->gtt_end = mc->gtt_start + mc->gtt_size - 1;
2369	dev_info(rdev->dev, "GTT: %juM 0x%016jX - 0x%016jX\n",
2370			(uintmax_t)mc->gtt_size >> 20, (uintmax_t)mc->gtt_start, (uintmax_t)mc->gtt_end);
2371}
2372
2373static void si_vram_gtt_location(struct radeon_device *rdev,
2374				 struct radeon_mc *mc)
2375{
2376	if (mc->mc_vram_size > 0xFFC0000000ULL) {
2377		/* leave room for at least 1024M GTT */
2378		dev_warn(rdev->dev, "limiting VRAM\n");
2379		mc->real_vram_size = 0xFFC0000000ULL;
2380		mc->mc_vram_size = 0xFFC0000000ULL;
2381	}
2382	si_vram_location(rdev, &rdev->mc, 0);
2383	rdev->mc.gtt_base_align = 0;
2384	si_gtt_location(rdev, mc);
2385}
2386
2387static int si_mc_init(struct radeon_device *rdev)
2388{
2389	u32 tmp;
2390	int chansize, numchan;
2391
2392	/* Get VRAM informations */
2393	rdev->mc.vram_is_ddr = true;
2394	tmp = RREG32(MC_ARB_RAMCFG);
2395	if (tmp & CHANSIZE_OVERRIDE) {
2396		chansize = 16;
2397	} else if (tmp & CHANSIZE_MASK) {
2398		chansize = 64;
2399	} else {
2400		chansize = 32;
2401	}
2402	tmp = RREG32(MC_SHARED_CHMAP);
2403	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
2404	case 0:
2405	default:
2406		numchan = 1;
2407		break;
2408	case 1:
2409		numchan = 2;
2410		break;
2411	case 2:
2412		numchan = 4;
2413		break;
2414	case 3:
2415		numchan = 8;
2416		break;
2417	case 4:
2418		numchan = 3;
2419		break;
2420	case 5:
2421		numchan = 6;
2422		break;
2423	case 6:
2424		numchan = 10;
2425		break;
2426	case 7:
2427		numchan = 12;
2428		break;
2429	case 8:
2430		numchan = 16;
2431		break;
2432	}
2433	rdev->mc.vram_width = numchan * chansize;
2434	/* Could aper size report 0 ? */
2435	rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
2436	rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
2437	/* size in MB on si */
2438	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
2439	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
2440	rdev->mc.visible_vram_size = rdev->mc.aper_size;
2441	si_vram_gtt_location(rdev, &rdev->mc);
2442	radeon_update_bandwidth_info(rdev);
2443
2444	return 0;
2445}
2446
2447/*
2448 * GART
2449 */
2450void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
2451{
2452	/* flush hdp cache */
2453	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
2454
2455	/* bits 0-15 are the VM contexts0-15 */
2456	WREG32(VM_INVALIDATE_REQUEST, 1);
2457}
2458
2459static int si_pcie_gart_enable(struct radeon_device *rdev)
2460{
2461	int r, i;
2462
2463	if (rdev->gart.robj == NULL) {
2464		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
2465		return -EINVAL;
2466	}
2467	r = radeon_gart_table_vram_pin(rdev);
2468	if (r)
2469		return r;
2470	radeon_gart_restore(rdev);
2471	/* Setup TLB control */
2472	WREG32(MC_VM_MX_L1_TLB_CNTL,
2473	       (0xA << 7) |
2474	       ENABLE_L1_TLB |
2475	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
2476	       ENABLE_ADVANCED_DRIVER_MODEL |
2477	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
2478	/* Setup L2 cache */
2479	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
2480	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
2481	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
2482	       EFFECTIVE_L2_QUEUE_SIZE(7) |
2483	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
2484	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
2485	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
2486	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
2487	/* setup context0 */
2488	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
2489	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
2490	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
2491	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
2492			(u32)(rdev->dummy_page.addr >> 12));
2493	WREG32(VM_CONTEXT0_CNTL2, 0);
2494	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
2495				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
2496
2497	WREG32(0x15D4, 0);
2498	WREG32(0x15D8, 0);
2499	WREG32(0x15DC, 0);
2500
2501	/* empty context1-15 */
2502	/* set vm size, must be a multiple of 4 */
2503	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
2504	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
2505	/* Assign the pt base to something valid for now; the pts used for
2506	 * the VMs are determined by the application and setup and assigned
2507	 * on the fly in the vm part of radeon_gart.c
2508	 */
2509	for (i = 1; i < 16; i++) {
2510		if (i < 8)
2511			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
2512			       rdev->gart.table_addr >> 12);
2513		else
2514			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
2515			       rdev->gart.table_addr >> 12);
2516	}
2517
2518	/* enable context1-15 */
2519	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
2520	       (u32)(rdev->dummy_page.addr >> 12));
2521	WREG32(VM_CONTEXT1_CNTL2, 4);
2522	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
2523				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
2524				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
2525				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
2526				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
2527				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
2528				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
2529				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
2530				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
2531				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
2532				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
2533				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
2534				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
2535
2536	si_pcie_gart_tlb_flush(rdev);
2537	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
2538		 (unsigned)(rdev->mc.gtt_size >> 20),
2539		 (unsigned long long)rdev->gart.table_addr);
2540	rdev->gart.ready = true;
2541	return 0;
2542}
2543
2544static void si_pcie_gart_disable(struct radeon_device *rdev)
2545{
2546	/* Disable all tables */
2547	WREG32(VM_CONTEXT0_CNTL, 0);
2548	WREG32(VM_CONTEXT1_CNTL, 0);
2549	/* Setup TLB control */
2550	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
2551	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
2552	/* Setup L2 cache */
2553	WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
2554	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
2555	       EFFECTIVE_L2_QUEUE_SIZE(7) |
2556	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
2557	WREG32(VM_L2_CNTL2, 0);
2558	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
2559	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
2560	radeon_gart_table_vram_unpin(rdev);
2561}
2562
2563static void si_pcie_gart_fini(struct radeon_device *rdev)
2564{
2565	si_pcie_gart_disable(rdev);
2566	radeon_gart_table_vram_free(rdev);
2567	radeon_gart_fini(rdev);
2568}
2569
2570/* vm parser */
2571static bool si_vm_reg_valid(u32 reg)
2572{
2573	/* context regs are fine */
2574	if (reg >= 0x28000)
2575		return true;
2576
2577	/* check config regs */
2578	switch (reg) {
2579	case GRBM_GFX_INDEX:
2580	case CP_STRMOUT_CNTL:
2581	case VGT_VTX_VECT_EJECT_REG:
2582	case VGT_CACHE_INVALIDATION:
2583	case VGT_ESGS_RING_SIZE:
2584	case VGT_GSVS_RING_SIZE:
2585	case VGT_GS_VERTEX_REUSE:
2586	case VGT_PRIMITIVE_TYPE:
2587	case VGT_INDEX_TYPE:
2588	case VGT_NUM_INDICES:
2589	case VGT_NUM_INSTANCES:
2590	case VGT_TF_RING_SIZE:
2591	case VGT_HS_OFFCHIP_PARAM:
2592	case VGT_TF_MEMORY_BASE:
2593	case PA_CL_ENHANCE:
2594	case PA_SU_LINE_STIPPLE_VALUE:
2595	case PA_SC_LINE_STIPPLE_STATE:
2596	case PA_SC_ENHANCE:
2597	case SQC_CACHES:
2598	case SPI_STATIC_THREAD_MGMT_1:
2599	case SPI_STATIC_THREAD_MGMT_2:
2600	case SPI_STATIC_THREAD_MGMT_3:
2601	case SPI_PS_MAX_WAVE_ID:
2602	case SPI_CONFIG_CNTL:
2603	case SPI_CONFIG_CNTL_1:
2604	case TA_CNTL_AUX:
2605		return true;
2606	default:
2607		DRM_ERROR("Invalid register 0x%x in CS\n", reg);
2608		return false;
2609	}
2610}
2611
2612static int si_vm_packet3_ce_check(struct radeon_device *rdev,
2613				  u32 *ib, struct radeon_cs_packet *pkt)
2614{
2615	switch (pkt->opcode) {
2616	case PACKET3_NOP:
2617	case PACKET3_SET_BASE:
2618	case PACKET3_SET_CE_DE_COUNTERS:
2619	case PACKET3_LOAD_CONST_RAM:
2620	case PACKET3_WRITE_CONST_RAM:
2621	case PACKET3_WRITE_CONST_RAM_OFFSET:
2622	case PACKET3_DUMP_CONST_RAM:
2623	case PACKET3_INCREMENT_CE_COUNTER:
2624	case PACKET3_WAIT_ON_DE_COUNTER:
2625	case PACKET3_CE_WRITE:
2626		break;
2627	default:
2628		DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
2629		return -EINVAL;
2630	}
2631	return 0;
2632}
2633
2634static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
2635				   u32 *ib, struct radeon_cs_packet *pkt)
2636{
2637	u32 idx = pkt->idx + 1;
2638	u32 idx_value = ib[idx];
2639	u32 start_reg, end_reg, reg, i;
2640	u32 command, info;
2641
2642	switch (pkt->opcode) {
2643	case PACKET3_NOP:
2644	case PACKET3_SET_BASE:
2645	case PACKET3_CLEAR_STATE:
2646	case PACKET3_INDEX_BUFFER_SIZE:
2647	case PACKET3_DISPATCH_DIRECT:
2648	case PACKET3_DISPATCH_INDIRECT:
2649	case PACKET3_ALLOC_GDS:
2650	case PACKET3_WRITE_GDS_RAM:
2651	case PACKET3_ATOMIC_GDS:
2652	case PACKET3_ATOMIC:
2653	case PACKET3_OCCLUSION_QUERY:
2654	case PACKET3_SET_PREDICATION:
2655	case PACKET3_COND_EXEC:
2656	case PACKET3_PRED_EXEC:
2657	case PACKET3_DRAW_INDIRECT:
2658	case PACKET3_DRAW_INDEX_INDIRECT:
2659	case PACKET3_INDEX_BASE:
2660	case PACKET3_DRAW_INDEX_2:
2661	case PACKET3_CONTEXT_CONTROL:
2662	case PACKET3_INDEX_TYPE:
2663	case PACKET3_DRAW_INDIRECT_MULTI:
2664	case PACKET3_DRAW_INDEX_AUTO:
2665	case PACKET3_DRAW_INDEX_IMMD:
2666	case PACKET3_NUM_INSTANCES:
2667	case PACKET3_DRAW_INDEX_MULTI_AUTO:
2668	case PACKET3_STRMOUT_BUFFER_UPDATE:
2669	case PACKET3_DRAW_INDEX_OFFSET_2:
2670	case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
2671	case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
2672	case PACKET3_MPEG_INDEX:
2673	case PACKET3_WAIT_REG_MEM:
2674	case PACKET3_MEM_WRITE:
2675	case PACKET3_PFP_SYNC_ME:
2676	case PACKET3_SURFACE_SYNC:
2677	case PACKET3_EVENT_WRITE:
2678	case PACKET3_EVENT_WRITE_EOP:
2679	case PACKET3_EVENT_WRITE_EOS:
2680	case PACKET3_SET_CONTEXT_REG:
2681	case PACKET3_SET_CONTEXT_REG_INDIRECT:
2682	case PACKET3_SET_SH_REG:
2683	case PACKET3_SET_SH_REG_OFFSET:
2684	case PACKET3_INCREMENT_DE_COUNTER:
2685	case PACKET3_WAIT_ON_CE_COUNTER:
2686	case PACKET3_WAIT_ON_AVAIL_BUFFER:
2687	case PACKET3_ME_WRITE:
2688		break;
2689	case PACKET3_COPY_DATA:
2690		if ((idx_value & 0xf00) == 0) {
2691			reg = ib[idx + 3] * 4;
2692			if (!si_vm_reg_valid(reg))
2693				return -EINVAL;
2694		}
2695		break;
2696	case PACKET3_WRITE_DATA:
2697		if ((idx_value & 0xf00) == 0) {
2698			start_reg = ib[idx + 1] * 4;
2699			if (idx_value & 0x10000) {
2700				if (!si_vm_reg_valid(start_reg))
2701					return -EINVAL;
2702			} else {
2703				for (i = 0; i < (pkt->count - 2); i++) {
2704					reg = start_reg + (4 * i);
2705					if (!si_vm_reg_valid(reg))
2706						return -EINVAL;
2707				}
2708			}
2709		}
2710		break;
2711	case PACKET3_COND_WRITE:
2712		if (idx_value & 0x100) {
2713			reg = ib[idx + 5] * 4;
2714			if (!si_vm_reg_valid(reg))
2715				return -EINVAL;
2716		}
2717		break;
2718	case PACKET3_COPY_DW:
2719		if (idx_value & 0x2) {
2720			reg = ib[idx + 3] * 4;
2721			if (!si_vm_reg_valid(reg))
2722				return -EINVAL;
2723		}
2724		break;
2725	case PACKET3_SET_CONFIG_REG:
2726		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
2727		end_reg = 4 * pkt->count + start_reg - 4;
2728		if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
2729		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
2730		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
2731			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
2732			return -EINVAL;
2733		}
2734		for (i = 0; i < pkt->count; i++) {
2735			reg = start_reg + (4 * i);
2736			if (!si_vm_reg_valid(reg))
2737				return -EINVAL;
2738		}
2739		break;
2740	case PACKET3_CP_DMA:
2741		command = ib[idx + 4];
2742		info = ib[idx + 1];
2743		if (command & PACKET3_CP_DMA_CMD_SAS) {
2744			/* src address space is register */
2745			if (((info & 0x60000000) >> 29) == 0) {
2746				start_reg = idx_value << 2;
2747				if (command & PACKET3_CP_DMA_CMD_SAIC) {
2748					reg = start_reg;
2749					if (!si_vm_reg_valid(reg)) {
2750						DRM_ERROR("CP DMA Bad SRC register\n");
2751						return -EINVAL;
2752					}
2753				} else {
2754					for (i = 0; i < (command & 0x1fffff); i++) {
2755						reg = start_reg + (4 * i);
2756						if (!si_vm_reg_valid(reg)) {
2757							DRM_ERROR("CP DMA Bad SRC register\n");
2758							return -EINVAL;
2759						}
2760					}
2761				}
2762			}
2763		}
2764		if (command & PACKET3_CP_DMA_CMD_DAS) {
2765			/* dst address space is register */
2766			if (((info & 0x00300000) >> 20) == 0) {
2767				start_reg = ib[idx + 2];
2768				if (command & PACKET3_CP_DMA_CMD_DAIC) {
2769					reg = start_reg;
2770					if (!si_vm_reg_valid(reg)) {
2771						DRM_ERROR("CP DMA Bad DST register\n");
2772						return -EINVAL;
2773					}
2774				} else {
2775					for (i = 0; i < (command & 0x1fffff); i++) {
2776						reg = start_reg + (4 * i);
2777						if (!si_vm_reg_valid(reg)) {
2778							DRM_ERROR("CP DMA Bad DST register\n");
2779							return -EINVAL;
2780						}
2781					}
2782				}
2783			}
2784		}
2785		break;
2786	default:
2787		DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
2788		return -EINVAL;
2789	}
2790	return 0;
2791}
2792
2793static int si_vm_packet3_compute_check(struct radeon_device *rdev,
2794				       u32 *ib, struct radeon_cs_packet *pkt)
2795{
2796	u32 idx = pkt->idx + 1;
2797	u32 idx_value = ib[idx];
2798	u32 start_reg, reg, i;
2799
2800	switch (pkt->opcode) {
2801	case PACKET3_NOP:
2802	case PACKET3_SET_BASE:
2803	case PACKET3_CLEAR_STATE:
2804	case PACKET3_DISPATCH_DIRECT:
2805	case PACKET3_DISPATCH_INDIRECT:
2806	case PACKET3_ALLOC_GDS:
2807	case PACKET3_WRITE_GDS_RAM:
2808	case PACKET3_ATOMIC_GDS:
2809	case PACKET3_ATOMIC:
2810	case PACKET3_OCCLUSION_QUERY:
2811	case PACKET3_SET_PREDICATION:
2812	case PACKET3_COND_EXEC:
2813	case PACKET3_PRED_EXEC:
2814	case PACKET3_CONTEXT_CONTROL:
2815	case PACKET3_STRMOUT_BUFFER_UPDATE:
2816	case PACKET3_WAIT_REG_MEM:
2817	case PACKET3_MEM_WRITE:
2818	case PACKET3_PFP_SYNC_ME:
2819	case PACKET3_SURFACE_SYNC:
2820	case PACKET3_EVENT_WRITE:
2821	case PACKET3_EVENT_WRITE_EOP:
2822	case PACKET3_EVENT_WRITE_EOS:
2823	case PACKET3_SET_CONTEXT_REG:
2824	case PACKET3_SET_CONTEXT_REG_INDIRECT:
2825	case PACKET3_SET_SH_REG:
2826	case PACKET3_SET_SH_REG_OFFSET:
2827	case PACKET3_INCREMENT_DE_COUNTER:
2828	case PACKET3_WAIT_ON_CE_COUNTER:
2829	case PACKET3_WAIT_ON_AVAIL_BUFFER:
2830	case PACKET3_ME_WRITE:
2831		break;
2832	case PACKET3_COPY_DATA:
2833		if ((idx_value & 0xf00) == 0) {
2834			reg = ib[idx + 3] * 4;
2835			if (!si_vm_reg_valid(reg))
2836				return -EINVAL;
2837		}
2838		break;
2839	case PACKET3_WRITE_DATA:
2840		if ((idx_value & 0xf00) == 0) {
2841			start_reg = ib[idx + 1] * 4;
2842			if (idx_value & 0x10000) {
2843				if (!si_vm_reg_valid(start_reg))
2844					return -EINVAL;
2845			} else {
2846				for (i = 0; i < (pkt->count - 2); i++) {
2847					reg = start_reg + (4 * i);
2848					if (!si_vm_reg_valid(reg))
2849						return -EINVAL;
2850				}
2851			}
2852		}
2853		break;
2854	case PACKET3_COND_WRITE:
2855		if (idx_value & 0x100) {
2856			reg = ib[idx + 5] * 4;
2857			if (!si_vm_reg_valid(reg))
2858				return -EINVAL;
2859		}
2860		break;
2861	case PACKET3_COPY_DW:
2862		if (idx_value & 0x2) {
2863			reg = ib[idx + 3] * 4;
2864			if (!si_vm_reg_valid(reg))
2865				return -EINVAL;
2866		}
2867		break;
2868	default:
2869		DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
2870		return -EINVAL;
2871	}
2872	return 0;
2873}
2874
2875int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
2876{
2877	int ret = 0;
2878	u32 idx = 0;
2879	struct radeon_cs_packet pkt;
2880
2881	do {
2882		pkt.idx = idx;
2883		pkt.type = CP_PACKET_GET_TYPE(ib->ptr[idx]);
2884		pkt.count = CP_PACKET_GET_COUNT(ib->ptr[idx]);
2885		pkt.one_reg_wr = 0;
2886		switch (pkt.type) {
2887		case PACKET_TYPE0:
2888			dev_err(rdev->dev, "Packet0 not allowed!\n");
2889			ret = -EINVAL;
2890			break;
2891		case PACKET_TYPE2:
2892			idx += 1;
2893			break;
2894		case PACKET_TYPE3:
2895			pkt.opcode = CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
2896			if (ib->is_const_ib)
2897				ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
2898			else {
2899				switch (ib->ring) {
2900				case RADEON_RING_TYPE_GFX_INDEX:
2901					ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
2902					break;
2903				case CAYMAN_RING_TYPE_CP1_INDEX:
2904				case CAYMAN_RING_TYPE_CP2_INDEX:
2905					ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
2906					break;
2907				default:
2908					dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
2909					ret = -EINVAL;
2910					break;
2911				}
2912			}
2913			idx += pkt.count + 2;
2914			break;
2915		default:
2916			dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
2917			ret = -EINVAL;
2918			break;
2919		}
2920		if (ret)
2921			break;
2922	} while (idx < ib->length_dw);
2923
2924	return ret;
2925}
2926
2927/*
2928 * vm
2929 */
2930int si_vm_init(struct radeon_device *rdev)
2931{
2932	/* number of VMs */
2933	rdev->vm_manager.nvm = 16;
2934	/* base offset of vram pages */
2935	rdev->vm_manager.vram_base_offset = 0;
2936
2937	return 0;
2938}
2939
2940void si_vm_fini(struct radeon_device *rdev)
2941{
2942}
2943
2944/**
2945 * si_vm_set_page - update the page tables using the CP
2946 *
2947 * @rdev: radeon_device pointer
2948 * @pe: addr of the page entry
2949 * @addr: dst addr to write into pe
2950 * @count: number of page entries to update
2951 * @incr: increase next addr by incr bytes
2952 * @flags: access flags
2953 *
2954 * Update the page tables using the CP (cayman-si).
2955 */
2956void si_vm_set_page(struct radeon_device *rdev, uint64_t pe,
2957		    uint64_t addr, unsigned count,
2958		    uint32_t incr, uint32_t flags)
2959{
2960	struct radeon_ring *ring = &rdev->ring[rdev->asic->vm.pt_ring_index];
2961	uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
2962	uint64_t value;
2963	unsigned ndw;
2964
2965	if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
2966		while (count) {
2967			ndw = 2 + count * 2;
2968			if (ndw > 0x3FFE)
2969				ndw = 0x3FFE;
2970
2971			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, ndw));
2972			radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2973						 WRITE_DATA_DST_SEL(1)));
2974			radeon_ring_write(ring, pe);
2975			radeon_ring_write(ring, upper_32_bits(pe));
2976			for (; ndw > 2; ndw -= 2, --count, pe += 8) {
2977				if (flags & RADEON_VM_PAGE_SYSTEM) {
2978					value = radeon_vm_map_gart(rdev, addr);
2979					value &= 0xFFFFFFFFFFFFF000ULL;
2980				} else if (flags & RADEON_VM_PAGE_VALID) {
2981					value = addr;
2982				} else {
2983					value = 0;
2984				}
2985				addr += incr;
2986				value |= r600_flags;
2987				radeon_ring_write(ring, value);
2988				radeon_ring_write(ring, upper_32_bits(value));
2989			}
2990		}
2991	} else {
2992		/* DMA */
2993		if (flags & RADEON_VM_PAGE_SYSTEM) {
2994			while (count) {
2995				ndw = count * 2;
2996				if (ndw > 0xFFFFE)
2997					ndw = 0xFFFFE;
2998
2999				/* for non-physically contiguous pages (system) */
3000				radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw));
3001				radeon_ring_write(ring, pe);
3002				radeon_ring_write(ring, upper_32_bits(pe) & 0xff);
3003				for (; ndw > 0; ndw -= 2, --count, pe += 8) {
3004					if (flags & RADEON_VM_PAGE_SYSTEM) {
3005						value = radeon_vm_map_gart(rdev, addr);
3006						value &= 0xFFFFFFFFFFFFF000ULL;
3007					} else if (flags & RADEON_VM_PAGE_VALID) {
3008						value = addr;
3009					} else {
3010						value = 0;
3011					}
3012					addr += incr;
3013					value |= r600_flags;
3014					radeon_ring_write(ring, value);
3015					radeon_ring_write(ring, upper_32_bits(value));
3016				}
3017			}
3018		} else {
3019			while (count) {
3020				ndw = count * 2;
3021				if (ndw > 0xFFFFE)
3022					ndw = 0xFFFFE;
3023
3024				if (flags & RADEON_VM_PAGE_VALID)
3025					value = addr;
3026				else
3027					value = 0;
3028				/* for physically contiguous pages (vram) */
3029				radeon_ring_write(ring, DMA_PTE_PDE_PACKET(ndw));
3030				radeon_ring_write(ring, pe); /* dst addr */
3031				radeon_ring_write(ring, upper_32_bits(pe) & 0xff);
3032				radeon_ring_write(ring, r600_flags); /* mask */
3033				radeon_ring_write(ring, 0);
3034				radeon_ring_write(ring, value); /* value */
3035				radeon_ring_write(ring, upper_32_bits(value));
3036				radeon_ring_write(ring, incr); /* increment size */
3037				radeon_ring_write(ring, 0);
3038				pe += ndw * 4;
3039				addr += (ndw / 2) * incr;
3040				count -= ndw / 2;
3041			}
3042		}
3043	}
3044}
3045
3046void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
3047{
3048	struct radeon_ring *ring = &rdev->ring[ridx];
3049
3050	if (vm == NULL)
3051		return;
3052
3053	/* write new base address */
3054	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3055	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3056				 WRITE_DATA_DST_SEL(0)));
3057
3058	if (vm->id < 8) {
3059		radeon_ring_write(ring,
3060				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
3061	} else {
3062		radeon_ring_write(ring,
3063				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
3064	}
3065	radeon_ring_write(ring, 0);
3066	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
3067
3068	/* flush hdp cache */
3069	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3070	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3071				 WRITE_DATA_DST_SEL(0)));
3072	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3073	radeon_ring_write(ring, 0);
3074	radeon_ring_write(ring, 0x1);
3075
3076	/* bits 0-15 are the VM contexts0-15 */
3077	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3078	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3079				 WRITE_DATA_DST_SEL(0)));
3080	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
3081	radeon_ring_write(ring, 0);
3082	radeon_ring_write(ring, 1 << vm->id);
3083
3084	/* sync PFP to ME, otherwise we might get invalid PFP reads */
3085	radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3086	radeon_ring_write(ring, 0x0);
3087}
3088
3089void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
3090{
3091	struct radeon_ring *ring = &rdev->ring[ridx];
3092
3093	if (vm == NULL)
3094		return;
3095
3096	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
3097	if (vm->id < 8) {
3098		radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
3099	} else {
3100		radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2));
3101	}
3102	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
3103
3104	/* flush hdp cache */
3105	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
3106	radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
3107	radeon_ring_write(ring, 1);
3108
3109	/* bits 0-7 are the VM contexts0-7 */
3110	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
3111	radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
3112	radeon_ring_write(ring, 1 << vm->id);
3113}
3114
3115/*
3116 * RLC
3117 */
3118void si_rlc_fini(struct radeon_device *rdev)
3119{
3120	int r;
3121
3122	/* save restore block */
3123	if (rdev->rlc.save_restore_obj) {
3124		r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
3125		if (unlikely(r != 0))
3126			dev_warn(rdev->dev, "(%d) reserve RLC sr bo failed\n", r);
3127		radeon_bo_unpin(rdev->rlc.save_restore_obj);
3128		radeon_bo_unreserve(rdev->rlc.save_restore_obj);
3129
3130		radeon_bo_unref(&rdev->rlc.save_restore_obj);
3131		rdev->rlc.save_restore_obj = NULL;
3132	}
3133
3134	/* clear state block */
3135	if (rdev->rlc.clear_state_obj) {
3136		r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
3137		if (unlikely(r != 0))
3138			dev_warn(rdev->dev, "(%d) reserve RLC c bo failed\n", r);
3139		radeon_bo_unpin(rdev->rlc.clear_state_obj);
3140		radeon_bo_unreserve(rdev->rlc.clear_state_obj);
3141
3142		radeon_bo_unref(&rdev->rlc.clear_state_obj);
3143		rdev->rlc.clear_state_obj = NULL;
3144	}
3145}
3146
3147int si_rlc_init(struct radeon_device *rdev)
3148{
3149	int r;
3150
3151	/* save restore block */
3152	if (rdev->rlc.save_restore_obj == NULL) {
3153		r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true,
3154				     RADEON_GEM_DOMAIN_VRAM, NULL,
3155				     &rdev->rlc.save_restore_obj);
3156		if (r) {
3157			dev_warn(rdev->dev, "(%d) create RLC sr bo failed\n", r);
3158			return r;
3159		}
3160	}
3161
3162	r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
3163	if (unlikely(r != 0)) {
3164		si_rlc_fini(rdev);
3165		return r;
3166	}
3167	r = radeon_bo_pin(rdev->rlc.save_restore_obj, RADEON_GEM_DOMAIN_VRAM,
3168			  &rdev->rlc.save_restore_gpu_addr);
3169	radeon_bo_unreserve(rdev->rlc.save_restore_obj);
3170	if (r) {
3171		dev_warn(rdev->dev, "(%d) pin RLC sr bo failed\n", r);
3172		si_rlc_fini(rdev);
3173		return r;
3174	}
3175
3176	/* clear state block */
3177	if (rdev->rlc.clear_state_obj == NULL) {
3178		r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true,
3179				     RADEON_GEM_DOMAIN_VRAM, NULL,
3180				     &rdev->rlc.clear_state_obj);
3181		if (r) {
3182			dev_warn(rdev->dev, "(%d) create RLC c bo failed\n", r);
3183			si_rlc_fini(rdev);
3184			return r;
3185		}
3186	}
3187	r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
3188	if (unlikely(r != 0)) {
3189		si_rlc_fini(rdev);
3190		return r;
3191	}
3192	r = radeon_bo_pin(rdev->rlc.clear_state_obj, RADEON_GEM_DOMAIN_VRAM,
3193			  &rdev->rlc.clear_state_gpu_addr);
3194	radeon_bo_unreserve(rdev->rlc.clear_state_obj);
3195	if (r) {
3196		dev_warn(rdev->dev, "(%d) pin RLC c bo failed\n", r);
3197		si_rlc_fini(rdev);
3198		return r;
3199	}
3200
3201	return 0;
3202}
3203
3204static void si_rlc_stop(struct radeon_device *rdev)
3205{
3206	WREG32(RLC_CNTL, 0);
3207}
3208
3209static void si_rlc_start(struct radeon_device *rdev)
3210{
3211	WREG32(RLC_CNTL, RLC_ENABLE);
3212}
3213
3214static int si_rlc_resume(struct radeon_device *rdev)
3215{
3216	u32 i;
3217	const __be32 *fw_data;
3218
3219	if (!rdev->rlc_fw)
3220		return -EINVAL;
3221
3222	si_rlc_stop(rdev);
3223
3224	WREG32(RLC_RL_BASE, 0);
3225	WREG32(RLC_RL_SIZE, 0);
3226	WREG32(RLC_LB_CNTL, 0);
3227	WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
3228	WREG32(RLC_LB_CNTR_INIT, 0);
3229
3230	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
3231	WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
3232
3233	WREG32(RLC_MC_CNTL, 0);
3234	WREG32(RLC_UCODE_CNTL, 0);
3235
3236	fw_data = (const __be32 *)rdev->rlc_fw->data;
3237	for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
3238		WREG32(RLC_UCODE_ADDR, i);
3239		WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
3240	}
3241	WREG32(RLC_UCODE_ADDR, 0);
3242
3243	si_rlc_start(rdev);
3244
3245	return 0;
3246}
3247
3248static void si_enable_interrupts(struct radeon_device *rdev)
3249{
3250	u32 ih_cntl = RREG32(IH_CNTL);
3251	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
3252
3253	ih_cntl |= ENABLE_INTR;
3254	ih_rb_cntl |= IH_RB_ENABLE;
3255	WREG32(IH_CNTL, ih_cntl);
3256	WREG32(IH_RB_CNTL, ih_rb_cntl);
3257	rdev->ih.enabled = true;
3258}
3259
3260static void si_disable_interrupts(struct radeon_device *rdev)
3261{
3262	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
3263	u32 ih_cntl = RREG32(IH_CNTL);
3264
3265	ih_rb_cntl &= ~IH_RB_ENABLE;
3266	ih_cntl &= ~ENABLE_INTR;
3267	WREG32(IH_RB_CNTL, ih_rb_cntl);
3268	WREG32(IH_CNTL, ih_cntl);
3269	/* set rptr, wptr to 0 */
3270	WREG32(IH_RB_RPTR, 0);
3271	WREG32(IH_RB_WPTR, 0);
3272	rdev->ih.enabled = false;
3273	rdev->ih.rptr = 0;
3274}
3275
3276static void si_disable_interrupt_state(struct radeon_device *rdev)
3277{
3278	u32 tmp;
3279
3280	WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
3281	WREG32(CP_INT_CNTL_RING1, 0);
3282	WREG32(CP_INT_CNTL_RING2, 0);
3283	tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
3284	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
3285	tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
3286	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
3287	WREG32(GRBM_INT_CNTL, 0);
3288	WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
3289	WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
3290	if (rdev->num_crtc >= 4) {
3291		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
3292		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
3293	}
3294	if (rdev->num_crtc >= 6) {
3295		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
3296		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
3297	}
3298
3299	WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
3300	WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
3301	if (rdev->num_crtc >= 4) {
3302		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
3303		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
3304	}
3305	if (rdev->num_crtc >= 6) {
3306		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
3307		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
3308	}
3309
3310	WREG32(DACA_AUTODETECT_INT_CONTROL, 0);
3311
3312	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3313	WREG32(DC_HPD1_INT_CONTROL, tmp);
3314	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3315	WREG32(DC_HPD2_INT_CONTROL, tmp);
3316	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3317	WREG32(DC_HPD3_INT_CONTROL, tmp);
3318	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3319	WREG32(DC_HPD4_INT_CONTROL, tmp);
3320	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3321	WREG32(DC_HPD5_INT_CONTROL, tmp);
3322	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3323	WREG32(DC_HPD6_INT_CONTROL, tmp);
3324
3325}
3326
3327static int si_irq_init(struct radeon_device *rdev)
3328{
3329	int ret = 0;
3330	int rb_bufsz;
3331	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
3332
3333	/* allocate ring */
3334	ret = r600_ih_ring_alloc(rdev);
3335	if (ret)
3336		return ret;
3337
3338	/* disable irqs */
3339	si_disable_interrupts(rdev);
3340
3341	/* init rlc */
3342	ret = si_rlc_resume(rdev);
3343	if (ret) {
3344		r600_ih_ring_fini(rdev);
3345		return ret;
3346	}
3347
3348	/* setup interrupt control */
3349	/* set dummy read address to ring address */
3350	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
3351	interrupt_cntl = RREG32(INTERRUPT_CNTL);
3352	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
3353	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
3354	 */
3355	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
3356	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
3357	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
3358	WREG32(INTERRUPT_CNTL, interrupt_cntl);
3359
3360	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
3361	rb_bufsz = drm_order(rdev->ih.ring_size / 4);
3362
3363	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
3364		      IH_WPTR_OVERFLOW_CLEAR |
3365		      (rb_bufsz << 1));
3366
3367	if (rdev->wb.enabled)
3368		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
3369
3370	/* set the writeback address whether it's enabled or not */
3371	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
3372	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
3373
3374	WREG32(IH_RB_CNTL, ih_rb_cntl);
3375
3376	/* set rptr, wptr to 0 */
3377	WREG32(IH_RB_RPTR, 0);
3378	WREG32(IH_RB_WPTR, 0);
3379
3380	/* Default settings for IH_CNTL (disabled at first) */
3381	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
3382	/* RPTR_REARM only works if msi's are enabled */
3383	if (rdev->msi_enabled)
3384		ih_cntl |= RPTR_REARM;
3385	WREG32(IH_CNTL, ih_cntl);
3386
3387	/* force the active interrupt state to all disabled */
3388	si_disable_interrupt_state(rdev);
3389
3390	pci_enable_busmaster(rdev->dev);
3391
3392	/* enable irqs */
3393	si_enable_interrupts(rdev);
3394
3395	return ret;
3396}
3397
3398int si_irq_set(struct radeon_device *rdev)
3399{
3400	u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE;
3401	u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
3402	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
3403	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
3404	u32 grbm_int_cntl = 0;
3405	u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0;
3406	u32 dma_cntl, dma_cntl1;
3407
3408	if (!rdev->irq.installed) {
3409		DRM_ERROR("Can't enable IRQ/MSI because no handler is installed\n");
3410		return -EINVAL;
3411	}
3412	/* don't enable anything if the ih is disabled */
3413	if (!rdev->ih.enabled) {
3414		si_disable_interrupts(rdev);
3415		/* force the active interrupt state to all disabled */
3416		si_disable_interrupt_state(rdev);
3417		return 0;
3418	}
3419
3420	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
3421	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
3422	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
3423	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
3424	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
3425	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
3426
3427	dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
3428	dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
3429
3430	/* enable CP interrupts on all rings */
3431	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
3432		DRM_DEBUG("si_irq_set: sw int gfx\n");
3433		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
3434	}
3435	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
3436		DRM_DEBUG("si_irq_set: sw int cp1\n");
3437		cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
3438	}
3439	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
3440		DRM_DEBUG("si_irq_set: sw int cp2\n");
3441		cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
3442	}
3443	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
3444		DRM_DEBUG("si_irq_set: sw int dma\n");
3445		dma_cntl |= TRAP_ENABLE;
3446	}
3447
3448	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
3449		DRM_DEBUG("si_irq_set: sw int dma1\n");
3450		dma_cntl1 |= TRAP_ENABLE;
3451	}
3452	if (rdev->irq.crtc_vblank_int[0] ||
3453	    atomic_read(&rdev->irq.pflip[0])) {
3454		DRM_DEBUG("si_irq_set: vblank 0\n");
3455		crtc1 |= VBLANK_INT_MASK;
3456	}
3457	if (rdev->irq.crtc_vblank_int[1] ||
3458	    atomic_read(&rdev->irq.pflip[1])) {
3459		DRM_DEBUG("si_irq_set: vblank 1\n");
3460		crtc2 |= VBLANK_INT_MASK;
3461	}
3462	if (rdev->irq.crtc_vblank_int[2] ||
3463	    atomic_read(&rdev->irq.pflip[2])) {
3464		DRM_DEBUG("si_irq_set: vblank 2\n");
3465		crtc3 |= VBLANK_INT_MASK;
3466	}
3467	if (rdev->irq.crtc_vblank_int[3] ||
3468	    atomic_read(&rdev->irq.pflip[3])) {
3469		DRM_DEBUG("si_irq_set: vblank 3\n");
3470		crtc4 |= VBLANK_INT_MASK;
3471	}
3472	if (rdev->irq.crtc_vblank_int[4] ||
3473	    atomic_read(&rdev->irq.pflip[4])) {
3474		DRM_DEBUG("si_irq_set: vblank 4\n");
3475		crtc5 |= VBLANK_INT_MASK;
3476	}
3477	if (rdev->irq.crtc_vblank_int[5] ||
3478	    atomic_read(&rdev->irq.pflip[5])) {
3479		DRM_DEBUG("si_irq_set: vblank 5\n");
3480		crtc6 |= VBLANK_INT_MASK;
3481	}
3482	if (rdev->irq.hpd[0]) {
3483		DRM_DEBUG("si_irq_set: hpd 1\n");
3484		hpd1 |= DC_HPDx_INT_EN;
3485	}
3486	if (rdev->irq.hpd[1]) {
3487		DRM_DEBUG("si_irq_set: hpd 2\n");
3488		hpd2 |= DC_HPDx_INT_EN;
3489	}
3490	if (rdev->irq.hpd[2]) {
3491		DRM_DEBUG("si_irq_set: hpd 3\n");
3492		hpd3 |= DC_HPDx_INT_EN;
3493	}
3494	if (rdev->irq.hpd[3]) {
3495		DRM_DEBUG("si_irq_set: hpd 4\n");
3496		hpd4 |= DC_HPDx_INT_EN;
3497	}
3498	if (rdev->irq.hpd[4]) {
3499		DRM_DEBUG("si_irq_set: hpd 5\n");
3500		hpd5 |= DC_HPDx_INT_EN;
3501	}
3502	if (rdev->irq.hpd[5]) {
3503		DRM_DEBUG("si_irq_set: hpd 6\n");
3504		hpd6 |= DC_HPDx_INT_EN;
3505	}
3506
3507	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
3508	WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
3509	WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
3510
3511	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
3512	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
3513
3514	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
3515
3516	WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
3517	WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
3518	if (rdev->num_crtc >= 4) {
3519		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
3520		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
3521	}
3522	if (rdev->num_crtc >= 6) {
3523		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
3524		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
3525	}
3526
3527	WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, grph1);
3528	WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, grph2);
3529	if (rdev->num_crtc >= 4) {
3530		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, grph3);
3531		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, grph4);
3532	}
3533	if (rdev->num_crtc >= 6) {
3534		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, grph5);
3535		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, grph6);
3536	}
3537
3538	WREG32(DC_HPD1_INT_CONTROL, hpd1);
3539	WREG32(DC_HPD2_INT_CONTROL, hpd2);
3540	WREG32(DC_HPD3_INT_CONTROL, hpd3);
3541	WREG32(DC_HPD4_INT_CONTROL, hpd4);
3542	WREG32(DC_HPD5_INT_CONTROL, hpd5);
3543	WREG32(DC_HPD6_INT_CONTROL, hpd6);
3544
3545	return 0;
3546}
3547
3548static inline void si_irq_ack(struct radeon_device *rdev)
3549{
3550	u32 tmp;
3551
3552	rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
3553	rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
3554	rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
3555	rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
3556	rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
3557	rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
3558	rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
3559	rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
3560	if (rdev->num_crtc >= 4) {
3561		rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
3562		rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
3563	}
3564	if (rdev->num_crtc >= 6) {
3565		rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
3566		rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
3567	}
3568
3569	if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
3570		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
3571	if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
3572		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
3573	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
3574		WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
3575	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
3576		WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
3577	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
3578		WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
3579	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
3580		WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
3581
3582	if (rdev->num_crtc >= 4) {
3583		if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
3584			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
3585		if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
3586			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
3587		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
3588			WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
3589		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
3590			WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
3591		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
3592			WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
3593		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
3594			WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
3595	}
3596
3597	if (rdev->num_crtc >= 6) {
3598		if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
3599			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
3600		if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
3601			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
3602		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
3603			WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
3604		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
3605			WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
3606		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
3607			WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
3608		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
3609			WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
3610	}
3611
3612	if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
3613		tmp = RREG32(DC_HPD1_INT_CONTROL);
3614		tmp |= DC_HPDx_INT_ACK;
3615		WREG32(DC_HPD1_INT_CONTROL, tmp);
3616	}
3617	if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
3618		tmp = RREG32(DC_HPD2_INT_CONTROL);
3619		tmp |= DC_HPDx_INT_ACK;
3620		WREG32(DC_HPD2_INT_CONTROL, tmp);
3621	}
3622	if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
3623		tmp = RREG32(DC_HPD3_INT_CONTROL);
3624		tmp |= DC_HPDx_INT_ACK;
3625		WREG32(DC_HPD3_INT_CONTROL, tmp);
3626	}
3627	if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
3628		tmp = RREG32(DC_HPD4_INT_CONTROL);
3629		tmp |= DC_HPDx_INT_ACK;
3630		WREG32(DC_HPD4_INT_CONTROL, tmp);
3631	}
3632	if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
3633		tmp = RREG32(DC_HPD5_INT_CONTROL);
3634		tmp |= DC_HPDx_INT_ACK;
3635		WREG32(DC_HPD5_INT_CONTROL, tmp);
3636	}
3637	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
3638		tmp = RREG32(DC_HPD5_INT_CONTROL);
3639		tmp |= DC_HPDx_INT_ACK;
3640		WREG32(DC_HPD6_INT_CONTROL, tmp);
3641	}
3642}
3643
3644static void si_irq_disable(struct radeon_device *rdev)
3645{
3646	si_disable_interrupts(rdev);
3647	/* Wait and acknowledge irq */
3648	DRM_MDELAY(1);
3649	si_irq_ack(rdev);
3650	si_disable_interrupt_state(rdev);
3651}
3652
3653static void si_irq_suspend(struct radeon_device *rdev)
3654{
3655	si_irq_disable(rdev);
3656	si_rlc_stop(rdev);
3657}
3658
3659static void si_irq_fini(struct radeon_device *rdev)
3660{
3661	si_irq_suspend(rdev);
3662	r600_ih_ring_fini(rdev);
3663}
3664
3665static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
3666{
3667	u32 wptr, tmp;
3668
3669	if (rdev->wb.enabled)
3670		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
3671	else
3672		wptr = RREG32(IH_RB_WPTR);
3673
3674	if (wptr & RB_OVERFLOW) {
3675		/* When a ring buffer overflow happen start parsing interrupt
3676		 * from the last not overwritten vector (wptr + 16). Hopefully
3677		 * this should allow us to catchup.
3678		 */
3679		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
3680			wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
3681		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
3682		tmp = RREG32(IH_RB_CNTL);
3683		tmp |= IH_WPTR_OVERFLOW_CLEAR;
3684		WREG32(IH_RB_CNTL, tmp);
3685	}
3686	return (wptr & rdev->ih.ptr_mask);
3687}
3688
3689/*        SI IV Ring
3690 * Each IV ring entry is 128 bits:
3691 * [7:0]    - interrupt source id
3692 * [31:8]   - reserved
3693 * [59:32]  - interrupt source data
3694 * [63:60]  - reserved
3695 * [71:64]  - RINGID
3696 * [79:72]  - VMID
3697 * [127:80] - reserved
3698 */
3699irqreturn_t si_irq_process(struct radeon_device *rdev)
3700{
3701	u32 wptr;
3702	u32 rptr;
3703	u32 src_id, src_data, ring_id;
3704	u32 ring_index;
3705	bool queue_hotplug = false;
3706
3707	if (!rdev->ih.enabled || rdev->shutdown)
3708		return IRQ_NONE;
3709
3710	wptr = si_get_ih_wptr(rdev);
3711
3712restart_ih:
3713	/* is somebody else already processing irqs? */
3714	if (atomic_xchg(&rdev->ih.lock, 1))
3715		return IRQ_NONE;
3716
3717	rptr = rdev->ih.rptr;
3718	DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
3719
3720	/* Order reading of wptr vs. reading of IH ring data */
3721	rmb();
3722
3723	/* display interrupts */
3724	si_irq_ack(rdev);
3725
3726	while (rptr != wptr) {
3727		/* wptr/rptr are in bytes! */
3728		ring_index = rptr / 4;
3729		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
3730		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
3731		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
3732
3733		switch (src_id) {
3734		case 1: /* D1 vblank/vline */
3735			switch (src_data) {
3736			case 0: /* D1 vblank */
3737				if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) {
3738					if (rdev->irq.crtc_vblank_int[0]) {
3739						drm_handle_vblank(rdev->ddev, 0);
3740						rdev->pm.vblank_sync = true;
3741						DRM_WAKEUP(&rdev->irq.vblank_queue);
3742					}
3743					if (atomic_read(&rdev->irq.pflip[0]))
3744						radeon_crtc_handle_flip(rdev, 0);
3745					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
3746					DRM_DEBUG("IH: D1 vblank\n");
3747				}
3748				break;
3749			case 1: /* D1 vline */
3750				if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) {
3751					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
3752					DRM_DEBUG("IH: D1 vline\n");
3753				}
3754				break;
3755			default:
3756				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3757				break;
3758			}
3759			break;
3760		case 2: /* D2 vblank/vline */
3761			switch (src_data) {
3762			case 0: /* D2 vblank */
3763				if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
3764					if (rdev->irq.crtc_vblank_int[1]) {
3765						drm_handle_vblank(rdev->ddev, 1);
3766						rdev->pm.vblank_sync = true;
3767						DRM_WAKEUP(&rdev->irq.vblank_queue);
3768					}
3769					if (atomic_read(&rdev->irq.pflip[1]))
3770						radeon_crtc_handle_flip(rdev, 1);
3771					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
3772					DRM_DEBUG("IH: D2 vblank\n");
3773				}
3774				break;
3775			case 1: /* D2 vline */
3776				if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
3777					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
3778					DRM_DEBUG("IH: D2 vline\n");
3779				}
3780				break;
3781			default:
3782				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3783				break;
3784			}
3785			break;
3786		case 3: /* D3 vblank/vline */
3787			switch (src_data) {
3788			case 0: /* D3 vblank */
3789				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
3790					if (rdev->irq.crtc_vblank_int[2]) {
3791						drm_handle_vblank(rdev->ddev, 2);
3792						rdev->pm.vblank_sync = true;
3793						DRM_WAKEUP(&rdev->irq.vblank_queue);
3794					}
3795					if (atomic_read(&rdev->irq.pflip[2]))
3796						radeon_crtc_handle_flip(rdev, 2);
3797					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
3798					DRM_DEBUG("IH: D3 vblank\n");
3799				}
3800				break;
3801			case 1: /* D3 vline */
3802				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
3803					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
3804					DRM_DEBUG("IH: D3 vline\n");
3805				}
3806				break;
3807			default:
3808				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3809				break;
3810			}
3811			break;
3812		case 4: /* D4 vblank/vline */
3813			switch (src_data) {
3814			case 0: /* D4 vblank */
3815				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
3816					if (rdev->irq.crtc_vblank_int[3]) {
3817						drm_handle_vblank(rdev->ddev, 3);
3818						rdev->pm.vblank_sync = true;
3819						DRM_WAKEUP(&rdev->irq.vblank_queue);
3820					}
3821					if (atomic_read(&rdev->irq.pflip[3]))
3822						radeon_crtc_handle_flip(rdev, 3);
3823					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
3824					DRM_DEBUG("IH: D4 vblank\n");
3825				}
3826				break;
3827			case 1: /* D4 vline */
3828				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
3829					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
3830					DRM_DEBUG("IH: D4 vline\n");
3831				}
3832				break;
3833			default:
3834				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3835				break;
3836			}
3837			break;
3838		case 5: /* D5 vblank/vline */
3839			switch (src_data) {
3840			case 0: /* D5 vblank */
3841				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
3842					if (rdev->irq.crtc_vblank_int[4]) {
3843						drm_handle_vblank(rdev->ddev, 4);
3844						rdev->pm.vblank_sync = true;
3845						DRM_WAKEUP(&rdev->irq.vblank_queue);
3846					}
3847					if (atomic_read(&rdev->irq.pflip[4]))
3848						radeon_crtc_handle_flip(rdev, 4);
3849					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
3850					DRM_DEBUG("IH: D5 vblank\n");
3851				}
3852				break;
3853			case 1: /* D5 vline */
3854				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
3855					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
3856					DRM_DEBUG("IH: D5 vline\n");
3857				}
3858				break;
3859			default:
3860				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3861				break;
3862			}
3863			break;
3864		case 6: /* D6 vblank/vline */
3865			switch (src_data) {
3866			case 0: /* D6 vblank */
3867				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
3868					if (rdev->irq.crtc_vblank_int[5]) {
3869						drm_handle_vblank(rdev->ddev, 5);
3870						rdev->pm.vblank_sync = true;
3871						DRM_WAKEUP(&rdev->irq.vblank_queue);
3872					}
3873					if (atomic_read(&rdev->irq.pflip[5]))
3874						radeon_crtc_handle_flip(rdev, 5);
3875					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
3876					DRM_DEBUG("IH: D6 vblank\n");
3877				}
3878				break;
3879			case 1: /* D6 vline */
3880				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
3881					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
3882					DRM_DEBUG("IH: D6 vline\n");
3883				}
3884				break;
3885			default:
3886				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3887				break;
3888			}
3889			break;
3890		case 42: /* HPD hotplug */
3891			switch (src_data) {
3892			case 0:
3893				if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
3894					rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
3895					queue_hotplug = true;
3896					DRM_DEBUG("IH: HPD1\n");
3897				}
3898				break;
3899			case 1:
3900				if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
3901					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
3902					queue_hotplug = true;
3903					DRM_DEBUG("IH: HPD2\n");
3904				}
3905				break;
3906			case 2:
3907				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
3908					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
3909					queue_hotplug = true;
3910					DRM_DEBUG("IH: HPD3\n");
3911				}
3912				break;
3913			case 3:
3914				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
3915					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
3916					queue_hotplug = true;
3917					DRM_DEBUG("IH: HPD4\n");
3918				}
3919				break;
3920			case 4:
3921				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
3922					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
3923					queue_hotplug = true;
3924					DRM_DEBUG("IH: HPD5\n");
3925				}
3926				break;
3927			case 5:
3928				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
3929					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
3930					queue_hotplug = true;
3931					DRM_DEBUG("IH: HPD6\n");
3932				}
3933				break;
3934			default:
3935				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3936				break;
3937			}
3938			break;
3939		case 146:
3940		case 147:
3941			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
3942			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3943				RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3944			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3945				RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3946			/* reset addr and status */
3947			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
3948			break;
3949		case 176: /* RINGID0 CP_INT */
3950			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
3951			break;
3952		case 177: /* RINGID1 CP_INT */
3953			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
3954			break;
3955		case 178: /* RINGID2 CP_INT */
3956			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
3957			break;
3958		case 181: /* CP EOP event */
3959			DRM_DEBUG("IH: CP EOP\n");
3960			switch (ring_id) {
3961			case 0:
3962				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
3963				break;
3964			case 1:
3965				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
3966				break;
3967			case 2:
3968				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
3969				break;
3970			}
3971			break;
3972		case 224: /* DMA trap event */
3973			DRM_DEBUG("IH: DMA trap\n");
3974			radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
3975			break;
3976		case 233: /* GUI IDLE */
3977			DRM_DEBUG("IH: GUI idle\n");
3978			break;
3979		case 244: /* DMA trap event */
3980			DRM_DEBUG("IH: DMA1 trap\n");
3981			radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
3982			break;
3983		default:
3984			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3985			break;
3986		}
3987
3988		/* wptr/rptr are in bytes! */
3989		rptr += 16;
3990		rptr &= rdev->ih.ptr_mask;
3991	}
3992	if (queue_hotplug)
3993		taskqueue_enqueue(rdev->tq, &rdev->hotplug_work);
3994	rdev->ih.rptr = rptr;
3995	WREG32(IH_RB_RPTR, rdev->ih.rptr);
3996	atomic_set(&rdev->ih.lock, 0);
3997
3998	/* make sure wptr hasn't changed while processing */
3999	wptr = si_get_ih_wptr(rdev);
4000	if (wptr != rptr)
4001		goto restart_ih;
4002
4003	return IRQ_HANDLED;
4004}
4005
4006/**
4007 * si_copy_dma - copy pages using the DMA engine
4008 *
4009 * @rdev: radeon_device pointer
4010 * @src_offset: src GPU address
4011 * @dst_offset: dst GPU address
4012 * @num_gpu_pages: number of GPU pages to xfer
4013 * @fence: radeon fence object
4014 *
4015 * Copy GPU paging using the DMA engine (SI).
4016 * Used by the radeon ttm implementation to move pages if
4017 * registered as the asic copy callback.
4018 */
4019int si_copy_dma(struct radeon_device *rdev,
4020		uint64_t src_offset, uint64_t dst_offset,
4021		unsigned num_gpu_pages,
4022		struct radeon_fence **fence)
4023{
4024	struct radeon_semaphore *sem = NULL;
4025	int ring_index = rdev->asic->copy.dma_ring_index;
4026	struct radeon_ring *ring = &rdev->ring[ring_index];
4027	u32 size_in_bytes, cur_size_in_bytes;
4028	int i, num_loops;
4029	int r = 0;
4030
4031	r = radeon_semaphore_create(rdev, &sem);
4032	if (r) {
4033		DRM_ERROR("radeon: moving bo (%d).\n", r);
4034		return r;
4035	}
4036
4037	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
4038	num_loops = DIV_ROUND_UP(size_in_bytes, 0xfffff);
4039	r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);
4040	if (r) {
4041		DRM_ERROR("radeon: moving bo (%d).\n", r);
4042		radeon_semaphore_free(rdev, &sem, NULL);
4043		return r;
4044	}
4045
4046	if (radeon_fence_need_sync(*fence, ring->idx)) {
4047		radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
4048					    ring->idx);
4049		radeon_fence_note_sync(*fence, ring->idx);
4050	} else {
4051		radeon_semaphore_free(rdev, &sem, NULL);
4052	}
4053
4054	for (i = 0; i < num_loops; i++) {
4055		cur_size_in_bytes = size_in_bytes;
4056		if (cur_size_in_bytes > 0xFFFFF)
4057			cur_size_in_bytes = 0xFFFFF;
4058		size_in_bytes -= cur_size_in_bytes;
4059		radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 1, 0, 0, cur_size_in_bytes));
4060		radeon_ring_write(ring, dst_offset & 0xffffffff);
4061		radeon_ring_write(ring, src_offset & 0xffffffff);
4062		radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
4063		radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
4064		src_offset += cur_size_in_bytes;
4065		dst_offset += cur_size_in_bytes;
4066	}
4067
4068	r = radeon_fence_emit(rdev, fence, ring->idx);
4069	if (r) {
4070		radeon_ring_unlock_undo(rdev, ring);
4071		return r;
4072	}
4073
4074	radeon_ring_unlock_commit(rdev, ring);
4075	radeon_semaphore_free(rdev, &sem, *fence);
4076
4077	return r;
4078}
4079
4080/*
4081 * startup/shutdown callbacks
4082 */
4083static int si_startup(struct radeon_device *rdev)
4084{
4085	struct radeon_ring *ring;
4086	int r;
4087
4088	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
4089	    !rdev->rlc_fw || !rdev->mc_fw) {
4090		r = si_init_microcode(rdev);
4091		if (r) {
4092			DRM_ERROR("Failed to load firmware!\n");
4093			return r;
4094		}
4095	}
4096
4097	r = si_mc_load_microcode(rdev);
4098	if (r) {
4099		DRM_ERROR("Failed to load MC firmware!\n");
4100		return r;
4101	}
4102
4103	r = r600_vram_scratch_init(rdev);
4104	if (r)
4105		return r;
4106
4107	si_mc_program(rdev);
4108	r = si_pcie_gart_enable(rdev);
4109	if (r)
4110		return r;
4111	si_gpu_init(rdev);
4112
4113#if 0
4114	r = evergreen_blit_init(rdev);
4115	if (r) {
4116		r600_blit_fini(rdev);
4117		rdev->asic->copy = NULL;
4118		dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r);
4119	}
4120#endif
4121	/* allocate rlc buffers */
4122	r = si_rlc_init(rdev);
4123	if (r) {
4124		DRM_ERROR("Failed to init rlc BOs!\n");
4125		return r;
4126	}
4127
4128	/* allocate wb buffer */
4129	r = radeon_wb_init(rdev);
4130	if (r)
4131		return r;
4132
4133	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
4134	if (r) {
4135		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
4136		return r;
4137	}
4138
4139	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
4140	if (r) {
4141		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
4142		return r;
4143	}
4144
4145	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
4146	if (r) {
4147		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
4148		return r;
4149	}
4150
4151	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
4152	if (r) {
4153		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
4154		return r;
4155	}
4156
4157	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
4158	if (r) {
4159		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
4160		return r;
4161	}
4162
4163	/* Enable IRQ */
4164	r = si_irq_init(rdev);
4165	if (r) {
4166		DRM_ERROR("radeon: IH init failed (%d).\n", r);
4167		radeon_irq_kms_fini(rdev);
4168		return r;
4169	}
4170	si_irq_set(rdev);
4171
4172	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4173	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
4174			     CP_RB0_RPTR, CP_RB0_WPTR,
4175			     0, 0xfffff, RADEON_CP_PACKET2);
4176	if (r)
4177		return r;
4178
4179	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
4180	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
4181			     CP_RB1_RPTR, CP_RB1_WPTR,
4182			     0, 0xfffff, RADEON_CP_PACKET2);
4183	if (r)
4184		return r;
4185
4186	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
4187	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
4188			     CP_RB2_RPTR, CP_RB2_WPTR,
4189			     0, 0xfffff, RADEON_CP_PACKET2);
4190	if (r)
4191		return r;
4192
4193	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
4194	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
4195			     DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
4196			     DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
4197			     2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
4198	if (r)
4199		return r;
4200
4201	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
4202	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
4203			     DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
4204			     DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
4205			     2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
4206	if (r)
4207		return r;
4208
4209	r = si_cp_load_microcode(rdev);
4210	if (r)
4211		return r;
4212	r = si_cp_resume(rdev);
4213	if (r)
4214		return r;
4215
4216	r = cayman_dma_resume(rdev);
4217	if (r)
4218		return r;
4219
4220	r = radeon_ib_pool_init(rdev);
4221	if (r) {
4222		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
4223		return r;
4224	}
4225
4226	r = radeon_vm_manager_init(rdev);
4227	if (r) {
4228		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
4229		return r;
4230	}
4231
4232	return 0;
4233}
4234
4235int si_resume(struct radeon_device *rdev)
4236{
4237	int r;
4238
4239	/* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
4240	 * posting will perform necessary task to bring back GPU into good
4241	 * shape.
4242	 */
4243	/* post card */
4244	atom_asic_init(rdev->mode_info.atom_context);
4245
4246	rdev->accel_working = true;
4247	r = si_startup(rdev);
4248	if (r) {
4249		DRM_ERROR("si startup failed on resume\n");
4250		rdev->accel_working = false;
4251		return r;
4252	}
4253
4254	return r;
4255
4256}
4257
4258int si_suspend(struct radeon_device *rdev)
4259{
4260	si_cp_enable(rdev, false);
4261	cayman_dma_stop(rdev);
4262	si_irq_suspend(rdev);
4263	radeon_wb_disable(rdev);
4264	si_pcie_gart_disable(rdev);
4265	return 0;
4266}
4267
4268/* Plan is to move initialization in that function and use
4269 * helper function so that radeon_device_init pretty much
4270 * do nothing more than calling asic specific function. This
4271 * should also allow to remove a bunch of callback function
4272 * like vram_info.
4273 */
4274int si_init(struct radeon_device *rdev)
4275{
4276	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4277	int r;
4278
4279	/* Read BIOS */
4280	if (!radeon_get_bios(rdev)) {
4281		if (ASIC_IS_AVIVO(rdev))
4282			return -EINVAL;
4283	}
4284	/* Must be an ATOMBIOS */
4285	if (!rdev->is_atom_bios) {
4286		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
4287		return -EINVAL;
4288	}
4289	r = radeon_atombios_init(rdev);
4290	if (r)
4291		return r;
4292
4293	/* Post card if necessary */
4294	if (!radeon_card_posted(rdev)) {
4295		if (!rdev->bios) {
4296			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
4297			return -EINVAL;
4298		}
4299		DRM_INFO("GPU not posted. posting now...\n");
4300		atom_asic_init(rdev->mode_info.atom_context);
4301	}
4302	/* Initialize scratch registers */
4303	si_scratch_init(rdev);
4304	/* Initialize surface registers */
4305	radeon_surface_init(rdev);
4306	/* Initialize clocks */
4307	radeon_get_clock_info(rdev->ddev);
4308
4309	/* Fence driver */
4310	r = radeon_fence_driver_init(rdev);
4311	if (r)
4312		return r;
4313
4314	/* initialize memory controller */
4315	r = si_mc_init(rdev);
4316	if (r)
4317		return r;
4318	/* Memory manager */
4319	r = radeon_bo_init(rdev);
4320	if (r)
4321		return r;
4322
4323	r = radeon_irq_kms_init(rdev);
4324	if (r)
4325		return r;
4326
4327	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4328	ring->ring_obj = NULL;
4329	r600_ring_init(rdev, ring, 1024 * 1024);
4330
4331	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
4332	ring->ring_obj = NULL;
4333	r600_ring_init(rdev, ring, 1024 * 1024);
4334
4335	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
4336	ring->ring_obj = NULL;
4337	r600_ring_init(rdev, ring, 1024 * 1024);
4338
4339	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
4340	ring->ring_obj = NULL;
4341	r600_ring_init(rdev, ring, 64 * 1024);
4342
4343	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
4344	ring->ring_obj = NULL;
4345	r600_ring_init(rdev, ring, 64 * 1024);
4346
4347	rdev->ih.ring_obj = NULL;
4348	r600_ih_ring_init(rdev, 64 * 1024);
4349
4350	r = r600_pcie_gart_init(rdev);
4351	if (r)
4352		return r;
4353
4354	rdev->accel_working = true;
4355	r = si_startup(rdev);
4356	if (r) {
4357		dev_err(rdev->dev, "disabling GPU acceleration\n");
4358		si_cp_fini(rdev);
4359		cayman_dma_fini(rdev);
4360		si_irq_fini(rdev);
4361		si_rlc_fini(rdev);
4362		radeon_wb_fini(rdev);
4363		radeon_ib_pool_fini(rdev);
4364		radeon_vm_manager_fini(rdev);
4365		radeon_irq_kms_fini(rdev);
4366		si_pcie_gart_fini(rdev);
4367		rdev->accel_working = false;
4368	}
4369
4370	/* Don't start up if the MC ucode is missing.
4371	 * The default clocks and voltages before the MC ucode
4372	 * is loaded are not suffient for advanced operations.
4373	 */
4374	if (!rdev->mc_fw) {
4375		DRM_ERROR("radeon: MC ucode required for NI+.\n");
4376		return -EINVAL;
4377	}
4378
4379	return 0;
4380}
4381
4382void si_fini(struct radeon_device *rdev)
4383{
4384#if 0
4385	r600_blit_fini(rdev);
4386#endif
4387	si_cp_fini(rdev);
4388	cayman_dma_fini(rdev);
4389	si_irq_fini(rdev);
4390	si_rlc_fini(rdev);
4391	radeon_wb_fini(rdev);
4392	radeon_vm_manager_fini(rdev);
4393	radeon_ib_pool_fini(rdev);
4394	radeon_irq_kms_fini(rdev);
4395	si_pcie_gart_fini(rdev);
4396	r600_vram_scratch_fini(rdev);
4397	radeon_gem_fini(rdev);
4398	radeon_fence_driver_fini(rdev);
4399	radeon_bo_fini(rdev);
4400	radeon_atombios_fini(rdev);
4401	si_fini_microcode(rdev);
4402	free(rdev->bios, DRM_MEM_DRIVER);
4403	rdev->bios = NULL;
4404}
4405
4406/**
4407 * si_get_gpu_clock - return GPU clock counter snapshot
4408 *
4409 * @rdev: radeon_device pointer
4410 *
4411 * Fetches a GPU clock counter snapshot (SI).
4412 * Returns the 64 bit clock counter snapshot.
4413 */
4414uint64_t si_get_gpu_clock(struct radeon_device *rdev)
4415{
4416	uint64_t clock;
4417
4418	sx_xlock(&rdev->gpu_clock_mutex);
4419	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4420	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
4421	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4422	sx_xunlock(&rdev->gpu_clock_mutex);
4423	return clock;
4424}
4425