rv770.c revision 282199
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 *          Alex Deucher
26 *          Jerome Glisse
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: stable/10/sys/dev/drm2/radeon/rv770.c 282199 2015-04-28 19:35:05Z dumbbell $");
31
32#include <dev/drm2/drmP.h>
33#include "radeon.h"
34#include "radeon_asic.h"
35#include <dev/drm2/radeon/radeon_drm.h>
36#include "rv770d.h"
37#include "atom.h"
38#include "avivod.h"
39
40#define R700_PFP_UCODE_SIZE 848
41#define R700_PM4_UCODE_SIZE 1360
42
43static void rv770_gpu_init(struct radeon_device *rdev);
44#ifdef FREEBSD_WIP /* FreeBSD: to please GCC 4.2. */
45void rv770_fini(struct radeon_device *rdev);
46#endif
47static void rv770_pcie_gen2_enable(struct radeon_device *rdev);
48
49u32 rv770_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base)
50{
51	struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id];
52	u32 tmp = RREG32(AVIVO_D1GRPH_UPDATE + radeon_crtc->crtc_offset);
53	int i;
54
55	/* Lock the graphics update lock */
56	tmp |= AVIVO_D1GRPH_UPDATE_LOCK;
57	WREG32(AVIVO_D1GRPH_UPDATE + radeon_crtc->crtc_offset, tmp);
58
59	/* update the scanout addresses */
60	if (radeon_crtc->crtc_id) {
61		WREG32(D2GRPH_SECONDARY_SURFACE_ADDRESS_HIGH, upper_32_bits(crtc_base));
62		WREG32(D2GRPH_PRIMARY_SURFACE_ADDRESS_HIGH, upper_32_bits(crtc_base));
63	} else {
64		WREG32(D1GRPH_SECONDARY_SURFACE_ADDRESS_HIGH, upper_32_bits(crtc_base));
65		WREG32(D1GRPH_PRIMARY_SURFACE_ADDRESS_HIGH, upper_32_bits(crtc_base));
66	}
67	WREG32(D1GRPH_SECONDARY_SURFACE_ADDRESS + radeon_crtc->crtc_offset,
68	       (u32)crtc_base);
69	WREG32(D1GRPH_PRIMARY_SURFACE_ADDRESS + radeon_crtc->crtc_offset,
70	       (u32)crtc_base);
71
72	/* Wait for update_pending to go high. */
73	for (i = 0; i < rdev->usec_timeout; i++) {
74		if (RREG32(AVIVO_D1GRPH_UPDATE + radeon_crtc->crtc_offset) & AVIVO_D1GRPH_SURFACE_UPDATE_PENDING)
75			break;
76		udelay(1);
77	}
78	DRM_DEBUG("Update pending now high. Unlocking vupdate_lock.\n");
79
80	/* Unlock the lock, so double-buffering can take place inside vblank */
81	tmp &= ~AVIVO_D1GRPH_UPDATE_LOCK;
82	WREG32(AVIVO_D1GRPH_UPDATE + radeon_crtc->crtc_offset, tmp);
83
84	/* Return current update_pending status: */
85	return RREG32(AVIVO_D1GRPH_UPDATE + radeon_crtc->crtc_offset) & AVIVO_D1GRPH_SURFACE_UPDATE_PENDING;
86}
87
88/* get temperature in millidegrees */
89int rv770_get_temp(struct radeon_device *rdev)
90{
91	u32 temp = (RREG32(CG_MULT_THERMAL_STATUS) & ASIC_T_MASK) >>
92		ASIC_T_SHIFT;
93	int actual_temp;
94
95	if (temp & 0x400)
96		actual_temp = -256;
97	else if (temp & 0x200)
98		actual_temp = 255;
99	else if (temp & 0x100) {
100		actual_temp = temp & 0x1ff;
101		actual_temp |= ~0x1ff;
102	} else
103		actual_temp = temp & 0xff;
104
105	return (actual_temp * 1000) / 2;
106}
107
108void rv770_pm_misc(struct radeon_device *rdev)
109{
110	int req_ps_idx = rdev->pm.requested_power_state_index;
111	int req_cm_idx = rdev->pm.requested_clock_mode_index;
112	struct radeon_power_state *ps = &rdev->pm.power_state[req_ps_idx];
113	struct radeon_voltage *voltage = &ps->clock_info[req_cm_idx].voltage;
114
115	if ((voltage->type == VOLTAGE_SW) && voltage->voltage) {
116		/* 0xff01 is a flag rather then an actual voltage */
117		if (voltage->voltage == 0xff01)
118			return;
119		if (voltage->voltage != rdev->pm.current_vddc) {
120			radeon_atom_set_voltage(rdev, voltage->voltage, SET_VOLTAGE_TYPE_ASIC_VDDC);
121			rdev->pm.current_vddc = voltage->voltage;
122			DRM_DEBUG("Setting: v: %d\n", voltage->voltage);
123		}
124	}
125}
126
127/*
128 * GART
129 */
130static int rv770_pcie_gart_enable(struct radeon_device *rdev)
131{
132	u32 tmp;
133	int r, i;
134
135	if (rdev->gart.robj == NULL) {
136		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
137		return -EINVAL;
138	}
139	r = radeon_gart_table_vram_pin(rdev);
140	if (r)
141		return r;
142	radeon_gart_restore(rdev);
143	/* Setup L2 cache */
144	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | ENABLE_L2_FRAGMENT_PROCESSING |
145				ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
146				EFFECTIVE_L2_QUEUE_SIZE(7));
147	WREG32(VM_L2_CNTL2, 0);
148	WREG32(VM_L2_CNTL3, BANK_SELECT(0) | CACHE_UPDATE_MODE(2));
149	/* Setup TLB control */
150	tmp = ENABLE_L1_TLB | ENABLE_L1_FRAGMENT_PROCESSING |
151		SYSTEM_ACCESS_MODE_NOT_IN_SYS |
152		SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU |
153		EFFECTIVE_L1_TLB_SIZE(5) | EFFECTIVE_L1_QUEUE_SIZE(5);
154	WREG32(MC_VM_MD_L1_TLB0_CNTL, tmp);
155	WREG32(MC_VM_MD_L1_TLB1_CNTL, tmp);
156	WREG32(MC_VM_MD_L1_TLB2_CNTL, tmp);
157	if (rdev->family == CHIP_RV740)
158		WREG32(MC_VM_MD_L1_TLB3_CNTL, tmp);
159	WREG32(MC_VM_MB_L1_TLB0_CNTL, tmp);
160	WREG32(MC_VM_MB_L1_TLB1_CNTL, tmp);
161	WREG32(MC_VM_MB_L1_TLB2_CNTL, tmp);
162	WREG32(MC_VM_MB_L1_TLB3_CNTL, tmp);
163	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
164	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
165	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
166	WREG32(VM_CONTEXT0_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
167				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT);
168	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
169			(u32)(rdev->dummy_page.addr >> 12));
170	for (i = 1; i < 7; i++)
171		WREG32(VM_CONTEXT0_CNTL + (i * 4), 0);
172
173	r600_pcie_gart_tlb_flush(rdev);
174	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
175		 (unsigned)(rdev->mc.gtt_size >> 20),
176		 (unsigned long long)rdev->gart.table_addr);
177	rdev->gart.ready = true;
178	return 0;
179}
180
181static void rv770_pcie_gart_disable(struct radeon_device *rdev)
182{
183	u32 tmp;
184	int i;
185
186	/* Disable all tables */
187	for (i = 0; i < 7; i++)
188		WREG32(VM_CONTEXT0_CNTL + (i * 4), 0);
189
190	/* Setup L2 cache */
191	WREG32(VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING |
192				EFFECTIVE_L2_QUEUE_SIZE(7));
193	WREG32(VM_L2_CNTL2, 0);
194	WREG32(VM_L2_CNTL3, BANK_SELECT(0) | CACHE_UPDATE_MODE(2));
195	/* Setup TLB control */
196	tmp = EFFECTIVE_L1_TLB_SIZE(5) | EFFECTIVE_L1_QUEUE_SIZE(5);
197	WREG32(MC_VM_MD_L1_TLB0_CNTL, tmp);
198	WREG32(MC_VM_MD_L1_TLB1_CNTL, tmp);
199	WREG32(MC_VM_MD_L1_TLB2_CNTL, tmp);
200	WREG32(MC_VM_MB_L1_TLB0_CNTL, tmp);
201	WREG32(MC_VM_MB_L1_TLB1_CNTL, tmp);
202	WREG32(MC_VM_MB_L1_TLB2_CNTL, tmp);
203	WREG32(MC_VM_MB_L1_TLB3_CNTL, tmp);
204	radeon_gart_table_vram_unpin(rdev);
205}
206
207static void rv770_pcie_gart_fini(struct radeon_device *rdev)
208{
209	radeon_gart_fini(rdev);
210	rv770_pcie_gart_disable(rdev);
211	radeon_gart_table_vram_free(rdev);
212}
213
214
215static void rv770_agp_enable(struct radeon_device *rdev)
216{
217	u32 tmp;
218	int i;
219
220	/* Setup L2 cache */
221	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | ENABLE_L2_FRAGMENT_PROCESSING |
222				ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
223				EFFECTIVE_L2_QUEUE_SIZE(7));
224	WREG32(VM_L2_CNTL2, 0);
225	WREG32(VM_L2_CNTL3, BANK_SELECT(0) | CACHE_UPDATE_MODE(2));
226	/* Setup TLB control */
227	tmp = ENABLE_L1_TLB | ENABLE_L1_FRAGMENT_PROCESSING |
228		SYSTEM_ACCESS_MODE_NOT_IN_SYS |
229		SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU |
230		EFFECTIVE_L1_TLB_SIZE(5) | EFFECTIVE_L1_QUEUE_SIZE(5);
231	WREG32(MC_VM_MD_L1_TLB0_CNTL, tmp);
232	WREG32(MC_VM_MD_L1_TLB1_CNTL, tmp);
233	WREG32(MC_VM_MD_L1_TLB2_CNTL, tmp);
234	WREG32(MC_VM_MB_L1_TLB0_CNTL, tmp);
235	WREG32(MC_VM_MB_L1_TLB1_CNTL, tmp);
236	WREG32(MC_VM_MB_L1_TLB2_CNTL, tmp);
237	WREG32(MC_VM_MB_L1_TLB3_CNTL, tmp);
238	for (i = 0; i < 7; i++)
239		WREG32(VM_CONTEXT0_CNTL + (i * 4), 0);
240}
241
242static void rv770_mc_program(struct radeon_device *rdev)
243{
244	struct rv515_mc_save save;
245	u32 tmp;
246	int i, j;
247
248	/* Initialize HDP */
249	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
250		WREG32((0x2c14 + j), 0x00000000);
251		WREG32((0x2c18 + j), 0x00000000);
252		WREG32((0x2c1c + j), 0x00000000);
253		WREG32((0x2c20 + j), 0x00000000);
254		WREG32((0x2c24 + j), 0x00000000);
255	}
256	/* r7xx hw bug.  Read from HDP_DEBUG1 rather
257	 * than writing to HDP_REG_COHERENCY_FLUSH_CNTL
258	 */
259	tmp = RREG32(HDP_DEBUG1);
260
261	rv515_mc_stop(rdev, &save);
262	if (r600_mc_wait_for_idle(rdev)) {
263		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
264	}
265	/* Lockout access through VGA aperture*/
266	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
267	/* Update configuration */
268	if (rdev->flags & RADEON_IS_AGP) {
269		if (rdev->mc.vram_start < rdev->mc.gtt_start) {
270			/* VRAM before AGP */
271			WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
272				rdev->mc.vram_start >> 12);
273			WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
274				rdev->mc.gtt_end >> 12);
275		} else {
276			/* VRAM after AGP */
277			WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
278				rdev->mc.gtt_start >> 12);
279			WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
280				rdev->mc.vram_end >> 12);
281		}
282	} else {
283		WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
284			rdev->mc.vram_start >> 12);
285		WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
286			rdev->mc.vram_end >> 12);
287	}
288	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, rdev->vram_scratch.gpu_addr >> 12);
289	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
290	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
291	WREG32(MC_VM_FB_LOCATION, tmp);
292	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
293	WREG32(HDP_NONSURFACE_INFO, (2 << 7));
294	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
295	if (rdev->flags & RADEON_IS_AGP) {
296		WREG32(MC_VM_AGP_TOP, rdev->mc.gtt_end >> 16);
297		WREG32(MC_VM_AGP_BOT, rdev->mc.gtt_start >> 16);
298		WREG32(MC_VM_AGP_BASE, rdev->mc.agp_base >> 22);
299	} else {
300		WREG32(MC_VM_AGP_BASE, 0);
301		WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
302		WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
303	}
304	if (r600_mc_wait_for_idle(rdev)) {
305		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
306	}
307	rv515_mc_resume(rdev, &save);
308	/* we need to own VRAM, so turn off the VGA renderer here
309	 * to stop it overwriting our objects */
310	rv515_vga_render_disable(rdev);
311}
312
313
314/*
315 * CP.
316 */
317void r700_cp_stop(struct radeon_device *rdev)
318{
319	radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
320	WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT));
321	WREG32(SCRATCH_UMSK, 0);
322	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
323}
324
325static int rv770_cp_load_microcode(struct radeon_device *rdev)
326{
327	const __be32 *fw_data;
328	int i;
329
330	if (!rdev->me_fw || !rdev->pfp_fw)
331		return -EINVAL;
332
333	r700_cp_stop(rdev);
334	WREG32(CP_RB_CNTL,
335#ifdef __BIG_ENDIAN
336	       BUF_SWAP_32BIT |
337#endif
338	       RB_NO_UPDATE | RB_BLKSZ(15) | RB_BUFSZ(3));
339
340	/* Reset cp */
341	WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
342	RREG32(GRBM_SOFT_RESET);
343	mdelay(15);
344	WREG32(GRBM_SOFT_RESET, 0);
345
346	fw_data = (const __be32 *)rdev->pfp_fw->data;
347	WREG32(CP_PFP_UCODE_ADDR, 0);
348	for (i = 0; i < R700_PFP_UCODE_SIZE; i++)
349		WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
350	WREG32(CP_PFP_UCODE_ADDR, 0);
351
352	fw_data = (const __be32 *)rdev->me_fw->data;
353	WREG32(CP_ME_RAM_WADDR, 0);
354	for (i = 0; i < R700_PM4_UCODE_SIZE; i++)
355		WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
356
357	WREG32(CP_PFP_UCODE_ADDR, 0);
358	WREG32(CP_ME_RAM_WADDR, 0);
359	WREG32(CP_ME_RAM_RADDR, 0);
360	return 0;
361}
362
363void r700_cp_fini(struct radeon_device *rdev)
364{
365	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
366	r700_cp_stop(rdev);
367	radeon_ring_fini(rdev, ring);
368	radeon_scratch_free(rdev, ring->rptr_save_reg);
369}
370
371/*
372 * Core functions
373 */
374static void rv770_gpu_init(struct radeon_device *rdev)
375{
376	int i, j, num_qd_pipes;
377	u32 ta_aux_cntl;
378	u32 sx_debug_1;
379	u32 smx_dc_ctl0;
380	u32 db_debug3;
381	u32 num_gs_verts_per_thread;
382	u32 vgt_gs_per_es;
383	u32 gs_prim_buffer_depth = 0;
384	u32 sq_ms_fifo_sizes;
385	u32 sq_config;
386	u32 sq_thread_resource_mgmt;
387	u32 hdp_host_path_cntl;
388	u32 sq_dyn_gpr_size_simd_ab_0;
389	u32 gb_tiling_config = 0;
390	u32 cc_rb_backend_disable = 0;
391	u32 cc_gc_shader_pipe_config = 0;
392	u32 mc_arb_ramcfg;
393	u32 db_debug4, tmp;
394	u32 inactive_pipes, shader_pipe_config;
395	u32 disabled_rb_mask;
396	unsigned active_number;
397
398	/* setup chip specs */
399	rdev->config.rv770.tiling_group_size = 256;
400	switch (rdev->family) {
401	case CHIP_RV770:
402		rdev->config.rv770.max_pipes = 4;
403		rdev->config.rv770.max_tile_pipes = 8;
404		rdev->config.rv770.max_simds = 10;
405		rdev->config.rv770.max_backends = 4;
406		rdev->config.rv770.max_gprs = 256;
407		rdev->config.rv770.max_threads = 248;
408		rdev->config.rv770.max_stack_entries = 512;
409		rdev->config.rv770.max_hw_contexts = 8;
410		rdev->config.rv770.max_gs_threads = 16 * 2;
411		rdev->config.rv770.sx_max_export_size = 128;
412		rdev->config.rv770.sx_max_export_pos_size = 16;
413		rdev->config.rv770.sx_max_export_smx_size = 112;
414		rdev->config.rv770.sq_num_cf_insts = 2;
415
416		rdev->config.rv770.sx_num_of_sets = 7;
417		rdev->config.rv770.sc_prim_fifo_size = 0xF9;
418		rdev->config.rv770.sc_hiz_tile_fifo_size = 0x30;
419		rdev->config.rv770.sc_earlyz_tile_fifo_fize = 0x130;
420		break;
421	case CHIP_RV730:
422		rdev->config.rv770.max_pipes = 2;
423		rdev->config.rv770.max_tile_pipes = 4;
424		rdev->config.rv770.max_simds = 8;
425		rdev->config.rv770.max_backends = 2;
426		rdev->config.rv770.max_gprs = 128;
427		rdev->config.rv770.max_threads = 248;
428		rdev->config.rv770.max_stack_entries = 256;
429		rdev->config.rv770.max_hw_contexts = 8;
430		rdev->config.rv770.max_gs_threads = 16 * 2;
431		rdev->config.rv770.sx_max_export_size = 256;
432		rdev->config.rv770.sx_max_export_pos_size = 32;
433		rdev->config.rv770.sx_max_export_smx_size = 224;
434		rdev->config.rv770.sq_num_cf_insts = 2;
435
436		rdev->config.rv770.sx_num_of_sets = 7;
437		rdev->config.rv770.sc_prim_fifo_size = 0xf9;
438		rdev->config.rv770.sc_hiz_tile_fifo_size = 0x30;
439		rdev->config.rv770.sc_earlyz_tile_fifo_fize = 0x130;
440		if (rdev->config.rv770.sx_max_export_pos_size > 16) {
441			rdev->config.rv770.sx_max_export_pos_size -= 16;
442			rdev->config.rv770.sx_max_export_smx_size += 16;
443		}
444		break;
445	case CHIP_RV710:
446		rdev->config.rv770.max_pipes = 2;
447		rdev->config.rv770.max_tile_pipes = 2;
448		rdev->config.rv770.max_simds = 2;
449		rdev->config.rv770.max_backends = 1;
450		rdev->config.rv770.max_gprs = 256;
451		rdev->config.rv770.max_threads = 192;
452		rdev->config.rv770.max_stack_entries = 256;
453		rdev->config.rv770.max_hw_contexts = 4;
454		rdev->config.rv770.max_gs_threads = 8 * 2;
455		rdev->config.rv770.sx_max_export_size = 128;
456		rdev->config.rv770.sx_max_export_pos_size = 16;
457		rdev->config.rv770.sx_max_export_smx_size = 112;
458		rdev->config.rv770.sq_num_cf_insts = 1;
459
460		rdev->config.rv770.sx_num_of_sets = 7;
461		rdev->config.rv770.sc_prim_fifo_size = 0x40;
462		rdev->config.rv770.sc_hiz_tile_fifo_size = 0x30;
463		rdev->config.rv770.sc_earlyz_tile_fifo_fize = 0x130;
464		break;
465	case CHIP_RV740:
466		rdev->config.rv770.max_pipes = 4;
467		rdev->config.rv770.max_tile_pipes = 4;
468		rdev->config.rv770.max_simds = 8;
469		rdev->config.rv770.max_backends = 4;
470		rdev->config.rv770.max_gprs = 256;
471		rdev->config.rv770.max_threads = 248;
472		rdev->config.rv770.max_stack_entries = 512;
473		rdev->config.rv770.max_hw_contexts = 8;
474		rdev->config.rv770.max_gs_threads = 16 * 2;
475		rdev->config.rv770.sx_max_export_size = 256;
476		rdev->config.rv770.sx_max_export_pos_size = 32;
477		rdev->config.rv770.sx_max_export_smx_size = 224;
478		rdev->config.rv770.sq_num_cf_insts = 2;
479
480		rdev->config.rv770.sx_num_of_sets = 7;
481		rdev->config.rv770.sc_prim_fifo_size = 0x100;
482		rdev->config.rv770.sc_hiz_tile_fifo_size = 0x30;
483		rdev->config.rv770.sc_earlyz_tile_fifo_fize = 0x130;
484
485		if (rdev->config.rv770.sx_max_export_pos_size > 16) {
486			rdev->config.rv770.sx_max_export_pos_size -= 16;
487			rdev->config.rv770.sx_max_export_smx_size += 16;
488		}
489		break;
490	default:
491		break;
492	}
493
494	/* Initialize HDP */
495	j = 0;
496	for (i = 0; i < 32; i++) {
497		WREG32((0x2c14 + j), 0x00000000);
498		WREG32((0x2c18 + j), 0x00000000);
499		WREG32((0x2c1c + j), 0x00000000);
500		WREG32((0x2c20 + j), 0x00000000);
501		WREG32((0x2c24 + j), 0x00000000);
502		j += 0x18;
503	}
504
505	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
506
507	/* setup tiling, simd, pipe config */
508	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
509
510	shader_pipe_config = RREG32(CC_GC_SHADER_PIPE_CONFIG);
511	inactive_pipes = (shader_pipe_config & INACTIVE_QD_PIPES_MASK) >> INACTIVE_QD_PIPES_SHIFT;
512	for (i = 0, tmp = 1, active_number = 0; i < R7XX_MAX_PIPES; i++) {
513		if (!(inactive_pipes & tmp)) {
514			active_number++;
515		}
516		tmp <<= 1;
517	}
518	if (active_number == 1) {
519		WREG32(SPI_CONFIG_CNTL, DISABLE_INTERP_1);
520	} else {
521		WREG32(SPI_CONFIG_CNTL, 0);
522	}
523
524	cc_rb_backend_disable = RREG32(CC_RB_BACKEND_DISABLE) & 0x00ff0000;
525	tmp = R7XX_MAX_BACKENDS - r600_count_pipe_bits(cc_rb_backend_disable >> 16);
526	if (tmp < rdev->config.rv770.max_backends) {
527		rdev->config.rv770.max_backends = tmp;
528	}
529
530	cc_gc_shader_pipe_config = RREG32(CC_GC_SHADER_PIPE_CONFIG) & 0xffffff00;
531	tmp = R7XX_MAX_PIPES - r600_count_pipe_bits((cc_gc_shader_pipe_config >> 8) & R7XX_MAX_PIPES_MASK);
532	if (tmp < rdev->config.rv770.max_pipes) {
533		rdev->config.rv770.max_pipes = tmp;
534	}
535	tmp = R7XX_MAX_SIMDS - r600_count_pipe_bits((cc_gc_shader_pipe_config >> 16) & R7XX_MAX_SIMDS_MASK);
536	if (tmp < rdev->config.rv770.max_simds) {
537		rdev->config.rv770.max_simds = tmp;
538	}
539
540	switch (rdev->config.rv770.max_tile_pipes) {
541	case 1:
542	default:
543		gb_tiling_config = PIPE_TILING(0);
544		break;
545	case 2:
546		gb_tiling_config = PIPE_TILING(1);
547		break;
548	case 4:
549		gb_tiling_config = PIPE_TILING(2);
550		break;
551	case 8:
552		gb_tiling_config = PIPE_TILING(3);
553		break;
554	}
555	rdev->config.rv770.tiling_npipes = rdev->config.rv770.max_tile_pipes;
556
557	disabled_rb_mask = (RREG32(CC_RB_BACKEND_DISABLE) >> 16) & R7XX_MAX_BACKENDS_MASK;
558	tmp = (gb_tiling_config & PIPE_TILING__MASK) >> PIPE_TILING__SHIFT;
559	tmp = r6xx_remap_render_backend(rdev, tmp, rdev->config.rv770.max_backends,
560					R7XX_MAX_BACKENDS, disabled_rb_mask);
561	gb_tiling_config |= tmp << 16;
562	rdev->config.rv770.backend_map = tmp;
563
564	if (rdev->family == CHIP_RV770)
565		gb_tiling_config |= BANK_TILING(1);
566	else {
567		if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
568			gb_tiling_config |= BANK_TILING(1);
569		else
570			gb_tiling_config |= BANK_TILING(0);
571	}
572	rdev->config.rv770.tiling_nbanks = 4 << ((gb_tiling_config >> 4) & 0x3);
573	gb_tiling_config |= GROUP_SIZE((mc_arb_ramcfg & BURSTLENGTH_MASK) >> BURSTLENGTH_SHIFT);
574	if (((mc_arb_ramcfg & NOOFROWS_MASK) >> NOOFROWS_SHIFT) > 3) {
575		gb_tiling_config |= ROW_TILING(3);
576		gb_tiling_config |= SAMPLE_SPLIT(3);
577	} else {
578		gb_tiling_config |=
579			ROW_TILING(((mc_arb_ramcfg & NOOFROWS_MASK) >> NOOFROWS_SHIFT));
580		gb_tiling_config |=
581			SAMPLE_SPLIT(((mc_arb_ramcfg & NOOFROWS_MASK) >> NOOFROWS_SHIFT));
582	}
583
584	gb_tiling_config |= BANK_SWAPS(1);
585	rdev->config.rv770.tile_config = gb_tiling_config;
586
587	WREG32(GB_TILING_CONFIG, gb_tiling_config);
588	WREG32(DCP_TILING_CONFIG, (gb_tiling_config & 0xffff));
589	WREG32(HDP_TILING_CONFIG, (gb_tiling_config & 0xffff));
590	WREG32(DMA_TILING_CONFIG, (gb_tiling_config & 0xffff));
591	WREG32(DMA_TILING_CONFIG2, (gb_tiling_config & 0xffff));
592
593	WREG32(CGTS_SYS_TCC_DISABLE, 0);
594	WREG32(CGTS_TCC_DISABLE, 0);
595	WREG32(CGTS_USER_SYS_TCC_DISABLE, 0);
596	WREG32(CGTS_USER_TCC_DISABLE, 0);
597
598
599	num_qd_pipes = R7XX_MAX_PIPES - r600_count_pipe_bits((cc_gc_shader_pipe_config & INACTIVE_QD_PIPES_MASK) >> 8);
600	WREG32(VGT_OUT_DEALLOC_CNTL, (num_qd_pipes * 4) & DEALLOC_DIST_MASK);
601	WREG32(VGT_VERTEX_REUSE_BLOCK_CNTL, ((num_qd_pipes * 4) - 2) & VTX_REUSE_DEPTH_MASK);
602
603	/* set HW defaults for 3D engine */
604	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
605				     ROQ_IB2_START(0x2b)));
606
607	WREG32(CP_MEQ_THRESHOLDS, STQ_SPLIT(0x30));
608
609	ta_aux_cntl = RREG32(TA_CNTL_AUX);
610	WREG32(TA_CNTL_AUX, ta_aux_cntl | DISABLE_CUBE_ANISO);
611
612	sx_debug_1 = RREG32(SX_DEBUG_1);
613	sx_debug_1 |= ENABLE_NEW_SMX_ADDRESS;
614	WREG32(SX_DEBUG_1, sx_debug_1);
615
616	smx_dc_ctl0 = RREG32(SMX_DC_CTL0);
617	smx_dc_ctl0 &= ~CACHE_DEPTH(0x1ff);
618	smx_dc_ctl0 |= CACHE_DEPTH((rdev->config.rv770.sx_num_of_sets * 64) - 1);
619	WREG32(SMX_DC_CTL0, smx_dc_ctl0);
620
621	if (rdev->family != CHIP_RV740)
622		WREG32(SMX_EVENT_CTL, (ES_FLUSH_CTL(4) |
623				       GS_FLUSH_CTL(4) |
624				       ACK_FLUSH_CTL(3) |
625				       SYNC_FLUSH_CTL));
626
627	if (rdev->family != CHIP_RV770)
628		WREG32(SMX_SAR_CTL0, 0x00003f3f);
629
630	db_debug3 = RREG32(DB_DEBUG3);
631	db_debug3 &= ~DB_CLK_OFF_DELAY(0x1f);
632	switch (rdev->family) {
633	case CHIP_RV770:
634	case CHIP_RV740:
635		db_debug3 |= DB_CLK_OFF_DELAY(0x1f);
636		break;
637	case CHIP_RV710:
638	case CHIP_RV730:
639	default:
640		db_debug3 |= DB_CLK_OFF_DELAY(2);
641		break;
642	}
643	WREG32(DB_DEBUG3, db_debug3);
644
645	if (rdev->family != CHIP_RV770) {
646		db_debug4 = RREG32(DB_DEBUG4);
647		db_debug4 |= DISABLE_TILE_COVERED_FOR_PS_ITER;
648		WREG32(DB_DEBUG4, db_debug4);
649	}
650
651	WREG32(SX_EXPORT_BUFFER_SIZES, (COLOR_BUFFER_SIZE((rdev->config.rv770.sx_max_export_size / 4) - 1) |
652					POSITION_BUFFER_SIZE((rdev->config.rv770.sx_max_export_pos_size / 4) - 1) |
653					SMX_BUFFER_SIZE((rdev->config.rv770.sx_max_export_smx_size / 4) - 1)));
654
655	WREG32(PA_SC_FIFO_SIZE, (SC_PRIM_FIFO_SIZE(rdev->config.rv770.sc_prim_fifo_size) |
656				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.rv770.sc_hiz_tile_fifo_size) |
657				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.rv770.sc_earlyz_tile_fifo_fize)));
658
659	WREG32(PA_SC_MULTI_CHIP_CNTL, 0);
660
661	WREG32(VGT_NUM_INSTANCES, 1);
662
663	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
664
665	WREG32(CP_PERFMON_CNTL, 0);
666
667	sq_ms_fifo_sizes = (CACHE_FIFO_SIZE(16 * rdev->config.rv770.sq_num_cf_insts) |
668			    DONE_FIFO_HIWATER(0xe0) |
669			    ALU_UPDATE_FIFO_HIWATER(0x8));
670	switch (rdev->family) {
671	case CHIP_RV770:
672	case CHIP_RV730:
673	case CHIP_RV710:
674		sq_ms_fifo_sizes |= FETCH_FIFO_HIWATER(0x1);
675		break;
676	case CHIP_RV740:
677	default:
678		sq_ms_fifo_sizes |= FETCH_FIFO_HIWATER(0x4);
679		break;
680	}
681	WREG32(SQ_MS_FIFO_SIZES, sq_ms_fifo_sizes);
682
683	/* SQ_CONFIG, SQ_GPR_RESOURCE_MGMT, SQ_THREAD_RESOURCE_MGMT, SQ_STACK_RESOURCE_MGMT
684	 * should be adjusted as needed by the 2D/3D drivers.  This just sets default values
685	 */
686	sq_config = RREG32(SQ_CONFIG);
687	sq_config &= ~(PS_PRIO(3) |
688		       VS_PRIO(3) |
689		       GS_PRIO(3) |
690		       ES_PRIO(3));
691	sq_config |= (DX9_CONSTS |
692		      VC_ENABLE |
693		      EXPORT_SRC_C |
694		      PS_PRIO(0) |
695		      VS_PRIO(1) |
696		      GS_PRIO(2) |
697		      ES_PRIO(3));
698	if (rdev->family == CHIP_RV710)
699		/* no vertex cache */
700		sq_config &= ~VC_ENABLE;
701
702	WREG32(SQ_CONFIG, sq_config);
703
704	WREG32(SQ_GPR_RESOURCE_MGMT_1,  (NUM_PS_GPRS((rdev->config.rv770.max_gprs * 24)/64) |
705					 NUM_VS_GPRS((rdev->config.rv770.max_gprs * 24)/64) |
706					 NUM_CLAUSE_TEMP_GPRS(((rdev->config.rv770.max_gprs * 24)/64)/2)));
707
708	WREG32(SQ_GPR_RESOURCE_MGMT_2,  (NUM_GS_GPRS((rdev->config.rv770.max_gprs * 7)/64) |
709					 NUM_ES_GPRS((rdev->config.rv770.max_gprs * 7)/64)));
710
711	sq_thread_resource_mgmt = (NUM_PS_THREADS((rdev->config.rv770.max_threads * 4)/8) |
712				   NUM_VS_THREADS((rdev->config.rv770.max_threads * 2)/8) |
713				   NUM_ES_THREADS((rdev->config.rv770.max_threads * 1)/8));
714	if (((rdev->config.rv770.max_threads * 1) / 8) > rdev->config.rv770.max_gs_threads)
715		sq_thread_resource_mgmt |= NUM_GS_THREADS(rdev->config.rv770.max_gs_threads);
716	else
717		sq_thread_resource_mgmt |= NUM_GS_THREADS((rdev->config.rv770.max_gs_threads * 1)/8);
718	WREG32(SQ_THREAD_RESOURCE_MGMT, sq_thread_resource_mgmt);
719
720	WREG32(SQ_STACK_RESOURCE_MGMT_1, (NUM_PS_STACK_ENTRIES((rdev->config.rv770.max_stack_entries * 1)/4) |
721						     NUM_VS_STACK_ENTRIES((rdev->config.rv770.max_stack_entries * 1)/4)));
722
723	WREG32(SQ_STACK_RESOURCE_MGMT_2, (NUM_GS_STACK_ENTRIES((rdev->config.rv770.max_stack_entries * 1)/4) |
724						     NUM_ES_STACK_ENTRIES((rdev->config.rv770.max_stack_entries * 1)/4)));
725
726	sq_dyn_gpr_size_simd_ab_0 = (SIMDA_RING0((rdev->config.rv770.max_gprs * 38)/64) |
727				     SIMDA_RING1((rdev->config.rv770.max_gprs * 38)/64) |
728				     SIMDB_RING0((rdev->config.rv770.max_gprs * 38)/64) |
729				     SIMDB_RING1((rdev->config.rv770.max_gprs * 38)/64));
730
731	WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_0, sq_dyn_gpr_size_simd_ab_0);
732	WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_1, sq_dyn_gpr_size_simd_ab_0);
733	WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_2, sq_dyn_gpr_size_simd_ab_0);
734	WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_3, sq_dyn_gpr_size_simd_ab_0);
735	WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_4, sq_dyn_gpr_size_simd_ab_0);
736	WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_5, sq_dyn_gpr_size_simd_ab_0);
737	WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_6, sq_dyn_gpr_size_simd_ab_0);
738	WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_7, sq_dyn_gpr_size_simd_ab_0);
739
740	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
741					  FORCE_EOV_MAX_REZ_CNT(255)));
742
743	if (rdev->family == CHIP_RV710)
744		WREG32(VGT_CACHE_INVALIDATION, (CACHE_INVALIDATION(TC_ONLY) |
745						AUTO_INVLD_EN(ES_AND_GS_AUTO)));
746	else
747		WREG32(VGT_CACHE_INVALIDATION, (CACHE_INVALIDATION(VC_AND_TC) |
748						AUTO_INVLD_EN(ES_AND_GS_AUTO)));
749
750	switch (rdev->family) {
751	case CHIP_RV770:
752	case CHIP_RV730:
753	case CHIP_RV740:
754		gs_prim_buffer_depth = 384;
755		break;
756	case CHIP_RV710:
757		gs_prim_buffer_depth = 128;
758		break;
759	default:
760		break;
761	}
762
763	num_gs_verts_per_thread = rdev->config.rv770.max_pipes * 16;
764	vgt_gs_per_es = gs_prim_buffer_depth + num_gs_verts_per_thread;
765	/* Max value for this is 256 */
766	if (vgt_gs_per_es > 256)
767		vgt_gs_per_es = 256;
768
769	WREG32(VGT_ES_PER_GS, 128);
770	WREG32(VGT_GS_PER_ES, vgt_gs_per_es);
771	WREG32(VGT_GS_PER_VS, 2);
772
773	/* more default values. 2D/3D driver should adjust as needed */
774	WREG32(VGT_GS_VERTEX_REUSE, 16);
775	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
776	WREG32(VGT_STRMOUT_EN, 0);
777	WREG32(SX_MISC, 0);
778	WREG32(PA_SC_MODE_CNTL, 0);
779	WREG32(PA_SC_EDGERULE, 0xaaaaaaaa);
780	WREG32(PA_SC_AA_CONFIG, 0);
781	WREG32(PA_SC_CLIPRECT_RULE, 0xffff);
782	WREG32(PA_SC_LINE_STIPPLE, 0);
783	WREG32(SPI_INPUT_Z, 0);
784	WREG32(SPI_PS_IN_CONTROL_0, NUM_INTERP(2));
785	WREG32(CB_COLOR7_FRAG, 0);
786
787	/* clear render buffer base addresses */
788	WREG32(CB_COLOR0_BASE, 0);
789	WREG32(CB_COLOR1_BASE, 0);
790	WREG32(CB_COLOR2_BASE, 0);
791	WREG32(CB_COLOR3_BASE, 0);
792	WREG32(CB_COLOR4_BASE, 0);
793	WREG32(CB_COLOR5_BASE, 0);
794	WREG32(CB_COLOR6_BASE, 0);
795	WREG32(CB_COLOR7_BASE, 0);
796
797	WREG32(TCP_CNTL, 0);
798
799	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
800	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
801
802	WREG32(PA_SC_MULTI_CHIP_CNTL, 0);
803
804	WREG32(PA_CL_ENHANCE, (CLIP_VTX_REORDER_ENA |
805					  NUM_CLIP_SEQ(3)));
806	WREG32(VC_ENHANCE, 0);
807}
808
809void r700_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc)
810{
811	u64 size_bf, size_af;
812
813	if (mc->mc_vram_size > 0xE0000000) {
814		/* leave room for at least 512M GTT */
815		dev_warn(rdev->dev, "limiting VRAM\n");
816		mc->real_vram_size = 0xE0000000;
817		mc->mc_vram_size = 0xE0000000;
818	}
819	if (rdev->flags & RADEON_IS_AGP) {
820		size_bf = mc->gtt_start;
821		size_af = 0xFFFFFFFF - mc->gtt_end;
822		if (size_bf > size_af) {
823			if (mc->mc_vram_size > size_bf) {
824				dev_warn(rdev->dev, "limiting VRAM\n");
825				mc->real_vram_size = size_bf;
826				mc->mc_vram_size = size_bf;
827			}
828			mc->vram_start = mc->gtt_start - mc->mc_vram_size;
829		} else {
830			if (mc->mc_vram_size > size_af) {
831				dev_warn(rdev->dev, "limiting VRAM\n");
832				mc->real_vram_size = size_af;
833				mc->mc_vram_size = size_af;
834			}
835			mc->vram_start = mc->gtt_end + 1;
836		}
837		mc->vram_end = mc->vram_start + mc->mc_vram_size - 1;
838		dev_info(rdev->dev, "VRAM: %juM 0x%08jX - 0x%08jX (%juM used)\n",
839				(uintmax_t)mc->mc_vram_size >> 20, (uintmax_t)mc->vram_start,
840				(uintmax_t)mc->vram_end, (uintmax_t)mc->real_vram_size >> 20);
841	} else {
842		radeon_vram_location(rdev, &rdev->mc, 0);
843		rdev->mc.gtt_base_align = 0;
844		radeon_gtt_location(rdev, mc);
845	}
846}
847
848static int rv770_mc_init(struct radeon_device *rdev)
849{
850	u32 tmp;
851	int chansize, numchan;
852
853	/* Get VRAM informations */
854	rdev->mc.vram_is_ddr = true;
855	tmp = RREG32(MC_ARB_RAMCFG);
856	if (tmp & CHANSIZE_OVERRIDE) {
857		chansize = 16;
858	} else if (tmp & CHANSIZE_MASK) {
859		chansize = 64;
860	} else {
861		chansize = 32;
862	}
863	tmp = RREG32(MC_SHARED_CHMAP);
864	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
865	case 0:
866	default:
867		numchan = 1;
868		break;
869	case 1:
870		numchan = 2;
871		break;
872	case 2:
873		numchan = 4;
874		break;
875	case 3:
876		numchan = 8;
877		break;
878	}
879	rdev->mc.vram_width = numchan * chansize;
880	/* Could aper size report 0 ? */
881	rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
882	rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
883	/* Setup GPU memory space */
884	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE);
885	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE);
886	rdev->mc.visible_vram_size = rdev->mc.aper_size;
887	r700_vram_gtt_location(rdev, &rdev->mc);
888	radeon_update_bandwidth_info(rdev);
889
890	return 0;
891}
892
893/**
894 * rv770_copy_dma - copy pages using the DMA engine
895 *
896 * @rdev: radeon_device pointer
897 * @src_offset: src GPU address
898 * @dst_offset: dst GPU address
899 * @num_gpu_pages: number of GPU pages to xfer
900 * @fence: radeon fence object
901 *
902 * Copy GPU paging using the DMA engine (r7xx).
903 * Used by the radeon ttm implementation to move pages if
904 * registered as the asic copy callback.
905 */
906int rv770_copy_dma(struct radeon_device *rdev,
907		  uint64_t src_offset, uint64_t dst_offset,
908		  unsigned num_gpu_pages,
909		  struct radeon_fence **fence)
910{
911	struct radeon_semaphore *sem = NULL;
912	int ring_index = rdev->asic->copy.dma_ring_index;
913	struct radeon_ring *ring = &rdev->ring[ring_index];
914	u32 size_in_dw, cur_size_in_dw;
915	int i, num_loops;
916	int r = 0;
917
918	r = radeon_semaphore_create(rdev, &sem);
919	if (r) {
920		DRM_ERROR("radeon: moving bo (%d).\n", r);
921		return r;
922	}
923
924	size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4;
925	num_loops = DIV_ROUND_UP(size_in_dw, 0xFFFF);
926	r = radeon_ring_lock(rdev, ring, num_loops * 5 + 8);
927	if (r) {
928		DRM_ERROR("radeon: moving bo (%d).\n", r);
929		radeon_semaphore_free(rdev, &sem, NULL);
930		return r;
931	}
932
933	if (radeon_fence_need_sync(*fence, ring->idx)) {
934		radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
935					    ring->idx);
936		radeon_fence_note_sync(*fence, ring->idx);
937	} else {
938		radeon_semaphore_free(rdev, &sem, NULL);
939	}
940
941	for (i = 0; i < num_loops; i++) {
942		cur_size_in_dw = size_in_dw;
943		if (cur_size_in_dw > 0xFFFF)
944			cur_size_in_dw = 0xFFFF;
945		size_in_dw -= cur_size_in_dw;
946		radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 0, cur_size_in_dw));
947		radeon_ring_write(ring, dst_offset & 0xfffffffc);
948		radeon_ring_write(ring, src_offset & 0xfffffffc);
949		radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
950		radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
951		src_offset += cur_size_in_dw * 4;
952		dst_offset += cur_size_in_dw * 4;
953	}
954
955	r = radeon_fence_emit(rdev, fence, ring->idx);
956	if (r) {
957		radeon_ring_unlock_undo(rdev, ring);
958		return r;
959	}
960
961	radeon_ring_unlock_commit(rdev, ring);
962	radeon_semaphore_free(rdev, &sem, *fence);
963
964	return r;
965}
966
967static int rv770_startup(struct radeon_device *rdev)
968{
969	struct radeon_ring *ring;
970	int r;
971
972	/* enable pcie gen2 link */
973	rv770_pcie_gen2_enable(rdev);
974
975	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw) {
976		r = r600_init_microcode(rdev);
977		if (r) {
978			DRM_ERROR("Failed to load firmware!\n");
979			return r;
980		}
981	}
982
983	r = r600_vram_scratch_init(rdev);
984	if (r)
985		return r;
986
987	rv770_mc_program(rdev);
988	if (rdev->flags & RADEON_IS_AGP) {
989		rv770_agp_enable(rdev);
990	} else {
991		r = rv770_pcie_gart_enable(rdev);
992		if (r)
993			return r;
994	}
995
996	rv770_gpu_init(rdev);
997	r = r600_blit_init(rdev);
998	if (r) {
999		r600_blit_fini(rdev);
1000		rdev->asic->copy.copy = NULL;
1001		dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r);
1002	}
1003
1004	/* allocate wb buffer */
1005	r = radeon_wb_init(rdev);
1006	if (r)
1007		return r;
1008
1009	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
1010	if (r) {
1011		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
1012		return r;
1013	}
1014
1015	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
1016	if (r) {
1017		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
1018		return r;
1019	}
1020
1021	/* Enable IRQ */
1022	r = r600_irq_init(rdev);
1023	if (r) {
1024		DRM_ERROR("radeon: IH init failed (%d).\n", r);
1025		radeon_irq_kms_fini(rdev);
1026		return r;
1027	}
1028	r600_irq_set(rdev);
1029
1030	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1031	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
1032			     R600_CP_RB_RPTR, R600_CP_RB_WPTR,
1033			     0, 0xfffff, RADEON_CP_PACKET2);
1034	if (r)
1035		return r;
1036
1037	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
1038	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
1039			     DMA_RB_RPTR, DMA_RB_WPTR,
1040			     2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
1041	if (r)
1042		return r;
1043
1044	r = rv770_cp_load_microcode(rdev);
1045	if (r)
1046		return r;
1047	r = r600_cp_resume(rdev);
1048	if (r)
1049		return r;
1050
1051	r = r600_dma_resume(rdev);
1052	if (r)
1053		return r;
1054
1055	r = radeon_ib_pool_init(rdev);
1056	if (r) {
1057		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
1058		return r;
1059	}
1060
1061	r = r600_audio_init(rdev);
1062	if (r) {
1063		DRM_ERROR("radeon: audio init failed\n");
1064		return r;
1065	}
1066
1067	return 0;
1068}
1069
1070int rv770_resume(struct radeon_device *rdev)
1071{
1072	int r;
1073
1074	/* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
1075	 * posting will perform necessary task to bring back GPU into good
1076	 * shape.
1077	 */
1078	/* post card */
1079	atom_asic_init(rdev->mode_info.atom_context);
1080
1081	rdev->accel_working = true;
1082	r = rv770_startup(rdev);
1083	if (r) {
1084		DRM_ERROR("r600 startup failed on resume\n");
1085		rdev->accel_working = false;
1086		return r;
1087	}
1088
1089	return r;
1090
1091}
1092
1093int rv770_suspend(struct radeon_device *rdev)
1094{
1095	r600_audio_fini(rdev);
1096	r700_cp_stop(rdev);
1097	r600_dma_stop(rdev);
1098	r600_irq_suspend(rdev);
1099	radeon_wb_disable(rdev);
1100	rv770_pcie_gart_disable(rdev);
1101
1102	return 0;
1103}
1104
1105/* Plan is to move initialization in that function and use
1106 * helper function so that radeon_device_init pretty much
1107 * do nothing more than calling asic specific function. This
1108 * should also allow to remove a bunch of callback function
1109 * like vram_info.
1110 */
1111int rv770_init(struct radeon_device *rdev)
1112{
1113	int r;
1114
1115	/* Read BIOS */
1116	if (!radeon_get_bios(rdev)) {
1117		if (ASIC_IS_AVIVO(rdev))
1118			return -EINVAL;
1119	}
1120	/* Must be an ATOMBIOS */
1121	if (!rdev->is_atom_bios) {
1122		dev_err(rdev->dev, "Expecting atombios for R600 GPU\n");
1123		return -EINVAL;
1124	}
1125	r = radeon_atombios_init(rdev);
1126	if (r)
1127		return r;
1128	/* Post card if necessary */
1129	if (!radeon_card_posted(rdev)) {
1130		if (!rdev->bios) {
1131			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
1132			return -EINVAL;
1133		}
1134		DRM_INFO("GPU not posted. posting now...\n");
1135		atom_asic_init(rdev->mode_info.atom_context);
1136	}
1137	/* Initialize scratch registers */
1138	r600_scratch_init(rdev);
1139	/* Initialize surface registers */
1140	radeon_surface_init(rdev);
1141	/* Initialize clocks */
1142	radeon_get_clock_info(rdev->ddev);
1143	/* Fence driver */
1144	r = radeon_fence_driver_init(rdev);
1145	if (r)
1146		return r;
1147	/* initialize AGP */
1148	if (rdev->flags & RADEON_IS_AGP) {
1149		r = radeon_agp_init(rdev);
1150		if (r)
1151			radeon_agp_disable(rdev);
1152	}
1153	r = rv770_mc_init(rdev);
1154	if (r)
1155		return r;
1156	/* Memory manager */
1157	r = radeon_bo_init(rdev);
1158	if (r)
1159		return r;
1160
1161	r = radeon_irq_kms_init(rdev);
1162	if (r)
1163		return r;
1164
1165	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ring_obj = NULL;
1166	r600_ring_init(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX], 1024 * 1024);
1167
1168	rdev->ring[R600_RING_TYPE_DMA_INDEX].ring_obj = NULL;
1169	r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX], 64 * 1024);
1170
1171	rdev->ih.ring_obj = NULL;
1172	r600_ih_ring_init(rdev, 64 * 1024);
1173
1174	r = r600_pcie_gart_init(rdev);
1175	if (r)
1176		return r;
1177
1178	rdev->accel_working = true;
1179	r = rv770_startup(rdev);
1180	if (r) {
1181		dev_err(rdev->dev, "disabling GPU acceleration\n");
1182		r700_cp_fini(rdev);
1183		r600_dma_fini(rdev);
1184		r600_irq_fini(rdev);
1185		radeon_wb_fini(rdev);
1186		radeon_ib_pool_fini(rdev);
1187		radeon_irq_kms_fini(rdev);
1188		rv770_pcie_gart_fini(rdev);
1189		rdev->accel_working = false;
1190	}
1191
1192	/* Don't start up if the ucode is missing. */
1193	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw) {
1194		DRM_ERROR("radeon: ucode required for R600+.\n");
1195		return -EINVAL;
1196	}
1197
1198	return 0;
1199}
1200
1201void rv770_fini(struct radeon_device *rdev)
1202{
1203	r600_blit_fini(rdev);
1204	r700_cp_fini(rdev);
1205	r600_dma_fini(rdev);
1206	r600_irq_fini(rdev);
1207	radeon_wb_fini(rdev);
1208	radeon_ib_pool_fini(rdev);
1209	radeon_irq_kms_fini(rdev);
1210	rv770_pcie_gart_fini(rdev);
1211	r600_vram_scratch_fini(rdev);
1212	radeon_gem_fini(rdev);
1213	radeon_fence_driver_fini(rdev);
1214	radeon_agp_fini(rdev);
1215	radeon_bo_fini(rdev);
1216	radeon_atombios_fini(rdev);
1217	r600_fini_microcode(rdev);
1218	free(rdev->bios, DRM_MEM_DRIVER);
1219	rdev->bios = NULL;
1220}
1221
1222static void rv770_pcie_gen2_enable(struct radeon_device *rdev)
1223{
1224	u32 link_width_cntl, lanes, speed_cntl, tmp;
1225	u16 link_cntl2;
1226	u32 mask;
1227	int ret;
1228
1229	if (radeon_pcie_gen2 == 0)
1230		return;
1231
1232	if (rdev->flags & RADEON_IS_IGP)
1233		return;
1234
1235	if (!(rdev->flags & RADEON_IS_PCIE))
1236		return;
1237
1238	/* x2 cards have a special sequence */
1239	if (ASIC_IS_X2(rdev))
1240		return;
1241
1242	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
1243	if (ret != 0)
1244		return;
1245
1246	if (!(mask & DRM_PCIE_SPEED_50))
1247		return;
1248
1249	DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
1250
1251	/* advertise upconfig capability */
1252	link_width_cntl = RREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL);
1253	link_width_cntl &= ~LC_UPCONFIGURE_DIS;
1254	WREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
1255	link_width_cntl = RREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL);
1256	if (link_width_cntl & LC_RENEGOTIATION_SUPPORT) {
1257		lanes = (link_width_cntl & LC_LINK_WIDTH_RD_MASK) >> LC_LINK_WIDTH_RD_SHIFT;
1258		link_width_cntl &= ~(LC_LINK_WIDTH_MASK |
1259				     LC_RECONFIG_ARC_MISSING_ESCAPE);
1260		link_width_cntl |= lanes | LC_RECONFIG_NOW |
1261			LC_RENEGOTIATE_EN | LC_UPCONFIGURE_SUPPORT;
1262		WREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
1263	} else {
1264		link_width_cntl |= LC_UPCONFIGURE_DIS;
1265		WREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
1266	}
1267
1268	speed_cntl = RREG32_PCIE_P(PCIE_LC_SPEED_CNTL);
1269	if ((speed_cntl & LC_OTHER_SIDE_EVER_SENT_GEN2) &&
1270	    (speed_cntl & LC_OTHER_SIDE_SUPPORTS_GEN2)) {
1271
1272		tmp = RREG32(0x541c);
1273		WREG32(0x541c, tmp | 0x8);
1274		WREG32(MM_CFGREGS_CNTL, MM_WR_TO_CFG_EN);
1275		link_cntl2 = RREG16(0x4088);
1276		link_cntl2 &= ~TARGET_LINK_SPEED_MASK;
1277		link_cntl2 |= 0x2;
1278		WREG16(0x4088, link_cntl2);
1279		WREG32(MM_CFGREGS_CNTL, 0);
1280
1281		speed_cntl = RREG32_PCIE_P(PCIE_LC_SPEED_CNTL);
1282		speed_cntl &= ~LC_TARGET_LINK_SPEED_OVERRIDE_EN;
1283		WREG32_PCIE_P(PCIE_LC_SPEED_CNTL, speed_cntl);
1284
1285		speed_cntl = RREG32_PCIE_P(PCIE_LC_SPEED_CNTL);
1286		speed_cntl |= LC_CLR_FAILED_SPD_CHANGE_CNT;
1287		WREG32_PCIE_P(PCIE_LC_SPEED_CNTL, speed_cntl);
1288
1289		speed_cntl = RREG32_PCIE_P(PCIE_LC_SPEED_CNTL);
1290		speed_cntl &= ~LC_CLR_FAILED_SPD_CHANGE_CNT;
1291		WREG32_PCIE_P(PCIE_LC_SPEED_CNTL, speed_cntl);
1292
1293		speed_cntl = RREG32_PCIE_P(PCIE_LC_SPEED_CNTL);
1294		speed_cntl |= LC_GEN2_EN_STRAP;
1295		WREG32_PCIE_P(PCIE_LC_SPEED_CNTL, speed_cntl);
1296
1297	} else {
1298		link_width_cntl = RREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL);
1299		/* XXX: only disable it if gen1 bridge vendor == 0x111d or 0x1106 */
1300		if (1)
1301			link_width_cntl |= LC_UPCONFIGURE_DIS;
1302		else
1303			link_width_cntl &= ~LC_UPCONFIGURE_DIS;
1304		WREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
1305	}
1306}
1307