r600_blit.c revision 261455
1/*
2 * Copyright 2009 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 *     Alex Deucher <alexander.deucher@amd.com>
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: stable/10/sys/dev/drm2/radeon/r600_blit.c 261455 2014-02-04 03:36:42Z eadler $");
29
30#include <dev/drm2/drmP.h>
31#include <dev/drm2/radeon/radeon_drm.h>
32#include "radeon_drv.h"
33
34#include "r600_blit_shaders.h"
35
36#define DI_PT_RECTLIST        0x11
37#define DI_INDEX_SIZE_16_BIT  0x0
38#define DI_SRC_SEL_AUTO_INDEX 0x2
39
40#define FMT_8                 0x1
41#define FMT_5_6_5             0x8
42#define FMT_8_8_8_8           0x1a
43#define COLOR_8               0x1
44#define COLOR_5_6_5           0x8
45#define COLOR_8_8_8_8         0x1a
46
47static void
48set_render_target(drm_radeon_private_t *dev_priv, int format, int w, int h, u64 gpu_addr)
49{
50	u32 cb_color_info;
51	int pitch, slice;
52	RING_LOCALS;
53	DRM_DEBUG("\n");
54
55	h = roundup2(h, 8);
56	if (h < 8)
57		h = 8;
58
59	cb_color_info = ((format << 2) | (1 << 27));
60	pitch = (w / 8) - 1;
61	slice = ((w * h) / 64) - 1;
62
63	if (((dev_priv->flags & RADEON_FAMILY_MASK) > CHIP_R600) &&
64	    ((dev_priv->flags & RADEON_FAMILY_MASK) < CHIP_RV770)) {
65		BEGIN_RING(21 + 2);
66		OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
67		OUT_RING((R600_CB_COLOR0_BASE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
68		OUT_RING(gpu_addr >> 8);
69		OUT_RING(CP_PACKET3(R600_IT_SURFACE_BASE_UPDATE, 0));
70		OUT_RING(2 << 0);
71	} else {
72		BEGIN_RING(21);
73		OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
74		OUT_RING((R600_CB_COLOR0_BASE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
75		OUT_RING(gpu_addr >> 8);
76	}
77
78	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
79	OUT_RING((R600_CB_COLOR0_SIZE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
80	OUT_RING((pitch << 0) | (slice << 10));
81
82	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
83	OUT_RING((R600_CB_COLOR0_VIEW - R600_SET_CONTEXT_REG_OFFSET) >> 2);
84	OUT_RING(0);
85
86	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
87	OUT_RING((R600_CB_COLOR0_INFO - R600_SET_CONTEXT_REG_OFFSET) >> 2);
88	OUT_RING(cb_color_info);
89
90	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
91	OUT_RING((R600_CB_COLOR0_TILE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
92	OUT_RING(0);
93
94	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
95	OUT_RING((R600_CB_COLOR0_FRAG - R600_SET_CONTEXT_REG_OFFSET) >> 2);
96	OUT_RING(0);
97
98	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
99	OUT_RING((R600_CB_COLOR0_MASK - R600_SET_CONTEXT_REG_OFFSET) >> 2);
100	OUT_RING(0);
101
102	ADVANCE_RING();
103}
104
105static void
106cp_set_surface_sync(drm_radeon_private_t *dev_priv,
107		    u32 sync_type, u32 size, u64 mc_addr)
108{
109	u32 cp_coher_size;
110	RING_LOCALS;
111	DRM_DEBUG("\n");
112
113	if (size == 0xffffffff)
114		cp_coher_size = 0xffffffff;
115	else
116		cp_coher_size = ((size + 255) >> 8);
117
118	BEGIN_RING(5);
119	OUT_RING(CP_PACKET3(R600_IT_SURFACE_SYNC, 3));
120	OUT_RING(sync_type);
121	OUT_RING(cp_coher_size);
122	OUT_RING((mc_addr >> 8));
123	OUT_RING(10); /* poll interval */
124	ADVANCE_RING();
125}
126
127static void
128set_shaders(struct drm_device *dev)
129{
130	drm_radeon_private_t *dev_priv = dev->dev_private;
131	u64 gpu_addr;
132	int i;
133	u32 *vs, *ps;
134	uint32_t sq_pgm_resources;
135	RING_LOCALS;
136	DRM_DEBUG("\n");
137
138	/* load shaders */
139	vs = (u32 *) ((char *)dev->agp_buffer_map->handle + dev_priv->blit_vb->offset);
140	ps = (u32 *) ((char *)dev->agp_buffer_map->handle + dev_priv->blit_vb->offset + 256);
141
142	for (i = 0; i < r6xx_vs_size; i++)
143		vs[i] = cpu_to_le32(r6xx_vs[i]);
144	for (i = 0; i < r6xx_ps_size; i++)
145		ps[i] = cpu_to_le32(r6xx_ps[i]);
146
147	dev_priv->blit_vb->used = 512;
148
149	gpu_addr = dev_priv->gart_buffers_offset + dev_priv->blit_vb->offset;
150
151	/* setup shader regs */
152	sq_pgm_resources = (1 << 0);
153
154	BEGIN_RING(9 + 12);
155	/* VS */
156	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
157	OUT_RING((R600_SQ_PGM_START_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
158	OUT_RING(gpu_addr >> 8);
159
160	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
161	OUT_RING((R600_SQ_PGM_RESOURCES_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
162	OUT_RING(sq_pgm_resources);
163
164	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
165	OUT_RING((R600_SQ_PGM_CF_OFFSET_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
166	OUT_RING(0);
167
168	/* PS */
169	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
170	OUT_RING((R600_SQ_PGM_START_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
171	OUT_RING((gpu_addr + 256) >> 8);
172
173	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
174	OUT_RING((R600_SQ_PGM_RESOURCES_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
175	OUT_RING(sq_pgm_resources | (1 << 28));
176
177	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
178	OUT_RING((R600_SQ_PGM_EXPORTS_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
179	OUT_RING(2);
180
181	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
182	OUT_RING((R600_SQ_PGM_CF_OFFSET_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
183	OUT_RING(0);
184	ADVANCE_RING();
185
186	cp_set_surface_sync(dev_priv,
187			    R600_SH_ACTION_ENA, 512, gpu_addr);
188}
189
190static void
191set_vtx_resource(drm_radeon_private_t *dev_priv, u64 gpu_addr)
192{
193	uint32_t sq_vtx_constant_word2;
194	RING_LOCALS;
195	DRM_DEBUG("\n");
196
197	sq_vtx_constant_word2 = (((gpu_addr >> 32) & 0xff) | (16 << 8));
198#ifdef __BIG_ENDIAN
199	sq_vtx_constant_word2 |= (2U << 30);
200#endif
201
202	BEGIN_RING(9);
203	OUT_RING(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
204	OUT_RING(0x460);
205	OUT_RING(gpu_addr & 0xffffffff);
206	OUT_RING(48 - 1);
207	OUT_RING(sq_vtx_constant_word2);
208	OUT_RING(1 << 0);
209	OUT_RING(0);
210	OUT_RING(0);
211	OUT_RING(R600_SQ_TEX_VTX_VALID_BUFFER << 30);
212	ADVANCE_RING();
213
214	if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
215	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
216	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
217	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880) ||
218	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710))
219		cp_set_surface_sync(dev_priv,
220				    R600_TC_ACTION_ENA, 48, gpu_addr);
221	else
222		cp_set_surface_sync(dev_priv,
223				    R600_VC_ACTION_ENA, 48, gpu_addr);
224}
225
226static void
227set_tex_resource(drm_radeon_private_t *dev_priv,
228		 int format, int w, int h, int pitch, u64 gpu_addr)
229{
230	uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4;
231	RING_LOCALS;
232	DRM_DEBUG("\n");
233
234	if (h < 1)
235		h = 1;
236
237	sq_tex_resource_word0 = (1 << 0);
238	sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 8) |
239				  ((w - 1) << 19));
240
241	sq_tex_resource_word1 = (format << 26);
242	sq_tex_resource_word1 |= ((h - 1) << 0);
243
244	sq_tex_resource_word4 = ((1 << 14) |
245				 (0 << 16) |
246				 (1 << 19) |
247				 (2 << 22) |
248				 (3 << 25));
249
250	BEGIN_RING(9);
251	OUT_RING(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
252	OUT_RING(0);
253	OUT_RING(sq_tex_resource_word0);
254	OUT_RING(sq_tex_resource_word1);
255	OUT_RING(gpu_addr >> 8);
256	OUT_RING(gpu_addr >> 8);
257	OUT_RING(sq_tex_resource_word4);
258	OUT_RING(0);
259	OUT_RING(R600_SQ_TEX_VTX_VALID_TEXTURE << 30);
260	ADVANCE_RING();
261
262}
263
264static void
265set_scissors(drm_radeon_private_t *dev_priv, int x1, int y1, int x2, int y2)
266{
267	RING_LOCALS;
268	DRM_DEBUG("\n");
269
270	BEGIN_RING(12);
271	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2));
272	OUT_RING((R600_PA_SC_SCREEN_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2);
273	OUT_RING((x1 << 0) | (y1 << 16));
274	OUT_RING((x2 << 0) | (y2 << 16));
275
276	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2));
277	OUT_RING((R600_PA_SC_GENERIC_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2);
278	OUT_RING((x1 << 0) | (y1 << 16) | (1U << 31));
279	OUT_RING((x2 << 0) | (y2 << 16));
280
281	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2));
282	OUT_RING((R600_PA_SC_WINDOW_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2);
283	OUT_RING((x1 << 0) | (y1 << 16) | (1U << 31));
284	OUT_RING((x2 << 0) | (y2 << 16));
285	ADVANCE_RING();
286}
287
288static void
289draw_auto(drm_radeon_private_t *dev_priv)
290{
291	RING_LOCALS;
292	DRM_DEBUG("\n");
293
294	BEGIN_RING(10);
295	OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
296	OUT_RING((R600_VGT_PRIMITIVE_TYPE - R600_SET_CONFIG_REG_OFFSET) >> 2);
297	OUT_RING(DI_PT_RECTLIST);
298
299	OUT_RING(CP_PACKET3(R600_IT_INDEX_TYPE, 0));
300#ifdef __BIG_ENDIAN
301	OUT_RING((2 << 2) | DI_INDEX_SIZE_16_BIT);
302#else
303	OUT_RING(DI_INDEX_SIZE_16_BIT);
304#endif
305
306	OUT_RING(CP_PACKET3(R600_IT_NUM_INSTANCES, 0));
307	OUT_RING(1);
308
309	OUT_RING(CP_PACKET3(R600_IT_DRAW_INDEX_AUTO, 1));
310	OUT_RING(3);
311	OUT_RING(DI_SRC_SEL_AUTO_INDEX);
312
313	ADVANCE_RING();
314	COMMIT_RING();
315}
316
317static void
318set_default_state(drm_radeon_private_t *dev_priv)
319{
320	int i;
321	u32 sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2;
322	u32 sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2;
323	int num_ps_gprs, num_vs_gprs, num_temp_gprs, num_gs_gprs, num_es_gprs;
324	int num_ps_threads, num_vs_threads, num_gs_threads, num_es_threads;
325	int num_ps_stack_entries, num_vs_stack_entries, num_gs_stack_entries, num_es_stack_entries;
326	RING_LOCALS;
327
328	switch ((dev_priv->flags & RADEON_FAMILY_MASK)) {
329	case CHIP_R600:
330		num_ps_gprs = 192;
331		num_vs_gprs = 56;
332		num_temp_gprs = 4;
333		num_gs_gprs = 0;
334		num_es_gprs = 0;
335		num_ps_threads = 136;
336		num_vs_threads = 48;
337		num_gs_threads = 4;
338		num_es_threads = 4;
339		num_ps_stack_entries = 128;
340		num_vs_stack_entries = 128;
341		num_gs_stack_entries = 0;
342		num_es_stack_entries = 0;
343		break;
344	case CHIP_RV630:
345	case CHIP_RV635:
346		num_ps_gprs = 84;
347		num_vs_gprs = 36;
348		num_temp_gprs = 4;
349		num_gs_gprs = 0;
350		num_es_gprs = 0;
351		num_ps_threads = 144;
352		num_vs_threads = 40;
353		num_gs_threads = 4;
354		num_es_threads = 4;
355		num_ps_stack_entries = 40;
356		num_vs_stack_entries = 40;
357		num_gs_stack_entries = 32;
358		num_es_stack_entries = 16;
359		break;
360	case CHIP_RV610:
361	case CHIP_RV620:
362	case CHIP_RS780:
363	case CHIP_RS880:
364	default:
365		num_ps_gprs = 84;
366		num_vs_gprs = 36;
367		num_temp_gprs = 4;
368		num_gs_gprs = 0;
369		num_es_gprs = 0;
370		num_ps_threads = 136;
371		num_vs_threads = 48;
372		num_gs_threads = 4;
373		num_es_threads = 4;
374		num_ps_stack_entries = 40;
375		num_vs_stack_entries = 40;
376		num_gs_stack_entries = 32;
377		num_es_stack_entries = 16;
378		break;
379	case CHIP_RV670:
380		num_ps_gprs = 144;
381		num_vs_gprs = 40;
382		num_temp_gprs = 4;
383		num_gs_gprs = 0;
384		num_es_gprs = 0;
385		num_ps_threads = 136;
386		num_vs_threads = 48;
387		num_gs_threads = 4;
388		num_es_threads = 4;
389		num_ps_stack_entries = 40;
390		num_vs_stack_entries = 40;
391		num_gs_stack_entries = 32;
392		num_es_stack_entries = 16;
393		break;
394	case CHIP_RV770:
395		num_ps_gprs = 192;
396		num_vs_gprs = 56;
397		num_temp_gprs = 4;
398		num_gs_gprs = 0;
399		num_es_gprs = 0;
400		num_ps_threads = 188;
401		num_vs_threads = 60;
402		num_gs_threads = 0;
403		num_es_threads = 0;
404		num_ps_stack_entries = 256;
405		num_vs_stack_entries = 256;
406		num_gs_stack_entries = 0;
407		num_es_stack_entries = 0;
408		break;
409	case CHIP_RV730:
410	case CHIP_RV740:
411		num_ps_gprs = 84;
412		num_vs_gprs = 36;
413		num_temp_gprs = 4;
414		num_gs_gprs = 0;
415		num_es_gprs = 0;
416		num_ps_threads = 188;
417		num_vs_threads = 60;
418		num_gs_threads = 0;
419		num_es_threads = 0;
420		num_ps_stack_entries = 128;
421		num_vs_stack_entries = 128;
422		num_gs_stack_entries = 0;
423		num_es_stack_entries = 0;
424		break;
425	case CHIP_RV710:
426		num_ps_gprs = 192;
427		num_vs_gprs = 56;
428		num_temp_gprs = 4;
429		num_gs_gprs = 0;
430		num_es_gprs = 0;
431		num_ps_threads = 144;
432		num_vs_threads = 48;
433		num_gs_threads = 0;
434		num_es_threads = 0;
435		num_ps_stack_entries = 128;
436		num_vs_stack_entries = 128;
437		num_gs_stack_entries = 0;
438		num_es_stack_entries = 0;
439		break;
440	}
441
442	if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
443	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
444	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
445	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880) ||
446	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710))
447		sq_config = 0;
448	else
449		sq_config = R600_VC_ENABLE;
450
451	sq_config |= (R600_DX9_CONSTS |
452		      R600_ALU_INST_PREFER_VECTOR |
453		      R600_PS_PRIO(0) |
454		      R600_VS_PRIO(1) |
455		      R600_GS_PRIO(2) |
456		      R600_ES_PRIO(3));
457
458	sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(num_ps_gprs) |
459				  R600_NUM_VS_GPRS(num_vs_gprs) |
460				  R600_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs));
461	sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(num_gs_gprs) |
462				  R600_NUM_ES_GPRS(num_es_gprs));
463	sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(num_ps_threads) |
464				   R600_NUM_VS_THREADS(num_vs_threads) |
465				   R600_NUM_GS_THREADS(num_gs_threads) |
466				   R600_NUM_ES_THREADS(num_es_threads));
467	sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(num_ps_stack_entries) |
468				    R600_NUM_VS_STACK_ENTRIES(num_vs_stack_entries));
469	sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(num_gs_stack_entries) |
470				    R600_NUM_ES_STACK_ENTRIES(num_es_stack_entries));
471
472	if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770) {
473		BEGIN_RING(r7xx_default_size + 10);
474		for (i = 0; i < r7xx_default_size; i++)
475			OUT_RING(r7xx_default_state[i]);
476	} else {
477		BEGIN_RING(r6xx_default_size + 10);
478		for (i = 0; i < r6xx_default_size; i++)
479			OUT_RING(r6xx_default_state[i]);
480	}
481	OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
482	OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT);
483	/* SQ config */
484	OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 6));
485	OUT_RING((R600_SQ_CONFIG - R600_SET_CONFIG_REG_OFFSET) >> 2);
486	OUT_RING(sq_config);
487	OUT_RING(sq_gpr_resource_mgmt_1);
488	OUT_RING(sq_gpr_resource_mgmt_2);
489	OUT_RING(sq_thread_resource_mgmt);
490	OUT_RING(sq_stack_resource_mgmt_1);
491	OUT_RING(sq_stack_resource_mgmt_2);
492	ADVANCE_RING();
493}
494
495/* 23 bits of float fractional data */
496#define I2F_FRAC_BITS  23
497#define I2F_MASK ((1 << I2F_FRAC_BITS) - 1)
498
499/*
500 * Converts unsigned integer into 32-bit IEEE floating point representation.
501 * Will be exact from 0 to 2^24.  Above that, we round towards zero
502 * as the fractional bits will not fit in a float.  (It would be better to
503 * round towards even as the fpu does, but that is slower.)
504 */
505__pure uint32_t int2float(uint32_t x)
506{
507	uint32_t msb, exponent, fraction;
508
509	/* Zero is special */
510	if (!x) return 0;
511
512	/* Get location of the most significant bit */
513	msb = fls(x);
514
515	/*
516	 * Use a rotate instead of a shift because that works both leftwards
517	 * and rightwards due to the mod(32) behaviour.  This means we don't
518	 * need to check to see if we are above 2^24 or not.
519	 */
520	fraction = ror32(x, (msb - I2F_FRAC_BITS) & 0x1f) & I2F_MASK;
521	exponent = (127 + msb) << I2F_FRAC_BITS;
522
523	return fraction + exponent;
524}
525
526static int r600_nomm_get_vb(struct drm_device *dev)
527{
528	drm_radeon_private_t *dev_priv = dev->dev_private;
529	dev_priv->blit_vb = radeon_freelist_get(dev);
530	if (!dev_priv->blit_vb) {
531		DRM_ERROR("Unable to allocate vertex buffer for blit\n");
532		return -EAGAIN;
533	}
534	return 0;
535}
536
537static void r600_nomm_put_vb(struct drm_device *dev)
538{
539	drm_radeon_private_t *dev_priv = dev->dev_private;
540
541	dev_priv->blit_vb->used = 0;
542	radeon_cp_discard_buffer(dev, dev_priv->blit_vb->file_priv->masterp, dev_priv->blit_vb);
543}
544
545static void *r600_nomm_get_vb_ptr(struct drm_device *dev)
546{
547	drm_radeon_private_t *dev_priv = dev->dev_private;
548	return (((char *)dev->agp_buffer_map->handle +
549		 dev_priv->blit_vb->offset + dev_priv->blit_vb->used));
550}
551
552int
553r600_prepare_blit_copy(struct drm_device *dev, struct drm_file *file_priv)
554{
555	drm_radeon_private_t *dev_priv = dev->dev_private;
556	int ret;
557	DRM_DEBUG("\n");
558
559	ret = r600_nomm_get_vb(dev);
560	if (ret)
561		return ret;
562
563	dev_priv->blit_vb->file_priv = file_priv;
564
565	set_default_state(dev_priv);
566	set_shaders(dev);
567
568	return 0;
569}
570
571
572void
573r600_done_blit_copy(struct drm_device *dev)
574{
575	drm_radeon_private_t *dev_priv = dev->dev_private;
576	RING_LOCALS;
577	DRM_DEBUG("\n");
578
579	BEGIN_RING(5);
580	OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
581	OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT);
582	/* wait for 3D idle clean */
583	OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
584	OUT_RING((R600_WAIT_UNTIL - R600_SET_CONFIG_REG_OFFSET) >> 2);
585	OUT_RING(RADEON_WAIT_3D_IDLE | RADEON_WAIT_3D_IDLECLEAN);
586
587	ADVANCE_RING();
588	COMMIT_RING();
589
590	r600_nomm_put_vb(dev);
591}
592
593void
594r600_blit_copy(struct drm_device *dev,
595	       uint64_t src_gpu_addr, uint64_t dst_gpu_addr,
596	       int size_bytes)
597{
598	drm_radeon_private_t *dev_priv = dev->dev_private;
599	int max_bytes;
600	u64 vb_addr;
601	u32 *vb;
602
603	vb = r600_nomm_get_vb_ptr(dev);
604
605	if ((size_bytes & 3) || (src_gpu_addr & 3) || (dst_gpu_addr & 3)) {
606		max_bytes = 8192;
607
608		while (size_bytes) {
609			int cur_size = size_bytes;
610			int src_x = src_gpu_addr & 255;
611			int dst_x = dst_gpu_addr & 255;
612			int h = 1;
613			src_gpu_addr = src_gpu_addr & ~255;
614			dst_gpu_addr = dst_gpu_addr & ~255;
615
616			if (!src_x && !dst_x) {
617				h = (cur_size / max_bytes);
618				if (h > 8192)
619					h = 8192;
620				if (h == 0)
621					h = 1;
622				else
623					cur_size = max_bytes;
624			} else {
625				if (cur_size > max_bytes)
626					cur_size = max_bytes;
627				if (cur_size > (max_bytes - dst_x))
628					cur_size = (max_bytes - dst_x);
629				if (cur_size > (max_bytes - src_x))
630					cur_size = (max_bytes - src_x);
631			}
632
633			if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) {
634
635				r600_nomm_put_vb(dev);
636				r600_nomm_get_vb(dev);
637				if (!dev_priv->blit_vb)
638					return;
639				set_shaders(dev);
640				vb = r600_nomm_get_vb_ptr(dev);
641			}
642
643			vb[0] = int2float(dst_x);
644			vb[1] = 0;
645			vb[2] = int2float(src_x);
646			vb[3] = 0;
647
648			vb[4] = int2float(dst_x);
649			vb[5] = int2float(h);
650			vb[6] = int2float(src_x);
651			vb[7] = int2float(h);
652
653			vb[8] = int2float(dst_x + cur_size);
654			vb[9] = int2float(h);
655			vb[10] = int2float(src_x + cur_size);
656			vb[11] = int2float(h);
657
658			/* src */
659			set_tex_resource(dev_priv, FMT_8,
660					 src_x + cur_size, h, src_x + cur_size,
661					 src_gpu_addr);
662
663			cp_set_surface_sync(dev_priv,
664					    R600_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
665
666			/* dst */
667			set_render_target(dev_priv, COLOR_8,
668					  dst_x + cur_size, h,
669					  dst_gpu_addr);
670
671			/* scissors */
672			set_scissors(dev_priv, dst_x, 0, dst_x + cur_size, h);
673
674			/* Vertex buffer setup */
675			vb_addr = dev_priv->gart_buffers_offset +
676				dev_priv->blit_vb->offset +
677				dev_priv->blit_vb->used;
678			set_vtx_resource(dev_priv, vb_addr);
679
680			/* draw */
681			draw_auto(dev_priv);
682
683			cp_set_surface_sync(dev_priv,
684					    R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA,
685					    cur_size * h, dst_gpu_addr);
686
687			vb += 12;
688			dev_priv->blit_vb->used += 12 * 4;
689
690			src_gpu_addr += cur_size * h;
691			dst_gpu_addr += cur_size * h;
692			size_bytes -= cur_size * h;
693		}
694	} else {
695		max_bytes = 8192 * 4;
696
697		while (size_bytes) {
698			int cur_size = size_bytes;
699			int src_x = (src_gpu_addr & 255);
700			int dst_x = (dst_gpu_addr & 255);
701			int h = 1;
702			src_gpu_addr = src_gpu_addr & ~255;
703			dst_gpu_addr = dst_gpu_addr & ~255;
704
705			if (!src_x && !dst_x) {
706				h = (cur_size / max_bytes);
707				if (h > 8192)
708					h = 8192;
709				if (h == 0)
710					h = 1;
711				else
712					cur_size = max_bytes;
713			} else {
714				if (cur_size > max_bytes)
715					cur_size = max_bytes;
716				if (cur_size > (max_bytes - dst_x))
717					cur_size = (max_bytes - dst_x);
718				if (cur_size > (max_bytes - src_x))
719					cur_size = (max_bytes - src_x);
720			}
721
722			if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) {
723				r600_nomm_put_vb(dev);
724				r600_nomm_get_vb(dev);
725				if (!dev_priv->blit_vb)
726					return;
727
728				set_shaders(dev);
729				vb = r600_nomm_get_vb_ptr(dev);
730			}
731
732			vb[0] = int2float(dst_x / 4);
733			vb[1] = 0;
734			vb[2] = int2float(src_x / 4);
735			vb[3] = 0;
736
737			vb[4] = int2float(dst_x / 4);
738			vb[5] = int2float(h);
739			vb[6] = int2float(src_x / 4);
740			vb[7] = int2float(h);
741
742			vb[8] = int2float((dst_x + cur_size) / 4);
743			vb[9] = int2float(h);
744			vb[10] = int2float((src_x + cur_size) / 4);
745			vb[11] = int2float(h);
746
747			/* src */
748			set_tex_resource(dev_priv, FMT_8_8_8_8,
749					 (src_x + cur_size) / 4,
750					 h, (src_x + cur_size) / 4,
751					 src_gpu_addr);
752
753			cp_set_surface_sync(dev_priv,
754					    R600_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
755
756			/* dst */
757			set_render_target(dev_priv, COLOR_8_8_8_8,
758					  (dst_x + cur_size) / 4, h,
759					  dst_gpu_addr);
760
761			/* scissors */
762			set_scissors(dev_priv, (dst_x / 4), 0, (dst_x + cur_size / 4), h);
763
764			/* Vertex buffer setup */
765			vb_addr = dev_priv->gart_buffers_offset +
766				dev_priv->blit_vb->offset +
767				dev_priv->blit_vb->used;
768			set_vtx_resource(dev_priv, vb_addr);
769
770			/* draw */
771			draw_auto(dev_priv);
772
773			cp_set_surface_sync(dev_priv,
774					    R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA,
775					    cur_size * h, dst_gpu_addr);
776
777			vb += 12;
778			dev_priv->blit_vb->used += 12 * 4;
779
780			src_gpu_addr += cur_size * h;
781			dst_gpu_addr += cur_size * h;
782			size_bytes -= cur_size * h;
783		}
784	}
785}
786
787void
788r600_blit_swap(struct drm_device *dev,
789	       uint64_t src_gpu_addr, uint64_t dst_gpu_addr,
790	       int sx, int sy, int dx, int dy,
791	       int w, int h, int src_pitch, int dst_pitch, int cpp)
792{
793	drm_radeon_private_t *dev_priv = dev->dev_private;
794	int cb_format, tex_format;
795	int sx2, sy2, dx2, dy2;
796	u64 vb_addr;
797	u32 *vb;
798
799	if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) {
800
801		r600_nomm_put_vb(dev);
802		r600_nomm_get_vb(dev);
803		if (!dev_priv->blit_vb)
804			return;
805
806		set_shaders(dev);
807	}
808	vb = r600_nomm_get_vb_ptr(dev);
809
810	sx2 = sx + w;
811	sy2 = sy + h;
812	dx2 = dx + w;
813	dy2 = dy + h;
814
815	vb[0] = int2float(dx);
816	vb[1] = int2float(dy);
817	vb[2] = int2float(sx);
818	vb[3] = int2float(sy);
819
820	vb[4] = int2float(dx);
821	vb[5] = int2float(dy2);
822	vb[6] = int2float(sx);
823	vb[7] = int2float(sy2);
824
825	vb[8] = int2float(dx2);
826	vb[9] = int2float(dy2);
827	vb[10] = int2float(sx2);
828	vb[11] = int2float(sy2);
829
830	switch(cpp) {
831	case 4:
832		cb_format = COLOR_8_8_8_8;
833		tex_format = FMT_8_8_8_8;
834		break;
835	case 2:
836		cb_format = COLOR_5_6_5;
837		tex_format = FMT_5_6_5;
838		break;
839	default:
840		cb_format = COLOR_8;
841		tex_format = FMT_8;
842		break;
843	}
844
845	/* src */
846	set_tex_resource(dev_priv, tex_format,
847			 src_pitch / cpp,
848			 sy2, src_pitch / cpp,
849			 src_gpu_addr);
850
851	cp_set_surface_sync(dev_priv,
852			    R600_TC_ACTION_ENA, src_pitch * sy2, src_gpu_addr);
853
854	/* dst */
855	set_render_target(dev_priv, cb_format,
856			  dst_pitch / cpp, dy2,
857			  dst_gpu_addr);
858
859	/* scissors */
860	set_scissors(dev_priv, dx, dy, dx2, dy2);
861
862	/* Vertex buffer setup */
863	vb_addr = dev_priv->gart_buffers_offset +
864		dev_priv->blit_vb->offset +
865		dev_priv->blit_vb->used;
866	set_vtx_resource(dev_priv, vb_addr);
867
868	/* draw */
869	draw_auto(dev_priv);
870
871	cp_set_surface_sync(dev_priv,
872			    R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA,
873			    dst_pitch * dy2, dst_gpu_addr);
874
875	dev_priv->blit_vb->used += 12 * 4;
876}
877