1/* NeoMagic Back End Scaler functions */
2/* Written by Rudolf Cornelissen 05/2002-1/2006 */
3
4#define MODULE_BIT 0x00000200
5
6#include "nm_std.h"
7
8static void nm_bes_calc_move_overlay(move_overlay_info *moi);
9static void nm_bes_program_move_overlay(move_overlay_info moi);
10
11/* move the overlay output window in virtualscreens */
12/* Note:
13 * si->dm.h_display_start and si->dm.v_display_start determine where the new
14 * output window is located! */
15void nm_bes_move_overlay()
16{
17	move_overlay_info moi;
18
19	/* abort if overlay is not active */
20	if (!si->overlay.active) return;
21
22	nm_bes_calc_move_overlay(&moi);
23	nm_bes_program_move_overlay(moi);
24}
25
26static void nm_bes_calc_move_overlay(move_overlay_info *moi)
27{
28	/* misc used variables */
29	uint16 temp1, temp2;
30	/* visible screen window in virtual workspaces */
31	uint16 crtc_hstart, crtc_vstart, crtc_hend, crtc_vend;
32	/* horizontal source start in source buffer (clipping) */
33	uint32 hsrcstv;
34
35	/* the BES does not respect virtual_workspaces, but adheres to CRTC
36	 * constraints only */
37	crtc_hstart = si->dm.h_display_start;
38	/* horizontal end is the first position beyond the displayed range on the CRTC */
39	crtc_hend = crtc_hstart + si->dm.timing.h_display;
40	crtc_vstart = si->dm.v_display_start;
41	/* vertical end is the first position beyond the displayed range on the CRTC */
42	crtc_vend = crtc_vstart + si->dm.timing.v_display;
43
44
45	/****************************************
46	 *** setup all edges of output window ***
47	 ****************************************/
48
49	/* setup left and right edges of output window */
50	moi->hcoordv = 0;
51	/* left edge coordinate of output window, must be inside desktop */
52	/* clipping on the left side */
53	if (si->overlay.ow.h_start < crtc_hstart)
54	{
55		temp1 = 0;
56	}
57	else
58	{
59		/* clipping on the right side */
60		if (si->overlay.ow.h_start >= (crtc_hend - 1))
61		{
62			/* width < 2 is not allowed */
63			temp1 = (crtc_hend - crtc_hstart - 2);
64		}
65		else
66		/* no clipping here */
67		{
68			temp1 = (si->overlay.ow.h_start - crtc_hstart);
69		}
70	}
71	moi->hcoordv |= temp1 << 16;
72
73	/* right edge coordinate of output window, must be inside desktop */
74	/* width < 2 is not allowed */
75	if (si->overlay.ow.width < 2)
76	{
77		temp2 = (temp1 + 1);
78	}
79	else
80	{
81		/* clipping on the right side */
82		if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) > (crtc_hend - 1))
83		{
84			temp2 = (crtc_hend - crtc_hstart - 1);
85		}
86		else
87		{
88			/* clipping on the left side */
89			if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) < (crtc_hstart + 1))
90			{
91				/* width < 2 is not allowed */
92				temp2 = 1;
93			}
94			else
95			/* no clipping here */
96			{
97				temp2 = ((uint16)(si->overlay.ow.h_start + si->overlay.ow.width - crtc_hstart - 1));
98			}
99		}
100	}
101	moi->hcoordv |= temp2 << 0;
102	LOG(4,("Overlay: CRTC left-edge output %d, right-edge output %d\n",temp1, temp2));
103
104	/* setup top and bottom edges of output window */
105	moi->vcoordv = 0;
106	/* top edge coordinate of output window, must be inside desktop */
107	/* clipping on the top side */
108	if (si->overlay.ow.v_start < crtc_vstart)
109	{
110		temp1 = 0;
111	}
112	else
113	{
114		/* clipping on the bottom side */
115		if (si->overlay.ow.v_start >= (crtc_vend - 1))
116		{
117			/* height < 2 is not allowed */
118			temp1 = (crtc_vend - crtc_vstart - 2);
119		}
120		else
121		/* no clipping here */
122		{
123			temp1 = (si->overlay.ow.v_start - crtc_vstart);
124		}
125	}
126	moi->vcoordv |= temp1 << 16;
127
128	/* bottom edge coordinate of output window, must be inside desktop */
129	/* height < 2 is not allowed */
130	if (si->overlay.ow.height < 2)
131	{
132		temp2 = (temp1 + 1);
133	}
134	else
135	{
136		/* clipping on the bottom side */
137		if ((si->overlay.ow.v_start + si->overlay.ow.height - 1) > (crtc_vend - 1))
138		{
139			temp2 = (crtc_vend - crtc_vstart - 1);
140		}
141		else
142		{
143			/* clipping on the top side */
144			if ((si->overlay.ow.v_start + si->overlay.ow.height - 1) < (crtc_vstart + 1))
145			{
146				/* height < 2 is not allowed */
147				temp2 = 1;
148			}
149			else
150			/* no clipping here */
151			{
152				temp2 = ((uint16)(si->overlay.ow.v_start + si->overlay.ow.height - crtc_vstart - 1));
153			}
154		}
155	}
156	moi->vcoordv |= temp2 << 0;
157	LOG(4,("Overlay: CRTC top-edge output %d, bottom-edge output %d\n",temp1, temp2));
158
159
160	/*********************************
161	 *** setup horizontal clipping ***
162	 *********************************/
163
164	/* Setup horizontal source start: first (sub)pixel contributing to output picture */
165	/* Note:
166	 * The method is to calculate, based on 1:1 scaling, based on the output window.
167	 * After this is done, include the scaling factor so you get a value based on the input bitmap.
168	 * Then add the left starting position of the bitmap's view (zoom function) to get the final value needed.
169	 * Note: The input bitmaps slopspace is automatically excluded from the calculations this way! */
170	/* Note also:
171	 * Even if the scaling factor is clamping we instruct the BES to use the correct source start pos.! */
172	hsrcstv = 0;
173	/* check for destination horizontal clipping at left side */
174	if (si->overlay.ow.h_start < crtc_hstart)
175	{
176		/* check if entire destination picture is clipping left:
177		 * (2 pixels will be clamped onscreen at least) */
178		if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) < (crtc_hstart + 1))
179		{
180			/* increase 'first contributing pixel' with 'fixed value': (total dest. width - 2) */
181			hsrcstv += (si->overlay.ow.width - 2);
182		}
183		else
184		{
185			/* increase 'first contributing pixel' with actual number of dest. clipping pixels */
186			hsrcstv += (crtc_hstart - si->overlay.ow.h_start);
187		}
188		LOG(4,("Overlay: clipping left...\n"));
189
190		/* The calculated value is based on scaling = 1x. So we now compensate for scaling.
191		 * Note that this also already takes care of aligning the value to the BES register! */
192		hsrcstv *= (si->overlay.h_ifactor << 4);
193	}
194	/* take zoom into account */
195	hsrcstv += ((uint32)si->overlay.my_ov.h_start) << 16;
196	LOG(4,("Overlay: first hor. (sub)pixel of input bitmap contributing %f\n", hsrcstv / (float)65536));
197
198	/* Setup horizontal source end: last (sub)pixel contributing to output picture */
199	/* Note:
200	 * The method is to calculate, based on 1:1 scaling, based on the output window.
201	 * After this is done, include the scaling factor so you get a value based on the input bitmap.
202	 * Then add the right ending position of the bitmap's view (zoom function) to get the final value needed. */
203	/* Note also:
204	 * Even if the scaling factor is clamping we instruct the BES to use the correct source end pos.! */
205
206	moi->hsrcendv = 0;
207	/* check for destination horizontal clipping at right side */
208	if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) > (crtc_hend - 1))
209	{
210		/* check if entire destination picture is clipping right:
211		 * (2 pixels will be clamped onscreen at least) */
212		if (si->overlay.ow.h_start > (crtc_hend - 2))
213		{
214			/* increase 'number of clipping pixels' with 'fixed value': (total dest. width - 2) */
215			moi->hsrcendv += (si->overlay.ow.width - 2);
216		}
217		else
218		{
219			/* increase 'number of clipping pixels' with actual number of dest. clipping pixels */
220			moi->hsrcendv += ((si->overlay.ow.h_start + si->overlay.ow.width - 1) - (crtc_hend - 1));
221		}
222		LOG(4,("Overlay: clipping right...\n"));
223
224		/* The calculated value is based on scaling = 1x. So we now compensate for scaling.
225		 * Note that this also already takes care of aligning the value to the BES register! */
226		moi->hsrcendv *= (si->overlay.h_ifactor << 4);
227		/* now subtract this value from the last used pixel in (zoomed) inputbuffer, aligned to BES */
228		moi->hsrcendv = (((uint32)((si->overlay.my_ov.h_start + si->overlay.my_ov.width) - 1)) << 16) - moi->hsrcendv;
229	}
230	else
231	{
232		/* set last contributing pixel to last used pixel in (zoomed) inputbuffer, aligned to BES */
233		moi->hsrcendv = (((uint32)((si->overlay.my_ov.h_start + si->overlay.my_ov.width) - 1)) << 16);
234	}
235	/* AND below required by hardware */
236	moi->hsrcendv &= 0x03ffffff;
237	LOG(4,("Overlay: last horizontal (sub)pixel of input bitmap contributing %f\n", moi->hsrcendv / (float)65536));
238
239
240	/*******************************
241	 *** setup vertical clipping ***
242	 *******************************/
243
244	/* calculate inputbitmap origin adress */
245	moi->a1orgv = (uintptr_t)((vuint32 *)si->overlay.ob.buffer);
246	moi->a1orgv -= (uintptr_t)((vuint32 *)si->framebuffer);
247	LOG(4,("Overlay: topleft corner of input bitmap (cardRAM offset) $%08x\n", moi->a1orgv));
248
249	/* Setup vertical source start: first (sub)pixel contributing to output picture. */
250	/* Note:
251	 * The method is to calculate, based on 1:1 scaling, based on the output window.
252	 * 'After' this is done, include the scaling factor so you get a value based on the input bitmap.
253	 * Then add the top starting position of the bitmap's view (zoom function) to get the final value needed. */
254	/* Note also:
255	 * Even if the scaling factor is clamping we instruct the BES to use the correct source start pos.! */
256
257	/* check for destination vertical clipping at top side */
258	if (si->overlay.ow.v_start < crtc_vstart)
259	{
260		/* check if entire destination picture is clipping at top:
261		 * (2 pixels will be clamped onscreen at least) */
262		if ((si->overlay.ow.v_start + si->overlay.ow.height - 1) < (crtc_vstart + 1))
263		{
264			/* increase source buffer origin with 'fixed value':
265			 * (integer part of ('total height - 2' of dest. picture in pixels * inverse scaling factor)) *
266			 * bytes per row source picture */
267			//fixme: rounding down would be better than just chopping off the fractional part...
268			moi->a1orgv +=
269				((((si->overlay.ow.height - 2) * (si->overlay.v_ifactor << 4)) >> 16) *
270				si->overlay.ob.bytes_per_row);
271		}
272		else
273		{
274			/* increase source buffer origin with:
275			 * (integer part of (number of destination picture clipping pixels * inverse scaling factor)) *
276			 * bytes per row source picture */
277			//fixme: rounding down would be better than just chopping off the fractional part...
278			moi->a1orgv +=
279				((((crtc_vstart - si->overlay.ow.v_start) * (si->overlay.v_ifactor << 4)) >> 16) *
280				si->overlay.ob.bytes_per_row);
281		}
282		LOG(4,("Overlay: clipping at top...\n"));
283	}
284	/* take zoom into account */
285	moi->a1orgv += (si->overlay.my_ov.v_start * si->overlay.ob.bytes_per_row);
286	/* now include 'pixel precise' left clipping...
287	 * (subpixel precision is not supported by NeoMagic cards) */
288	moi->a1orgv += ((hsrcstv >> 16) * 2);
289	/* we need to step in 4-byte (2 pixel) granularity due to the nature of yuy2 */
290	moi->a1orgv &= ~0x03;
291	LOG(4,("Overlay: 'contributing part of buffer' origin is (cardRAM offset) $%08x\n", moi->a1orgv));
292}
293
294static void nm_bes_program_move_overlay(move_overlay_info moi)
295{
296	/*************************************
297	 *** sync to BES (Back End Scaler) ***
298	 *************************************/
299
300	/* Make sure reprogramming the BES completes before the next retrace occurs,
301	 * to prevent register-update glitches (double buffer feature). */
302
303	//fixme if needed...
304
305
306	/**************************************
307	 *** actually program the registers ***
308	 **************************************/
309	if (si->ps.card_type >= NM2097)
310	{
311		/* PCI card */
312		LOG(4,("Overlay: accelerant is programming BES\n"));
313		/* unlock card overlay sequencer registers (b5 = 1) */
314		PCIGRPHW(GENLOCK, (PCIGRPHR(GENLOCK) | 0x20));
315		/* destination rectangle #1 (output window position and size) */
316		PCIGRPHW(HD1COORD1L, ((moi.hcoordv >> 16) & 0xff));
317		PCIGRPHW(HD1COORD2L, (moi.hcoordv & 0xff));
318		PCIGRPHW(HD1COORD21H, (((moi.hcoordv >> 4) & 0xf0) | ((moi.hcoordv >> 24) & 0x0f)));
319		PCIGRPHW(VD1COORD1L, ((moi.vcoordv >> 16) & 0xff));
320		PCIGRPHW(VD1COORD2L, (moi.vcoordv & 0xff));
321		PCIGRPHW(VD1COORD21H, (((moi.vcoordv >> 4) & 0xf0) | ((moi.vcoordv >> 24) & 0x0f)));
322		/* inputbuffer #1 origin */
323		/* (we don't program buffer #2 as it's unused.) */
324		if (si->ps.card_type < NM2200)
325		{
326			moi.a1orgv >>= 1;
327			/* horizontal source end does not use subpixelprecision: granularity is 8 pixels */
328			/* notes:
329			 * - correctly programming horizontal source end minimizes used bandwidth;
330			 * - adding 9 below is in fact:
331			 *   - adding 1 to round-up to the nearest whole source-end value
332			       (making SURE we NEVER are a (tiny) bit too low);
333			     - adding 1 to convert 'last used position' to 'number of used pixels';
334			     - adding 7 to round-up to the nearest higher (or equal) valid register
335			       value (needed because of it's 8-pixel granularity). */
336			PCIGRPHW(0xbc, ((((moi.hsrcendv >> 16) + 9) >> 3) - 1));
337		}
338		else
339		{
340			/* horizontal source end does not use subpixelprecision: granularity is 16 pixels */
341			/* notes:
342			 * - programming this register just a tiny bit too low messes up vertical
343			 *   scaling badly (also distortion stripes and flickering are reported)!
344			 * - not programming this register correctly will mess-up the picture when
345			 *   it's partly clipping on the right side of the screen...
346			 * - make absolutely sure the engine can fetch the last pixel needed from
347			 *   the sourcebitmap even if only to generate a tiny subpixel from it!
348			 *   (see remarks for < NM2200 cards regarding programming this register) */
349			PCIGRPHW(0xbc, ((((moi.hsrcendv >> 16) + 17) >> 4) - 1));
350		}
351		PCIGRPHW(BUF1ORGL, (moi.a1orgv & 0xff));
352		PCIGRPHW(BUF1ORGM, ((moi.a1orgv >> 8) & 0xff));
353		PCIGRPHW(BUF1ORGH, ((moi.a1orgv >> 16) & 0xff));
354		/* ??? */
355		PCIGRPHW(0xbd, 0x02);
356		PCIGRPHW(0xbe, 0x00);
357		/* b2 = 0: don't use horizontal mirroring (NM2160) */
358		/* other bits do ??? */
359		PCIGRPHW(0xbf, 0x02);
360		/* ??? */
361	    PCISEQW(0x1c, 0xfb);
362    	PCISEQW(0x1d, 0x00);
363		PCISEQW(0x1e, 0xe2);
364    	PCISEQW(0x1f, 0x02);
365 		/* b1 = 0: disable alternating hardware buffers (NM2160) */
366		/* other bits do ??? */
367 		PCISEQW(0x09, 0x11);
368		/* we don't use PCMCIA Zoomed Video port capturing, set 1:1 scale just in case */
369		/* (b6-4 = Y downscale = 100%, b2-0 = X downscale = 100%;
370		 *  downscaling selectable in 12.5% steps on increasing setting by 1) */
371		PCISEQW(ZVCAP_DSCAL, 0x00);
372	}
373	else
374	{
375		/* bes setup data */
376		nm_bes_data bi;
377
378		/* ISA card. Speed required, so:
379		 * program entire sequence in kerneldriver in one context switch! */
380		LOG(4,("Overlay: kerneldriver programs BES\n"));
381
382		/* setup BES info struct... */
383		bi.moi = moi;
384		bi.card_type = si->ps.card_type;
385		bi.move_only = true;
386		/* ... and call kerneldriver to program the BES */
387		bi.magic = NM_PRIVATE_DATA_MAGIC;
388		ioctl(fd, NM_PGM_BES, &bi, sizeof(bi));
389	}
390}
391
392status_t nm_configure_bes
393	(const overlay_buffer *ob, const overlay_window *ow, const overlay_view *ov, int offset)
394{
395	/* yuy2 (4:2:2) colorspace calculations */
396	/* Note: Some calculations will have to be modified for other colorspaces if they are incorporated. */
397
398	/* Note:
399	 * in BeOS R5.0.3 and DANO:
400	 * 'ow->offset_xxx' is always 0, so not used;
401	 * 'ow->width' and 'ow->height' are the output window size: does not change
402	 * if window is clipping;
403	 * 'ow->h_start' and 'ow->v_start' are the left-top position of the output
404	 * window. These values can be negative: this means the window is clipping
405	 * at the left or the top of the display, respectively. */
406
407	/* 'ov' is the view in the source bitmap, so which part of the bitmap is actually
408	 * displayed on screen. This is used for the 'hardware zoom' function. */
409
410	/* bes setup data */
411	nm_bes_data bi;
412	/* inverse scaling factor, used for source positioning */
413	uint32 ifactor;
414	/* copy of overlay view which has checked valid values */
415	overlay_view my_ov;
416
417
418	/**************************************************************************************
419	 *** copy, check and limit if needed the user-specified view into the intput bitmap ***
420	 **************************************************************************************/
421	my_ov = *ov;
422	/* check for valid 'coordinates' */
423	if (my_ov.width == 0) my_ov.width++;
424	if (my_ov.height == 0) my_ov.height++;
425	if (my_ov.h_start > ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1))
426		my_ov.h_start = ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1);
427	if (((my_ov.h_start + my_ov.width) - 1) > ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1))
428		my_ov.width = ((((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1) - my_ov.h_start) + 1);
429	if (my_ov.v_start > (ob->height - 1))
430		my_ov.v_start = (ob->height - 1);
431	if (((my_ov.v_start + my_ov.height) - 1) > (ob->height - 1))
432		my_ov.height = (((ob->height - 1) - my_ov.v_start) + 1);
433
434	LOG(6,("Overlay: inputbuffer view (zoom) left %d, top %d, width %d, height %d\n",
435		my_ov.h_start, my_ov.v_start, my_ov.width, my_ov.height));
436
437	/* save for nm_bes_calc_move_overlay() */
438	si->overlay.ow = *ow;
439	si->overlay.ob = *ob;
440	si->overlay.my_ov = my_ov;
441
442
443	/********************************
444	 *** setup horizontal scaling ***
445	 ********************************/
446
447	LOG(6,("Overlay: total input picture width = %d, height = %d\n",
448			(ob->width - si->overlay.myBufInfo[offset].slopspace), ob->height));
449	LOG(6,("Overlay: output picture width = %d, height = %d\n", ow->width, ow->height));
450
451	/* calculate inverse horizontal scaling factor, taking zoom into account */
452	ifactor = ((((uint32)my_ov.width) << 12) / ow->width);
453
454	/* correct factor to prevent most-right visible 'line' from distorting */
455	ifactor -= 1;
456	bi.hiscalv = ifactor;
457	/* save for nv_bes_calc_move_overlay() */
458	si->overlay.h_ifactor = ifactor;
459	LOG(4,("Overlay: horizontal scaling factor is %f\n", (float)4096 / ifactor));
460
461	/* check scaling factor (and modify if needed) to be within scaling limits */
462	/* the upscaling limit is 8.0 (see official Neomagic specsheets) */
463	if (bi.hiscalv < 0x00000200)
464	{
465		/* (non-inverse) factor too large, set factor to max. valid value */
466		bi.hiscalv = 0x00000200;
467		LOG(4,("Overlay: horizontal scaling factor too large, clamping at %f\n", (float)4096 / bi.hiscalv));
468	}
469	/* horizontal downscaling cannot be done by NM BES hardware */
470	if (bi.hiscalv > (1 << 12))
471	{
472		/* (non-inverse) factor too small, set factor to min. valid value */
473		bi.hiscalv = 0x1000;
474		LOG(4,("Overlay: horizontal scaling factor too small, clamping at %f\n", (float)4096 / bi.hiscalv));
475	}
476
477
478	/******************************
479	 *** setup vertical scaling ***
480	 ******************************/
481
482	/* calculate inverse vertical scaling factor, taking zoom into account */
483	ifactor = ((((uint32)my_ov.height) << 12) / ow->height);
484
485	/* correct factor to prevent lowest visible line from distorting */
486	ifactor -= 1;
487	LOG(4,("Overlay: vertical scaling factor is %f\n", (float)4096 / ifactor));
488
489	/* preserve ifactor for source positioning calculations later on */
490	bi.viscalv = ifactor;
491	/* save for nv_bes_calc_move_overlay() */
492	si->overlay.v_ifactor = ifactor;
493
494	/* check scaling factor (and modify if needed) to be within scaling limits */
495	/* the upscaling limit is 8.0 (see official Neomagic specsheets) */
496	if (bi.viscalv < 0x00000200)
497	{
498		/* (non-inverse) factor too large, set factor to max. valid value */
499		bi.viscalv = 0x00000200;
500		LOG(4,("Overlay: vertical scaling factor too large, clamping at %f\n", (float)4096 / bi.viscalv));
501	}
502	/* vertical downscaling cannot be done by NM BES hardware */
503	if (bi.viscalv > (1 << 12))
504	{
505		/* (non-inverse) factor too small, set factor to min. valid value */
506		bi.viscalv = 0x1000;
507		LOG(4,("Overlay: vertical scaling factor too small, clamping at %f\n", (float)4096 / bi.viscalv));
508	}
509
510
511	/********************************************************************************
512	 *** setup all edges of output window, setup horizontal and vertical clipping ***
513	 ********************************************************************************/
514	nm_bes_calc_move_overlay(&(bi.moi));
515
516
517	/*****************************
518	 *** log color keying info ***
519	 *****************************/
520
521	LOG(6,("Overlay: key_red %d, key_green %d, key_blue %d, key_alpha %d\n",
522		ow->red.value, ow->green.value, ow->blue.value, ow->alpha.value));
523	LOG(6,("Overlay: mask_red %d, mask_green %d, mask_blue %d, mask_alpha %d\n",
524		ow->red.mask, ow->green.mask, ow->blue.mask, ow->alpha.mask));
525
526
527	/*************************
528	 *** setup BES control ***
529	 *************************/
530
531	/* BES global control: setup functions */
532	bi.globctlv = 0;
533
534	/* enable BES */
535	bi.globctlv |= 1 << 0;
536	/* enable colorkeying if requested */
537	if (ow->flags & B_OVERLAY_COLOR_KEY) bi.globctlv |= 1 << 1;
538	/* b3 = 1: distorts right-half of overlay output. Keeping it zero. */
539	/* colorspace is YV12, I420 or YUY2 (no RV15 or RV16) */
540	bi.globctlv |= 0 << 5;
541
542	/* enable auto-alternating hardware buffers if alternating buffers is enabled (NM2160) */
543	bi.globctlv |= 1 << 8;
544	/* disable capture */
545	bi.globctlv |= 1 << 13;
546	/* capture: display one buffer (no alternating buffers) */
547	bi.globctlv |= 0 << 14;
548	/* capture: display frame (no field) */
549	bi.globctlv |= 0 << 15;
550
551	/* BTW: horizontal and vertical filtering are always turned on in NM hardware. */
552
553
554	/*************************************
555	 *** sync to BES (Back End Scaler) ***
556	 *************************************/
557
558	/* Make sure reprogramming the BES completes before the next retrace occurs,
559	 * to prevent register-update glitches (double buffer feature). */
560
561	//fixme if needed...
562
563
564	/**************************************
565	 *** actually program the registers ***
566	 **************************************/
567
568	if (si->ps.card_type >= NM2097)
569	{
570		/* helper: some cards use pixels to define buffer pitch, others use bytes */
571		uint16 buf_pitch = ob->width;
572
573		/* PCI card */
574		LOG(4,("Overlay: accelerant is programming BES\n"));
575		/* unlock card overlay sequencer registers (b5 = 1) */
576		PCIGRPHW(GENLOCK, (PCIGRPHR(GENLOCK) | 0x20));
577		/* destination rectangle #1 (output window position and size) */
578		PCIGRPHW(HD1COORD1L, ((bi.moi.hcoordv >> 16) & 0xff));
579		PCIGRPHW(HD1COORD2L, (bi.moi.hcoordv & 0xff));
580		PCIGRPHW(HD1COORD21H, (((bi.moi.hcoordv >> 4) & 0xf0) | ((bi.moi.hcoordv >> 24) & 0x0f)));
581		PCIGRPHW(VD1COORD1L, ((bi.moi.vcoordv >> 16) & 0xff));
582		PCIGRPHW(VD1COORD2L, (bi.moi.vcoordv & 0xff));
583		PCIGRPHW(VD1COORD21H, (((bi.moi.vcoordv >> 4) & 0xf0) | ((bi.moi.vcoordv >> 24) & 0x0f)));
584		/* scaling */
585		PCIGRPHW(XSCALEL, (bi.hiscalv & 0xff));
586		PCIGRPHW(XSCALEH, ((bi.hiscalv >> 8) & 0xff));
587		PCIGRPHW(YSCALEL, (bi.viscalv & 0xff));
588		PCIGRPHW(YSCALEH, ((bi.viscalv >> 8) & 0xff));
589		/* inputbuffer #1 origin */
590		/* (we don't program buffer #2 as it's unused.) */
591		if (si->ps.card_type < NM2200)
592		{
593			bi.moi.a1orgv >>= 1;
594			/* horizontal source end does not use subpixelprecision: granularity is 8 pixels */
595			/* notes:
596			 * - correctly programming horizontal source end minimizes used bandwidth;
597			 * - adding 9 below is in fact:
598			 *   - adding 1 to round-up to the nearest whole source-end value
599			       (making SURE we NEVER are a (tiny) bit too low);
600			     - adding 1 to convert 'last used position' to 'number of used pixels';
601			     - adding 7 to round-up to the nearest higher (or equal) valid register
602			       value (needed because of it's 8-pixel granularity). */
603			PCIGRPHW(0xbc, ((((bi.moi.hsrcendv >> 16) + 9) >> 3) - 1));
604		}
605		else
606		{
607			/* NM2200 and later cards use bytes to define buffer pitch */
608			buf_pitch <<= 1;
609			/* horizontal source end does not use subpixelprecision: granularity is 16 pixels */
610			/* notes:
611			 * - programming this register just a tiny bit too low messes up vertical
612			 *   scaling badly (also distortion stripes and flickering are reported)!
613			 * - not programming this register correctly will mess-up the picture when
614			 *   it's partly clipping on the right side of the screen...
615			 * - make absolutely sure the engine can fetch the last pixel needed from
616			 *   the sourcebitmap even if only to generate a tiny subpixel from it!
617			 *   (see remarks for < NM2200 cards regarding programming this register) */
618			PCIGRPHW(0xbc, ((((bi.moi.hsrcendv >> 16) + 17) >> 4) - 1));
619		}
620		PCIGRPHW(BUF1ORGL, (bi.moi.a1orgv & 0xff));
621		PCIGRPHW(BUF1ORGM, ((bi.moi.a1orgv >> 8) & 0xff));
622		PCIGRPHW(BUF1ORGH, ((bi.moi.a1orgv >> 16) & 0xff));
623		/* ??? */
624		PCIGRPHW(0xbd, 0x02);
625		PCIGRPHW(0xbe, 0x00);
626		/* b2 = 0: don't use horizontal mirroring (NM2160) */
627		/* other bits do ??? */
628		PCIGRPHW(0xbf, 0x02);
629		/* ??? */
630	    PCISEQW(0x1c, 0xfb);
631    	PCISEQW(0x1d, 0x00);
632		PCISEQW(0x1e, 0xe2);
633    	PCISEQW(0x1f, 0x02);
634 		/* b1 = 0: disable alternating hardware buffers (NM2160) */
635		/* other bits do ??? */
636 		PCISEQW(0x09, 0x11);
637		/* we don't use PCMCIA Zoomed Video port capturing, set 1:1 scale just in case */
638		/* (b6-4 = Y downscale = 100%, b2-0 = X downscale = 100%;
639		 *  downscaling selectable in 12.5% steps on increasing setting by 1) */
640		PCISEQW(ZVCAP_DSCAL, 0x00);
641		/* global BES control */
642		PCIGRPHW(BESCTRL1, (bi.globctlv & 0xff));
643		PCISEQW(BESCTRL2, ((bi.globctlv >> 8) & 0xff));
644
645
646		/**************************
647		 *** setup color keying ***
648		 **************************/
649
650		PCIGRPHW(COLKEY_R, (ow->red.value & ow->red.mask));
651		PCIGRPHW(COLKEY_G, (ow->green.value & ow->green.mask));
652		PCIGRPHW(COLKEY_B, (ow->blue.value & ow->blue.mask));
653
654
655		/*************************
656		 *** setup misc. stuff ***
657		 *************************/
658
659		/* setup brightness to be 'neutral' (two's complement number) */
660		PCIGRPHW(BRIGHTNESS, 0x00);
661
662		/* setup inputbuffer #1 pitch including slopspace */
663		/* (we don't program the pitch for inputbuffer #2 as it's unused.) */
664		PCIGRPHW(BUF1PITCHL, (buf_pitch & 0xff));
665		PCIGRPHW(BUF1PITCHH, ((buf_pitch >> 8) & 0xff));
666	}
667	else
668	{
669		/* ISA card. Speed required, so:
670		 * program entire sequence in kerneldriver in one context switch! */
671		LOG(4,("Overlay: kerneldriver programs BES\n"));
672
673		/* complete BES info struct... */
674		bi.card_type = si->ps.card_type;
675		bi.colkey_r = (ow->red.value & ow->red.mask);
676		bi.colkey_g = (ow->green.value & ow->green.mask);
677		bi.colkey_b = (ow->blue.value & ow->blue.mask);
678		bi.ob_width = ob->width;
679		bi.move_only = false;
680		/* ... and call kerneldriver to program the BES */
681		bi.magic = NM_PRIVATE_DATA_MAGIC;
682		ioctl(fd, NM_PGM_BES, &bi, sizeof(bi));
683	}
684
685	/* note that overlay is in use (for nm_bes_move_overlay()) */
686	si->overlay.active = true;
687
688	return B_OK;
689}
690
691status_t nm_release_bes()
692{
693	/* setup BES control: disable scaler */
694	if (si->ps.card_type >= NM2097)
695	{
696		/* PCI card */
697		PCIGRPHW(BESCTRL1, 0x02);
698		PCISEQW(BESCTRL2, 0xa0);
699	}
700	else
701	{
702		/* ISA card */
703		ISAGRPHW(BESCTRL1, 0x02);
704		ISASEQW(BESCTRL2, 0xa0);
705	}
706
707	/* note that overlay is not in use (for nm_bes_move_overlay()) */
708	si->overlay.active = false;
709
710	return B_OK;
711}
712