1/*	$NetBSD: raster_op.c,v 1.18 2009/04/18 15:04:26 tsutsui Exp $ */
2
3/*-
4 * Copyright (c) 1991, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * This code is derived from software contributed to the Computer Systems
8 * Engineering Group at Lawrence Berkeley Laboratory and to the University
9 * of California at Berkeley by Jef Poskanzer.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 *    may be used to endorse or promote products derived from this software
21 *    without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 *	@(#)raster_op.c	8.1 (Berkeley) 6/11/93
36 */
37
38/*
39 * Bitblit routine for raster library.
40 *
41 * This raster-op is machined to exacting tolerances by skilled native
42 * craftsmen with pride in their work.
43 *
44 * The various cases are broken down like this:
45 *
46 *   src required
47 *       1-bit to 1-bit
48 *       1-bit to 2-bits
49 *       1-bit to 4-bits
50 *       1-bit to 8-bits
51 *       1-bit to 16-bits
52 *       2-bits to 2-bits
53 *       2-bits to 4-bits (not implemented)
54 *       2-bits to 8-bits (not implemented)
55 *       2-bits to 16-bits (not implemented)
56 *       4-bits to 4-bits
57 *       4-bits to 8-bits (not implemented)
58 *       4-bits to 16-bits (not implemented)
59 *       8-bits to 8-bits
60 *       8-bits to 16-bits (not implemented)
61 *       16-bits to 16-bits
62 *   no src required
63 *       1-bit no-src
64 *       2-bits no-src
65 *       8-bits no-src
66 *       16-bits no-src
67 */
68
69#include <sys/cdefs.h>
70__KERNEL_RCSID(0, "$NetBSD: raster_op.c,v 1.18 2009/04/18 15:04:26 tsutsui Exp $");
71
72#include <sys/types.h>
73#ifdef _KERNEL
74#include "opt_rcons.h"
75#include <dev/rcons/raster.h>
76#else
77#include "raster.h"
78#endif
79
80/* CONFIGURE: To save on executable size, you can configure out the seldom-used
81** logical operations.  With this variable set, the only operations implemented
82** are: RAS_SRC, RAS_CLEAR, RAS_SET, RAS_INVERT, RAS_XOR, RAS_INVERTSRC.
83*/
84#ifdef _KERNEL
85#define PARTIAL_LOGICAL_OPS
86#endif
87
88/* CONFIGURE: bcopy() is supposed to be the ultimately fastest way to move
89** bytes, overlapping or not, ignoring the startup cost.  Unfortunately
90** this is not true on some systems.  For example, on a Sun 3 running
91** SunOS 3.5, bcopy() is about five times slower than a simple for loop
92** on overlapping copies.  And on a 4.1.1 SPARC, bcopy() is about 2/3rds
93** as fast on backwards overlaps.  So, only define this if your bcopy is ok.
94*/
95#undef BCOPY_FASTER
96
97/* End of configurable definitions. */
98
99
100/* Definitions. */
101
102/* Raster-op macros.  These encapsulate the switch statements and so make
103** the source code 16 times smaller.  The pre and pst args are code
104** fragments to put before and after the assignment in each case.  They
105** can be the beginning and end of a loop.  If the pst fragment includes a
106** masked assignment, for example to handle the left or right edge cases,
107** a good optimizing compiler will simplify the boolean expressions very
108** nicely - both cc and gcc on the SPARC will do this.
109*/
110
111#ifndef PARTIAL_LOGICAL_OPS
112
113#define ROP_DST(op,pre,d,pst) \
114    switch ( op ) \
115	{ \
116	case RAS_CLEAR: \
117	pre \
118	(d) = 0; \
119	pst \
120	break; \
121	case RAS_INVERT: \
122	pre \
123	(d) = ~(d); \
124	pst \
125	break; \
126	case RAS_DST: \
127	/* noop */ \
128	break; \
129	case RAS_SET: \
130	pre \
131	(d) = ~0; \
132	pst \
133	break; \
134	default: \
135	return -1; \
136	}
137
138#define ROP_DSTCOLOR(op,pre,d,c,pst) \
139    switch ( op ) \
140	{ \
141	case RAS_CLEAR: \
142	pre \
143	(d) = 0; \
144	pst \
145	break; \
146	case RAS_INVERT: \
147	pre \
148	(d) = ~(d); \
149	pst \
150	break; \
151	case RAS_DST: \
152	/* noop */ \
153	break; \
154	case RAS_SET: \
155	pre \
156	(d) = (c); \
157	pst \
158	break; \
159	default: \
160	return -1; \
161	}
162
163#define ROP_SRCDST(op,pre,s,d,pst) \
164    switch ( op ) \
165	{ \
166	case RAS_NOTOR: \
167	pre \
168	(d) = ~( (s) | (d) ); \
169	pst \
170	break; \
171	case RAS_NOTSRC_AND_DST: \
172	pre \
173	(d) = ~(s) & (d); \
174	pst \
175	break; \
176	case RAS_INVERTSRC: \
177	pre \
178	(d) = ~(s); \
179	pst \
180	break; \
181	case RAS_SRC_AND_NOTDST: \
182	pre \
183	(d) = (s) & ~(d); \
184	pst \
185	break; \
186	case RAS_XOR: \
187	pre \
188	(d) = (s) ^ (d); \
189	pst \
190	break; \
191	case RAS_NOTAND: \
192	pre \
193	(d) = ~( (s) & (d) ); \
194	pst \
195	break; \
196	case RAS_AND: \
197	pre \
198	(d) = (s) & (d); \
199	pst \
200	break; \
201	case RAS_NOTXOR: \
202	pre \
203	(d) = ~( (s) ^ (d) ); \
204	pst \
205	break; \
206	case RAS_NOTSRC_OR_DST: \
207	pre \
208	(d) = ~(s) | (d); \
209	pst \
210	break; \
211	case RAS_SRC: \
212	pre \
213	(d) = (s); \
214	pst \
215	break; \
216	case RAS_SRC_OR_NOTDST: \
217	pre \
218	(d) = (s) | ~(d); \
219	pst \
220	break; \
221	case RAS_OR: \
222	pre \
223	(d) = (s) | (d); \
224	pst \
225	break; \
226	default: \
227	return -1; \
228	}
229
230#define ROP_SRCDSTCOLOR(op,pre,s,d,c,pst) \
231    switch ( op ) \
232	{ \
233	case RAS_NOTOR: \
234	pre \
235	if ( s ) \
236	    (d) = ~( (c) | (d) ); \
237	else \
238	    (d) = ~(d); \
239	pst \
240	break; \
241	case RAS_NOTSRC_AND_DST: \
242	pre \
243	if ( s ) \
244	    (d) = ~(c) & (d); \
245	pst \
246	break; \
247	case RAS_INVERTSRC: \
248	pre \
249	if ( s ) \
250	    (d) = ~(c); \
251	else \
252	    (d) = ~0; \
253	pst \
254	break; \
255	case RAS_SRC_AND_NOTDST: \
256	pre \
257	if ( s ) \
258	    (d) = (c) & ~(d); \
259	else \
260	    (d) = 0; \
261	pst \
262	break; \
263	case RAS_XOR: \
264	pre \
265	if ( s ) \
266	    (d) = (c) ^ (d); \
267	pst \
268	break; \
269	case RAS_NOTAND: \
270	pre \
271	if ( s ) \
272	    (d) = ~( (c) & (d) ); \
273	else \
274	    (d) = ~0; \
275	pst \
276	break; \
277	case RAS_AND: \
278	pre \
279	if ( s ) \
280	    (d) = (c) & (d); \
281	else \
282	    (d) = 0; \
283	pst \
284	break; \
285	case RAS_NOTXOR: \
286	pre \
287	if ( s ) \
288	    (d) = ~( (c) ^ (d) ); \
289	else \
290	    (d) = ~(d); \
291	pst \
292	break; \
293	case RAS_NOTSRC_OR_DST: \
294	pre \
295	if ( s ) \
296	    (d) = ~(c) | (d); \
297	else \
298	    (d) = ~0; \
299	pst \
300	break; \
301	case RAS_SRC: \
302	pre \
303	if ( s ) \
304	    (d) = (c); \
305	else \
306	    (d) = 0; \
307	pst \
308	break; \
309	case RAS_SRC_OR_NOTDST: \
310	pre \
311	if ( s ) \
312	    (d) = (c) | ~(d); \
313	else \
314	    (d) = ~(d); \
315	pst \
316	break; \
317	case RAS_OR: \
318	pre \
319	if ( s ) \
320	    (d) = (c) | (d); \
321	pst \
322	break; \
323	default: \
324	return -1; \
325	}
326
327#else /*PARTIAL_LOGICAL_OPS*/
328
329#define ROP_DST(op,pre,d,pst) \
330    switch ( op ) \
331	{ \
332	case RAS_CLEAR: \
333	pre \
334	(d) = 0; \
335	pst \
336	break; \
337	case RAS_INVERT: \
338	pre \
339	(d) = ~(d); \
340	pst \
341	break; \
342	case RAS_SET: \
343	pre \
344	(d) = ~0; \
345	pst \
346	break; \
347	default: \
348	return -1; \
349	}
350
351#define ROP_DSTCOLOR(op,pre,d,c,pst) \
352    switch ( op ) \
353	{ \
354	case RAS_CLEAR: \
355	pre \
356	(d) = 0; \
357	pst \
358	break; \
359	case RAS_INVERT: \
360	pre \
361	(d) = ~(d); \
362	pst \
363	break; \
364	case RAS_SET: \
365	pre \
366	(d) = (c); \
367	pst \
368	break; \
369	default: \
370	return -1; \
371	}
372
373#define ROP_SRCDST(op,pre,s,d,pst) \
374    switch ( op ) \
375	{ \
376	case RAS_INVERTSRC: \
377	pre \
378	(d) = ~(s); \
379	pst \
380	break; \
381	case RAS_XOR: \
382	pre \
383	(d) = (s) ^ (d); \
384	pst \
385	break; \
386	case RAS_SRC: \
387	pre \
388	(d) = (s); \
389	pst \
390	break; \
391	default: \
392	return -1; \
393	}
394
395#define ROP_SRCDSTCOLOR(op,pre,s,d,c,pst) \
396    switch ( op ) \
397	{ \
398	case RAS_INVERTSRC: \
399	pre \
400	if ( s ) \
401	    (d) = ~(c); \
402	else \
403	    (d) = ~0; \
404	pst \
405	break; \
406	case RAS_XOR: \
407	pre \
408	if ( s ) \
409	    (d) = (c) ^ (d); \
410	pst \
411	break; \
412	case RAS_SRC: \
413	pre \
414	if ( s ) \
415	    (d) = (c); \
416	else \
417	    (d) = 0; \
418	pst \
419	break; \
420	default: \
421	return -1; \
422	}
423
424#endif /*PARTIAL_LOGICAL_OPS*/
425
426
427/* Variables. */
428
429static int needsrc[16] = { 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0 };
430/*                       CLEAR          INVERT          DST            SET */
431
432#ifdef MSBIT_FIRST
433
434u_int32_t raster_bitmask[32] = {
435    0x80000000, 0x40000000, 0x20000000, 0x10000000,
436    0x08000000, 0x04000000, 0x02000000, 0x01000000,
437    0x00800000, 0x00400000, 0x00200000, 0x00100000,
438    0x00080000, 0x00040000, 0x00020000, 0x00010000,
439    0x00008000, 0x00004000, 0x00002000, 0x00001000,
440    0x00000800, 0x00000400, 0x00000200, 0x00000100,
441    0x00000080, 0x00000040, 0x00000020, 0x00000010,
442    0x00000008, 0x00000004, 0x00000002, 0x00000001 };
443
444#ifdef MSBYTE_FIRST
445static u_int32_t leftmask[32] = {
446    0x00000000, 0x80000000, 0xc0000000, 0xe0000000,
447    0xf0000000, 0xf8000000, 0xfc000000, 0xfe000000,
448    0xff000000, 0xff800000, 0xffc00000, 0xffe00000,
449    0xfff00000, 0xfff80000, 0xfffc0000, 0xfffe0000,
450    0xffff0000, 0xffff8000, 0xffffc000, 0xffffe000,
451    0xfffff000, 0xfffff800, 0xfffffc00, 0xfffffe00,
452    0xffffff00, 0xffffff80, 0xffffffc0, 0xffffffe0,
453    0xfffffff0, 0xfffffff8, 0xfffffffc, 0xfffffffe };
454static u_int32_t rightmask[32] = {
455    0x00000000, 0x00000001, 0x00000003, 0x00000007,
456    0x0000000f, 0x0000001f, 0x0000003f, 0x0000007f,
457    0x000000ff, 0x000001ff, 0x000003ff, 0x000007ff,
458    0x00000fff, 0x00001fff, 0x00003fff, 0x00007fff,
459    0x0000ffff, 0x0001ffff, 0x0003ffff, 0x0007ffff,
460    0x000fffff, 0x001fffff, 0x003fffff, 0x007fffff,
461    0x00ffffff, 0x01ffffff, 0x03ffffff, 0x07ffffff,
462    0x0fffffff, 0x1fffffff, 0x3fffffff, 0x7fffffff };
463
464#define LSOP <<
465#define RSOP >>
466#endif /*MSBYTE_FIRST*/
467
468#else /*MSBIT_FIRST*/
469
470u_int32_t raster_bitmask[32] = {
471    0x00000001, 0x00000002, 0x00000004, 0x00000008,
472    0x00000010, 0x00000020, 0x00000040, 0x00000080,
473    0x00000100, 0x00000200, 0x00000400, 0x00000800,
474    0x00001000, 0x00002000, 0x00004000, 0x00008000,
475    0x00010000, 0x00020000, 0x00040000, 0x00080000,
476    0x00100000, 0x00200000, 0x00400000, 0x00800000,
477    0x01000000, 0x02000000, 0x04000000, 0x08000000,
478    0x10000000, 0x20000000, 0x40000000, 0x80000000 };
479
480#ifndef MSBYTE_FIRST
481static u_int32_t leftmask[32] = {
482    0x00000000, 0x00000001, 0x00000003, 0x00000007,
483    0x0000000f, 0x0000001f, 0x0000003f, 0x0000007f,
484    0x000000ff, 0x000001ff, 0x000003ff, 0x000007ff,
485    0x00000fff, 0x00001fff, 0x00003fff, 0x00007fff,
486    0x0000ffff, 0x0001ffff, 0x0003ffff, 0x0007ffff,
487    0x000fffff, 0x001fffff, 0x003fffff, 0x007fffff,
488    0x00ffffff, 0x01ffffff, 0x03ffffff, 0x07ffffff,
489    0x0fffffff, 0x1fffffff, 0x3fffffff, 0x7fffffff };
490static u_int32_t rightmask[32] = {
491    0x00000000, 0x80000000, 0xc0000000, 0xe0000000,
492    0xf0000000, 0xf8000000, 0xfc000000, 0xfe000000,
493    0xff000000, 0xff800000, 0xffc00000, 0xffe00000,
494    0xfff00000, 0xfff80000, 0xfffc0000, 0xfffe0000,
495    0xffff0000, 0xffff8000, 0xffffc000, 0xffffe000,
496    0xfffff000, 0xfffff800, 0xfffffc00, 0xfffffe00,
497    0xffffff00, 0xffffff80, 0xffffffc0, 0xffffffe0,
498    0xfffffff0, 0xfffffff8, 0xfffffffc, 0xfffffffe };
499#define LSOP >>
500#define RSOP <<
501#endif /*not MSBYTE_FIRST*/
502
503#endif /*MSBIT_FIRST*/
504
505/* (The odd combinations MSBIT+~MSBYTE and ~MSBIT+MSBYTE could be added.) */
506
507#ifdef MSBYTE_FIRST
508static u_int32_t bytemask[4] = { 0xff000000, 0x00ff0000, 0x0000ff00, 0x000000ff };
509#ifdef RCONS_2BPP
510static u_int32_t twobitmask[16] = {
511  0xc0000000, 0x30000000, 0x0c000000, 0x03000000,
512  0x00c00000, 0x00300000, 0x000c0000, 0x00030000,
513  0x0000c000, 0x00003000, 0x00000c00, 0x00000300,
514  0x000000c0, 0x00000030, 0x0000000c, 0x00000003 };
515#endif /* RCONS_2BPP */
516#ifdef RCONS_4BPP
517static u_int32_t fourbitmask[8] = {
518  0xf0000000, 0x0f000000,
519  0x00f00000, 0x000f0000,
520  0x0000f000, 0x00000f00,
521  0x000000f0, 0x0000000f };
522#endif /* RCONS_4BPP */
523#ifdef RCONS_16BPP
524static u_int32_t twobytemask[2] = { 0xffff0000, 0x0000ffff };
525#endif /* RCONS_16BPP */
526#else /*MSBYTE_FIRST*/
527static u_int32_t bytemask[4] = { 0x000000ff, 0x0000ff00, 0x00ff0000, 0xff000000 };
528#ifdef RCONS_2BPP
529static u_int32_t twobitmask[16] = {
530  0x00000003, 0x0000000c, 0x00000030, 0x000000c0,
531  0x00000300, 0x00000c00, 0x00003000, 0x0000c000,
532  0x00030000, 0x000c0000, 0x00300000, 0x00c00000,
533  0x03000000, 0x0c000000, 0x30000000, 0xc0000000 };
534#endif /* RCONS_2BPP */
535#ifdef RCONS_4BPP
536static u_int32_t fourbitmask[16] = {
537  0x0000000f, 0x000000f0,
538  0x00000f00, 0x0000f000,
539  0x000f0000, 0x00f00000,
540  0x0f000000, 0xf0000000 };
541#endif /* RCONS_4BPP */
542#ifdef RCONS_16BPP
543static u_int32_t twobytemask[2] = { 0x0000ffff, 0xffff0000 };
544#endif /* RCONS_16BPP */
545#endif /*MSBYTE_FIRST*/
546
547
548/* Forward routines. */
549
550static int raster_blit(struct raster *, u_int32_t *, int, int, int,
551			struct raster *, u_int32_t *, int, int, int,
552			int, int);
553
554/* Raster operations.  */
555
556/* Performs a bitblit.  Returns 0 on success, -1 on failure. */
557int
558raster_op(struct raster* dst, int dx, int dy, int w, int h, int rop,
559    struct raster* src, int sx, int sy)
560    {
561    if ( dst == (struct raster*) 0 )
562	return -1;			/* no destination */
563
564    if ( needsrc[RAS_GETOP( rop )] )
565	{
566	/* Two-operand blit. */
567	if ( src == (struct raster*) 0 )
568	    return -1;			/* no source */
569
570	/* Clip against source. */
571	if ( sx < 0 )
572	    {
573	    w += sx;
574	    sx = 0;
575	    }
576	if ( sy < 0 )
577	    {
578	    h += sy;
579	    sy = 0;
580	    }
581	if ( sx + w > src->width )
582	    w = src->width - sx;
583	if ( sy + h > src->height )
584	    h = src->height - sy;
585
586	/* Clip against dest. */
587	if ( dx < 0 )
588	    {
589	    w += dx;
590	    sx -= dx;
591	    dx = 0;
592	    }
593	if ( dy < 0 )
594	    {
595	    h += dy;
596	    sy -= dy;
597	    dy = 0;
598	    }
599	if ( dx + w > dst->width )
600	    w = dst->width - dx;
601	if ( dy + h > dst->height )
602	    h = dst->height - dy;
603
604	if ( w <= 0 || h <= 0 )
605	    return 0;			/* nothing to do */
606
607	return raster_op_noclip( dst, dx, dy, w, h, rop, src, sx, sy );
608	}
609
610    /* No source necessary - one-operand blit. */
611    if ( src != (struct raster*) 0 )
612	return -1;			/* unwanted source */
613
614    /* Clip against dest. */
615    if ( dx < 0 )
616	{
617	w += dx;
618	dx = 0;
619	}
620    if ( dy < 0 )
621	{
622	h += dy;
623	dy = 0;
624	}
625    if ( dx + w > dst->width )
626	w = dst->width - dx;
627    if ( dy + h > dst->height )
628	h = dst->height - dy;
629
630    if ( w <= 0 || h <= 0 )
631	return 0;			/* nothing to do */
632
633    return raster_op_nosrc_noclip( dst, dx, dy, w, h, rop );
634    }
635
636/* Semi-public routine to do a bitblit without clipping.  Returns 0 on
637** success, -1 on failure.
638*/
639int
640raster_op_noclip(struct raster* dst, int dx, int dy, int w, int h, int rop,
641    struct raster* src, int sx, int sy)
642    {
643    int op;
644
645    op = RAS_GETOP( rop );
646
647    if ( src->depth == 1 )
648	{
649	/* One-bit to ? blit. */
650	if ( dst->depth == 1 )
651	    {
652	    /* One to one blit. */
653	    u_int32_t* srclin1;
654	    u_int32_t* dstlin1;
655	    int srcleftignore, srcrightignore, srclongs;
656	    int dstleftignore, dstrightignore, dstlongs;
657
658	    srclin1 = RAS_ADDR( src, sx, sy );
659	    dstlin1 = RAS_ADDR( dst, dx, dy );
660
661#ifdef BCOPY_FASTER
662	    /* Special-case full-width to full-width copies. */
663	    if ( op == RAS_SRC && src->width == w && dst->width == w &&
664		 src->linelongs == dst->linelongs && src->linelongs == w >> 5 )
665		{
666		bcopy(
667		    (char*) srclin1, (char*) dstlin1,
668		    h * src->linelongs * sizeof(u_int32_t) );
669		return 0;
670		}
671#endif /*BCOPY_FASTER*/
672
673	    srcleftignore = ( sx & 31 );
674	    srclongs = ( srcleftignore + w + 31 ) >> 5;
675	    srcrightignore = ( srclongs * 32 - w - srcleftignore ) & 31;
676	    dstleftignore = ( dx & 31 );
677	    dstlongs = ( dstleftignore + w + 31 ) >> 5;
678	    dstrightignore = ( dstlongs * 32 - w - dstleftignore ) & 31;
679
680	    return raster_blit(
681		src, srclin1, srcleftignore, srcrightignore, srclongs,
682		dst, dstlin1, dstleftignore, dstrightignore, dstlongs, h, op );
683	    }
684
685#ifdef RCONS_2BPP
686	else if ( dst->depth == 2 )
687          {
688            /* One to two, using the color in the rop.  */
689	    u_int32_t* srclin1;
690	    u_int32_t* dstlin1;
691	    u_int32_t* srclin2;
692	    u_int32_t* srclin;
693	    u_int32_t* dstlin;
694	    u_int32_t* srclong;
695	    u_int32_t* dstlong;
696	    u_int32_t color, dl;
697	    int srcbit, dstbyte, i;
698
699	    color = RAS_GETCOLOR( rop );
700	    if ( color == 0 )
701              color = 3;
702
703	    /* Make 32 bits of color so we can do the ROP without shifting. */
704	    color |= (( color << 30 ) | ( color << 28 ) | ( color << 26 )
705                      | ( color << 24 ) | ( color << 22 ) | ( color << 20 )
706                      | ( color << 18 ) | ( color << 16 ) | ( color << 14 )
707                      | ( color << 12 ) | ( color << 10 ) | ( color << 8 )
708                      | ( color << 6 ) | ( color << 4 ) | ( color << 2 ));
709
710	    /* Don't have to worry about overlapping blits here. */
711	    srclin1 = RAS_ADDR( src, sx, sy );
712	    srclin2 = srclin1 + h * src->linelongs;
713	    dstlin1 = RAS_ADDR( dst, dx, dy );
714	    srclin = srclin1;
715	    dstlin = dstlin1;
716
717	    while ( srclin != srclin2 )
718		{
719		srclong = srclin;
720		srcbit = sx & 31;
721		dstlong = dstlin;
722		dstbyte = dx & 15;
723		i = w;
724
725		/* WARNING: this code is KNOWN TO FAIL on Sun 3's / CG2's. */
726		ROP_SRCDSTCOLOR(
727		/*op*/  op,
728		/*pre*/ while ( i > 0 )
729			    {
730			    dl = *dstlong;,
731		/*s*/       *srclong & raster_bitmask[srcbit],
732		/*d*/       dl,
733		/*c*/       color,
734		/*pst*/     *dstlong = ( *dstlong & ~twobitmask[dstbyte] ) |
735				       ( dl & twobitmask[dstbyte] );
736			    if ( srcbit == 31 )
737				{
738				srcbit = 0;
739				++srclong;
740				}
741			    else
742				++srcbit;
743			    if ( dstbyte == 15 )
744				{
745				dstbyte = 0;
746				++dstlong;
747				}
748			    else
749				++dstbyte;
750			    --i;
751			    } )
752
753		srclin += src->linelongs;
754		dstlin += dst->linelongs;
755		}
756          }
757#endif /* RCONS_2BPP */
758#ifdef RCONS_4BPP
759	else if ( dst->depth == 4 )
760          {
761            /* One to four, using the color in the rop.  */
762	    u_int32_t* srclin1;
763	    u_int32_t* dstlin1;
764	    u_int32_t* srclin2;
765	    u_int32_t* srclin;
766	    u_int32_t* dstlin;
767	    u_int32_t* srclong;
768	    u_int32_t* dstlong;
769	    u_int32_t color, dl;
770	    int srcbit, dstbyte, i;
771
772	    color = RAS_GETCOLOR( rop );
773	    if ( color == 0 )
774              color = 15;
775
776	    /* Make 32 bits of color so we can do the ROP without shifting. */
777	    color |= (( color << 28 ) | ( color << 24 )
778                      | ( color << 20 ) | ( color << 16 )
779                      | ( color << 12 ) | ( color << 8 )
780                      | ( color << 4 ));
781
782	    /* Don't have to worry about overlapping blits here. */
783	    srclin1 = RAS_ADDR( src, sx, sy );
784	    srclin2 = srclin1 + h * src->linelongs;
785	    dstlin1 = RAS_ADDR( dst, dx, dy );
786	    srclin = srclin1;
787	    dstlin = dstlin1;
788
789	    while ( srclin != srclin2 )
790		{
791		srclong = srclin;
792		srcbit = sx & 31;
793		dstlong = dstlin;
794		dstbyte = dx & 7;
795		i = w;
796
797		/* WARNING: this code is KNOWN TO FAIL on Sun 3's / CG2's. */
798		ROP_SRCDSTCOLOR(
799		/*op*/  op,
800		/*pre*/ while ( i > 0 )
801			    {
802			    dl = *dstlong;,
803		/*s*/       *srclong & raster_bitmask[srcbit],
804		/*d*/       dl,
805		/*c*/       color,
806		/*pst*/     *dstlong = ( *dstlong & ~fourbitmask[dstbyte] ) |
807				       ( dl & fourbitmask[dstbyte] );
808			    if ( srcbit == 31 )
809				{
810				srcbit = 0;
811				++srclong;
812				}
813			    else
814				++srcbit;
815			    if ( dstbyte == 7 )
816				{
817				dstbyte = 0;
818				++dstlong;
819				}
820			    else
821				++dstbyte;
822			    --i;
823			    } )
824
825		srclin += src->linelongs;
826		dstlin += dst->linelongs;
827		}
828          }
829#endif /* RCONS_4BPP */
830	else if ( dst->depth == 8 )
831	    {
832	    /* One to eight, using the color in the rop.  This could
833	    ** probably be sped up by handling each four-bit source nybble
834	    ** as a group, indexing into a 16-element runtime-constructed
835	    ** table of longwords.
836	    */
837	    u_int32_t* srclin1;
838	    u_int32_t* dstlin1;
839	    u_int32_t* srclin2;
840	    u_int32_t* srclin;
841	    u_int32_t* dstlin;
842	    u_int32_t* srclong;
843	    u_int32_t* dstlong;
844	    u_int32_t color, dl;
845	    int srcbit, dstbyte, i;
846
847	    color = RAS_GETCOLOR( rop );
848	    if ( color == 0 )
849		color = 255;
850
851	    /* Make 32 bits of color so we can do the ROP without shifting. */
852	    color |= ( color << 24 ) | ( color << 16 ) | ( color << 8 );
853
854	    /* Don't have to worry about overlapping blits here. */
855	    srclin1 = RAS_ADDR( src, sx, sy );
856	    srclin2 = srclin1 + h * src->linelongs;
857	    dstlin1 = RAS_ADDR( dst, dx, dy );
858	    srclin = srclin1;
859	    dstlin = dstlin1;
860	    while ( srclin != srclin2 )
861		{
862		srclong = srclin;
863		srcbit = sx & 31;
864		dstlong = dstlin;
865		dstbyte = dx & 3;
866		i = w;
867
868		/* WARNING: this code is KNOWN TO FAIL on Sun 3's / CG2's. */
869		ROP_SRCDSTCOLOR(
870		/*op*/  op,
871		/*pre*/ while ( i > 0 )
872			    {
873			    dl = *dstlong;,
874		/*s*/       *srclong & raster_bitmask[srcbit],
875		/*d*/       dl,
876		/*c*/       color,
877		/*pst*/     *dstlong = ( *dstlong & ~bytemask[dstbyte] ) |
878				       ( dl & bytemask[dstbyte] );
879			    if ( srcbit == 31 )
880				{
881				srcbit = 0;
882				++srclong;
883				}
884			    else
885				++srcbit;
886			    if ( dstbyte == 3 )
887				{
888				dstbyte = 0;
889				++dstlong;
890				}
891			    else
892				++dstbyte;
893			    --i;
894			    } )
895
896		srclin += src->linelongs;
897		dstlin += dst->linelongs;
898		}
899	    }
900#ifdef RCONS_16BPP
901	else
902	    {
903	    /* One to sixteen, using the color in the rop.  This could
904	    ** probably be sped up by handling each four-bit source nybble
905	    ** as a group, indexing into a 16-element runtime-constructed
906	    ** table of longwords.
907	    */
908	    u_int32_t* srclin1;
909	    u_int32_t* dstlin1;
910	    u_int32_t* srclin2;
911	    u_int32_t* srclin;
912	    u_int32_t* dstlin;
913	    u_int32_t* srclong;
914	    u_int32_t* dstlong;
915	    u_int32_t color, dl;
916	    int srcbit, dstbyte, i;
917
918	    color = RAS_GETCOLOR( rop );
919	    if ( color == 0 )
920		color = 0xffff;
921
922	    /* Make 32 bits of color so we can do the ROP without shifting. */
923	    color |= ( color << 16 );
924
925	    /* Don't have to worry about overlapping blits here. */
926	    srclin1 = RAS_ADDR( src, sx, sy );
927	    srclin2 = srclin1 + h * src->linelongs;
928	    dstlin1 = RAS_ADDR( dst, dx, dy );
929	    srclin = srclin1;
930	    dstlin = dstlin1;
931	    while ( srclin != srclin2 )
932		{
933		srclong = srclin;
934		srcbit = sx & 31;
935		dstlong = dstlin;
936		dstbyte = dx & 1;
937		i = w;
938
939		/* WARNING: this code is KNOWN TO FAIL on Sun 3's / CG2's. */
940		ROP_SRCDSTCOLOR(
941		/*op*/  op,
942		/*pre*/ while ( i > 0 )
943			    {
944			    dl = *dstlong;,
945		/*s*/       *srclong & raster_bitmask[srcbit],
946		/*d*/       dl,
947		/*c*/       color,
948		/*pst*/     *dstlong = ( *dstlong & ~twobytemask[dstbyte] ) |
949				       ( dl & twobytemask[dstbyte] );
950			    if ( srcbit == 31 )
951				{
952				srcbit = 0;
953				++srclong;
954				}
955			    else
956				++srcbit;
957			    if ( dstbyte == 1 )
958				{
959				dstbyte = 0;
960				++dstlong;
961				}
962			    else
963				++dstbyte;
964			    --i;
965			    } )
966
967		srclin += src->linelongs;
968		dstlin += dst->linelongs;
969		}
970	    }
971#endif /* RCONS_16BPP */
972	}
973#ifdef RCONS_2BPP
974    else if ( src->depth == 2 )
975      {
976        /* Two to two blit. */
977	    u_int32_t* srclin1;
978	    u_int32_t* dstlin1;
979	    int srcleftignore, srcrightignore, srclongs;
980	    int dstleftignore, dstrightignore, dstlongs;
981
982	    srclin1 = RAS_ADDR( src, sx, sy );
983	    dstlin1 = RAS_ADDR( dst, dx, dy );
984
985	    srcleftignore = ( sx & 15 ) * 2;
986	    srclongs = ( srcleftignore + w * 2 + 31 ) >> 5;
987	    srcrightignore = ( srclongs * 32 - w * 2 - srcleftignore ) & 31;
988	    dstleftignore = ( dx & 15 ) * 2;
989	    dstlongs = ( dstleftignore + w * 2 + 31 ) >> 5;
990	    dstrightignore = ( dstlongs * 32 - w * 2 - dstleftignore ) & 31;
991
992	    return raster_blit(
993		src, srclin1, srcleftignore, srcrightignore, srclongs,
994		dst, dstlin1, dstleftignore, dstrightignore, dstlongs, h, op );
995	    }
996#endif /* RCONS_2BPP */
997#ifdef RCONS_4BPP
998    else if ( src->depth == 4 )
999      {
1000        /* Four to four blit. */
1001	    u_int32_t* srclin1;
1002	    u_int32_t* dstlin1;
1003	    int srcleftignore, srcrightignore, srclongs;
1004	    int dstleftignore, dstrightignore, dstlongs;
1005
1006	    srclin1 = RAS_ADDR( src, sx, sy );
1007	    dstlin1 = RAS_ADDR( dst, dx, dy );
1008
1009	    srcleftignore = ( sx & 7 ) * 4;
1010	    srclongs = ( srcleftignore + w * 4 + 31 ) >> 5;
1011	    srcrightignore = ( srclongs * 32 - w * 4 - srcleftignore ) & 31;
1012	    dstleftignore = ( dx & 7 ) * 4;
1013	    dstlongs = ( dstleftignore + w * 4 + 31 ) >> 5;
1014	    dstrightignore = ( dstlongs * 32 - w * 4 - dstleftignore ) & 31;
1015
1016	    return raster_blit(
1017		src, srclin1, srcleftignore, srcrightignore, srclongs,
1018		dst, dstlin1, dstleftignore, dstrightignore, dstlongs, h, op );
1019	    }
1020#endif /* RCONS_4BPP */
1021
1022    else if ( src->depth == 8 )
1023	{
1024	/* Eight to eight blit. */
1025	u_int32_t* srclin1;
1026	u_int32_t* dstlin1;
1027	int srcleftignore, srcrightignore, srclongs;
1028	int dstleftignore, dstrightignore, dstlongs;
1029
1030	if ( dst->depth != 8 )
1031	    return -1;		/* depth mismatch */
1032
1033	srclin1 = RAS_ADDR( src, sx, sy );
1034	dstlin1 = RAS_ADDR( dst, dx, dy );
1035
1036#ifdef BCOPY_FASTER
1037	/* Special-case full-width to full-width copies. */
1038	if ( op == RAS_SRC && src->width == w && dst->width == w &&
1039	     src->linelongs == dst->linelongs && src->linelongs == w >> 2 )
1040	    {
1041	    bcopy( (char*) srclin1, (char*) dstlin1,
1042		   h * src->linelongs * sizeof(u_int32_t) );
1043	    return 0;
1044	    }
1045#endif /*BCOPY_FASTER*/
1046
1047	srcleftignore = ( sx & 3 ) * 8;
1048	srclongs = ( srcleftignore + w * 8 + 31 ) >> 5;
1049	srcrightignore = ( srclongs * 32 - w * 8 - srcleftignore ) & 31;
1050	dstleftignore = ( dx & 3 ) * 8;
1051	dstlongs = ( dstleftignore + w * 8 + 31 ) >> 5;
1052	dstrightignore = ( dstlongs * 32 - w * 8 - dstleftignore ) & 31;
1053
1054	return raster_blit(
1055	    src, srclin1, srcleftignore, srcrightignore, srclongs,
1056	    dst, dstlin1, dstleftignore, dstrightignore, dstlongs, h, op );
1057	}
1058#ifdef RCONS_16BPP
1059    else
1060        {
1061	/* Sixteen to sixteen blit. */
1062	    u_int32_t* srclin1;
1063	    u_int32_t* dstlin1;
1064	    int srcleftignore, srcrightignore, srclongs;
1065	    int dstleftignore, dstrightignore, dstlongs;
1066
1067	    srclin1 = RAS_ADDR( src, sx, sy );
1068	    dstlin1 = RAS_ADDR( dst, dx, dy );
1069
1070	    srcleftignore = ( sx & 1 ) * 16;
1071	    srclongs = ( srcleftignore + w * 16 + 31 ) >> 5;
1072	    srcrightignore = ( srclongs * 32 - w * 16 - srcleftignore ) & 31;
1073	    dstleftignore = ( dx & 1 ) * 16;
1074	    dstlongs = ( dstleftignore + w * 16 + 31 ) >> 5;
1075	    dstrightignore = ( dstlongs * 32 - w * 16 - dstleftignore ) & 31;
1076
1077	    return raster_blit(
1078		src, srclin1, srcleftignore, srcrightignore, srclongs,
1079		dst, dstlin1, dstleftignore, dstrightignore, dstlongs, h, op );
1080	}
1081#endif /* RCONS_16BPP */
1082    return 0;
1083    }
1084
1085/* Semi-public routine to do a no-src bitblit without clipping.  Returns 0
1086** on success, -1 on failure.
1087*/
1088int
1089raster_op_nosrc_noclip(struct raster* dst,
1090    int dx, int dy, int w, int h, int rop)
1091    {
1092    int op;
1093
1094    op = RAS_GETOP( rop );
1095
1096    if ( dst->depth == 1 )
1097	{
1098	/* One-bit no-src blit. */
1099	u_int32_t* dstlin1;
1100	u_int32_t* dstlin2;
1101	u_int32_t* dstlin;
1102	int dstleftignore, dstrightignore, dstlongs;
1103	u_int32_t dl, lm, nlm, rm, nrm;
1104	u_int32_t* dstlong2;
1105	u_int32_t* dstlong;
1106
1107	dstlin1 = RAS_ADDR( dst, dx, dy );
1108
1109#ifdef BCOPY_FASTER
1110	/* Special-case full-width clears. */
1111	if ( op == RAS_CLEAR && dst->width == w && dst->linelongs == w >> 5 )
1112	    {
1113	    memset( (char*) dstlin1, 0, h * dst->linelongs * sizeof(u_int32_t) );
1114	    return 0;
1115	    }
1116#endif /*BCOPY_FASTER*/
1117
1118	dstleftignore = ( dx & 31 );
1119	dstlongs = ( dstleftignore + w + 31 ) >> 5;
1120	dstrightignore = ( dstlongs * 32 - w - dstleftignore ) & 31;
1121
1122	dstlin2 = dstlin1 + h * dst->linelongs;
1123	dstlin = dstlin1;
1124
1125	if ( dstlongs == 1 )
1126	    {
1127	    /* It fits into a single longword. */
1128	    lm = leftmask[dstleftignore] | rightmask[dstrightignore];
1129	    nlm = ~lm;
1130	    while ( dstlin != dstlin2 )
1131		{
1132		ROP_DST(
1133		/*op*/  op,
1134		/*pre*/ dl = *dstlin;,
1135		/*d*/   dl,
1136		/*pst*/ *dstlin = ( *dstlin & lm ) | ( dl & nlm ); )
1137
1138		dstlin += dst->linelongs;
1139		}
1140	    }
1141	else
1142	    {
1143	    lm = leftmask[dstleftignore];
1144	    rm = rightmask[dstrightignore];
1145	    nrm = ~rm;
1146	    nlm = ~lm;
1147
1148	    while ( dstlin != dstlin2 )
1149		{
1150		dstlong = dstlin;
1151		dstlong2 = dstlong + dstlongs;
1152		if ( dstrightignore != 0 )
1153		    --dstlong2;
1154
1155		/* Leading edge. */
1156		if ( dstleftignore != 0 )
1157		    {
1158		    ROP_DST(
1159		    /*op*/  op,
1160		    /*pre*/ dl = *dstlong;,
1161		    /*d*/   dl,
1162		    /*pst*/ *dstlong = ( *dstlong & lm ) | ( dl & nlm ); )
1163		    ++dstlong;
1164		    }
1165
1166		/* Main rop. */
1167		ROP_DST(
1168		/*op*/  op,
1169		/*pre*/ while ( dstlong != dstlong2 )
1170			    {,
1171		/*d*/       *dstlong,
1172		/*pst*/     ++dstlong;
1173			    } )
1174
1175		/* Trailing edge. */
1176		if ( dstrightignore != 0 )
1177		    {
1178		    ROP_DST(
1179		    /*op*/  op,
1180		    /*pre*/ dl = *dstlong;,
1181		    /*d*/   dl,
1182		    /*pst*/ *dstlong = ( dl & nrm ) | ( *dstlong & rm ); )
1183		    }
1184
1185		dstlin += dst->linelongs;
1186		}
1187	    }
1188	}
1189
1190#ifdef RCONS_2BPP
1191    else if ( dst->depth == 2 )
1192	{
1193	/* Two-bit no-src blit. */
1194	u_int32_t color;
1195	u_int32_t* dstlin1;
1196	u_int32_t* dstlin2;
1197	u_int32_t* dstlin;
1198	int dstleftignore, dstrightignore, dstlongs;
1199	u_int32_t dl, lm, nlm, rm, nrm;
1200	u_int32_t* dstlong2;
1201	u_int32_t* dstlong;
1202
1203	dstlin1 = RAS_ADDR( dst, dx, dy );
1204
1205#ifdef BCOPY_FASTER
1206	/* Special-case full-width clears. */
1207	if ( op == RAS_CLEAR && dst->width == w && dst->linelongs == w >> 4 )
1208	    {
1209	    memset( (char*) dstlin1, 0, h * dst->linelongs * sizeof(u_int32_t) );
1210	    return 0;
1211	    }
1212#endif /*BCOPY_FASTER*/
1213
1214	color = RAS_GETCOLOR( rop );
1215	if ( color == 0 )
1216	    color = 3;
1217
1218        /* Make 32 bits of color so we can do the ROP without shifting. */
1219        color |= (( color << 30 ) | ( color << 28 ) | ( color << 26 )
1220                  | ( color << 24 ) | ( color << 22 ) | ( color << 20 )
1221                  | ( color << 18 ) | ( color << 16 ) | ( color << 14 )
1222                  | ( color << 12 ) | ( color << 10 ) | ( color << 8 )
1223                  | ( color << 6 ) | ( color << 4 ) | ( color << 2 ));
1224
1225	dstleftignore = ( dx & 15 ) * 2;
1226	dstlongs = ( dstleftignore + w * 2 + 31 ) >> 5;
1227	dstrightignore = ( dstlongs * 32 - w * 2 - dstleftignore ) & 31;
1228
1229	dstlin2 = dstlin1 + h * dst->linelongs;
1230	dstlin = dstlin1;
1231
1232	if ( dstlongs == 1 )
1233	    {
1234	    /* It fits into a single longword. */
1235	    lm = leftmask[dstleftignore] | rightmask[dstrightignore];
1236	    nlm = ~lm;
1237	    while ( dstlin != dstlin2 )
1238		{
1239		ROP_DST(
1240		/*op*/  op,
1241		/*pre*/ dl = *dstlin;,
1242		/*d*/   dl,
1243		/*pst*/ *dstlin = ( *dstlin & lm ) | ( dl & nlm ); )
1244
1245		dstlin += dst->linelongs;
1246		}
1247	    }
1248	else
1249	    {
1250	    lm = leftmask[dstleftignore];
1251	    rm = rightmask[dstrightignore];
1252	    nrm = ~rm;
1253	    nlm = ~lm;
1254
1255	    while ( dstlin != dstlin2 )
1256		{
1257		dstlong = dstlin;
1258		dstlong2 = dstlong + dstlongs;
1259		if ( dstrightignore != 0 )
1260		    --dstlong2;
1261
1262		/* Leading edge. */
1263		if ( dstleftignore != 0 )
1264		    {
1265		    ROP_DST(
1266		    /*op*/  op,
1267		    /*pre*/ dl = *dstlong;,
1268		    /*d*/   dl,
1269		    /*pst*/ *dstlong = ( *dstlong & lm ) | ( dl & nlm ); )
1270		    ++dstlong;
1271		    }
1272
1273		/* Main rop. */
1274		ROP_DST(
1275		/*op*/  op,
1276		/*pre*/ while ( dstlong != dstlong2 )
1277			    {,
1278		/*d*/       *dstlong,
1279		/*pst*/     ++dstlong;
1280			    } )
1281
1282		/* Trailing edge. */
1283		if ( dstrightignore != 0 )
1284		    {
1285		    ROP_DST(
1286		    /*op*/  op,
1287		    /*pre*/ dl = *dstlong;,
1288		    /*d*/   dl,
1289		    /*pst*/ *dstlong = ( dl & nrm ) | ( *dstlong & rm ); )
1290		    }
1291
1292		dstlin += dst->linelongs;
1293		}
1294	    }
1295	}
1296#endif /* RCONS_2BPP */
1297#ifdef RCONS_4BPP
1298    else if ( dst->depth == 4 )
1299	{
1300	/* Two-bit no-src blit. */
1301	u_int32_t color;
1302	u_int32_t* dstlin1;
1303	u_int32_t* dstlin2;
1304	u_int32_t* dstlin;
1305	int dstleftignore, dstrightignore, dstlongs;
1306	u_int32_t dl, lm, nlm, rm, nrm;
1307	u_int32_t* dstlong2;
1308	u_int32_t* dstlong;
1309
1310	dstlin1 = RAS_ADDR( dst, dx, dy );
1311
1312#ifdef BCOPY_FASTER
1313	/* Special-case full-width clears. */
1314	if ( op == RAS_CLEAR && dst->width == w && dst->linelongs == w >> 3 )
1315	    {
1316	    memset( (char*) dstlin1, 0, h * dst->linelongs * sizeof(u_int32_t) );
1317	    return 0;
1318	    }
1319#endif /*BCOPY_FASTER*/
1320
1321	color = RAS_GETCOLOR( rop );
1322	if ( color == 0 )
1323	    color = 15;
1324
1325	/* Make 32 bits of color so we can do the ROP without shifting. */
1326	color |= (( color << 28 ) | ( color << 24 )
1327		  | ( color << 20 ) | ( color << 16 )
1328		  | ( color << 12 ) | ( color << 8 )
1329		  | ( color << 4 ));
1330
1331	dstleftignore = ( dx & 7 ) * 4;
1332	dstlongs = ( dstleftignore + w * 4 + 31 ) >> 5;
1333	dstrightignore = ( dstlongs * 32 - w * 4 - dstleftignore ) & 31;
1334
1335	dstlin2 = dstlin1 + h * dst->linelongs;
1336	dstlin = dstlin1;
1337
1338	if ( dstlongs == 1 )
1339	    {
1340	    /* It fits into a single longword. */
1341	    lm = leftmask[dstleftignore] | rightmask[dstrightignore];
1342	    nlm = ~lm;
1343	    while ( dstlin != dstlin2 )
1344		{
1345		ROP_DST(
1346		/*op*/  op,
1347		/*pre*/ dl = *dstlin;,
1348		/*d*/   dl,
1349		/*pst*/ *dstlin = ( *dstlin & lm ) | ( dl & nlm ); )
1350
1351		dstlin += dst->linelongs;
1352		}
1353	    }
1354	else
1355	    {
1356	    lm = leftmask[dstleftignore];
1357	    rm = rightmask[dstrightignore];
1358	    nrm = ~rm;
1359	    nlm = ~lm;
1360
1361	    while ( dstlin != dstlin2 )
1362		{
1363		dstlong = dstlin;
1364		dstlong2 = dstlong + dstlongs;
1365		if ( dstrightignore != 0 )
1366		    --dstlong2;
1367
1368		/* Leading edge. */
1369		if ( dstleftignore != 0 )
1370		    {
1371		    ROP_DST(
1372		    /*op*/  op,
1373		    /*pre*/ dl = *dstlong;,
1374		    /*d*/   dl,
1375		    /*pst*/ *dstlong = ( *dstlong & lm ) | ( dl & nlm ); )
1376		    ++dstlong;
1377		    }
1378
1379		/* Main rop. */
1380		ROP_DST(
1381		/*op*/  op,
1382		/*pre*/ while ( dstlong != dstlong2 )
1383			    {,
1384		/*d*/       *dstlong,
1385		/*pst*/     ++dstlong;
1386			    } )
1387
1388		/* Trailing edge. */
1389		if ( dstrightignore != 0 )
1390		    {
1391		    ROP_DST(
1392		    /*op*/  op,
1393		    /*pre*/ dl = *dstlong;,
1394		    /*d*/   dl,
1395		    /*pst*/ *dstlong = ( dl & nrm ) | ( *dstlong & rm ); )
1396		    }
1397
1398		dstlin += dst->linelongs;
1399		}
1400	    }
1401	}
1402#endif /* RCONS_4BPP */
1403    else if ( dst->depth == 8)
1404	{
1405	/* Eight-bit no-src blit. */
1406	u_int32_t color;
1407	u_int32_t* dstlin1;
1408	u_int32_t* dstlin2;
1409	u_int32_t* dstlin;
1410	int dstleftignore, dstrightignore, dstlongs;
1411	u_int32_t dl, lm, nlm, rm, nrm;
1412	u_int32_t* dstlong2;
1413	u_int32_t* dstlong;
1414
1415	dstlin1 = RAS_ADDR( dst, dx, dy );
1416
1417#ifdef BCOPY_FASTER
1418	/* Special-case full-width clears. */
1419	if ( op == RAS_CLEAR && dst->width == w && dst->linelongs == w >> 2 )
1420	    {
1421	    memset( (char*) dstlin1, 0, h * dst->linelongs * sizeof(u_int32_t) );
1422	    return 0;
1423	    }
1424#endif /*BCOPY_FASTER*/
1425
1426	color = RAS_GETCOLOR( rop );
1427	if ( color == 0 )
1428	    color = 255;
1429
1430	/* Make 32 bits of color so we can do the ROP without shifting. */
1431	color |= ( color << 24 ) | ( color << 16 ) | ( color << 8 );
1432
1433	dstleftignore = ( dx & 3 ) * 8;
1434	dstlongs = ( dstleftignore + w * 8 + 31 ) >> 5;
1435	dstrightignore = ( dstlongs * 32 - w * 8 - dstleftignore ) & 31;
1436
1437	dstlin2 = dstlin1 + h * dst->linelongs;
1438	dstlin = dstlin1;
1439
1440	if ( dstlongs == 1 )
1441	    {
1442	    /* It fits into a single longword. */
1443	    lm = leftmask[dstleftignore] | rightmask[dstrightignore];
1444	    nlm = ~lm;
1445	    while ( dstlin != dstlin2 )
1446		{
1447		ROP_DSTCOLOR(
1448		/*op*/  op,
1449		/*pre*/ dl = *dstlin;,
1450		/*d*/   dl,
1451		/*c*/	color,
1452		/*pst*/ *dstlin = ( *dstlin & lm ) | ( dl & nlm ); )
1453
1454		dstlin += dst->linelongs;
1455		}
1456	    }
1457	else
1458	    {
1459	    lm = leftmask[dstleftignore];
1460	    rm = rightmask[dstrightignore];
1461	    nrm = ~rm;
1462	    nlm = ~lm;
1463	    while ( dstlin != dstlin2 )
1464		{
1465		dstlong = dstlin;
1466		dstlong2 = dstlong + dstlongs;
1467		if ( dstrightignore != 0 )
1468		    --dstlong2;
1469
1470		/* Leading edge. */
1471		if ( dstleftignore != 0 )
1472		    {
1473		    ROP_DSTCOLOR(
1474		    /*op*/  op,
1475		    /*pre*/ dl = *dstlong;,
1476		    /*d*/   dl,
1477		    /*c*/   color,
1478		    /*pst*/ *dstlong = ( *dstlong & lm ) | ( dl & nlm ); )
1479		    ++dstlong;
1480		    }
1481
1482		/* Main rop. */
1483		ROP_DSTCOLOR(
1484		/*op*/  op,
1485		/*pre*/ while ( dstlong != dstlong2 )
1486			    {,
1487		/*d*/       *dstlong,
1488		/*c*/       color,
1489		/*pst*/     ++dstlong;
1490			    } )
1491
1492		/* Trailing edge. */
1493		if ( dstrightignore != 0 )
1494		    {
1495		    ROP_DSTCOLOR(
1496		    /*op*/  op,
1497		    /*pre*/ dl = *dstlong;,
1498		    /*d*/   dl,
1499		    /*c*/   color,
1500		    /*pst*/ *dstlong = ( dl & nrm ) | ( *dstlong & rm ); )
1501		    }
1502
1503		dstlin += dst->linelongs;
1504		}
1505	    }
1506	}
1507#ifdef RCONS_16BPP
1508    else
1509	{
1510	/* Sixteen-bit no-src blit. */
1511	u_int32_t color;
1512	u_int32_t* dstlin1;
1513	u_int32_t* dstlin2;
1514	u_int32_t* dstlin;
1515	int dstleftignore, dstrightignore, dstlongs;
1516	u_int32_t dl, lm, nlm, rm, nrm;
1517	u_int32_t* dstlong2;
1518	u_int32_t* dstlong;
1519
1520	dstlin1 = RAS_ADDR( dst, dx, dy );
1521
1522#ifdef BCOPY_FASTER
1523	/* Special-case full-width clears. */
1524	if ( op == RAS_CLEAR && dst->width == w && dst->linelongs == w >> 1 )
1525	    {
1526	    memset( (char*) dstlin1, 0, h * dst->linelongs * sizeof(u_int32_t) );
1527	    return 0;
1528	    }
1529#endif /*BCOPY_FASTER*/
1530
1531	color = RAS_GETCOLOR( rop );
1532	if ( color == 0 )
1533		color = 0xffff; /* XXX */
1534
1535	/* Make 32 bits of color so we can do the ROP without shifting. */
1536	color |= ( color << 16 );
1537
1538	dstleftignore = ( dx & 1 ) * 16;
1539	dstlongs = ( dstleftignore + w * 16 + 31 ) >> 5;
1540	dstrightignore = ( dstlongs * 32 - w * 8 - dstleftignore ) & 31;
1541
1542	dstlin2 = dstlin1 + h * dst->linelongs;
1543	dstlin = dstlin1;
1544
1545	if ( dstlongs == 1 )
1546	    {
1547	    /* It fits into a single longword. */
1548	    lm = leftmask[dstleftignore] | rightmask[dstrightignore];
1549	    nlm = ~lm;
1550	    while ( dstlin != dstlin2 )
1551		{
1552		ROP_DSTCOLOR(
1553		/*op*/  op,
1554		/*pre*/ dl = *dstlin;,
1555		/*d*/   dl,
1556		/*c*/	color,
1557		/*pst*/ *dstlin = ( *dstlin & lm ) | ( dl & nlm ); )
1558
1559		dstlin += dst->linelongs;
1560		}
1561	    }
1562	else
1563	    {
1564	    lm = leftmask[dstleftignore];
1565	    rm = rightmask[dstrightignore];
1566	    nrm = ~rm;
1567	    nlm = ~lm;
1568	    while ( dstlin != dstlin2 )
1569		{
1570		dstlong = dstlin;
1571		dstlong2 = dstlong + dstlongs;
1572		if ( dstrightignore != 0 )
1573		    --dstlong2;
1574
1575		/* Leading edge. */
1576		if ( dstleftignore != 0 )
1577		    {
1578		    ROP_DSTCOLOR(
1579		    /*op*/  op,
1580		    /*pre*/ dl = *dstlong;,
1581		    /*d*/   dl,
1582		    /*c*/   color,
1583		    /*pst*/ *dstlong = ( *dstlong & lm ) | ( dl & nlm ); )
1584		    ++dstlong;
1585		    }
1586
1587		/* Main rop. */
1588		ROP_DSTCOLOR(
1589		/*op*/  op,
1590		/*pre*/ while ( dstlong != dstlong2 )
1591			    {,
1592		/*d*/       *dstlong,
1593		/*c*/       color,
1594		/*pst*/     ++dstlong;
1595			    } )
1596
1597		/* Trailing edge. */
1598		if ( dstrightignore != 0 )
1599		    {
1600		    ROP_DSTCOLOR(
1601		    /*op*/  op,
1602		    /*pre*/ dl = *dstlong;,
1603		    /*d*/   dl,
1604		    /*c*/   color,
1605		    /*pst*/ *dstlong = ( dl & nrm ) | ( *dstlong & rm ); )
1606		    }
1607
1608		dstlin += dst->linelongs;
1609		}
1610	    }
1611	}
1612#endif /* RCONS_16BPP */
1613
1614    return 0;
1615    }
1616
1617/* This is a general bitblit routine, handling overlapping source and
1618** destination.  It's used for both the 1-to-1 and 8-to-8 cases.
1619*/
1620static int
1621raster_blit(
1622    struct raster* src, uint32_t* srclin1,
1623    int srcleftignore, int srcrightignore, int srclongs,
1624    struct raster* dst,
1625    uint32_t* dstlin1,
1626    int dstleftignore, int dstrightignore, int dstlongs,
1627    int h, int op)
1628    {
1629    u_int32_t* srclin2;
1630    u_int32_t* dstlin2;
1631    int srclininc, dstlininc;
1632    u_int32_t* srclin;
1633    u_int32_t* dstlin;
1634    int prevleftshift, currrightshift;
1635    int longinc;
1636    u_int32_t* srclong;
1637    u_int32_t* dstlong;
1638    u_int32_t* dstlong2;
1639    u_int32_t dl, lm, nlm, rm, nrm;
1640
1641    prevleftshift = ( srcleftignore - dstleftignore ) & 31;
1642
1643    srclin2 = srclin1 + h * src->linelongs;
1644    dstlin2 = dstlin1 + h * dst->linelongs;
1645    srclininc = src->linelongs;
1646    dstlininc = dst->linelongs;
1647    longinc = 1;
1648
1649    /* Check for overlaps. */
1650    if ( ( dstlin1 >= srclin1 && dstlin1 < srclin1 + srclongs ) ||
1651	 ( srclin1 >= dstlin1 && srclin1 < dstlin1 + dstlongs ) )
1652	{
1653	/* Horizontal overlap.  Should we reverse? */
1654	if ( srclin1 < dstlin1 )
1655	    {
1656	    longinc = -1;
1657	    srclin1 += srclongs - 1;
1658	    srclin2 += srclongs - 1;
1659	    dstlin1 += dstlongs - 1;
1660	    }
1661	}
1662    else if ( ( dstlin1 >= srclin1 && dstlin1 < srclin2 ) ||
1663	      ( srclin1 >= dstlin1 && srclin1 < dstlin2 ) )
1664	{
1665	/* Vertical overlap.  Should we reverse? */
1666	if ( srclin1 < dstlin1 )
1667	    {
1668	    srclin2 = srclin1 - srclininc;
1669	    srclin1 += ( h - 1 ) * srclininc;
1670	    dstlin1 += ( h - 1 ) * dstlininc;
1671	    srclininc = -srclininc;
1672	    dstlininc = -dstlininc;
1673	    }
1674	}
1675    srclin = srclin1;
1676    dstlin = dstlin1;
1677
1678    if ( prevleftshift == 0 )
1679	{
1680	/* The bits line up, no shifting necessary. */
1681	if ( dstlongs == 1 )
1682	    {
1683	    /* It all fits into a single longword. */
1684	    lm = leftmask[dstleftignore] | rightmask[dstrightignore];
1685	    nlm = ~lm;
1686	    while ( srclin != srclin2 )
1687		{
1688		ROP_SRCDST(
1689		/*op*/  op,
1690		/*pre*/ dl = *dstlin;,
1691		/*s*/   *srclin,
1692		/*d*/   dl,
1693		/*pst*/ *dstlin = ( *dstlin & lm ) | ( dl & nlm ); )
1694
1695		srclin += srclininc;
1696		dstlin += dstlininc;
1697		}
1698	    }
1699	else
1700	    {
1701	    /* Multiple longwords. */
1702	    lm = leftmask[dstleftignore];
1703	    rm = rightmask[dstrightignore];
1704	    nrm = ~rm;
1705	    nlm = ~lm;
1706	    if ( longinc == 1 )
1707		{
1708		/* Left to right. */
1709		while ( srclin != srclin2 )
1710		    {
1711		    srclong = srclin;
1712		    dstlong = dstlin;
1713		    dstlong2 = dstlong + dstlongs;
1714		    if ( dstrightignore != 0 )
1715			--dstlong2;
1716
1717		    /* Leading edge. */
1718		    if ( dstleftignore != 0 )
1719			{
1720			ROP_SRCDST(
1721			/*op*/  op,
1722			/*pre*/ dl = *dstlong;,
1723			/*s*/   *srclong,
1724			/*d*/   dl,
1725			/*pst*/ *dstlong = ( *dstlong & lm ) | ( dl & nlm ); )
1726			++srclong;
1727			++dstlong;
1728			}
1729
1730		    /* Main rop. */
1731		    ROP_SRCDST(
1732		    /*op*/  op,
1733		    /*pre*/ while ( dstlong != dstlong2 )
1734				{,
1735		    /*s*/       *srclong,
1736		    /*d*/       *dstlong,
1737		    /*pst*/     ++srclong;
1738				++dstlong;
1739				} )
1740
1741		    /* Trailing edge. */
1742		    if ( dstrightignore != 0 )
1743			{
1744			ROP_SRCDST(
1745			/*op*/  op,
1746			/*pre*/ dl = *dstlong;,
1747			/*s*/   *srclong,
1748			/*d*/   dl,
1749			/*pst*/ *dstlong = ( dl & nrm ) | ( *dstlong & rm ); )
1750			}
1751
1752		    srclin += srclininc;
1753		    dstlin += dstlininc;
1754		    }
1755		}
1756	    else
1757		{
1758		/* Right to left. */
1759		while ( srclin != srclin2 )
1760		    {
1761		    srclong = srclin;
1762		    dstlong = dstlin;
1763		    dstlong2 = dstlong - dstlongs;
1764		    if ( dstleftignore != 0 )
1765			++dstlong2;
1766
1767		    /* Leading edge. */
1768		    if ( dstrightignore != 0 )
1769			{
1770			ROP_SRCDST(
1771			/*op*/  op,
1772			/*pre*/ dl = *dstlong;,
1773			/*s*/   *srclong,
1774			/*d*/   dl,
1775			/*pst*/ *dstlong = ( dl & nrm ) | ( *dstlong & rm ); )
1776			--srclong;
1777			--dstlong;
1778			}
1779
1780		    /* Main rop. */
1781		    ROP_SRCDST(
1782		    /*op*/  op,
1783		    /*pre*/ while ( dstlong != dstlong2 )
1784				{,
1785		    /*s*/       *srclong,
1786		    /*d*/       *dstlong,
1787		    /*pst*/     --srclong;
1788				--dstlong;
1789				} )
1790
1791		    /* Trailing edge. */
1792		    if ( dstleftignore != 0 )
1793			{
1794			ROP_SRCDST(
1795			/*op*/  op,
1796			/*pre*/ dl = *dstlong;,
1797			/*s*/   *srclong,
1798			/*d*/   dl,
1799			/*pst*/ *dstlong = ( *dstlong & lm ) | ( dl & nlm ); )
1800			}
1801
1802		    srclin += srclininc;
1803		    dstlin += dstlininc;
1804		    }
1805		}
1806	    }
1807	}
1808
1809    else
1810	{
1811	/* General case, with shifting and everything. */
1812	u_int32_t sl, prevsl;
1813
1814	currrightshift = 32 - prevleftshift;
1815	if ( srclongs == 1 && dstlongs == 1 )
1816	    {
1817	    /* It fits into a single longword, with a shift. */
1818	    lm = leftmask[dstleftignore] | rightmask[dstrightignore];
1819	    nlm = ~lm;
1820	    if ( srcleftignore > dstleftignore )
1821		{
1822		while ( srclin != srclin2 )
1823		    {
1824		    ROP_SRCDST(
1825		    /*op*/  op,
1826		    /*pre*/ dl = *dstlin;,
1827		    /*s*/   *srclin LSOP prevleftshift,
1828		    /*d*/   dl,
1829		    /*pst*/ *dstlin = ( *dstlin & lm ) | ( dl & nlm ); )
1830
1831		    srclin += srclininc;
1832		    dstlin += dstlininc;
1833		    }
1834		}
1835	    else
1836		{
1837		while ( srclin != srclin2 )
1838		    {
1839		    ROP_SRCDST(
1840		    /*op*/  op,
1841		    /*pre*/ dl = *dstlin;,
1842		    /*s*/   *srclin RSOP currrightshift,
1843		    /*d*/   dl,
1844		    /*pst*/ *dstlin = ( *dstlin & lm ) | ( dl & nlm ); )
1845
1846		    srclin += srclininc;
1847		    dstlin += dstlininc;
1848		    }
1849		}
1850	    }
1851	else
1852	    {
1853	    /* Multiple longwords. */
1854	    lm = leftmask[dstleftignore];
1855	    rm = rightmask[dstrightignore];
1856	    nrm = ~rm;
1857	    nlm = ~lm;
1858	    if ( longinc == 1 )
1859		{
1860		/* Left to right. */
1861		while ( srclin != srclin2 )
1862		    {
1863		    srclong = srclin;
1864		    dstlong = dstlin;
1865		    dstlong2 = dstlong + dstlongs;
1866		    if ( srcleftignore > dstleftignore )
1867			prevsl = *srclong++ LSOP prevleftshift;
1868		    else
1869			prevsl = 0;
1870		    if ( dstrightignore != 0 )
1871			--dstlong2;
1872
1873		    /* Leading edge. */
1874		    if ( dstleftignore != 0 )
1875			{
1876			ROP_SRCDST(
1877			/*op*/  op,
1878			/*pre*/ sl = *srclong;
1879				dl = *dstlong;,
1880			/*s*/   prevsl | ( sl RSOP currrightshift ),
1881			/*d*/   dl,
1882			/*pst*/ *dstlong = ( *dstlong & lm ) | ( dl & nlm ); )
1883			prevsl = sl LSOP prevleftshift;
1884			++srclong;
1885			++dstlong;
1886			}
1887
1888		    /* Main rop. */
1889		    ROP_SRCDST(
1890		    /*op*/  op,
1891		    /*pre*/ while ( dstlong != dstlong2 )
1892				{
1893				sl = *srclong;,
1894		    /*s*/       prevsl | ( sl RSOP currrightshift ),
1895		    /*d*/       *dstlong,
1896		    /*pst*/     prevsl = sl LSOP prevleftshift;
1897				++srclong;
1898				++dstlong;
1899				} )
1900
1901		    /* Trailing edge. */
1902		    if ( dstrightignore != 0 )
1903			{
1904			ROP_SRCDST(
1905			/*op*/  op,
1906			/*pre*/ dl = *dstlong;,
1907			/*s*/   prevsl | ( *srclong RSOP currrightshift ),
1908			/*d*/   dl,
1909			/*pst*/ *dstlong = ( dl & nrm ) | ( *dstlong & rm ); )
1910			}
1911
1912		    srclin += srclininc;
1913		    dstlin += dstlininc;
1914		    }
1915		}
1916	    else
1917		{
1918		/* Right to left. */
1919		while ( srclin != srclin2 )
1920		    {
1921		    srclong = srclin;
1922		    dstlong = dstlin;
1923		    dstlong2 = dstlong - dstlongs;
1924		    if ( srcrightignore > dstrightignore )
1925			prevsl = *srclong-- RSOP currrightshift;
1926		    else
1927			prevsl = 0;
1928		    if ( dstleftignore != 0 )
1929			++dstlong2;
1930
1931		    /* Leading edge. */
1932		    if ( dstrightignore != 0 )
1933			{
1934			ROP_SRCDST(
1935			/*op*/  op,
1936			/*pre*/ sl = *srclong;
1937				dl = *dstlong;,
1938			/*s*/   prevsl | ( sl LSOP prevleftshift ),
1939			/*d*/   dl,
1940			/*pst*/ *dstlong = ( dl & nrm ) | ( *dstlong & rm ); )
1941			prevsl = sl RSOP currrightshift;
1942			--srclong;
1943			--dstlong;
1944			}
1945
1946		    /* Main rop. */
1947		    ROP_SRCDST(
1948		    /*op*/  op,
1949		    /*pre*/ while ( dstlong != dstlong2 )
1950				{
1951				sl = *srclong;,
1952		    /*s*/       prevsl | ( sl LSOP prevleftshift ),
1953		    /*d*/       *dstlong,
1954		    /*pst*/     prevsl = sl RSOP currrightshift;
1955				--srclong;
1956				--dstlong;
1957				} )
1958
1959		    /* Trailing edge. */
1960		    if ( dstleftignore != 0 )
1961			{
1962			ROP_SRCDST(
1963			/*op*/  op,
1964			/*pre*/ dl = *dstlong;,
1965			/*s*/   prevsl | ( *srclong LSOP prevleftshift ),
1966			/*d*/   dl,
1967			/*pst*/ *dstlong = ( *dstlong & lm ) | ( dl & nlm ); )
1968			}
1969
1970		    srclin += srclininc;
1971		    dstlin += dstlininc;
1972		    }
1973		}
1974	    }
1975	}
1976
1977    return 0;
1978    }
1979