/*
    SDL - Simple DirectMedia Layer
    Copyright (C) 1997, 1998  Sam Lantinga

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public
    License as published by the Free Software Foundation; either
    version 2 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Library General Public License for more details.

    You should have received a copy of the GNU Library General Public
    License along with this library; if not, write to the Free
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

    Sam Lantinga
    5635-34 Springhouse Dr.
    Pleasanton, CA 94588 (USA)
    slouken@devolution.com
*/

#ifdef SAVE_RCSID
static char rcsid =
 "@(#) $Id: SDL_blit_N.c,v 1.9 1999/04/12 01:57:48 slouken Exp $";
#endif

#include <stdio.h>

#include "SDL_types.h"
#include "SDL_video.h"
#include "SDL_blit.h"

/* Function to check the CPU flags */
#define MMX_CPU		0x800000
#ifdef USE_ASMBLIT
#define CPU_Flags()	Hermes_X86_CPU()
#else
#define CPU_Flags()	0L
#endif

/* Functions to blit from N-bit surfaces to other surfaces */

#ifdef USE_ASMBLIT

/* Heheheh, we coerce Hermes into using SDL blit information */
#define X86_ASSEMBLER
#define HermesConverterInterface	SDL_BlitInfo
#define HermesClearInterface		void
#define STACKCALL
typedef Uint32 int32;

#include "HeadMMX.h"
#include "HeadX86.h"

#else

/* Special optimized blit for RGB 8-8-8 --> RGB 3-3-2 */
#define RGB888_RGB332(dst, src) { \
	dst = (((src)&0x00E00000)>>16)| \
	      (((src)&0x0000E000)>>11)| \
	      (((src)&0x000000C0)>>6); \
}
static void Blit_RGB888_index8(SDL_BlitInfo *info)
{
	int c;
	int width, height;
	Uint32 *src;
	Uint8 *map, *dst;
	int srcskip, dstskip;

	/* Set up some basic variables */
	width = info->d_width;
	height = info->d_height;
	src = (Uint32 *)info->s_pixels;
	srcskip = info->s_skip/4;
	dst = info->d_pixels;
	dstskip = info->d_skip;
	map = info->table;

	if ( map == NULL ) {
		while ( height-- ) {
			for ( c=width/4; c; --c ) {
				/* Pack RGB into 8bit pixel */
				RGB888_RGB332(*dst++, *src);
				++src;
				RGB888_RGB332(*dst++, *src);
				++src;
				RGB888_RGB332(*dst++, *src);
				++src;
				RGB888_RGB332(*dst++, *src);
				++src;
			}
			switch ( width % 4 ) {
				case 3:
					RGB888_RGB332(*dst++, *src);
					++src;
				case 2:
					RGB888_RGB332(*dst++, *src);
					++src;
				case 1:
					RGB888_RGB332(*dst++, *src);
					++src;
			}
			src += srcskip;
			dst += dstskip;
		}
	} else {
		int pixel;

		while ( height-- ) {
			for ( c=width/4; c; --c ) {
				/* Pack RGB into 8bit pixel */
				RGB888_RGB332(pixel, *src);
				*dst++ = map[pixel];
				++src;
				RGB888_RGB332(pixel, *src);
				*dst++ = map[pixel];
				++src;
				RGB888_RGB332(pixel, *src);
				*dst++ = map[pixel];
				++src;
				RGB888_RGB332(pixel, *src);
				*dst++ = map[pixel];
				++src;
			}
			switch ( width % 4 ) {
				case 3:
					RGB888_RGB332(pixel, *src);
					*dst++ = map[pixel];
					++src;
				case 2:
					RGB888_RGB332(pixel, *src);
					*dst++ = map[pixel];
					++src;
				case 1:
					RGB888_RGB332(pixel, *src);
					*dst++ = map[pixel];
					++src;
			}
			src += srcskip;
			dst += dstskip;
		}
	}
}
/* Special optimized blit for RGB 8-8-8 --> RGB 5-5-5 */
#define RGB888_RGB555(dst, src) { \
	*(Uint16 *)(dst) = (((*src)&0x00F80000)>>9)| \
	                   (((*src)&0x0000F800)>>6)| \
	                   (((*src)&0x000000F8)>>3); \
}
#define RGB888_RGB555_TWO(dst, src) { \
	*(Uint32 *)(dst) = (((((src[1])&0x00F80000)>>9)| \
	                     (((src[1])&0x0000F800)>>6)| \
	                     (((src[1])&0x000000F8)>>3))<<16)| \
	                     (((src[0])&0x00F80000)>>9)| \
	                     (((src[0])&0x0000F800)>>6)| \
	                     (((src[0])&0x000000F8)>>3); \
}
static void Blit_RGB888_RGB555(SDL_BlitInfo *info)
{
	int c;
	int width, height;
	Uint32 *src;
	Uint16 *dst;
	int srcskip, dstskip;

	/* Set up some basic variables */
	width = info->d_width;
	height = info->d_height;
	src = (Uint32 *)info->s_pixels;
	srcskip = info->s_skip/4;
	dst = (Uint16 *)info->d_pixels;
	dstskip = info->d_skip/2;

	/* Memory align at 4-byte boundary, if necessary */
	if ( (long)dst & 0x03 ) {
		/* Don't do anything if width is 0 */
		if ( width == 0 ) {
			return;
		}
		--width;

		while ( height-- ) {
			/* Perform copy alignment */
			RGB888_RGB555(dst, src);
			++src;
			++dst;

			/* Copy in 4 pixel chunks */
			for ( c=width/4; c; --c ) {
				RGB888_RGB555_TWO(dst, src);
				src += 2;
				dst += 2;
				RGB888_RGB555_TWO(dst, src);
				src += 2;
				dst += 2;
			}
			/* Get any leftovers */
			switch (width % 4) {
				case 3:
					RGB888_RGB555(dst, src);
					++src;
					++dst;
				case 2:
					RGB888_RGB555_TWO(dst, src);
					src += 2;
					dst += 2;
					break;
				case 1:
					RGB888_RGB555(dst, src);
					++src;
					++dst;
					break;
			}
			src += srcskip;
			dst += dstskip;
		}
	} else { 
		while ( height-- ) {
			/* Copy in 4 pixel chunks */
			for ( c=width/4; c; --c ) {
				RGB888_RGB555_TWO(dst, src);
				src += 2;
				dst += 2;
				RGB888_RGB555_TWO(dst, src);
				src += 2;
				dst += 2;
			}
			/* Get any leftovers */
			switch (width % 4) {
				case 3:
					RGB888_RGB555(dst, src);
					++src;
					++dst;
				case 2:
					RGB888_RGB555_TWO(dst, src);
					src += 2;
					dst += 2;
					break;
				case 1:
					RGB888_RGB555(dst, src);
					++src;
					++dst;
					break;
			}
			src += srcskip;
			dst += dstskip;
		}
	}
}
/* Special optimized blit for RGB 8-8-8 --> RGB 5-6-5 */
#define RGB888_RGB565(dst, src) { \
	*(Uint16 *)(dst) = (((*src)&0x00F80000)>>8)| \
	                   (((*src)&0x0000FC00)>>5)| \
	                   (((*src)&0x000000F8)>>3); \
}
#define RGB888_RGB565_TWO(dst, src) { \
	*(Uint32 *)(dst) = (((((src[1])&0x00F80000)>>8)| \
	                     (((src[1])&0x0000FC00)>>5)| \
	                     (((src[1])&0x000000F8)>>3))<<16)| \
	                     (((src[0])&0x00F80000)>>8)| \
	                     (((src[0])&0x0000FC00)>>5)| \
	                     (((src[0])&0x000000F8)>>3); \
}
static void Blit_RGB888_RGB565(SDL_BlitInfo *info)
{
	int c;
	int width, height;
	Uint32 *src;
	Uint16 *dst;
	int srcskip, dstskip;

	/* Set up some basic variables */
	width = info->d_width;
	height = info->d_height;
	src = (Uint32 *)info->s_pixels;
	srcskip = info->s_skip/4;
	dst = (Uint16 *)info->d_pixels;
	dstskip = info->d_skip/2;

	/* Memory align at 4-byte boundary, if necessary */
	if ( (long)dst & 0x03 ) {
		/* Don't do anything if width is 0 */
		if ( width == 0 ) {
			return;
		}
		--width;

		while ( height-- ) {
			/* Perform copy alignment */
			RGB888_RGB565(dst, src);
			++src;
			++dst;

			/* Copy in 4 pixel chunks */
			for ( c=width/4; c; --c ) {
				RGB888_RGB565_TWO(dst, src);
				src += 2;
				dst += 2;
				RGB888_RGB565_TWO(dst, src);
				src += 2;
				dst += 2;
			}
			/* Get any leftovers */
			switch (width % 4) {
				case 3:
					RGB888_RGB565(dst, src);
					++src;
					++dst;
				case 2:
					RGB888_RGB565_TWO(dst, src);
					src += 2;
					dst += 2;
					break;
				case 1:
					RGB888_RGB565(dst, src);
					++src;
					++dst;
					break;
			}
			src += srcskip;
			dst += dstskip;
		}
	} else { 
		while ( height-- ) {
			/* Copy in 4 pixel chunks */
			for ( c=width/4; c; --c ) {
				RGB888_RGB565_TWO(dst, src);
				src += 2;
				dst += 2;
				RGB888_RGB565_TWO(dst, src);
				src += 2;
				dst += 2;
			}
			/* Get any leftovers */
			switch (width % 4) {
				case 3:
					RGB888_RGB565(dst, src);
					++src;
					++dst;
				case 2:
					RGB888_RGB565_TWO(dst, src);
					src += 2;
					dst += 2;
					break;
				case 1:
					RGB888_RGB565(dst, src);
					++src;
					++dst;
					break;
			}
			src += srcskip;
			dst += dstskip;
		}
	}
}
/* Special optimized blit for RGB 5-5-5 --> RGB 5-6-5 */
#define RGB555_RGB565(dst, src) \
	*dst = (((*src)&0x7FE0)<<1)|((*src)&0x001F)
/* Special optimized blit for RGB 5-6-5 --> RGB 5-5-5 */
#define RGB565_RGB555(dst, src) \
	*dst = (((*src)&0xFFC0)>>1)|((*src)&0x001F)
/* FIXME... actually implement the blitters */
#endif /* USE_ASMBLITTER */

/* Special optimized blit for RGB 8-8-8 --> RGB 3-3-2 */
#ifndef RGB888_RGB332
#define RGB888_RGB332(dst, src) { \
	dst = (((src)&0x00E00000)>>16)| \
	      (((src)&0x0000E000)>>11)| \
	      (((src)&0x000000C0)>>6); \
}
#endif
static void Blit_RGB888_index8_map(SDL_BlitInfo *info)
{
	int c, pixel;
	int width, height;
	Uint32 *src;
	Uint8 *map, *dst;
	int srcskip, dstskip;

	/* Set up some basic variables */
	width = info->d_width;
	height = info->d_height;
	src = (Uint32 *)info->s_pixels;
	srcskip = info->s_skip/4;
	dst = info->d_pixels;
	dstskip = info->d_skip;
	map = info->table;

	while ( height-- ) {
		for ( c=width/4; c; --c ) {
			/* Pack RGB into 8bit pixel */
			RGB888_RGB332(pixel, *src);
			*dst++ = map[pixel];
			++src;
			RGB888_RGB332(pixel, *src);
			*dst++ = map[pixel];
			++src;
			RGB888_RGB332(pixel, *src);
			*dst++ = map[pixel];
			++src;
			RGB888_RGB332(pixel, *src);
			*dst++ = map[pixel];
			++src;
		}
		switch ( width % 4 ) {
			case 3:
				RGB888_RGB332(pixel, *src);
				*dst++ = map[pixel];
				++src;
			case 2:
				RGB888_RGB332(pixel, *src);
				*dst++ = map[pixel];
				++src;
			case 1:
				RGB888_RGB332(pixel, *src);
				*dst++ = map[pixel];
				++src;
		}
		src += srcskip;
		dst += dstskip;
	}
}
static void SDL_BlitNto1(SDL_BlitInfo *info)
{
	int c;
	int width, height;
	Uint8 *src, *map, *dst;
	int srcskip, dstskip;
	Uint8 srcbpp;
	Uint32 pixel;
	Uint8  sR, sG, sB;
	SDL_PixelFormat *srcfmt;

	/* Set up some basic variables */
	width = info->d_width;
	height = info->d_height;
	src = info->s_pixels;
	srcskip = info->s_skip;
	dst = info->d_pixels;
	dstskip = info->d_skip;
	map = info->table;
	srcfmt = info->src;
	srcbpp = srcfmt->BytesPerPixel;

	if ( map == NULL ) {
		while ( height-- ) {
			for ( c=width; c; --c ) {
				DISEMBLE_RGB(src, srcbpp, srcfmt, pixel,
								sR, sG, sB);
				if ( 1 ) {
				  	/* Pack RGB into 8bit pixel */
				  	*dst = ((sR>>5)<<(3+2))|
						((sG>>5)<<(2)) |
						((sB>>6)<<(0)) ;
				}
				dst++;
				src += srcbpp;
			}
			src += srcskip;
			dst += dstskip;
		}
	} else {
		while ( height-- ) {
			for ( c=width; c; --c ) {
				DISEMBLE_RGB(src, srcbpp, srcfmt, pixel,
								sR, sG, sB);
				if ( 1 ) {
				  	/* Pack RGB into 8bit pixel */
				  	*dst = map[((sR>>5)<<(3+2))|
						   ((sG>>5)<<(2))  |
						   ((sB>>6)<<(0))  ];
				}
				dst++;
				src += srcbpp;
			}
			src += srcskip;
			dst += dstskip;
		}
	}
}
static void SDL_BlitNtoN(SDL_BlitInfo *info)
{
	int c;
	int width, height;
	Uint8 *src, *dst;
	Uint8 srcbpp;
	Uint8 dstbpp;
	int srcskip, dstskip;
	Uint32 pixel;
	Uint8  sR, sG, sB;
	SDL_PixelFormat *srcfmt;
	SDL_PixelFormat *dstfmt;

	/* Set up some basic variables */
	width = info->d_width;
	height = info->d_height;
	src = info->s_pixels;
	srcskip = info->s_skip;
	dst = info->d_pixels;
	dstskip = info->d_skip;
	srcfmt = info->src;
	srcbpp = srcfmt->BytesPerPixel;
	dstfmt = info->dst;
	dstbpp = dstfmt->BytesPerPixel;

	while ( height-- ) {
		for ( c=width; c; --c ) {
			DISEMBLE_RGB(src, srcbpp, srcfmt, pixel, sR, sG, sB);
			if ( 1 ) {
				  ASSEMBLE_RGB(dst, dstbpp, dstfmt, sR, sG, sB);
			}
			dst += dstbpp;
			src += srcbpp;
		}
		src += srcskip;
		dst += dstskip;
	}
}

static SDL_loblit complex_blit[] = {
	NULL, SDL_BlitKey, SDL_BlitAlpha, SDL_BlitAlphaKey
};

/* Normal N to N optimized blitters */
struct blit_table {
	Uint32 srcR, srcG, srcB;
	int dstbpp;
	Uint32 dstR, dstG, dstB;
	Uint32 cpu_flags;
	void *aux_data;
	SDL_loblit blitfunc;
};
static struct blit_table normal_blit_1[] = {
	/* Default for 8-bit RGB source, an invalid combination */
	{ 0,0,0, 0, 0,0,0, 0, NULL, NULL },
};
static struct blit_table normal_blit_2[] = {
#ifdef USE_ASMBLIT
	{ 0x0000F800,0x000007E0,0x0000001F, 2, 0x0000001F,0x000007E0,0x0000F800,
		 0, ConvertX86p16_16BGR565, ConvertX86 },
	{ 0x0000F800,0x000007E0,0x0000001F, 2, 0x00007C00,0x000003E0,0x0000001F,
		 0, ConvertX86p16_16RGB555, ConvertX86 },
	{ 0x0000F800,0x000007E0,0x0000001F, 2, 0x0000001F,0x000003E0,0x00007C00,
		 0, ConvertX86p16_16BGR555, ConvertX86 },
	{ 0x0000F800,0x000007E0,0x0000001F, 3, 0x00FF0000,0x0000FF00,0x000000FF,
		 0, ConvertX86p16_24RGB888, ConvertX86 },
	{ 0x0000F800,0x000007E0,0x0000001F, 3, 0x000000FF,0x0000FF00,0x00FF0000,
		 0, ConvertX86p16_24BGR888, ConvertX86 },
	{ 0x0000F800,0x000007E0,0x0000001F, 4, 0x00FF0000,0x0000FF00,0x000000FF,
		 0, ConvertX86p16_32RGB888, ConvertX86 },
	{ 0x0000F800,0x000007E0,0x0000001F, 4, 0x000000FF,0x0000FF00,0x00FF0000,
		 0, ConvertX86p16_32BGR888, ConvertX86 },
	{ 0x0000F800,0x000007E0,0x0000001F, 4, 0xFF000000,0x00FF0000,0x0000FF00,
		 0, ConvertX86p16_32RGBA888, ConvertX86 },
	{ 0x0000F800,0x000007E0,0x0000001F, 4, 0x0000FF00,0x00FF0000,0xFF000000,
		 0, ConvertX86p16_32BGRA888, ConvertX86 },
#endif
	/* Default for 16-bit RGB source, used if no other blitter matches */
	{ 0,0,0, 0, 0,0,0, 0, NULL, SDL_BlitNtoN },
};
static struct blit_table normal_blit_3[] = {
	/* Default for 24-bit RGB source, never optimized */
	{ 0,0,0, 0, 0,0,0, 0, NULL, SDL_BlitNtoN },
};
static struct blit_table normal_blit_4[] = {
#ifdef USE_ASMBLIT
	{ 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000F800,0x000007E0,0x0000001F,
		 0, ConvertX86p32_16RGB565, ConvertX86 },
	{ 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000007E0,0x0000F800,
		 0, ConvertX86p32_16BGR565, ConvertX86 },
	{ 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x00007C00,0x000003E0,0x0000001F,
		 MMX_CPU, ConvertMMXp32_16RGB555, ConvertMMX },
	{ 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x00007C00,0x000003E0,0x0000001F,
		 0, ConvertX86p32_16RGB555, ConvertX86 },
	{ 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000003E0,0x00007C00,
		 0, ConvertX86p32_16BGR555, ConvertX86 },
	{ 0x00FF0000,0x0000FF00,0x000000FF, 3, 0x00FF0000,0x0000FF00,0x000000FF,
		 0, ConvertX86p32_24RGB888, ConvertX86 },
	{ 0x00FF0000,0x0000FF00,0x000000FF, 3, 0x000000FF,0x0000FF00,0x00FF0000,
		 0, ConvertX86p32_24BGR888, ConvertX86 },
	{ 0x00FF0000,0x0000FF00,0x000000FF, 4, 0x000000FF,0x0000FF00,0x00FF0000,
		 0, ConvertX86p32_32BGR888, ConvertX86 },
	{ 0x00FF0000,0x0000FF00,0x000000FF, 4, 0xFF000000,0x00FF0000,0x0000FF00,
		 0, ConvertX86p32_32RGBA888, ConvertX86 },
	{ 0x00FF0000,0x0000FF00,0x000000FF, 4, 0x0000FF00,0x00FF0000,0xFF000000,
		 0, ConvertX86p32_32BGRA888, ConvertX86 },
#else
	{ 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000F800,0x000007E0,0x0000001F,
		 0, NULL, Blit_RGB888_RGB565 },
	{ 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x00007C00,0x000003E0,0x0000001F,
		 0, NULL, Blit_RGB888_RGB555 },
#endif
	/* Default for 32-bit RGB source, used if no other blitter matches */
	{ 0,0,0, 0, 0,0,0, 0, NULL, SDL_BlitNtoN },
};
static struct blit_table *normal_blit[] = {
	normal_blit_1, normal_blit_2, normal_blit_3, normal_blit_4
};

SDL_loblit SDL_CalculateBlitN(SDL_Surface *surface, int complex)
{
	struct private_swaccel *sdata;
	SDL_PixelFormat *srcfmt;
	SDL_PixelFormat *dstfmt;
	struct blit_table *table;
	int which;
	SDL_loblit blitfun;

	/* Set up data for choosing the blit */
	sdata = surface->map->sw_data;
	srcfmt = surface->format;
	dstfmt = surface->map->dst->format;

	/* Complex are easy to choose, but slow */
	if ( complex ) {
		return(complex_blit[complex]);
	}

	/* We don't support destinations less than 8-bits */
	if ( dstfmt->BitsPerPixel < 8 ) {
		return(NULL);
	}

	blitfun = NULL;
	if ( dstfmt->BitsPerPixel == 8 ) {
		/* We assume 8-bit destinations are palettized */
		if ( (srcfmt->Rmask == 0x00FF0000) &&
		     (srcfmt->Gmask == 0x0000FF00) &&
		     (srcfmt->Bmask == 0x000000FF) ) {
			if ( surface->map->table ) {
				blitfun = Blit_RGB888_index8_map;
			} else {
#ifdef USE_ASMBLIT
				sdata->aux_data = ConvertX86p32_8RGB332;
				blitfun = ConvertX86;
#else
				blitfun = Blit_RGB888_index8;
#endif
			}
		} else {
			blitfun = SDL_BlitNto1;
		}
	} else {
		/* Now the meat, choose the blitter we want */
		table = normal_blit[srcfmt->BytesPerPixel-1];
		for ( which=0; table[which].srcR; ++which ) {
			if ( (srcfmt->Rmask == table[which].srcR) &&
			     (srcfmt->Gmask == table[which].srcG) &&
			     (srcfmt->Bmask == table[which].srcB) &&
			     (dstfmt->BytesPerPixel == table[which].dstbpp) &&
			     (dstfmt->Rmask == table[which].dstR) &&
			     (dstfmt->Gmask == table[which].dstG) &&
			     (dstfmt->Bmask == table[which].dstB) ) {
				if ( (CPU_Flags()&table[which].cpu_flags) ==
						table[which].cpu_flags ) {
					break;
				}
			}
		}
		sdata->aux_data = table[which].aux_data;
		blitfun = table[which].blitfunc;
	}

#ifdef DEBUG_ASM
#ifdef USE_ASMBLIT
	if ( blitfun == ConvertMMX )
		fprintf(stderr, "Using mmx blit\n");
	else
	if ( blitfun == ConvertX86 )
		fprintf(stderr, "Using asm blit\n");
	else
#endif
	if ( (blitfun == SDL_BlitNtoN) || (blitfun == SDL_BlitNto1) )
		fprintf(stderr, "Using C blit\n");
	else
		fprintf(stderr, "Using optimized C blit\n");
#endif /* DEBUG_ASM */

	return(blitfun);
}
