/*
 * mkht.cc --
 *
 *      Huffman Table generator
 *
 * Copyright (c) 1993-2002 The Regents of the University of California.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * A. Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimer.
 * B. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
 * C. Neither the names of the copyright holders nor the names of its
 *    contributors may be used to endorse or promote products derived from this
 *    software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS
 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#ifndef lint
static const char rcsid[] =
    "@(#) $Header: /usr/mash/src/repository/mash/mash-1/codec/mkht.cc,v 1.11 2002/02/03 03:13:33 lim Exp $";
#endif

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <sys/types.h>
#ifdef WIN32
#include <windows.h>
#include <winsock.h>
extern "C" {
int getopt(int, char * const *, const char *);
}
#endif
#include "huffman.h"
#ifndef WIN32
// for unlink() and getopt()
#   include <unistd.h>
#endif

struct hufftab {
	int maxlen;
	short *prefix;
};

struct huffcode {
	int val;
	char* str;
	int decode_only;
	int lineno;
	huffcode* next;
};

int lineno = 1;
const char* inputFile;

struct field {
	int n;
	int width[32];
};
field decoder_field;
field encoder_pad;

int illegal_symbol = 0;

void
error_header(int lineno)
{
	fprintf(stderr, "%s:%d: ", inputFile, lineno);
}

void
syntax(const char* s)
{
	error_header(lineno);
	fprintf(stderr, "%s\n", s);
	exit(1);
}

/*
 * Convert a binary string to an integer.
 */
int
btoi(const char* s)
{
	int v = 0;
	while (*s) {
		v <<= 1;
		v |= *s++ - '0';
	}
	return (v);
}

char*
skipspace(char* p)
{
	while (isspace(*p))
		++p;
	return (p);
}

char*
nextspace(char* p)
{
	while (!isspace(*p) && *p != 0)
		++p;
	return (p);
}

const char*const*
parseline(char* bp)
{
	static char* argv[128];
	char** ap = argv;

	for (;;) {
		bp = skipspace(bp);
		if (*bp == '#' || *bp == 0)
			break;
		char* np = nextspace(bp);
		*ap++ = bp;
		if (*np != 0) {
			*np++ = 0;
			bp = skipspace(np);
		} else
			break;
	}
	*ap = 0;
	return (argv);
}

int
wc(const char*const* ap)
{
	int n = 0;
	while (*ap++ != 0)
		++n;
	return (n);
}

int
count_codes(const huffcode* p)
{
	int n = 0;
	for (; p != 0; p = p->next)
		++n;
	return (n);
}

int
code_width(const huffcode* p)
{
	int maxlen = 0;
	for (; p != 0; p = p->next) {
		int v = strlen(p->str);
		if (v > maxlen)
			maxlen = v;
	}
	return (maxlen);
}

void
set_fields(const char*const* ap, field* fp)
{
	if (fp->n != 0)
		syntax("multiple bit field declarations");
	if (*ap == 0)
		syntax("empty bit field declaration");

	const char* s;
	while ((s = *ap++) != 0) {
		int v = (*s != '-') ? strtol(s, 0, 0) : -1;
		fp->width[fp->n++] = v;
	}
}

int
form_field(const char*const* argv, field* fp)
{
	int s = 0;
	for (int i = 0; i < fp->n; ++i) {
		int w = fp->width[i];
		s <<= w;
		s |= strtol(argv[i], 0, 0) & ((1 << w) - 1);
	}
	return (s);
}

huffcode*
parsewords(const char*const* argv)
{
	if (strcmp(argv[0], "fields") == 0)
		set_fields(argv + 1, &decoder_field);
	else if (strcmp(argv[0], "pad") == 0)
		set_fields(argv + 1, &encoder_pad);
	else {
		int nf = decoder_field.n;
		if (nf == 0)
			syntax("no bit field declaration");
		int argc = wc(argv);
		if (argc - 1 != nf)
			syntax("wrong number of symbols on line");

		int val = form_field(argv, &decoder_field);
		const char* codeword = argv[argc - 1];
		if (strcmp(codeword, "illegal") == 0)
			illegal_symbol = val;
		else {
			huffcode* hc = new huffcode;
			if (*codeword == '*') {
				++codeword;
				hc->decode_only = 1;
			} else
				hc->decode_only = 0;
			hc->val = val;
			hc->str = strdup(codeword);
			hc->next = 0;
			hc->lineno = lineno;
			return (hc);
		}
	}
	return (0);
}

huffcode*
parse(const char* infile)
{
	FILE* f = fopen(infile, "r");
	if (f == 0) {
		perror(infile);
		exit(1);
	}
	inputFile = infile;
	char line[512];
	huffcode* list = 0;
	while (fgets(line, sizeof(line), f) != 0) {
		const char*const* p = parseline(line);
		if (p != 0 && *p != 0) {
			huffcode* hc = parsewords(p);
			if (hc != 0) {
				hc->next = list;
				list = hc;
			}
		}
		++lineno;
	}
	return (list);
}

/*
 * Build a direct map huffman table from the array of codes given.
 * We build a prefix table such that we can directly index
 * table with the k-bit number at the head of the bit stream, where
 * k is the max-length code in the table.  The table tells us
 * the value and the actual length of the code we have.
 * We need the length so we can tear that many bits off
 * the input stream.  The length and value are packed as
 * two 16-bit quantities in a 32-bit word.  The value is stored
 * in the upper 16-bits, so when we right-shift it over,
 * it is automatically sign-extended.
 */
void
huffbuild(hufftab& ht, const huffcode* codes)
{
	int maxlen = code_width(codes);
	int size = 1 << maxlen;
	if (size > 65536) {
		fprintf(stderr, "mkht: longest codeword too big\n");
		exit(1);
	}

	/*
	 * Build the direct-map lookup table.
	 */
	ht.prefix = new short[size];
	ht.maxlen = maxlen;

	/*
	 * Initialize states to illegal, and arrange
	 * for max bits to be stripped off the input.
	 */
	for (int i = 0; i < size; ++i)
		ht.prefix[i] = (illegal_symbol << 5) | maxlen;

	for (const huffcode* p = codes; p != 0; p = p->next) {
		int codelen = strlen(p->str);
		int nbit = maxlen - codelen;
		int code = btoi(p->str) << nbit;
		int map = (p->val << 5) | codelen;
		/*
		 * The low nbit bits are don't cares.
		 * Spin through all possible combos.
		 */
		for (int n = 1 << nbit; --n >= 0; ) {
			if ((code | n) >= size)
				abort();
			ht.prefix[code | n] = map;
		}
	}
}

int
#ifdef notyet
skipcode(int bs, int* code, int* len, int* symbol, int n)
#else
skipcode(int /*bs*/, int* /*code*/, int* /*len*/, int* /*symbol*/, int /*n*/)
#endif
{
/*FIXME*/
#ifdef notyet
	int nbit = 0;
	for (;;) {
		/*
		 * Find the matching huffman code that is a prefix
		 * of bs at offset off.  There is either zero
		 * or one such codes, since the huffman strings have
		 * unique prefixes.  The zero case means the given
		 * bit string is impossible.
		 */
		int pbit = nbit;
		for (int k = 0; k < n; ++k) {
			if (len[k] < 16 - nbit) {
				int v = bs >> (16 - nbit - len[k]);
				v &= (1 << len[k]) - 1;
				if (v != code[k])
					continue;
				nbit += len[k];
				if (symbol[k] == SYM_ESCAPE)
					/*
					 * This must end the prefix
					 * since it necessarily takes
					 * up more than 16 bits.
					 */
					return (nbit + 14);
				else if (symbol[k] == SYM_EOB)
					return (0x80 | nbit);
			}
		}
		if (nbit == 0)
			/*
			 * Didn't find any matches.
			 */
			return (0x40);
		/*
		 * If we didn't find a new match,
		 * return the current result.
		 */
		if (nbit == pbit)
			return (nbit);
	}
#else /* notyet */
        /* compiler silencer */
        return 0;
#endif /* notyet */
}

/*
 * FIXME this comment is out of date (it refers to the way
 * the JPEG decoder used to work)
 * Build a skip table.  The idea is to have a fast way
 * of finding the boundaries in the bit stream for a block
 * (i.e., entropy/run-length encoded set of 8x8 DCT coefficients).
 * The bit string can then be used to hash into a cache of
 * recently computed inverse DCTs.
 *
 * The table is indexed 16 bits at a time.  Each 8-bit entry
 * tells how many bits are taken up by the longest string of
 * whole codewords in the index.  The length might be greater
 * than 16 if there is an ESCAPE character (which is great
 * because we can just skip over the following 14-bits).
 * Bit 7 is set if the codes are terminated by EOB,; bit 6
 * is set if the string is impossible; the length is in bits 0-5.
 *
 * Note that this table works only for the AC coeffcients, because
 * the DC decoding is complicated by macroblock type context.
 * That's okay because we want the hash table to contain inverse
 * DCTs with 0 DC bias because adding in the DC component during
 * the block copy incurs no additional cost (memory is the bottleneck),
 * and using DC=0 significantly increases the probability of a
 * cache hit.
 */
void
skipbuild(huffcode* hc, u_char* skiptab)
{
	int len[1024];
	int code[1024];
	int symbol[1024];

	int n = 0;
	for (; hc->str != 0; ++hc) {
		len[n] = strlen(hc->str);
		code[n] = btoi(hc->str);
		symbol[n] = hc->val;
		++n;
	}
	for (int bs = 0; bs < 65536; ++bs)
		skiptab[bs] = skipcode(bs, code, len, symbol, n);
}

huffcode*
hc_lookup(huffcode* p, int v, int mask)
{
	for (; p != 0; p = p->next) {
		if ((p->val & mask) == v)
			return (p);
	}
	return (0);
}

struct fldmap {
	int src;
	int dst;
	int width;
};

int
hte_map(fldmap* map)
{
	field* ep = &encoder_pad;
	field* dp = &decoder_field;

	int nm = 0;
	int soff = 0;
	int doff = 0;
	int i;
	for (i = 0; i < dp->n; ++i) {
		int w = dp->width[i];
		if (i < ep->n) {
			if (ep->width[i] < 0) {
				/* ignore this field in the destination */
				soff += w;
				continue;
			}
			/* account for padding in index */
			doff += ep->width[i];
		}
		map[nm].width = w;
		map[nm].src = soff;
		map[nm].dst = doff;
		++nm;

		soff += w;
		doff += w;
	}
	for (i = 0; i < nm; ++i) {
		map[i].dst = doff - map[i].dst - map[i].width;
		map[i].src = soff - map[i].src - map[i].width;
#ifdef notdef
printf("src %d dst %d width %d\n", map[i].src, map[i].dst, map[i].width);
#endif
	}
	return (nm);
}

void
dump_encode(const char* base, huffcode* hc)
{
	fldmap map[32];
	int nm = hte_map(map);
	int size = map[0].width + map[0].dst;
	/*FIXME ugly: account for first pad bits */
	if (encoder_pad.n > 0)
		size += encoder_pad.width[0];

	size = 1 << size;
	printf("#include \"huffman.h\"\n");
	printf("struct huffent hte_%s[%d] = {\n", base, size);
	huffent* he = new huffent[size];
	memset(he, 0, size * sizeof(*he));

	for (; hc != 0; hc = hc->next) {
		if (hc->decode_only)
			continue;
		int sym = hc->val;
		int out_sym = 0;
		for (int i = 0; i < nm; ++i) {
			int w = map[i].width;
			int mask = (1 << w) - 1;
			int sub_symbol = (sym >> map[i].src) & mask;
			out_sym |= sub_symbol << map[i].dst;
		}
		if (he[out_sym].val) {
			error_header(hc->lineno);
			fprintf(stderr, "encoder symbol collision\n");
			exit(1);
		}
		he[out_sym].val = btoi(hc->str);
		he[out_sym].nb = strlen(hc->str);
	}
	for (int i = 0; i < size; ++i)
		printf("\t{ %d, %d },\n", he[i].val, he[i].nb);

	printf("};\n");
}

void
dump_decode(const char* base, huffcode* hc)
{
	hufftab ht;
	huffbuild(ht, hc);
	printf("#include \"huffman.h\"\n");
	printf("const unsigned short htd_%s[] = {", base);
	int n = 1 << ht.maxlen;
	for (int i = 0; i < n; ++i)
		printf("%s0x%04x,", (i & 7) ? " " : "\n\t",
			ht.prefix[i] & 0xffff);
	printf("\n};\n");
}

void
dump_skid(const char* /* base */, huffcode* hc)
{
	u_char skiptab[65536];
	skipbuild(hc, skiptab);
	printf("const unsigned char hts_skip[] = {");
	for (int i = 0; i < 65536; ++i)
		printf("%s0x%02x,", (i & 7) ? " " : "\n\t",
			skiptab[i]);
	printf("\n};\n");
}

char*
upify(const char* s)
{
	int n = strlen(s);
	char* p = new char[n + 1];
	int c;
	char* d = p;
	do {
		c = *s++;
		if (islower(c))
			c = toupper(c);
		*d++ = c;
	} while (c != 0);
	return (p);
}

void
dump_header(const char* base, huffcode* hc)
{
	int width = code_width(hc);
	printf("\
#define HUFF_DECODE_%s(nbb, bb, bs, result) \\\n\
\tHUFF_DECODE(htd_%s, %d, nbb, bb, bs, result)\n\n",
	       upify(base), base, width);
        // disable warnings of converting from int to short
	printf("extern const unsigned short htd_%s[];\n", base);
	printf("extern const struct huffent hte_%s[];\n", base);
	printf("extern const unsigned char hts_%s[];\n", base);
}

void
usage()
{
	fprintf(stderr, "usage: mkht [-es] base file\n");
	exit(1);
}

FILE*
openfile(const char* base, const char* ext, const char* mode)
{
	int n = strlen(base) + strlen(ext) + 1;
	char* wrk = new char[n];
	sprintf(wrk, "%s%s", base, ext);
	(void)unlink(wrk);
	FILE* f = fopen(wrk, mode);
	if (f == 0) {
		perror(wrk);
		exit(1);
	}
	delete wrk;
	return (f);
}

void
dump_codes(huffcode* p)
{
	for (; p != 0; p = p->next)
		printf("%d\t%s\n", p->val, p->str);
}

extern "C" char *optarg;
extern "C" int optind;
extern "C" int opterr;

int
main(int argc, char **argv)
{
	int sflag = 0;
	int eflag = 0;
	int hflag = 0;
	int dflag = 0;
	int Dflag = 0;

	int op;
	while ((op = getopt(argc, argv, "Ddehs")) != -1) {
		switch (op) {
		case 's':
			++sflag;
			break;

		case 'd':
			++dflag;
			break;

		case 'D':
			++Dflag;
			break;

		case 'e':
			++eflag;
			break;

		case 'h':
			++hflag;
			break;

		default:
			usage();
			break;
		}
	}
	argc -= optind;
	argv += optind;
	if (argc != 2)
		usage();

	const char* base = argv[0];
	huffcode* hc = parse(argv[1]);
	if (hflag)
		dump_header(base, hc);
	if (Dflag)
		dump_codes(hc);
	if (eflag)
		dump_encode(base, hc);
	if (dflag)
		dump_decode(base, hc);
#ifdef notdef
	if (sflag)
		dump_skip(base, hc);
#endif

	return (0);
}
