/*  $Header: /cvsroot/dvipdfmx/src/agl.c,v 1.13 2004/03/20 13:55:47 hirata Exp $

    This is dvipdfmx, an eXtended version of dvipdfm by Mark A. Wicks.

    Copyright (C) 2002 by Jin-Hwan Cho and Shunsaku Hirata,
    the dvipdfmx project team <dvipdfmx@project.ktug.or.kr>

    Copyright (C) 1998, 1999 by Mark A. Wicks <mwicks@kettering.edu>

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
*/

/*
 * References:
 *
 *  Unicode and Glyph Names, ver. 2.3., Adobe Solution Network
 *  http://partners.adobe.com/asn/tech/type/unicodegn.jsp
 */

#if HAVE_CONFIG_H
#include "config.h"
#endif

#include <string.h>
#include <ctype.h>

#include "system.h"
#include "mem.h"
#include "error.h"

#include "mfileio.h"
#include "pdfparse.h"
#include "dpxutil.h"

#include "unicode.h"

#include "agl.h"

#define AGL_DEBUG_STR "AGL"
#define AGL_DEBUG     5

static unsigned int __verbose = 0;

static AGList *AGList_new      (void);
static void    AGList_release  (AGList *agl);
static void    AGList_insert   (AGList *agl, AGList *glyph);
static unsigned int get_hash   (const char *key);

void
AGL_set_verbose (void)
{
  __verbose++;
}

#if 0
char *
AGName_normalize (char *glyphname)
{
  /*
   * Not Implemented Yet
   *
   * This has nothing to do with the Unicode normalization forms.
   * This routine is supposed to do a conversion of glyph names such as
   * "Asmall" to "a.sc", "summationtext" to "summation.text", ... but not
   * "Aacute" to "A" + "acute".
   */

  return NULL;
}
#endif

char *
AGName_strip_suffix (char *glyphname)
{
  char *suffix, *dot;

  dot = strchr(glyphname, '.');
  if (dot) {
    if (strlen(dot) > 1) {
      suffix = NEW(strlen(dot+1)+1, char);
      strcpy(suffix, dot+1);
    } else {
      suffix = NULL;
    }
    *dot = '\0';
  } else
    suffix = NULL;

  return suffix;
}

int
AGName_is_unicode (const char *glyphname)
{
  char c, *suffix;
  int  i, len;

  if (!glyphname)
    return 0;

  suffix = strchr(glyphname, '.');
  len    = (int) (suffix ? suffix - glyphname : strlen(glyphname));
  /*
   * uni02ac is invalid glyph name and mapped to th empty string.
   */
  if (len >= 7 && (len - 3) % 4 == 0 &&
      !strncmp(glyphname, "uni", 3)) {
    c = glyphname[3];
    /*
     * Check if the 4th character is uppercase hexadecimal digit.
     * "union" should not be treated as Unicode glyph name.
     */
    if (isdigit(c) || (c >= 'A' && c <= 'F'))
      return 1;
    else
      return 0;
  } else if (len <= 7 && len >= 5 &&
	     glyphname[0] == 'u') {
    for (i = 1; i < len - 1; i++) {
      c = glyphname[i];
      if (!isdigit(c) && (c < 'A' || c > 'F'))
	return 0;
    }
    return 1;
  }

  return 0;
}

long
AGName_convert_uni (const char *glyphname)
{
  long  ucv = -1;
  char *p;

  if (!AGName_is_unicode(glyphname))
    return -1;

  if (strlen(glyphname) > 7 && *(glyphname+7) != '.') {
    WARN("Mapping to multiple Unicode characters not supported.");
    return -1;
  }

  if (glyphname[1] == 'n')
    p = (char *) (glyphname + 3);
  else
    p = (char *) (glyphname + 1);
  ucv = 0;
  while (*p != '\0' && *p != '.') {
    if (!isdigit(*p) && (*p < 'A' || *p > 'F')) {
      WARN("Invalid char %c in Unicode glyph name %s.", *p, glyphname);
      return -1;
    }
    ucv <<= 4;
    ucv += isdigit(*p) ? *p - '0' : *p - 'A' + 10;
    p++;
  }

  if (!UC_is_valid(ucv)) {
    WARN("Invalid Unicode code value U+%08X.", ucv);
    ucv = -1;
  }

  return ucv;
}


#define AGL_FLAG_NEXT_IS_ALT (1 << 0)
#define AGL_FLAG_CHAR_DECOMP (1 << 1)

/*
 * The following glyphname length limit is taken from
 * "Glyph Names and Current Implementations":
 */
#define NAME_LEN_MAX 31

/*
 * Adobe glyph list contains 4,000+ glyph name entries...
 */
struct AGList {
  int   flags;

  char *name;
  char *suffix;

  long  code; /* -1 for composite glyph */
  struct AGList *comp;

  struct AGList *next;
};

#define IS_COMPOSITE(g) ((g) && ((g)->flags & AGL_FLAG_CHAR_DECOMP))
#define HAVE_ALTERN(g)  ((g) && ((g)->flags & AGL_FLAG_NEXT_IS_ALT))
#define HAVE_SUFFIX(g)  ((g) && ((g)->suffix))

static AGList *
AGList_new (void)
{
  AGList *agl;

  agl = NEW(1, AGList);

  agl->flags  = 0;
  agl->name   = NULL;
  agl->suffix = NULL;
  agl->code   = -1;
  agl->comp   = NULL;

  agl->next   = NULL;

  return agl;
}

static void
AGList_release (AGList *agl)
{
  while (agl) {
    AGList *next = agl->next;
    if (agl->name)
      RELEASE(agl->name);
    if (agl->suffix)
      RELEASE(agl->suffix);
    if (IS_COMPOSITE(agl))
      AGList_release(agl->comp);
    RELEASE(agl);
    agl = next;
  }
}

char *
AGList_get_name (AGList *agl)
{
  char *glyphname;
  int   len;

  ASSERT(agl);

  len = strlen(agl->name);
  if (agl->suffix)
    len += strlen(agl->suffix) + 1;

  glyphname = NEW(len + 1, char);
  strcpy(glyphname, agl->name);
  if (agl->suffix) {
    sprintf(glyphname, "%s.%s", agl->name, agl->suffix);
  } else {
    strcpy(glyphname, agl->suffix);
  }
  
  return glyphname;
}

long
AGList_get_code (AGList *agl)
{
  ASSERT(agl);

  return (IS_COMPOSITE(agl) ? -1 : agl->code);
}

int
AGList_is_composite (AGList *agl)
{
  ASSERT(agl);

  return (IS_COMPOSITE(agl) ? 1 : 0);
}

AGList *
AGList_next_alternative (AGList *agl)
{
  ASSERT(agl);

  return (HAVE_ALTERN(agl) ? agl->next : NULL);
}

long
AGList_sputx_UTF16BE (AGList *glyph, char **dst, char *dstend)
{
  long len;

  ASSERT(glyph);

  len = 0;
  if (IS_COMPOSITE(glyph)) {
    glyph = glyph->comp;
    while (glyph) {
      if (HAVE_ALTERN(glyph))
	ERROR("Unexpected error.");
      len += AGList_sputx_UTF16BE(glyph, dst, dstend);
      glyph = glyph->next;
    }
  } else {
    len += UC_sputx_UTF16BE(glyph->code, dst, dstend);
  }

  return len;
}

#define MATCH_SUFFIX(g,s) ((!(s) && !((g)->suffix)) || \
                           ((s) && (g)->suffix && !strcmp((s),(g)->suffix)))
#define EXACT_MATCH(g,n,s) (!strcmp((g)->name,(n)) && MATCH_SUFFIX((g),(s)))

static void
AGList_insert (AGList *agl, AGList *glyph)
{
  AGList *prev = NULL;

  ASSERT(agl && glyph);
  while (agl) {
    if (!strcmp(agl->name, glyph->name)) {
      if (MATCH_SUFFIX(agl, glyph->suffix)) {
	if (agl->code == glyph->code)
	  return;
	break;
      } else {
	if (!HAVE_ALTERN(agl))
	  break;
      }
    }
    prev = agl;
    agl  = agl->next;
  }
  if (!agl) {
    if (prev)
      prev->next = glyph;
  } else {
    glyph->flags |= AGL_FLAG_NEXT_IS_ALT;
    glyph->next  = agl;
    if (prev)
      prev->next = glyph;
  }
}

struct AGLmap {
  char    *ident;
  AGList **map;
};

#define HASH_TABLE_SIZE 503

static unsigned int
get_hash (const char *key)
{
  unsigned int h = 0;

  while (*key)
    h = 33*h + (*key++);

  return h % HASH_TABLE_SIZE;
}

AGLmap *
AGLmap_new (void)
{
  AGLmap *aglm;

  aglm = NEW(1, AGLmap);
  aglm->ident = NULL;
  aglm->map   = NEW(HASH_TABLE_SIZE, AGList *);
  memset(aglm->map, 0, HASH_TABLE_SIZE*sizeof(AGList *));

  return aglm;
}

void
AGLmap_release (AGLmap *aglm)
{
  if (aglm) {
    if (aglm->ident)
      RELEASE(aglm->ident);
    if (aglm->map) {
      int i;
      for (i = 0; i < HASH_TABLE_SIZE; i++) {
	if (aglm->map[i])
	  AGList_release(aglm->map[i]);
      }
      RELEASE(aglm->map);
    }
  }
}

static AGList *
make_composite_glyph (char **start, char *end, char *name, char *suffix, long first)
{
  AGList *agl, *curr;
  long  code;
  char *next;

  agl = AGList_new();
  agl->name   = name;
  agl->suffix = suffix;
  agl->flags |= AGL_FLAG_CHAR_DECOMP;

  agl->comp = curr = AGList_new();
  /* Glyph name unknown */
  curr->code = first;

  skip_white(start, end);
  while (*start < end && isxdigit(**start)) {
    code = strtol(*start, &next, 16);
    if (next == *start)
      break;
    curr->next = AGList_new();
    curr = curr->next;
    /* Glyph name unknown */
    curr->code = code;
    *start = next;
    if (**start == '_')
      *start += 1;
    else
      skip_white(start, end);
  }

  return agl;
}

static void
dump_list (AGList *agl)
{
  while (agl) {
    MESG("%s", agl->name ? agl->name : "UNKNOWN");
    if (agl->suffix)
      MESG(".%s", agl->suffix);
    if (IS_COMPOSITE(agl))
      MESG("[COMPOSITE]");
    if (HAVE_ALTERN(agl))
      MESG("[ALTERNATE]");
    if (IS_COMPOSITE(agl)) {
      MESG(" ==> ");
      dump_list(agl->comp);
    } else {
      if (agl->code > 0xFFFFL)
	MESG(": %08X", agl->code);
      else
	MESG(": %04X", agl->code);
      MESG(";");
    }
    if (HAVE_ALTERN(agl))
      MESG(" ");
    agl = agl->next;
  }
}

/*
 * format unused.
 */
#define WBUF_SIZE 1024
int
AGLmap_read (AGLmap *aglm, FILE *fp, int format)
{
  int   count = 0;
  char  wbuf[WBUF_SIZE];
  char *start = NULL, *end, *next;

  ASSERT(aglm);

  while ((start = mfgets(wbuf, WBUF_SIZE, fp)) != NULL) {
    AGList *agl;
    char   *name, *suffix;
    long    code;

    end = start + strlen(start);
    skip_white(&start, end);
    /* Need table version check. */
    if (!start || *start == '#') {
      continue;
    }
    next = strchr(start, ';');
    if (!next || next == start) {
      continue;
    }
    name   = parse_ident(&start, next);
    suffix = AGName_strip_suffix(name);
    start = next+1;
    code  = strtol(start, &next, 16);
    if (next == start) {
      WARN("Invalid AGL entry (ignored): %s", wbuf);
      if (name)   RELEASE(name);
      if (suffix) RELEASE(suffix);
      continue;
    }
    start = next;
    skip_white(&start, end);
    if (isxdigit(*start)) { /* Decomposition */
      agl = make_composite_glyph(&start, end, name, suffix, code);
    } else {
      agl = AGList_new();
      agl->name   = name;
      agl->suffix = suffix;
      agl->code   = code;
    }
    {
      unsigned int idx = get_hash(name);
      if (!aglm->map[idx]) {
	aglm->map[idx] = agl;
      } else {
	AGList_insert(aglm->map[idx], agl);
      }
      if (__verbose > AGL_DEBUG) {
	MESG("%s: ", AGL_DEBUG_STR);
	dump_list(agl);
	MESG("\n");
      }
    }
    count++;
  }

  if (__verbose > AGL_DEBUG)
    MESG("\n%s: %d glyph list entries found.\n", AGL_DEBUG_STR, count);

  return count;
}

AGList *
AGLmap_lookup (AGLmap *aglm, const char *glyphname, const char *suffix)
{
  unsigned int hkey = 0;
  AGList *agl;

  if (!aglm)
    return NULL;
  
  hkey = get_hash(glyphname);
  agl = (aglm->map)[hkey];
  while (agl) {
    if (EXACT_MATCH(agl, glyphname, suffix))
      break;
    agl = agl->next;
  }

  return agl;
}

static long
xtol (const char *start, int len)
{
  long v = 0;

  while (len-- > 0) {
    v <<= 4;
    if (isdigit(*start)) {
      v += *start - '0';
    } else if (*start >= 'A' && *start <= 'F') {
      v += *start - 'A' + 10;
    } else {
      return -1;
    }
    start++;
  }

  return v;
}

long
AGLmap_encode_UTF16BE (AGLmap *aglm, const char *glyphstr,
		       char **dst, char *dstend, int *fail_count)
{
  long  len;
  int   count;
  char *cur, *end;

  cur = (char *) glyphstr;
  end = strchr(cur, '.');
  if (!end)
    end = cur + strlen(cur);
  else {
    if (__verbose > 5)
      WARN("Suffix \"%s\" in glyph name \"%s\" ommited.", end, glyphstr);
  }

  len = 0; count = 0;
  while (cur < end) {
    char    name[NAME_LEN_MAX+1], *delim;
    long    ucv, sublen;
    AGList *agl;

    delim = strchr(cur, '_');
    if (delim == cur) {
      /*
       * Glyph names starting with a underscore or two subsequent
       * underscore in glyph name not allowed?
       */
      WARN("Invalid glyph name component in \"%s\".", glyphstr);
      count++;
      return len; /* Cannot continue */
    } else if (!delim || delim > end) {
      delim = end;
    }
    sublen = (long) (delim - cur);
    if (sublen > NAME_LEN_MAX) {
      WARN("Glyph name too long.");
      cur = delim + 1;
      count++;
      continue;
    }

    memcpy(name, cur, sublen);
    name[sublen] = '\0';
    if (AGName_is_unicode(name)) {
      char *p;
      p   = name;
      ucv = 0;
#define IS_PUA(u) (((u) >= 0x00E000L && (u) <= 0x00F8FFL) || \
                   ((u) >= 0x0F0000L && (u) <= 0x0FFFFDL) || \
	           ((u) >= 0x100000L && (u) <= 0x10FFFDL))
      if (p[1] != 'n') {
	p += 1;
	ucv = xtol(p, strlen(p));
	if (!UC_is_valid(ucv)) {
	  WARN("Invalid Unicode glyph name \"%s\" (%ld).", name, ucv);
	  count++;
	} else {
	  if (IS_PUA(ucv)) {
	    if (__verbose)
	      WARN("Glyph \"%s\" mapped to PUA U+%X.", name, ucv);
	  }
	  len += UC_sputx_UTF16BE(ucv, dst, dstend);
	}
      } else {
	p += 3;
	while (*p != '\0') {
	  ucv = xtol(p, 4);
	  if (!UC_is_valid(ucv)) {
	    WARN("Invalid Unicode glyph name \"%s\".", name);
	    count++;
	  } else {
	    if (IS_PUA(ucv)) {
	      if (__verbose)
		WARN("Glyph \"%s\" mapped to PUA U+%04X.", name, ucv);
	    }
	    len += UC_sputx_UTF16BE(ucv, dst, dstend);
	  }
	  p += 4;
	}
      }
    } else if (aglm &&
	       (agl = AGLmap_lookup(aglm, name, NULL)) != NULL) {
      len += AGList_sputx_UTF16BE(agl, dst, dstend);
    } else {
      if (__verbose > 2) {
	WARN("No Unicode mapping available for glyph \"%s\".", name);
      }
      count++;
    }
    cur = delim + 1;
  }

  if (fail_count)
    *fail_count = count;
  return len;
}

#define CACHE_ALLOC_SIZE 8u
#define CHECK_ID(n) do { \
  if (! __cache) { \
    ERROR("%s: Not initialized.", AGL_DEBUG_STR); \
  } else if ((n) < 0 || (n) >= __cache->num) { \
    ERROR("%s: Invalid ID %d", AGL_DEBUG_STR, (n)); \
  } \
} while (0)

struct AGLmapCache
{
  int      num;
  int      max;
  AGLmap **maps;
};

static struct AGLmapCache *__cache = NULL;

void
AGLmap_cache_init (void)
{
  if (__cache)
    ERROR("%s: Already initialized.", AGL_DEBUG_STR);

  __cache = NEW(1, struct AGLmapCache);
  __cache->max  = CACHE_ALLOC_SIZE;
  __cache->num  = 0;
  __cache->maps = NEW(__cache->max, struct AGLmap *);
}

AGLmap *
AGLmap_cache_get (int id)
{
  CHECK_ID(id);
  return __cache->maps[id];
}

int
AGLmap_cache_find (const char *name)
{
  int     id;
  AGLmap *aglm;
  char   *fullname;
  FILE   *fp;

  if (!__cache)
    AGLmap_cache_init();
  ASSERT(__cache);

  for (id = 0; id < __cache->num; id++) {
    aglm = __cache->maps[id];
    if (!strcmp(name, aglm->ident))
      return id;
  }

  fullname = kpse_find_file(name, kpse_program_text_format, 0);
  if (!fullname) {
    char *altname = NEW(strlen(name)+strlen(".txt")+1, char);
    sprintf(altname, "%s.txt", name);
    fullname = kpse_find_file(altname, kpse_program_text_format, 0);
    RELEASE(altname);
  }

  if (!fullname || !(fp = MFOPEN(fullname, FOPEN_R_MODE)))
    return -1;

  if (__verbose) {
    MESG("(AGL:%s", name);
    if (__verbose > 1)
      MESG("[%s]", fullname);
  }

  aglm = AGLmap_new();
  if (AGLmap_read(aglm, fp, 0) <= 0) {
    AGLmap_release(aglm);
    MFCLOSE(fp);
    return -1;
  }
  MFCLOSE(fp);

  if (__cache->num >= __cache->max) {
    __cache->max += CACHE_ALLOC_SIZE;
    __cache->maps = RENEW(__cache->maps, __cache->max, struct AGLmap *);
  }

  aglm->ident = strdup(name);
  id = __cache->num;
  __cache->maps[id] = aglm;
  (__cache->num)++;

  if (__verbose)
    MESG(")");

  return id;
}

void
AGLmap_cache_close (void)
{
  if (__cache) {
    int id;
    for (id = 0; id < __cache->num; id++)
      AGLmap_release(__cache->maps[id]);
    RELEASE(__cache->maps);
    RELEASE(__cache);
    __cache = NULL;
  }
}
