#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "catdoc.h"

char *charset_path=CHARSETPATH;
short int * source_charset;
CHARSET target_charset;
/************************************************************************/
/* Converts char in input charset into unicode representation           */
/* Should be converted to macro                                         */
/************************************************************************/
int to_unicode (short int *charset, int c) {
  return charset[c];
}
/************************************************************************/
/* Search inverse charset record for given unicode char and returns     */
/* 0-255 char value if found, -1 otherwise                              */
/************************************************************************/
int from_unicode (CHARSET charset, int u) {
   short int *p;
   if ((p=charset[(unsigned)u>>8])) {
     return p[u & 0xff];
   } else {
     return -1;
   }
}
/************************************************************************/
/*  Converts direct (charset -> unicode) to reverse map                 */
/************************************************************************/
CHARSET make_reverse_map(short int *charset) {
   CHARSET newmap=calloc(sizeof(short int *), 256);
   int i,j,k,l;
   short int *p;   
   for (i=0;i<256;i++) {
     k= charset[i];
     j=  (unsigned)k>>8;
     if (!newmap[j]) {
       newmap[j] = malloc(sizeof(short int *)*256);
       if (!newmap[j]) {
           fprintf(stderr,"Insufficient memory for  charset\n");
           exit(1);
       }
       for (l=0,p=newmap[j];l<256;l++,p++) *p=-1;
     }
     p=newmap[j];
     p[k & 0xff]=i;
   }
   return newmap;
}

/************************************************************************/
/* Reads charset file (as got from ftp.unicode.org) and returns array of*/
/* 256 short ints (malloced) mapping from charset t unicode             */
/************************************************************************/
short int * read_charset(char *filename) {
  char filebuf[512];
  char namebuf[64];
  FILE *f;
  short int *new=calloc(sizeof(short int),256);
  int c;
  long int uc;
  strcpy (namebuf,filename);
  strcat(namebuf,CHARSET_EXT);
  find_file(filebuf,namebuf,charset_path);
  if (!namebuf[0]) {

     fprintf(stderr,"Cannot load charset %s - file not found\n",namebuf);
     return NULL;
  }
  f=fopen(filebuf,"rb");

  if (!f) {
    perror(namebuf); 
    return NULL;
  }
  if (input_buffer)
     setvbuf(f,input_buffer,_IOFBF,FILE_BUFFER);
  /* defaults */
  for (c=0;c<32;c++) {
    new[c]=c;
  }
  while (!feof(f)) {
     if (fscanf(f,"%x %lx",&c,&uc)==2) {
       if (c<0||c>255||uc<0||(uc>0xFEFE&& uc!=0xFFFE)) {
	  fprintf(stderr,"Invalid charset file %s\n",filebuf);
          fclose(f);
          return NULL;
       }
       new[c]=uc;
     }
       while((fgetc(f)!='\n')&&!feof(f)) ;
  }
  fclose (f);
  return new;
}


/************************************************************************/
/* Reads 8-bit char and convers it from source charset                  */
/************************************************************************/

int get_8bit_char (FILE *f)
{ int c = fgetc(f);
  if (c==EOF) return c;
   else return to_unicode(source_charset,c);
}


/************************************************************************/
/* Reads 16-bit unicode value. MS-Word runs on LSB-first machine only,  */
/* so read lsb first always and don't care about proper bit order       */
/************************************************************************/

int get_utf16lsb (FILE *f) {
  int d,c = fgetc(f);
  if (c == EOF) return EOF;
  if ((d=fgetc(f))==EOF) return EOF;
  c |= (d<<8);
  if (c==EOF) return (int)0xFEFF;
  return c;
}
  
