#include <lct/local.h>

#include <string.h>
#include <stdlib.h>

#include <lct/generic.h>
#include <lct/unicode.h>

#define UNICODE_HEXLENGTH 4

/*
 * Convert a UCS2 char into the equivalent UTF8 sequence, with
 * trailing zero.
 * Adapted from Linux kernel 2.0.30
 */
void ucs2_to_utf8(unicode c, char* utf)
{
  if (c < 0x80)
    {
      utf[0] = c;			/*  0*******  */
      utf[1] = 0;
    }
  else if (c < 0x800) 
    {
      utf[0] = 0xc0 | (c >> 6); 	/*  110***** 10******  */
      utf[1] = 0x80 | (c & 0x3f);
      utf[2] = 0;
    } 
  else
    {
      utf[0] = 0xe0 | (c >> 12); 	/*  1110**** 10****** 10******  */
      utf[1] = 0x80 | ((c >> 6) & 0x3f);
      utf[2] = 0x80 | (c & 0x3f);
      utf[3] = 0;
    }
  /* UTF-8 is defined for words of up to 31 bits,
     but we need only 16 bits here */
}


/* Combine UTF-8 into Unicode */
/* Incomplete characters silently ignored */
unicode utf8_to_ucs2 (char* buf)
{
  int utf_count = 0;
  long utf_char;
  unicode tc;
  unsigned char c;
  
  do
    {
      c = *buf;
      buf++;
      
      /* if byte should be part of multi-byte sequence */
      if(c & 0x80)
	{
	  /* if we have already started to parse a UTF8 sequence */
	  if (utf_count > 0 && (c & 0xc0) == 0x80)
	    {
	      utf_char = (utf_char << 6) | (c & 0x3f);
	      utf_count--;
	      if (utf_count == 0)
		  tc = utf_char;
	      else
		  continue;
	    } 
	  else	/* Possibly 1st char of a UTF8 sequence */
	    {
	      if ((c & 0xe0) == 0xc0) 
		{
		  utf_count = 1;
		  utf_char = (c & 0x1f);
		} 
	      else if ((c & 0xf0) == 0xe0) 
		{
		  utf_count = 2;
		  utf_char = (c & 0x0f);
		} 
	      else if ((c & 0xf8) == 0xf0) 
		{
		  utf_count = 3;
		  utf_char = (c & 0x07);
		} 
	      else if ((c & 0xfc) == 0xf8) 
		{
		  utf_count = 4;
		  utf_char = (c & 0x03);
		} 
	      else if ((c & 0xfe) == 0xfc) 
		{
		  utf_count = 5;
		  utf_char = (c & 0x01);
		} 
	      else
		  utf_count = 0;
	      continue;
	    }
	} 
      else /* not part of multi-byte sequence - treat as ASCII
	    * this makes incomplete sequences to be ignored
	    */
	{
	  tc = c;
	  utf_count = 0;
	}
    }
  while (utf_count);
  
  return tc;
}


char* unicode_charname (unicode u)
{
  if (u == INVALID_UNICODE)
    return "(Unknown)";

#ifdef UNICODEDATA
  {
    char* buf = NULL;
    char* outbuf = 0;
    int bufsize;
    char unicode_str[5];
    FILE* f = fopen (UNICODEDATA, "r");

    if (f == NULL)
      /* unidata file could not be read */
      return _("Not found (No available Unicode Data)");

    /* get uppercase-hex printed form of unicode value */
    sprintf (unicode_str, "%04X", u);
    
    while (-1 != getline (&buf, &bufsize, f))
      {
	if (0 == strncmp (unicode_str, buf, UNICODE_HEXLENGTH))
	  {
	    /* skip unicode and semicolon */
	    char* name = buf + (UNICODE_HEXLENGTH + 1);
	    
	    strtok (name, ";");
	    outbuf = (char*)malloc (strlen(name)+1);
	    strcpy (outbuf, name);
	    free (buf);
	    fclose (f);
	    return (outbuf);
	  }
      }

    /* char not found */
    free (buf);
    fclose (f);
    return _("Not found (Not in installed Unicode Data)");
  }
#else
  return _("Not found (Unicode Data support not available)");
#endif
}
