/* $Header: /home/yav/catty/fkiss/RCS/codeconv.c,v 1.5 2000/08/24 02:10:14 yav Exp $
 * Japanese KANJI code coversion
 * written by yav <yav@bigfoot.com>
 *
 * define CODECONV_STANDALONE_TEST for Self check
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

char id_codeconv[] = "$Id: codeconv.c,v 1.5 2000/08/24 02:10:14 yav Exp $";

#ifdef CODECONV_STANDALONE_TEST

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/* jiscode.c */
extern unsigned short yjis2sjis();

/* Japanese text coding type */
#define CODING_UNKNOWN	0
#define CODING_SJIS	1
#define CODING_EUC	2
#define CODING_JIS	3
#define CODING_MAX	4	/* number of Japanese text coding types */

static int coding_type = CODING_UNKNOWN;

#else /* CODECONV_STANDALONE_TEST */

#include <X11/Xos.h>
#include <X11/Xlib.h>
#include <stdio.h>

#include "config.h"
#include "headers.h"
#include "fkiss.h"
#include "work.h"
#define PUBLIC_CODECONV_C
#include "extern.h"

#endif /* CODECONV_STANDALONE_TEST */

/* get Japanese coding systen name */
char *coding_name(n)
     int n;			/* coding type code (CODING_*) */
{
  int i;
  static struct {int type; char *name;} nametbl[] = {
    {CODING_UNKNOWN,	"unknown"},
    {CODING_SJIS,	"Shift-JIS"},
    {CODING_EUC,	"EUC"},
    {CODING_JIS,	"JIS"},
    {-1,		NULL}
  };
  
  for (i = 0; nametbl[i].name != NULL; i++) {
    if (nametbl[i].type == n)
      return nametbl[i].name;
  }
  return "Illegal coding!";
}

/* search string and identify encoding type */
int check_coding(str)
     char *str;
{
  unsigned char c;
  unsigned char *p;
  unsigned char *p0;

  /* JIS check */
  for (p0 = (unsigned char *)str; (p = (unsigned char *)index(p0, 033)) != NULL; p0 = p) {
    switch(*++p) {
    case '$':			/* Kanji in */
    case '(':			/* Kanji out */
      return CODING_JIS;
    }
  }
  /* EUC, Shift-JIS */
  p = (unsigned char *)str;
  while ((c = *p++) != '\0') {
    /* high byte */
    if (c < 0x81) {
      continue;
    } else if (c <= 0x9f) {
      return CODING_SJIS;	/* 0x81 - 0x9f SJIS 1st byte */
    } else if (c <= 0xa0) {
      return CODING_SJIS;	/* 0xa0 JIS X0201 single byte KANA space */
    } else if (c <= 0xa8) {
      /* 0xa1 - 0xa8 EUC high or KANA */
      c = *p++;			/* 2nd byte */
      /* check EUC low byte */
      if (c < 0xa1 || c >0xfe)
	return CODING_SJIS;	/* not EUC 2nd byte! JIS X0201 single byte KANA */
      continue;
    } else if (c < 0xb0) {
      return CODING_SJIS;	/* single byte KANA */
    } else if (c <= 0xf3) {
      /* 0xb0 - 0xf3 EUC or SJIS 1st byte or JIS X0201 single byte KANA */
      c = *p++;			/* 2nd byte */
      if (c < 0x40) {
	return CODING_SJIS;	/* JIS X0201 single byte KANA */
      } else if (c < 0xa1) {
	return CODING_SJIS;	/* SJIS 2nd byte */
      } else if (c <= 0xfc) {
	continue;		/* EUC or SJIS 2nd byte */
      } else if (c <= 0xfe) {
	return CODING_EUC;	/* EUC 2nd byte */
      }
      /* ??? illegal code */
      continue;
    } else if (c <= 0xfc) {
      return CODING_SJIS;	/* 0xf4 - 0xfc SJIS 1st byte */
    }
    /* illegal code */
  }
  return CODING_UNKNOWN;	/* Cannot to identify, need more sample */
}

/* convert JIS to Shift-JIS string
 * Caution! destination string is not terminated.
 */
void strnjis2sjis(dst, src, n)
     unsigned char *dst;
     unsigned char *src;
     int n;			/* length */
{
  unsigned short code;

  while (--n >= 0) {
    code = *src++;
    if (--n >= 0) {
      code <<= 8;
      code += *src++;
      code = yjis2sjis(code);
      *dst++ = code >> 8;
    }
    *dst++= code;
  }
}

/* convert string from JIS to Shift-JIS */
void convert_from_jis(dst, str)
     char *dst;
     char *str;
{
  char *p;
  char *p0;
  int i;
  int inkanji;
  
  inkanji = 0;
  for (p0 = str; (p = index(p0, 033)) != NULL; p0 = p) {
    i = p - p0;
    if (inkanji)
      strnjis2sjis(dst, p0, i);
    else
      strncpy(dst, p0, i);
    dst += i;
    p++;
    if (*p == '$' && *(p+1) != '\0') {
      /* ESC '$' ? Kanji in */
      if (*(p+1) == '(')
	p += 3;			/* ESC $ ( B */
      else
	p += 2;			/* ESC $ B (JIS 1983) or ESC $ @ (JIS 1978) */
      inkanji = 1;
    } else if (*p == '(' && *(p+1) != '\0' ) {
      /* ESC '(' ? Kanji out */
      p += 2;			/* ESC ( B */
      inkanji = 0;
    }
  }
  i = strlen(p0);
  if (inkanji)
    strnjis2sjis(dst, p0, i);
  else
    strncpy(dst, p0, i);
  *(dst+i) = '\0';
}

/* convert string from EUC to Shift-JIS */
void convert_from_euc(dst, src)
     unsigned char *dst;
     unsigned char *src;
{
  unsigned short code;
  
  while ((code = *src++) != '\0') {
    if ((code & 0x80) && (*src != '\0')) {
      code &= 0x7f;
      code <<= 8;
      code += (*src++) & 0x7f;
      code = yjis2sjis(code);
      *dst++ = code >> 8;
    }
    *dst++ = code;
  }
  *dst = '\0';
}

/* convert any coding types to Shift-JIS
 * source coding type indicated coding_type
 */
void convert_coding(dst, src)
     char *dst;
     char *src;
{
  switch(coding_type) {
  case CODING_JIS:
    convert_from_jis(dst, src);
    break;
  case CODING_EUC:
    convert_from_euc(dst, src);
    break;
  default:			/* Shift-JIS or Unknown */
    strcpy(dst, src);
    break;
  }
}

#ifdef CODECONV_STANDALONE_TEST

static int linecnt = 0;
static char **lineptr = NULL;

static void store_line(char *str)
{
  if (lineptr == NULL)
    lineptr = (char **)malloc(sizeof(char *));
  else
    lineptr = (char **)realloc(lineptr, sizeof(char *)*(linecnt + 1));
  *(lineptr+linecnt) = strdup(str);
  linecnt++;
}

void main(int argc, char **argv)
{
  int i;
  char buf[BUFSIZ];
  
  while (fgets(buf, sizeof(buf), stdin) != NULL) {
    if (coding_type == CODING_UNKNOWN)
      fprintf(stderr, "<%s", buf);
    store_line(buf);
    if (coding_type == CODING_UNKNOWN) {
      coding_type = check_coding(buf);
      if (coding_type != CODING_UNKNOWN) {
	fprintf(stderr, "*** %s ***\n", coding_name(coding_type));
      }
    }
  }
  if (coding_type == CODING_UNKNOWN) {
    fprintf(stderr, "unknown coding default * EUC *\n");
    coding_type = CODING_EUC;
  }
  for (i = 0; i < linecnt; i++) {
    convert_coding(buf, *(lineptr+i));
    fputs(buf, stdout);
  }
  exit(0);
}

#endif /* CODECONV_STANDALONE_TEST */

/* End of file */
