/* This file is part of Malaga, a system for Natural Language Analysis.
 * Copyright (C) 1995-1999 Bjoern Beutel
 *
 * Bjoern Beutel
 * Universitaet Erlangen-Nuernberg
 * Abteilung fuer Computerlinguistik
 * Bismarckstrasse 12
 * D-91054 Erlangen
 * e-mail: malaga@linguistik.uni-erlangen.de 
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */

/* description ==============================================================*/

/* This file contains basic types, macros and functions used everywhere. */

/* includes =================================================================*/

#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <string.h>
#include "basic.h"

#undef GLOBAL
#define GLOBAL

/* constants ================================================================*/

GLOBAL char lower_letter[256] =
/* This table contains the lower case letter for each letter code,
 * and 0 for each non-letter. */
{
  /* This table is for Latin1 char code. */
  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
  0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',   0,   0,   0,   0,   0,
  0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',   0,   0,   0,   0,   0,
  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
  0,   0,   0,   0,   0,   0,   0,   0,   0,   0, '',   0,   0,   0,   0,   0,
  0,   0,   0,   0,   0,   0,   0,   0,   0,   0, '',   0,   0,   0,   0,   0,
'', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '',
'', '', '', '', '', '', '',   0, '', '', '', '', '', '', '', '',
'', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '',
'', '', '', '', '', '', '',   0, '', '', '', '', '', '', '', ''
};

/* types ====================================================================*/

struct TEXT_T /* the implementation of "text_t" */
{
  string_t buffer;
  int_t buffer_size;
  int_t string_size;
};

/* error function ===========================================================*/

GLOBAL void (*error) (string_t, ...) NO_RETURN;
/* print an error message, like "printf", and return to the user input loop.
 * (exit the program if there is no user input loop).
 * THIS FUNCTION SHOULD NEVER RETURN. */

/* memory functions =========================================================*/

GLOBAL void *new_mem (int_t item_size)
/* Allocate a memory block of size <item_size>, clear it and return it.
 * If memory is out, call the function "error". */
{
  void *block;

  block = calloc (1, item_size);

  if (block == NULL)
    error ("out of memory");

  return block;
}

/*---------------------------------------------------------------------------*/

GLOBAL void *new_vector (int_t item_size, int_t items)
/* Allocate a vector of <items> items of size <item_size> each, clear it
 * and return it. If memory is out, call the function "error". */
{
  void *block;

  if (items == 0)
    return NULL;

  block = calloc (items, item_size);

  if (block == NULL)
    error ("out of memory");

  return block;
}

/*---------------------------------------------------------------------------*/

GLOBAL int_t renew_vector (void *buffer_ptr, 
			   int_t item_size, 
			   int_t new_size)
/* Realloc the buffer at <*buffer_ptr> to contain <new_size> items of
 * <item_size> each and return <new_size>. If memory is out, call the
 * function "error". */
{
  void *buffer = * (void **) buffer_ptr;

  buffer = realloc (buffer, item_size * new_size);
  if (buffer == NULL)
    error ("out of memory");
  * (void **) buffer_ptr = buffer;
  return new_size;
}

/*---------------------------------------------------------------------------*/

GLOBAL void free_mem (void *pointer)
/* Free memory <*pointer> points to, and set <*pointer> to NULL. */
{
  free (* (void **) pointer);
  * (void **) pointer = NULL;
}

/* functions for text (indefinitely growing strings) ========================*/

GLOBAL text_t new_text (void)
/* Return a new text structure. */
{
  text_t text;
  
  text = new_mem (sizeof (struct TEXT_T));
  text->buffer_size = 100;
  text->buffer = new_vector (sizeof (char), text->buffer_size + 1);
  /* text->buffer[0] is set to EOS. */
  text->string_size = 0;

  return text;
}

/*---------------------------------------------------------------------------*/

GLOBAL void clear_text (text_t *text_ptr)
/* Initialize <*text_ptr> to an empty string. 
 * If <*text_ptr> is NULL, a new text structure is allocated. */
{
  if (*text_ptr == NULL)
    *text_ptr = new_text ();
  else
  {
    (*text_ptr)->buffer[0] = EOS;
    (*text_ptr)->string_size = 0;
  }
}

/*---------------------------------------------------------------------------*/

GLOBAL void free_text (text_t *text_ptr)
/* Free the content of <*text_ptr>. */
{
  if (*text_ptr != NULL)
  {
    free_mem (&(*text_ptr)->buffer);
    free_mem (text_ptr);
  }
}

/*---------------------------------------------------------------------------*/

GLOBAL void add_to_text (text_t text, string_t string)
/* Add <string> to <text>. */
{
  int_t string_length = strlen (string);

  if (text->buffer_size < text->string_size + string_length + 1)
    text->buffer_size = renew_vector (&text->buffer, sizeof (char), 
				      2 * (text->string_size + string_length));
  strcpy (text->buffer + text->string_size, string);
  text->string_size += string_length;
}

/*---------------------------------------------------------------------------*/

GLOBAL void add_char_to_text (text_t text, char character)
/* Add <character> to <text>. */
{
  char buffer[2];
  
  buffer[0] = character;
  buffer[1] = EOS;
  add_to_text (text, buffer);
}

/*---------------------------------------------------------------------------*/

GLOBAL void insert_in_text (text_t text, string_t string, int_t position)
/* Insert <string> at <position> in <text> (position counts from 0 onward). */
{
  int_t string_length = strlen (string);

  if (text->buffer_size < text->string_size + string_length + 1)
    text->buffer_size = renew_vector (&text->buffer, sizeof (char), 
				      2 * (text->string_size + string_length));
  if (position < 0)
    position = 0;
  if (position > text->string_size)
    position = text->string_size;
  memmove (text->buffer + position + string_length, text->buffer + position,
	   sizeof (char) * (text->string_size - position));
  memcpy (text->buffer + position, string, sizeof (char) * string_length);
  text->string_size += string_length;
}

/*---------------------------------------------------------------------------*/

GLOBAL void insert_char_in_text (text_t text, char character, int_t position)
/* Insert <character> at <position> in <text>. */
{
  char buffer[2];
  
  buffer[0] = character;
  buffer[1] = EOS;
  insert_in_text (text, buffer, position);
}

/*---------------------------------------------------------------------------*/

GLOBAL int_t text_length (text_t text)
/* Return length of <text>. */
{
  return text->string_size;
}

/*---------------------------------------------------------------------------*/

GLOBAL string_t text_string (text_t text)
/* Return content of <text> as a string. 
 * This string is valid until another text function on <text> is called. */
{
  return text->buffer;
}

/*---------------------------------------------------------------------------*/

GLOBAL string_t text_to_string (text_t *text_ptr)
/* Return content of <*text_ptr> as a string and delete <*text_ptr>.
 * The string must be freed with "free" after use. */
{
  string_t string = new_string ((*text_ptr)->buffer, NULL);

  free_text (text_ptr);
  return string;
}

/* string functions =========================================================*/

GLOBAL string_t new_string (string_t string, string_t end)
/* Allocate memory and copy <string> into it.
 * If <end> != NULL, it marks the end of the string.
 * The string can be deleted with "free". */
{
  string_t new_string, new_string_ptr;

  if (end == NULL) 
    end = string + strlen (string);
  new_string_ptr = new_string = new_vector (sizeof (char), end - string + 1);
  while (string < end)
    *new_string_ptr++ = *string++;
  *new_string_ptr = EOS;
  return new_string;
}

/*---------------------------------------------------------------------------*/

GLOBAL string_t new_string_readable (string_t from, string_t from_end)
/* Like "new_string", but enclose the string in double quotes, copy a "\" in 
 * front of quotes and copy any control chars in octal code: "\000". 
 * If <from_end> != NULL, it marks the end of the string. 
 * The result string must be freed with "free" after use. */
{
  text_t text = new_text ();

  if (from_end == NULL)
    from_end = from + strlen (from);

  add_to_text (text, "\"");
  for (; from < from_end; from++) 
  {
    if (*from == '\"' || *from == '\\') /* Prepend a backslash. */
    {
      add_to_text (text, "\\");
      add_char_to_text (text, *from);
    } 
    else if (IS_PRINT (*from))
      add_char_to_text (text, *from);
    else
    { /* Convert control chars to backslash and hexadecimal code. */
      int_t i, code, position;

      add_to_text (text, "\\x");
      position = text_length (text);
      code = ORD (*from);
      for (i = 0; i < 2 * sizeof (char); i++) /* compatible for wide chars */
      {
	if (code % 16 >= 10)
	  insert_char_in_text (text, 'a' + code % 16 - 10, position);
	else
	  insert_char_in_text (text, '0' + code % 16, position);
	code = code / 16; 
      }
    } 
  }
  add_to_text (text, "\"");
  return text_to_string (&text);
}

/*---------------------------------------------------------------------------*/

GLOBAL string_t concat_strings (string_t first_string, ...)
/* Concatenate a list of strings and return the result string.
 * Must have NULL-terminated list of strings as parameters.
 * The result string must be freed after use. */
{
  va_list arg;
  size_t length;
  string_t next_string, string, string_ptr;

  /* Compute length of the result string. */
  va_start (arg, first_string);
  length = strlen (first_string);
  for (next_string = va_arg (arg, string_t); 
       next_string != NULL;
       next_string = va_arg (arg, string_t))
    length += strlen (next_string);
  va_end (arg);

  /* Concatenate strings. */
  va_start (arg, first_string);
  string = new_vector (sizeof (char), length + 1);
  strcpy (string, first_string);
  string_ptr = string + strlen (first_string);
  for (next_string = va_arg (arg, string_t); 
       next_string != NULL; 
       next_string = va_arg (arg, string_t))
  {
    strcpy (string_ptr, next_string);
    string_ptr += strlen (next_string);
  }
  va_end (arg);
  
  return string;
}

/*---------------------------------------------------------------------------*/

GLOBAL string_t next_non_space (string_t string)
/* Return <string>, but without leading spaces. */
{
  while (isspace (*string))
    string++;
  return string;
}

/*---------------------------------------------------------------------------*/

GLOBAL int_t strcmp_no_case (string_t str1, string_t str2)
/* Return (case insensitive) lexical order of <str1> and <str2>:
 * Result is -1 if <str1> < <str2>,
 *            0 if <str1> = <str2>,
 *            1 if <str1> > <str2>. */
{
  /* Find first char where <str1> and <str2> differ. */
  while (TO_LOWER (*str1) == TO_LOWER (*str2)) 
  {
    if (*str1 == EOS) /* means *<str2> == EOS, too */
      return 0;
    
    str1++;
    str2++;
  } 
  
  return (ORD (TO_LOWER (*str1)) < ORD (TO_LOWER (*str2))) ? -1 : 1;
}

/*---------------------------------------------------------------------------*/

GLOBAL int_t strncmp_no_case (string_t str1, string_t str2, int_t n)
/* Return (case insensitive) lexical order of <str1> and <str2>,
 * but compare only the first <n> characters.
 * Result is -1 if <str1> < <str2>,
 *            0 if <str1> = <str2>,
 *            1 if <str1> > <str2>. */ 
{
  /* Find first char where <str1> and <str2> differ. */
  while (TO_LOWER (*str1) == TO_LOWER (*str2) && n > 0) 
  {
    if (*str1 == EOS) /* means *<str2> == EOS, too */
      return 0;
    
    str1++;
    str2++;
    n--;
  }

  if (n == 0)
    return 0;
  else
    return (ORD (TO_LOWER (*str1)) < ORD (TO_LOWER (*str2))) ? -1 : 1;
}

/*---------------------------------------------------------------------------*/

GLOBAL string_t replace_arguments (string_t format, string_t chars, ...)
/* Create a new string with a copy of <format>.
 * Replace each sequence "%<c>" in <format>, where <c> is the <n>-th
 * char in <chars>, by the <n>-th additional string argument. 
 * Return the result string. It must be freed with "free" after use. */
{
  va_list arg;
  int_t argument_count, i;
  string_t *arguments;
  text_t text = new_text ();
  string_t from;

  /* Copy the arguments into <arguments>. */
  va_start (arg, chars);
  argument_count = strlen (chars);
  arguments = new_vector (sizeof (string_t), argument_count);
  for (i = 0; i < argument_count; i++)
    arguments[i] = va_arg (arg, string_t);
  va_end (arg);

  /* Copy <text> into <buffer>. */
  for (from = format; *from != EOS; from++)
  {
    if (*from == '%')
    {
      from++;

      /* find character <*from> in <chars>. */
      for (i = 0; i < argument_count; i++)
      {
	if (*from == chars[i])
	  break;
      }

      if (i < argument_count)
	add_to_text (text, arguments[i]);
      else
	add_char_to_text (text, *from);
    }
    else 
      add_char_to_text (text, *from);
  }

  free_mem (&arguments);
  return text_to_string (&text);
}

/*---------------------------------------------------------------------------*/

GLOBAL string_t double_to_string (double number)
/* Convert <number> to a string. It must be freed with "free" after use. */
{
  char buffer[30];

  sprintf (buffer, "%.11G", number);
  return new_string (buffer, NULL);
}

/*---------------------------------------------------------------------------*/

GLOBAL string_t int_to_string (int_t number)
/* Convert <number> to a string. It must be freed with "free" after use. */
{
  char buffer[12];
  
  sprintf (buffer, "%ld", number);
  return new_string (buffer, NULL);
}

/* end of file ==============================================================*/
