<copyright> Unicoding class.
    Written by <a href="mailto:tiggr@ics.ele.tue.nl">Pieter J. Schoenmakers</a>

    Copyright &copy; 1996, 1997 Pieter J. Schoenmakers.

    This file is part of TOM.  TOM is distributed under the terms of the
    TOM License, a copy of which can be found in the TOM distribution; see
    the file LICENSE.

    <id>$Id: Unicoding.t,v 1.10 1998/07/21 14:56:11 tiggr Exp $</id>
    </copyright>

<doc> The {Unicoding} class object maintains information on the {Unicode}
    character coding.  </doc>
implementation class
Unicoding: instance (All)
{
  <doc> Bitmap for digit predicate.  </doc>
  static ByteArray is_digit;

  <doc> Bitmap for letter predicate.  </doc>
  static ByteArray is_letter;

  <doc> Bitmap for lower predicate.  </doc>
  static ByteArray is_lower;

  <doc> Bitmap for punctuation predicate.  </doc>
  static ByteArray is_punct;

  <doc> Bitmap for space predicate.  </doc>
  static ByteArray is_space;

  <doc> Bitmap for upper predicate.  </doc>
  static ByteArray is_upper;
}

<doc> Load and return the predicate set for the {predicate} on Unicode
    characters.  If it can not be located, the {alt_sel} is used to
    extract part of the information needed from the {USASCIIEncoding}.
    </doc>
protected ByteArray
  loadPredicateSet String predicate
       alternative selector alt_sel
{
  String ext = [[MutableByteString new] print ("p.", predicate)];
  ByteArray ret = [CharEncoding loadBytes 8192 from "unicode" extension ext];

  if (!ret)
    {
      MutableByteArray rep = [MutableByteArray withCapacity 8192];
      USASCIIEncoding ascii = [USASCIIEncoding shared];
      int i;

      [rep resize 8192];

      for (i = 0; i < 128; i++)
	if ([ascii perform alt_sel with byte (i)])
	  {
	    // This B should not be necessary.
	    // Mon Nov  4 16:12:54 1996, tiggr@jaguar.ics.ele.tue.nl
	    byte b = rep[i / 8];
	    rep[i / 8] = b | byte (1 << i % 8);
	  }

      ret = rep;
    }

  = ret;
}

/******************** predicates ********************/

boolean
  isAlpha char c
{
  if (!is_letter)
    is_letter = [self loadPredicateSet "letter"
		      alternative selector (boolean isAlpha byte)];
  = is_letter[c / 8] & (1 << (c % 8)) != 0;
}

<doc> Return {TRUE} iff the character {c} is a digit according to the
    encoding of the receiving string.  </doc>
boolean
  isDigit char c
{
  if (!is_digit)
    is_digit = [self loadPredicateSet "digit"
		     alternative selector (boolean isDigit byte)];
  = is_digit[c / 8] & (1 << (c % 8)) != 0;
}

boolean
  isLower char c
{
  if (!is_lower)
    is_lower = [self loadPredicateSet "lower"
		     alternative selector (boolean isLower byte)];
  = is_lower[c / 8] & (1 << (c % 8)) != 0;
}

boolean
  isPunct char c
{
  if (!is_punct)
    is_punct = [self loadPredicateSet "punct"
		     alternative selector (boolean isPunct byte)];
  = is_punct[c / 8] & (1 << (c % 8)) != 0;
}

boolean
  isSpace char c
{
  if (!is_space)
    is_space = [self loadPredicateSet "space"
		     alternative selector (boolean isSpace byte)];
  = is_space[c / 8] & (1 << (c % 8)) != 0;
}

boolean
  isUpper char c
{
  if (!is_upper)
    is_upper = [self loadPredicateSet "upper"
		     alternative selector (boolean isUpper byte)];
  = is_upper[c / 8] & (1 << (c % 8)) != 0;
}

/******************** character conversion ********************/

char
  toLower char c
{
  // No real implementation yet...
  // Wed Nov 13 22:14:07 1996, tiggr@jaguar.ics.ele.tue.nl
  = char ([[USASCIIEncoding shared] toLower byte (c)]);
}

char
  toTitle char c
{
  [self unimplemented cmd];
}

char
  toUpper char c
{
  // No real implementation yet...
  // Wed Nov 13 22:14:07 1996, tiggr@jaguar.ics.ele.tue.nl
  = char ([[USASCIIEncoding shared] toUpper byte (c)]);
}

int
  digitValue char c
{
  // Ahem...
  // Thu Oct 31 17:31:14 1996, tiggr@jaguar.ics.ele.tue.nl
  = c - '0';
}

int
  alphaValue char c
{
  // Ahem...
  // Mon Nov  4 16:24:16 1996, tiggr@jaguar.ics.ele.tue.nl
  = ({if (c >= 'a' && c <= 'z') c - 'a'; else c - 'A';});
}

end;

implementation instance
Unicoding: instance (All)

end;
