# Perl5 script to make a mapping table of characters into thier properties.
# Invoke as:
#   perl cprop.pl /path-to/UnicodeData-Latest.txt /path-to-text-mapping-datas/*.TXT

require 5;

$up_h = 'ucs-prop.h' if (!defined($up_h));
$ip_h = 'iso-prop.h' if (!defined($ip_h));
$u_cjk_h = 'ucs-cjk.h' if (!defined($u_cjk_h));
$i_cjk_h = 'iso-cjk.h' if (!defined($i_cjk_h));

require 'ucs-to-iso-lib.pl';
require 'mbbtri.pl';

my $udata = shift(@ARGV);
my ($map) = &make_map;
my $ucs_tab = &bt_new;
my $iso_tab = &bt_new;
my (@cprop, $line, $first, @cjk);
local (*U);

open(U, $udata) || die "open(U, \"$udata\"): $!";

while (defined($line = <U>)) {
  my ($hex, $name, $type) = split(/\;/, $line);
  my $code = hex($hex);
  my $cprop;

  if ($name =~ /^<.*First>$/i) {
    $first = $code;
  }
  else {
    $first = $code if ($name !~ /^<.*Last>$/i);
    if ($name =~ /\bHANGUL\b/i) {
      push(@cjk, $first, $code, &may_break);
    }
    elsif ($name =~ /\b(CJK|HIRAGANA|KATAKANA|KANA|FULLWIDTH|IDEOGRAPHIC)\b/i) {
      push(@cjk, $first, $code, &may_break | &eol_to_null);
    }

    if ($type =~ /^Z/) {
      $cprop = &is_space | &may_break if ($name !~ /\bNO-BREAK\s*SPACE\b/i);
    }
    elsif ($type =~ /^P/) {
      $cprop = &may_break | &eol_to_null if ($name =~ /\b(HIRAGANA|KATAKANA|KANA|FULLWIDTH|IDEOGRAPHIC)\b/i);

      if ($type =~ /[is]$/) {
	$cprop |= &never_eol;
      }
      elsif ($type =~ /[fe]$/ || $name =~ /(FULL\s*STOP|COMMA)$/i) {
	$cprop |= &never_bol;
      }
    }
    elsif ($type =~ /^M/) {
      $cprop = &may_break | &never_bol;
    }

    if (defined($cprop)) {
      for (; $first <= $code ; ++$first) {
	if ($first < @$map) {
	  my @def = values %{$map->[$first]};
	  my $i;

	  for ($i = 0 ; $i < @def ; ++$i) {
	    &bt_add($iso_tab, &mb_word_enc(@{$def[$i]}[1, 0]), $cprop);
	  }
	}

	&bt_add($ucs_tab, $first, $cprop);
      }
    }
    elsif ($type =~ /^L/ && $name !~ /\b(HANGUL|CJK|HIRAGANA|KATAKANA|KANA|FULLWIDTH|IDEOGRAPHIC)\b/i) {
      for (; $first <= $code ; ++$first) {
	my $n = 0;

	if ($first < @$map) {
	  my @def = values %{$map->[$first]};
	  my $i;

	  for ($i = 0 ; $i < @def ; ++$i) {
	    ++$n if (&is94x94(@{$def[$i]}[1, 0]));
	  }
	}

	&bt_add($ucs_tab, $first, &may_break) if ($n);
      }
    }

    $first = undef;
  }
}

close(U);

&bt_optimize($ucs_tab);
&bt_make_c_header($ucs_tab, $up_h);
&bt_optimize($iso_tab);
&bt_make_c_header($iso_tab, $ip_h);

local (*CJK);

open(CJK, ">$u_cjk_h") || die "open(CJK, \">$u_cjk_h\"): $!";

if (@cjk >= 3) {
  ($first, $code, $cprop) = splice(@cjk, 0, 3);

  while (@cjk >= 3) {
    if ($code + 1 == $cjk[0] && $cprop == $cjk[2]) {
      (undef, $code) = splice(@cjk, 0, 3);
    }
    else {
      printf CJK "{0x%X,0x%X,0x%X},\n", $first, $code, $cprop;
      ($first, $code, $cprop) = splice(@cjk, 0, 3);
    }
  }

  printf CJK "{0x%X,0x%X,0x%X},\n", $first, $code, $cprop;
}

close(CJK);

open(CJK, ">$i_cjk_h") || die "open(CJK, \">$i_cjk_h\"): $!";

foreach $cjk (&isocjk) {
  printf CJK "{0x%X,0x%X,0x%X},\n", @$cjk;
}

close(CJK);
