#!/usr/bin/perl -w

# Copyright (C) 2005 Enrico Zini <enrico@debian.org>
#
# This code is released under the public domain.
# 
# Converts the thesaurus.txt from the Italian thesaurus project into the .dat
# file for OpenOffice.org 2
#
# The description of the input format can be found at:
# 
# http://rpms.alerque.com/BUILD/ooo-build-1.9.104/build/src680-m104/lingucomponent/source/thesaurus/mythes/data_layout.txt

use strict;
use warnings;

# Skip the first two lines
<>;
<>;

# Print the charset
print "ISO8859-1\n";

# Convert the rest
while (<>)
{
	# Fix newlines in whatever encoding
	s/[\r\n]+$//;
	my @line = split(',');
	next if @line < 2;

	print $line[0], "|1\n";
	print "-|", join('|', @line[1..$#line]), "\n";
}

# vim:set ts=4 sw=4:
