/*************************************************************************/
/*                                                                       */
/*                Centre for Speech Technology Research                  */
/*                     University of Edinburgh, UK                       */
/*                      Copyright (c) 1996,1997                          */
/*                        All Rights Reserved.                           */
/*                                                                       */
/*  Permission to use, copy, modify and distribute this software and its */
/*  documentation for research, educational and individual use only, is  */
/*  hereby granted without fee, subject to the following conditions:     */
/*   1. The code must retain the above copyright notice, this list of    */
/*      conditions and the following disclaimer.                         */
/*   2. Any modifications must be clearly marked as such.                */
/*   3. Original authors' names are not deleted.                         */
/*  This software may not be used for commercial purposes without        */
/*  specific prior written permission from the authors.                  */
/*                                                                       */
/*  THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK        */
/*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      */
/*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   */
/*  SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE     */
/*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    */
/*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   */
/*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          */
/*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       */
/*  THIS SOFTWARE.                                                       */
/*                                                                       */
/*************************************************************************/
/*                     Author :  Alan W Black                            */
/*                     Date   :  October 1997                            */
/*-----------------------------------------------------------------------*/
/*  Parse a list of sentences with a given stochastic context free       */
/*  grammar                                                              */
/*                                                                       */
/*=======================================================================*/
#include <stdlib.h>
#include <stdio.h>
#include <iostream.h>
#include <fstream.h>
#include <string.h>
#include "EST.h"
#include "EST_SCFG.h"
#include "EST_SCFG_Chart.h"
#include "siod.h"

static EST_String outfile = "-";

static int scfg_parse_main(int argc, char **argv);

int main(int argc, char **argv)
{

    scfg_parse_main(argc,argv);

    exit(0);
    return 0;
}

static int scfg_parse_main(int argc, char **argv)
{
    // Top level function generates a probabilistic grammar
    EST_Option al;
    EST_StrList files;
    EST_SCFG_Chart chart;
    LISP rules,s,parse;
    FILE *corpus,*output;
    int i;

    parse_command_line(argc, argv,
       EST_String("Usage:\n")+
       "scfg_parse <options>\n"+
       "Parse a corpus with a stochastic context free grammar\n"+
       "-grammar <ifile>  Grammar file, one rule per line.\n"+
       "-corpus <ifile>   Corpus file, one bracketed sentence per line.\n"+
       "-brackets          Output bracketing only.\n"+
       "-o <ofile>        Output file for parsed sentences.\n",
		       files, al);
    
    if (al.present("-o"))
	outfile = al.val("-o");
    else
	outfile = "-";

    siod_init();

    if (al.present("-grammar"))
    {
	rules = vload(al.val("-grammar"),1);
	gc_protect(&rules);
    }
    else
    {
	cerr << "scfg_parse: no grammar specified" << endl;
	exit(1);
    }

    if (al.present("-corpus"))
    {
	if ((corpus = fopen(al.val("-corpus"),"r")) == NULL)
	{
	    cerr << "scfg_parse: can't open corpus file \"" << 
		al.val("-corpus") << "\" for reading " << endl;
	    exit(1);
	}
    }
    else
    {
	cerr << "scfg_parse: no corpus specified" << endl;
	exit(1);
    }

    if (al.present("-o"))
    {
	if ((output=fopen(al.val("-o"),"w")) == NULL)
	{
	    cerr << "scfg_parse: can't open output file \"" << 
		al.val("-o") << "\" for writing " << endl;
	    exit(1);
	}
    }
    else
	output = stdout;

    gc_protect(&s);
    gc_protect(&parse);
    for (i=0; ((s=lreadf(corpus)) != get_eof_val()); i++)
    {
	parse = scfg_parse(s,rules);
	if (al.present("-brackets"))
	{
	    LISP bparse = scfg_bracketing_only(parse);
	    if (bparse == NIL)
		bparse = s;
	    pprint_to_fd(output,bparse);
	}
	else 
	    pprint_to_fd(output,parse);
	if (i%100 == 99)
	    user_gc(NIL);
   }

    if (output != stdout)
	fclose(output);
    gc_unprotect(&s);
    gc_unprotect(&parse);
    gc_unprotect(&rules);

    return 0;
}

