/*
 * tagged collection - Experimental programs to test and study tagged collections
 *
 * Copyright (C) 2003,2004,2005,2006  Enrico Zini
 * 
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

#ifdef HAVE_CONFIG_H
#include <config.h>
#define APPNAME PACKAGE
#else
#warning No config.h found: using fallback values
#define APPNAME __FILE__
#define VERSION "unknown"
#endif

#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <dirent.h>

#include <stdio.h>

#include <stdlib.h>	// getenv

#include <errno.h>

#include <tagcoll/stringf.h>
#include <tagcoll/Exception.h>
#include <tagcoll/Commandline.h>

#include <tagcoll/CardinalityStore.h>
#include <tagcoll/SmartHierarchy.h>

#include <tagcoll/Consumer.h>
#include <tagcoll/Filter.h>
#include <tagcoll/InputMerger.h>
#include <tagcoll/Implications.h>
#include <tagcoll/Filters.h>
#include <tagcoll/Patches.h>
#include <tagcoll/DerivedTags.h>
#include <tagcoll/ItemGrouper.h>

#include <tagcoll/StdioParserInput.h>
#include <tagcoll/TextFormat.h>
#include <tagcoll/Serializer.h>
#include <tagcoll/Expression.h>

#include <algorithm>
#include <iostream>

#include "TagcollOptions.h"

using namespace std;
using namespace Tagcoll;


bool isdir(const std::string& file)
{
	struct stat st;
	if (stat(file.c_str(), &st) == -1)
		throw SystemException(errno, "trying to stat file " + file);
	return S_ISDIR(st.st_mode);
}

void printItems(const set<string>& items, const string& prefix = "")
{
	for (set<string>::const_iterator i = items.begin();
			i != items.end(); i++)
	{
		printf("%.*s%.*s\n", PFSTR(prefix), PFSTR(*i));
	}
}

void printNode(HierarchyNode<string, string>* node, string prefix = "")
{
	OpSet<string> items = node->getItems();
	if (!items.empty())
	{
		printItems(items, prefix + ": ");
//	} else {
//		printf("%.*s: no items\n", PFSTR(prefix));
	}
	
	if (prefix.size() > 0 && prefix[prefix.size() - 1] != '/')
		prefix += '/';

	for (HierarchyNode<string, string>::iterator i = node->begin();
			i != node->end(); i++)
	{
		printNode(*i, prefix + (*i)->tag());
	}
}

OpSet<string> getItems(HierarchyNode<string, string>* node)
{
	OpSet<string> items = node->getItems();
	
	for (HierarchyNode<string, string>::iterator i = node->begin();
			i != node->end(); i++)
		items += getItems(*i);

	return items;
}

void readCollection(const string& file, Consumer<string, string>& builder)
	throw (FileException, ParserException)
{
	TrivialConverter<string, string> conv;

	if (file == "-")
	{
		StdioParserInput input(stdin, "<stdin>");
		TextFormat<string, string>::parse(conv, conv, input, builder);
	}
	else
	{
		StdioParserInput input(file);
		TextFormat<string, string>::parse(conv, conv, input, builder);
	}
}

PatchList<string, string> readPatches(const string& file)
	throw (FileException, ParserException)
{
	TrivialConverter<string, string> conv;

	if (file == "-")
	{
		StdioParserInput input(stdin, "<stdin>");
		return TextFormat<string, string>::parsePatch(conv, conv, input);
	}
	else if (isdir(file))
	{
		PatchList<string, string> patches;
		DIR* dir = opendir(file.c_str());
		if (dir == NULL)
			throw SystemException(errno, "reading directory " + file);
		while (struct dirent* d = readdir(dir))
		{
			if (d->d_name[0] == '.')
				continue;
			StdioParserInput input(file + '/' + d->d_name);
			patches.addPatch(TextFormat<string, string>::parsePatch(conv, conv, input));
		}
		closedir(dir);
		return patches;
	}
	else
	{
		StdioParserInput input(file);
		return TextFormat<string, string>::parsePatch(conv, conv, input);
	}
}

void parseDerivedTags(ParserInput& in, DerivedTags& output)
{
	string tag;
	string expr;

	int c;
	enum {TAG, TAGCOLON, SEXPR, EXPR} state = TAG;
	int line = 1;
	while ((c = in.nextChar()) != ParserInput::Eof)
	{
		if (c == '\n')
		{
			if (tag.size() > 0 && expr.size() > 0)
				output.add(tag, expr);
			else
				fprintf(stderr, "In derived tags file, ignoring incomplete line %d.\n", line);
			tag = string();
			expr = string();
			state = TAG;
			line++;
		} else
			switch (state)
			{
				// Read item
				case TAG:
					switch (c)
					{
						case ':':
							state = TAGCOLON;
							break;
						default:
							tag += c;
							break;
					}
					break;
				// After colon on item
				case TAGCOLON:
					switch (c)
					{
						case ' ':
						case '\t':
							state = SEXPR;
							break;
						case ':':
							tag += c;
							break;
						default:
							tag += ':';
							tag += c;
							state = EXPR;
							break;
					}
					break;
				// Space before tag
				case SEXPR:
					switch (c)
					{
						case ' ':
						case '\t':
							break;
						default:
							expr += c;
							state = EXPR;
							break;
					}
					break;
				// Read tag
				case EXPR:
					expr += c;
					break;
			}
	}
}

void readDerivedTags(const string& file, DerivedTags& derivedTags)
	throw (FileException)
{
	if (file == "-")
	{
		StdioParserInput input(stdin, "<stdin>");
		parseDerivedTags(input, derivedTags);
	}
	else
	{
		StdioParserInput input(file);
		parseDerivedTags(input, derivedTags);
	}
}

template<typename ITEM, typename TAG>
class ItemsOnly : public Filter<ITEM, TAG>
{
protected:
	virtual void consumeItemUntagged(const ITEM& item)
	{
		this->consumer->consume(item);
	}
	virtual void consumeItem(const ITEM& item, const OpSet<TAG>& tags)
	{
		this->consumer->consume(item);
	}
	virtual void consumeItemsUntagged(const OpSet<ITEM>& items)
	{
		this->consumer->consume(items);
	}
	virtual void consumeItem(const OpSet<ITEM>& items, const OpSet<TAG>& tags)
	{
		this->consumer->consume(items);
	}

public:
	ItemsOnly() {}
	ItemsOnly(Consumer<ITEM, TAG>& cons) : Filter<ITEM, TAG>(cons) {}
};

template<typename ITEM, typename TAG>
class Reverser : public Consumer<ITEM, TAG>
{
protected:
	Consumer<TAG, ITEM>* consumer;
	TAG reversedNull;

	virtual void consumeItemUntagged(const ITEM& item)
	{
		consumer->consume(reversedNull, OpSet<ITEM>() + item);
	}
	virtual void consumeItem(const ITEM& item, const OpSet<TAG>& tags)
	{
		consumer->consume(tags, OpSet<ITEM>() + item);
	}
	virtual void consumeItemsUntagged(const OpSet<ITEM>& items)
	{
		consumer->consume(reversedNull, items);
	}
	virtual void consumeItem(const OpSet<ITEM>& items, const OpSet<TAG>& tags)
	{
		consumer->consume(tags, items);
	}

public:
	Reverser(Consumer<TAG, ITEM>& cons, const TAG& reversedNull = TAG()) :
		consumer(&cons), reversedNull(reversedNull) {}
};

class Reader
{
	// Prepare the input filter chain
	FilterChain<string, string> filters;
	Substitute<string, string> substitutions;
	PatchList<string, string> patches;
	Implications<string> implications;
	DerivedTags derivedTags;

	AddImplied<string, string> addImplied;
	RemoveImplied<string, string> removeImplied;
	AddDerived<string> addDerived;
	RemoveDerived<string> removeDerived;
	UnfacetedRemover<string> unfacetedRemover;
	FilterTagsByExpression<string, string> filterByExpression;

public:
	Reader(commandline::TagcollOptions& opts)
		: addImplied(implications), removeImplied(implications),
		  addDerived(derivedTags), removeDerived(derivedTags),
		  unfacetedRemover("::"), filterByExpression("")
	{
		if (opts.inputGroup.rename->boolValue())
		{
			readCollection(opts.inputGroup.rename->stringValue(), substitutions.substitutions());
			filters.appendFilter(substitutions);
		}
		if (opts.inputGroup.patch->boolValue())
		{
			patches = readPatches(opts.inputGroup.patch->stringValue());
			filters.appendFilter(patches);
		}

		if (opts.inputGroup.extimpl->boolValue())
		{
			readCollection(opts.inputGroup.extimpl->stringValue(), implications);
			// Pack the structure for faster expansions
			implications.pack();
		}
		if (opts.inputGroup.derived->boolValue())
			readDerivedTags(opts.inputGroup.derived->stringValue(), derivedTags);

		// Intermix implications and derived tags as seems best
		bool compressOutput = (opts.lastCommand()->name() == "copy" && !opts.outputGroup.redundant->boolValue());
		bool hasImpl = opts.inputGroup.extimpl->boolValue();
		bool hasDerv = opts.inputGroup.derived->boolValue();

		if (compressOutput)
		{
			if (hasDerv)
			{
				// Expand implications
				if (hasImpl) filters.appendFilter(addImplied);

				// Remove derived tags computing them using the expanded tag set
				filters.appendFilter(removeDerived);
			}

			// Compress implications
			if (hasImpl) filters.appendFilter(removeImplied);
		} else {
			// Expand implications
			if (hasImpl) filters.appendFilter(addImplied);

			// Add derived tags computing them using the expanded tag set
			if (hasDerv)
			{
				filters.appendFilter(addDerived);

				// Add further tags implicated by the derived tags
				if (hasImpl) filters.appendFilter(addImplied);
			}
		}

		if (opts.inputGroup.rmunfaceted->boolValue())
			filters.appendFilter(unfacetedRemover);

		if (opts.inputGroup.rmtags->boolValue())
		{
			filterByExpression.setExpression(not Expression(opts.inputGroup.rmtags->stringValue())); 
			filters.appendFilter(filterByExpression);
		}
	}
	void output(const string& file, Consumer<string, string>& cons)
	{
		filters.setConsumer(cons);
		readCollection(file, filters);
	}
	void output(commandline::TagcollOptions& opts, Consumer<string, string>& cons)
	{
		if (opts.hasNext())
			while (opts.hasNext())
				output(opts.next(), cons);
		else
			output("-", cons);
	}
};

class Writer : public Consumer<string, string>
{
	TrivialConverter<string, string> conv;
	TextFormat<string, string> output;
	ItemGrouper<string, string>* grouper;
	bool itemsOnly;

protected:
	/// Process an untagged item
	virtual void consumeItemUntagged(const string& item)
	{
		if (grouper)
			grouper->consume(item);
		else
			output.consume(item);
	}

	/// Process a tagged item, with its tags
	virtual void consumeItem(const string& item, const OpSet<string>& tags)
	{
		if (itemsOnly)
			consumeItemUntagged(item);
		else if (grouper)
			grouper->consume(item, tags);
		else
			output.consume(item, tags);
	}

	/// Process a set of items, all with no tags
	virtual void consumeItemsUntagged(const OpSet<string>& items)
	{
		if (grouper)
			grouper->consume(items);
		else
			// Explicitly split groups
			for (OpSet<string>::const_iterator i = items.begin();
					i != items.end(); i++)
				output.consume(*i);
	}

	/// Process a set of items identically tagged, with their tags
	virtual void consumeItems(const OpSet<string>& items, const OpSet<string>& tags)
	{
		if (itemsOnly)
			consumeItemsUntagged(items);
		else if (grouper)
			grouper->consume(items, tags);
		else
			// Explicitly split groups
			for (OpSet<string>::const_iterator i = items.begin();
					i != items.end(); i++)
				output.consume(*i, tags);
	}

public:
	Writer(commandline::TagcollOptions& opts)
		: output(conv, conv, stdout), grouper(0)
	{
		if (opts.outputGroup.group->boolValue())
			grouper = new ItemGrouper<string, string>;
		itemsOnly = opts.outputGroup.itemsOnly->boolValue();
	}
	~Writer()
	{
		// Flush output if needed
		if (grouper)
		{
			grouper->output(output);
			delete grouper;
		}
	}
};

int main(int argc, const char* argv[])
{
	commandline::TagcollOptions opts;

	try {
		opts.parse(argc, argv);
		if (!opts.lastCommand())
			throw commandline::BadOption("could not understand the command to execute");

		Reader reader(opts);
		
		// Perform the correct operation
		if (opts.helpGroup.help->boolValue())
		{
			// Provide help as requested
			commandline::Help help(APPNAME, VERSION);
			commandline::OptionParser* o = opts.lastCommand();

			if (o && !o->name().empty())
				// Help on a specific command
				help.outputHelp(cout, *o);
			else
				// General help
				help.outputHelp(cout, opts);
		}
		else if (opts.helpGroup.version->boolValue())
		{
			// Print the program version
			commandline::Help help(APPNAME, VERSION);
			help.outputVersion(cout);
		}
		else if (opts.lastCommand() == &opts.generic)
		{
			commandline::Help help(APPNAME, VERSION);
			help.outputHelp(cout, opts);
		}
		else if (opts.lastCommand() == &opts.help)
		{
			commandline::Help help(APPNAME, VERSION);
			commandline::OptionParser* o = 0;
			if (opts.hasNext())
				o = opts.command(opts.next());

			if (o)
				// Help on a specific command
				help.outputHelp(cout, *o);
			else
				// General help
				help.outputHelp(cout, opts);
		}
		else if (opts.lastCommand() == &opts.implications)
		{
			CardinalityStore<string, string> coll;
			reader.output(opts, coll);

			Implications<string> newImpls;

			// Find tag implications
			OpSet<string> allTags = coll.getAllTags();
			for (OpSet<string>::const_iterator t = allTags.begin();
					t != allTags.end(); t++)
			{
				OpSet<string> implied = coll.getImpliedBy(*t);
				if (!implied.empty())
					newImpls.consume(*t, implied);
			}

			newImpls.pack();

			TrivialConverter<string, string> conv;
			TextFormat<string, string> output(conv, conv, stdout);
			if (opts.outputGroup.redundant->boolValue())
				newImpls.outputFull(output);
			else
				newImpls.output(output);
		}
		else if (opts.lastCommand() == &opts.hierarchy)
		{
			int flattenThreshold = 0;
			if (opts.hierarchyGroup.flatten->boolValue())
				flattenThreshold = opts.hierarchyGroup.flatten->intValue();

			CardinalityStore<string, string> coll;
			reader.output(opts, coll);

			if (opts.hierarchyGroup.filter->boolValue())
				coll.removeTagsWithCardinalityLessThan(opts.hierarchyGroup.filter->intValue());

			// Default operation: build the smart hierarchy
			HierarchyNode<string, string>* root =
				new SmartHierarchyNode<string, string>("_top", coll, flattenThreshold);
			printNode(root, "/");
		}
		else if (opts.lastCommand() == &opts.cleanhierarchy)
		{
			int flattenThreshold = 0;
			if (opts.hierarchyGroup.flatten->boolValue())
				flattenThreshold = opts.hierarchyGroup.flatten->intValue();

			CardinalityStore<string, string> coll;
			reader.output(opts, coll);

			if (opts.hierarchyGroup.filter->boolValue())
				coll.removeTagsWithCardinalityLessThan(opts.hierarchyGroup.filter->intValue());

			// Default operation: build the smart hierarchy
			HierarchyNode<string, string>* root = new CleanSmartHierarchyNode<string, string>("_top", coll, flattenThreshold);
			printNode(root, "/");
		}
		else if (opts.lastCommand() == &opts.diff)
		{
			InputMerger<string, string> merger1;
			reader.output(opts.next(), merger1);

			InputMerger<string, string> merger2;
			reader.output(opts.next(), merger2);

			PatchList<string, string> newpatches;
			newpatches.addPatch(merger1, merger2);

			TrivialConverter<string, string> conv;
			TextFormat<string, string>::outputPatch(conv, conv, newpatches, stdout);
		}
		else if (opts.lastCommand() == &opts.related)
		{
			string item = opts.next();
			InputMerger<string, string> merger;
			reader.output(opts, merger);

			int maxdist = 0;
			if (opts.related.distance->boolValue())
				maxdist = opts.related.distance->intValue();

			// Split the items on commas
			string splititem;
			set<string> splititems;
			for (string::const_iterator c = item.begin();
					c != item.end(); c++)
				if (*c == ',')
				{
					if (!merger.hasItem(splititem))
					{
						fprintf(stderr, "Item \"%.*s\" does not exist in the collection\n", PFSTR(splititem));
						return 1;
					}
					splititems.insert(splititem);
					splititem = string();
				} else
					splititem += *c;
			if (!merger.hasItem(splititem))
			{
				fprintf(stderr, "Item \"%.*s\" does not exist in the collection\n", PFSTR(splititem));
				return 1;
			}
			splititems.insert(splititem);

			// Get the tagset as the intersection of the tagsets of all input items
			set<string>::const_iterator i = splititems.begin();
			OpSet<string> ts = merger.getTags(*i);
			for (++i; i != splititems.end(); i++)
				ts = ts ^ merger.getTags(*i);

			if (ts.empty())
			{
				if (splititems.size() > 1)
					fprintf(stderr, "The items %.*s are unrelated: cannot find a barycenter to start computing relationships from.\n", PFSTR(item));
				else
					fprintf(stderr, "The items %.*s has no tags attached.\n", PFSTR(item));
				return 1;
			}

			// Build a full TagCollection
			CardinalityStore<string, string> coll;
			merger.output(coll);

			printItems(coll.getItemsExactMatch(ts));

			if (maxdist)
			{
				// Get the related tagsets
				list< OpSet<string> > rel = coll.getRelatedTagsets(ts, maxdist);

				// Print the output list
				for (list< OpSet<string> >::const_iterator i = rel.begin();
						i != rel.end(); i++)
					printItems(coll.getItemsExactMatch(*i));
			}
		}
		else if (opts.lastCommand() == &opts.reverse)
		{
			string revnull;
			if (opts.reverse.untaggedTag->boolValue())
				revnull = opts.reverse.untaggedTag->stringValue();

			InputMerger<string, string> reversed;
			Reverser<string, string> reverser(reversed, revnull);
			reader.output(opts, reverser);

			Writer writer(opts);
			reversed.output(writer);
		}
		else if (opts.lastCommand() == &opts.copy)
		{
			Writer writer(opts);
			reader.output(opts, writer);
		}
		else if (opts.lastCommand() == &opts.findspecials)
		{
			int flattenThreshold = 0;
			if (opts.hierarchyGroup.flatten->boolValue())
				flattenThreshold = opts.hierarchyGroup.flatten->intValue();

			CardinalityStore<string, string> coll;
			reader.output(opts, coll);

			if (opts.hierarchyGroup.filter->boolValue())
				coll.removeTagsWithCardinalityLessThan(opts.hierarchyGroup.filter->intValue());

			// Default operation: build the smart hierarchy
			SmartHierarchyNode<string, string> root("_top", coll, flattenThreshold);

			OpSet<string> seen;
			for (HierarchyNode<string, string>::iterator i = root.begin();
					i != root.end(); i++)
			{
				OpSet<string> items = getItems(*i);

				// Find the items in this branch that are not present in
				// any of the previous ones
				OpSet<string> newItems;
				if (!seen.empty())
				{
					for (OpSet<string>::const_iterator j = items.begin();
							j != items.end(); j++)
					{
						OpSet<string> tags = coll.getTags(*j) ^ seen;
						if (tags.empty())
							newItems += *j;
					}

					printf("%.*s: %d items, %d special items:\n",
							PFSTR((*i)->tag()), items.size(), newItems.size());

					int indent = (*i)->tag().size() + 2;
					for (OpSet<string>::const_iterator j = newItems.begin(); j != newItems.end(); j++)
						printf("%*s%.*s\n", indent, "", PFSTR(*j));
				}

				seen += (*i)->tag();
			}
		}
		else if (opts.lastCommand() == &opts.grep)
		{
			Writer writer(opts);
			FilterItemsByExpression<string, string> filter(writer, opts.next());
			reader.output(opts, filter);
		}
		else
			throw commandline::BadOption(string("unhandled command ") +
						(opts.lastCommand() ? opts.lastCommand()->name() : "(null)"));

		return 0;
	} catch (commandline::BadOption& e) {
		cerr << e.desc() << endl;
		commandline::Help help(APPNAME, VERSION);
		if (opts.lastCommand())
		{
			help.outputHelp(cerr, *opts.lastCommand());
		} else {
			help.outputHelp(cerr, opts);
		}
		exit(1);
	} catch (Exception& e)
	{
		fprintf(stderr, "%s: %.*s\n", e.type(), PFSTR(e.desc()));
		return 1;
	}
}

// vim:set ts=4 sw=4:
