/*
 * Fast index for tag data
 *
 * Copyright (C) 2006  Enrico Zini <enrico@debian.org>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

#include <tagcoll/IntDiskIndex.h>
#include <list>

using namespace std;
using namespace Tagcoll;

//#define TRACE_ISI

#ifdef TRACE_IS
#include <iostream>
#endif

template<typename T>
class IntSetIterator
{
	const int* vals;
	size_t size;
	const Converter<int, T>& conv;

public:
	// End iterator
	IntSetIterator(const Converter<int, T>& conv)
		: vals(0), size(0), conv(conv) {}

	IntSetIterator(const int* vals, size_t size, const Converter<int, T>& conv)
		: vals(vals), size(size), conv(conv) { if (size == 0) this->vals = 0; }

	IntSetIterator<T>& operator++()
	{
		if (size > 0)
		{
			++vals;
			if (--size == 0)
				vals = 0;
		}
		return *this;
	}
	T operator*() { return conv(*vals); }

	bool operator!=(const IntSetIterator<T>& mi) { return vals != mi.vals; }
};

template<typename T>
class IntSeqIterator
{
	int val;
	const Converter<int, T>& conv;

public:
	IntSeqIterator(int val, const Converter<int, T>& conv)
		: val(val), conv(conv) {}

	IntSeqIterator<T>& operator++()
	{
		++val;
		return *this;
	}
	T operator*() { return conv(val); }

	bool operator!=(const IntSeqIterator<T>& mi) { return val != mi.val; }
};

template<typename T>
class NonemptyIntSeqIterator
{
	const IntIndex& idx;
	unsigned int val;
	const Converter<int, T>& conv;

public:
	NonemptyIntSeqIterator(const IntIndex& idx, int val, const Converter<int, T>& conv)
		: idx(idx), val(val), conv(conv) {}

	NonemptyIntSeqIterator<T>& operator++()
	{
		for (++val; idx.size(val) == 0 && val < idx.size(); ++val)
			;
		return *this;
	}
	T operator*() { return conv(val); }

	bool operator!=(const NonemptyIntSeqIterator<T>& mi) { return val != mi.val; }
};


class IntSets : public std::list< std::pair<size_t, const int*> >
{
#ifdef TRACE_IS
	void print(const std::string& title)
	{
		cerr << " * " << title << ":" << endl;
		size_t idx = 0;
		for (const_iterator i = begin(); i != end(); i++, idx++)
		{
			cerr << idx << ":";
			for (size_t j = 0; j < i->first; j++)
			{
				cerr << " " << i->second[j];
				if (j > 5)
				{
					cerr << "...";
					break;
				}
			}
			cerr << endl;
		}
	}
#else
	inline void print(const std::string&) {}
#endif

public:
	// Remove the first item from the beginning of the list idx.  If the
	// list becomes empty, remove the list itself by copying the last
	// element on top of it.
	// Returns i if it advanced normally, or i + 1 if the list got empty
	// and has been deleted.
	iterator advance(iterator i)
	{
		if (i->first == 1)
		{
			iterator next = i; ++next;
			erase(i);
			return next;
		} else {
			--(i->first);
			++(i->second);
			return i;
		}
	}

	// Make sure that the first item is the greatest one
	void ensureFirstIsGreatest()
	{
		print("pre-ensureFirstIsGreatest");
		while (size() > 1)
		{
			iterator i = begin(); ++i;
			if (*begin()->second >= *i->second)
				break;
			else
			{
				if (begin()->first == 1)
					erase(begin());
				else {
					--(begin()->first);
					++(begin()->second);
				}
			}
		}
	}

	// Advance i until its first item it the same as the item pointed by
	// begin().  If it's impossible, return false, if it succeeds, returns
	// true.
	bool ensureNextIsSame(iterator start)
	{
		print("pre-ensureNextIsSame(begin)");
		// Advance the next until it's the same
		iterator i = start; ++i;
		while (i != end() && *start->second > *i->second)
			i = advance(i);

		// If nothing is left, we're ok
		if (i == end())
			return true;

		// If next is the same, keep going
		if (*start->second == *i->second)
			return ensureNextIsSame(i);
		
		// Else, we need to go back and advance the start
		return false;
	}

	// Ensure that all the first elements are the same
	void flatten()
	{
		while (size() > 1)
		{
			ensureFirstIsGreatest();
			if (ensureNextIsSame(begin()))
				break;
			else
				advance(begin());
		}
	}

	template<typename T>
	OpSet<T> intersect(const Converter<int, T>& conv)
	{
		print("begin");
		/*
		 * * If the first element of every list is the same, add it to res
		 *   and move on
		 * * Else, advance the minor ones until they all get the same
		 */
		OpSet<T> res;
		while (size() > 1)
		{
			print("pre-flatten");
			flatten();
			print("post-flatten");
			if (size() > 1)
			{
				// Store the common item
				res += conv(*(begin()->second));
				//cerr << "Selected: " << *(begin()->second) << endl;

				// Advance all lists
				iterator i = begin();
				while (i != end())
				{
					iterator next = i; ++next;
					advance(i);
					i = next;
				}
			}
			print("post-selection");
		}
		return res;
	}

	OpSet<int> intersect()
	{
		print("begin");
		/*
		 * * If the first element of every list is the same, add it to res
		 *   and move on
		 * * Else, advance the minor ones until they all get the same
		 */
		OpSet<int> res;
		while (size() > 1)
		{
			print("pre-flatten");
			flatten();
			print("post-flatten");
			if (size() > 1)
			{
				// Store the common item
				res += *(begin()->second);
				//cerr << "Selected: " << *(begin()->second) << endl;

				// Advance all lists
				iterator i = begin();
				while (i != end())
				{
					iterator next = i; ++next;
					advance(i);
					i = next;
				}
			}
			print("post-selection");
		}
		return res;
	}


	int extractmin()
	{
		// Find the minimum item
		int min = *(begin()->second);
		for (const_iterator i = begin(); i != end(); i++)
			if (*(i->second) < min)
				min = *(i->second);

		// Advance all the lists which have the minimum item as first item
		iterator i = begin();
		while (i != end())
		{
			iterator next = i; ++next;
			if (*(i->second) == min)
				advance(i);
			i = next;
		}

		return min;
	}

	template<typename T>
	class MergeIterator
	{
		IntSets& is;
		const Converter<int, T>& conv;
		int val;

	public:
		// End iterator
		MergeIterator(IntSets& is, const Converter<int, T>& conv) : is(is), conv(conv), val(-1) {}

		MergeIterator(IntSets& is, const Converter<int, T>& conv, int val) : is(is), conv(conv), val(val) {}

		MergeIterator& operator++()
		{
			if (is.empty())
				val = -1;
			else
				val = is.extractmin();
			return *this;
		}
		T operator*() { return conv(val); }

		bool operator!=(const MergeIterator& mi) { return val != -1 || mi.val != -1; }
	};
	
	template<typename T>
	MergeIterator<T> mergeBegin(const Converter<int, T>& conv)
	{
		return MergeIterator<T>(*this, conv, extractmin());
	}

	template<typename T>
	MergeIterator<T> mergeEnd(const Converter<int, T>& conv)
	{
		return MergeIterator<T>(*this, conv);
	}

	template<typename T>
	OpSet<T> merge(const Converter<int, T>& conv)
	{
		/*
		vector<T> sortedmerge;
		while (!empty())
			sortedmerge.push_back(conv(extractmin()));
		return OpSet<T>(sortedmerge.begin(), sortedmerge.end());
		*/
		return OpSet<T>(mergeBegin<T>(conv), mergeEnd<T>(conv));
	}
};

template<typename ITEM, typename TAG>
OpSet<ITEM> IntDiskIndex<ITEM, TAG>::getItemsHavingTag(const TAG& tag) const
{
	int id = fromtag(tag);
	return OpSet<ITEM>(IntSetIterator<ITEM>(tagidx.data(id), tagidx.size(id), *m_toitem), IntSetIterator<ITEM>(*m_toitem));
}

template<typename ITEM, typename TAG>
OpSet<ITEM> IntDiskIndex<ITEM, TAG>::getItemsHavingTags(const OpSet<TAG>& tags) const
{
	if (tags.empty())
		return OpSet<ITEM>();
	if (tags.size() == 1)
		return getItemsHavingTag(*tags.begin());

	// Create a vector with the item lists
	IntSets items;
	for (typename OpSet<TAG>::const_iterator i = tags.begin(); i != tags.end(); i++)
	{
		int id = fromtag(*i);
		items.push_back(make_pair(tagidx.size(id), tagidx.data(id)));
	}
	return items.intersect<ITEM>(*m_toitem);
}

template<typename ITEM, typename TAG>
OpSet<TAG> IntDiskIndex<ITEM, TAG>::getTagsOfItem(const ITEM& item) const
{
	int id = fromitem(item);
	return OpSet<TAG>(IntSetIterator<TAG>(pkgidx.data(id), pkgidx.size(id), *m_totag), IntSetIterator<TAG>(*m_totag));
}

template<typename ITEM, typename TAG>
OpSet<TAG> IntDiskIndex<ITEM, TAG>::getTagsOfItems(const OpSet<ITEM>& items) const
{
	if (items.empty())
		return OpSet<TAG>();

	// Create a vector with the item lists
	IntSets tags;
	for (typename OpSet<ITEM>::const_iterator i = items.begin(); i != items.end(); i++)
	{
		int id = fromitem(*i);
		tags.push_back(make_pair(pkgidx.size(id), pkgidx.data(id)));
	}
	return tags.merge<TAG>(*m_totag);
}

template<typename ITEM, typename TAG>
OpSet<ITEM> IntDiskIndex<ITEM, TAG>::getTaggedItems() const
{
	return OpSet<ITEM>(
			NonemptyIntSeqIterator<ITEM>(pkgidx, 0, *m_toitem),
			NonemptyIntSeqIterator<ITEM>(pkgidx, pkgidx.size(), *m_toitem));
}

template<typename ITEM, typename TAG>
OpSet<TAG> IntDiskIndex<ITEM, TAG>::getAllTags() const
{
	return OpSet<TAG>(IntSeqIterator<TAG>(0, *m_totag), IntSeqIterator<TAG>(tagidx.size(), *m_totag));
}

template<typename ITEM, typename TAG>
OpSet<TAG> IntDiskIndex<ITEM, TAG>::getCompanionTags(const OpSet<TAG>& tags) const
{
	// This is basically a reimplementation of:
	// return getTagsOfItems(getItemsHavingTags(tags)) - tags;
	// without the conversion to and from ITEM and TAG

	if (tags.empty())
		return OpSet<TAG>();

	OpSet<int> itags = (*m_fromtag)(tags);
	OpSet<int> items;

	if (itags.size() == 1)
	{
		int itag = *itags.begin();
		for (unsigned int i = 0; i < tagidx.size(itag); i++)
			items += tagidx.data(itag)[i];
	} else {
		// Create a vector with the item lists
		IntSets inters;
		for (OpSet<int>::const_iterator i = itags.begin(); i != itags.end(); i++)
			inters.push_back(make_pair(tagidx.size(*i), tagidx.data(*i)));
		items = inters.intersect();
	}

	if (items.empty())
		return OpSet<TAG>();

	// Create a vector with the item lists
	IntSets merge;
	for (OpSet<int>::const_iterator i = items.begin(); i != items.end(); i++)
		merge.push_back(make_pair(pkgidx.size(*i), pkgidx.data(*i)));
	return merge.merge<TAG>(*m_totag);
}

template<typename ITEM, typename TAG>
void IntDiskIndex<ITEM, TAG>::output(Consumer<ITEM, TAG>& consumer) const
{
	for (unsigned int i = 0; i < pkgidx.size(); i++)
		if (pkgidx.size(i) > 0)
			consumer.consume(toitem(i), OpSet<TAG>(IntSetIterator<TAG>(pkgidx.data(i), pkgidx.size(i), *m_totag), IntSetIterator<TAG>(*m_totag)));
}


template<class ITEM, class TAG>
IntDiskIndexer<ITEM, TAG>::IntDiskIndexer(
		const Converter<ITEM, int>& fromitem,
		const Converter<TAG, int>& fromtag)
	: fromitem(fromitem), fromtag(fromtag) {}

template<class ITEM, class TAG>
void IntDiskIndexer<ITEM, TAG>::consumeItem(const ITEM& item, const OpSet<TAG>& tags)
{
	int iitem = fromitem(item);
	if (iitem == -1)
		return;
	for (typename OpSet<TAG>::iterator i = tags.begin(); i != tags.end(); i++)
	{
		int itag = fromtag(*i);
		if (itag == -1)
			continue;
		pkgidx.map(iitem, itag);
		tagidx.map(itag, iitem);
	}
}


#ifndef INSTANTIATING_TEMPLATES
#include <string>

namespace Tagcoll {
	template class IntDiskIndex<int, int>;
	template class IntDiskIndexer<int, int>;
}
#endif


#ifdef COMPILE_TESTSUITE

#include <tests/test-utils.h>
#include <string>
#include <map>

namespace tut {
using namespace tut_tagcoll;
using namespace std;

static const char* fname = "tagcoll_intdiskindex.tmp";

class BigMap
{
	int seq;
	map<int, string> tostring;
	map<string, int> toint;
public:
	BigMap() : seq(0) {}

	void map(int a, const string& b)
	{
		tostring[a] = b;
		toint[b] = a;
	}

	int get(const string& val)
	{
		std::map<string, int>::const_iterator i = toint.find(val);
		if (i == toint.end())
		{
			map(seq, val);
			return seq++;
		} else
			return i->second;
	}

	string get(int val)
	{
		std::map<int, string>::const_iterator i = tostring.find(val);
		gen_ensure(i != tostring.end());
		return i->second;
	}
};
}

namespace Tagcoll {

class TestFromIntConverter : public Converter<int, std::string>
{
	tut::BigMap& map;
	
public:
	TestFromIntConverter(tut::BigMap& map) : map(map) {}
	virtual std::string operator()(const int& item) const { return map.get(item); }
};

class TestToIntConverter : public Converter<std::string, int>
{
	tut::BigMap& map;

public:
	TestToIntConverter(tut::BigMap& map) : map(map) {}

	virtual int operator()(const std::string& item) const { return map.get(item); }
};

}

namespace tut {

struct tagcoll_intdiskindex_shar {
	BigMap items;
	BigMap tags;
	TestToIntConverter conv1;
	TestToIntConverter conv2;
	TestFromIntConverter conv3;
	TestFromIntConverter conv4;

	tagcoll_intdiskindex_shar()
		: conv1(items), conv2(tags), conv3(items), conv4(tags)
	{
		MasterMMapIndexer master(fname);

		IntDiskIndexer<string, string> indexer(conv1, conv2);
		output_test_collection(indexer);
		
		master.append(indexer.pkgIndexer());
		master.append(indexer.tagIndexer());
		master.commit();
	}
	~tagcoll_intdiskindex_shar()
	{
		unlink(fname);
	}
};
TESTGRP(tagcoll_intdiskindex);

#include <iostream>

#if 0
static void outts(const OpSet<string>& s)
{
	for (OpSet<string>::const_iterator i = s.begin(); i != s.end(); i++)
		if (i == s.begin())
			cerr << *i;
		else
			cerr << ", " << *i;
}
#endif

template<> template<>
void to::test<1>()
{
	MasterMMapIndex master(fname);

	IntDiskIndex<string, string> idx(master, 0, 1, &conv1, &conv2, &conv3, &conv4);

#if 0
	cerr << "Items: ";
	OpSet<string> s = idx.getTaggedItems();
	outts(s);
	cerr << endl;
	for (OpSet<string>::const_iterator i = s.begin(); i != s.end(); i++)
	{
		cerr << "  " << *i << ": ";
		outts(idx.getTags(*i));
		cerr << endl;
	}


	cerr << "Tags: ";
	s = idx.getAllTags();
	outts(s);
	cerr << endl;
	for (OpSet<string>::const_iterator i = s.begin(); i != s.end(); i++)
	{
		cerr << "  " << *i << ": ";
		outts(idx.getItems(*i));
		cerr << endl;
	}
#endif
	
	test_readonly_collection(idx);
}

}

#endif
// vim:set ts=4 sw=4:
