/*
    libmaus2
    Copyright (C) 2018 German Tischler-Höhle

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/
#include <libmaus2/fastx/acgtnMap.hpp>
#include <libmaus2/wavelet/ImpCompactHuffmanWaveletTree.hpp>
#include <libmaus2/lf/ImpCompactHuffmanWaveletLF.hpp>
#include <libmaus2/parallel/NumCpus.hpp>
#include <libmaus2/util/ArgParser.hpp>
#include <libmaus2/util/PrefixSums.hpp>
#include <libmaus2/util/FiniteSizeHeap.hpp>
#include <libmaus2/util/MemUsage.hpp>
#include <libmaus2/aio/OutputStreamInstanceArray.hpp>
#include <libmaus2/aio/InputOutputStreamInstance.hpp>
#include <libmaus2/math/ipow.hpp>
#include <libmaus2/aio/ConcatInputStream.hpp>
#include <libmaus2/aio/SerialisedPeeker.hpp>
#include <libmaus2/fm/BiIndex.hpp>
#include <libmaus2/math/numbits.hpp>
#include <libmaus2/bitio/CompactArray.hpp>

static uint64_t getDefaultNumThreads()
{
	return libmaus2::parallel::NumCpus::getNumLogicalProcessors();
}

struct DArray
{
	typedef DArray this_type;
	typedef libmaus2::util::unique_ptr<this_type>::type unique_ptr_type;
	typedef libmaus2::util::shared_ptr<this_type>::type shared_ptr_type;

	uint64_t const sigmasize;
	uint64_t const sigma;
	libmaus2::autoarray::AutoArray<uint64_t> D;

	uint64_t operator[](uint64_t const s) const
	{
		return D[s];
	}

	static uint64_t getSigmaSize(libmaus2::wavelet::ImpCompactHuffmanWaveletTree const & WT)
	{
		return WT.getSymbolArray().size();
	}

	static uint64_t getSigma(libmaus2::wavelet::ImpCompactHuffmanWaveletTree const & WT)
	{
		::libmaus2::autoarray::AutoArray<int64_t> S = WT.getSymbolArray();

		if ( ! S.size() )
			return 0;

		std::sort(S.begin(),S.end());

		int64_t const maxsym = S[S.size()-1];
		assert ( maxsym >= 0 );

		return maxsym + 1;
	}

	static libmaus2::autoarray::AutoArray<uint64_t> getD(libmaus2::wavelet::ImpCompactHuffmanWaveletTree const & WT, uint64_t const sigmasize, uint64_t const sigma)
	{
		libmaus2::autoarray::AutoArray< std::pair<int64_t,uint64_t> > P(sigma,false);
		uint64_t const csigma = WT.enumerateSymbolsInRangeSorted(0,WT.size(),P.begin());

		libmaus2::autoarray::AutoArray<uint64_t> D(sigma+1);
		assert ( csigma == sigmasize );

		for ( uint64_t i = 0; i < csigma; ++i )
			D[ P[i].first ] = P[i].second;

		libmaus2::util::PrefixSums::prefixSums(D.begin(),D.end());
		return D;
	}

	DArray(libmaus2::wavelet::ImpCompactHuffmanWaveletTree const & WT)
	: sigmasize(getSigmaSize(WT)), sigma(getSigma(WT)), D(getD(WT,sigmasize,sigma))
	{

	}
};

struct NodeInfo
{
	// symbol
	int64_t sym;
	// interval on BWT
	uint64_t from;
	uint64_t to;

	NodeInfo() {}
	NodeInfo(int64_t const rsym, uint64_t const rfrom, uint64_t const rto)
	: sym(rsym), from(rfrom), to(rto) {}

	std::string toString() const
	{
		std::ostringstream ostr;
		ostr << "NodeInfo(sym=" << sym << ",from=" << from << ",to=" << to << ")";
		return ostr.str();
	}
};

struct SortNodeInfo
{
	int64_t lsym;
	int64_t sym;
	uint64_t from;
	uint64_t to;

	SortNodeInfo() {}
	SortNodeInfo(int64_t const rlsym, int64_t const rsym, uint64_t const rfrom, uint64_t const rto)
	: lsym(rlsym), sym(rsym), from(rfrom), to(rto) {}

	bool operator<(SortNodeInfo const & SNI) const
	{
		if ( lsym != SNI.lsym )
			return lsym < SNI.lsym;
		else
			return from < SNI.from;
	}

	std::string toString() const
	{
		std::ostringstream ostr;
		ostr << "SortNodeInfo(lsym=" << lsym << ",sym=" << sym << ",from=" << from << ",to=" << to << ")";
		return ostr.str();
	}
};

// traversal stack node
struct TraversalNode
{
	// offset pointing to start in NodeInfo array
	uint64_t offset;
	// number of entries in NodeInfo array
	uint64_t size;
	// string depth
	uint64_t depth;
	// symbol we followed to reach this node from parent (via backward search)
	int64_t symbol;

	TraversalNode() {}
	TraversalNode(uint64_t const roffset, uint64_t const rsize, uint64_t const rdepth, int64_t const rsymbol)
	: offset(roffset), size(rsize), depth(rdepth), symbol(rsymbol) {}
};

// traversal stack node
struct ExtendedTraversalNode
{
	// offset pointing to start in NodeInfo array
	uint64_t offset;
	// number of entries in NodeInfo array
	uint64_t size;
	// string depth
	uint64_t depth;
	// symbol we followed to reach this node from parent (via backward search)
	int64_t symbol;
	// range size
	uint64_t isize;

	ExtendedTraversalNode() {}
	ExtendedTraversalNode(uint64_t const roffset, uint64_t const rsize, uint64_t const rdepth, int64_t const rsymbol, uint64_t const risize)
	: offset(roffset), size(rsize), depth(rdepth), symbol(rsymbol), isize(risize) {}

	bool operator<(ExtendedTraversalNode const & E) const
	{
		return isize > E.isize;
	}
};

struct SuffixTreeNode
{
	uint64_t depth;
	NodeInfo const * NI;
	uint64_t size;
	int64_t * trace;

	SuffixTreeNode()
	{

	}

	SuffixTreeNode(
		uint64_t const rdepth,
		NodeInfo const * const rNI,
		uint64_t const rsize,
		int64_t * const rtrace
	) : depth(rdepth), NI(rNI), size(rsize), trace(rtrace)
	{

	}

	uint64_t range() const
	{
		if ( size )
		{
			uint64_t const from = NI[0].from;
			uint64_t const to = NI[size-1].to;
			return to-from;
		}
		else
		{
			return 0;
		}
	}

	std::string toString() const
	{
		std::ostringstream ostr;

		ostr << "SuffixTreeNode(depth=" << depth << ",label=";
		for ( uint64_t i = 0; i < depth; ++i )
			ostr << trace[i];
		ostr << ")\n";

		for ( uint64_t i = 0; i < size; ++i )
			ostr << "\t" << NI[i].toString() << "\n";

		return ostr.str();
	}
};

struct Enumerator
{
	// wavelet tree
	libmaus2::wavelet::ImpCompactHuffmanWaveletTree const & WT;
	// prefix sums over symbol counts
	DArray const D;

	Enumerator(libmaus2::wavelet::ImpCompactHuffmanWaveletTree const & rWT)
	: WT(rWT), D(WT)
	{
	}

	struct TraversalContext
	{
		typedef TraversalContext this_type;
		typedef libmaus2::util::unique_ptr<this_type>::type unique_ptr_type;
		typedef libmaus2::util::shared_ptr<this_type>::type shared_ptr_type;

		uint64_t const maxdepth;
		uint64_t const sigmasize;
		int64_t const minsym;
		int64_t const maxsym;

		// depth first traversal data
		uint64_t ANIoffset;
		libmaus2::autoarray::AutoArray<NodeInfo> ANI;
		uint64_t ATNoffset;
		libmaus2::autoarray::AutoArray<TraversalNode> ATN;
		libmaus2::autoarray::AutoArray<ExtendedTraversalNode> ATTN;
		libmaus2::autoarray::AutoArray<int64_t> vtrace;

		// array for sorting
		libmaus2::autoarray::AutoArray<SortNodeInfo> ASNI;

		// temporary for copying NodeInfo
		libmaus2::autoarray::AutoArray<NodeInfo> ATNI;

		// return space for NodeInfo
		libmaus2::autoarray::AutoArray<NodeInfo> ARNI;

		// rank information arrays
		libmaus2::autoarray::AutoArray< std::pair<int64_t,uint64_t> > P;
		libmaus2::autoarray::AutoArray< std::pair<int64_t,uint64_t> > PB;
		libmaus2::autoarray::AutoArray< std::pair<int64_t,uint64_t> > PBC;

		SuffixTreeNode STN;

		TraversalContext(uint64_t const rmaxdepth, uint64_t const rsigmasize, int64_t const rminsym, int64_t const rmaxsym)
		: maxdepth(rmaxdepth), sigmasize(rsigmasize), minsym(rminsym), maxsym(rmaxsym), ANIoffset(0), ANI(), ATNoffset(0), ATN(),
		  vtrace(maxdepth), ASNI(), ATNI(), ARNI(), P(sigmasize,false), PB(sigmasize,false), PBC(sigmasize,false), STN()
		{

		}

		std::ostream & printTop(std::ostream & out) const
		{
			assert ( ATNoffset );
			uint64_t const tnid = ATNoffset-1;

			TraversalNode const & TN = ATN[tnid];
			out << "TraversalNode("
				<< "depth=" << TN.depth << ",symbol=" << TN.symbol
				<< ",trace=";

			if ( TN.depth )
				out << TN.symbol;

			int64_t const * vt = vtrace.end() - TN.depth;
			for ( uint64_t i = 1; i < TN.depth; ++i )
				out << vt[i];

			if ( TN.size )
			{
				uint64_t const from = ANI[TN.offset+0].from;
				uint64_t const to = ANI[TN.offset + TN.size - 1].to;
				out << ",range=" << (to-from);
			}

			for ( uint64_t i = 0; i < TN.size; ++i )
				out << "," << ANI[TN.offset+i].toString();

			out << ")";

			return out;
		}
	};

	TraversalContext::unique_ptr_type getContext(uint64_t const maxdepth, int64_t const minsym, int64_t const maxsym) const
	{
		TraversalContext::unique_ptr_type ptr(new TraversalContext(maxdepth,D.sigmasize,minsym,maxsym));

		uint64_t & ANIoffset = ptr->ANIoffset;
		uint64_t & ATNoffset = ptr->ATNoffset;

		uint64_t const s = WT.enumerateSymbolsInRangeSorted(0,WT.size(),ptr->P.begin());

		if ( s > 1 )
		{
			uint64_t lANIoffset = ANIoffset;
			for ( uint64_t i = 0; i < s; ++i )
			{
				int64_t const sym = ptr->P[i].first;

				if ( sym >= minsym && sym <= maxsym )
				{
					uint64_t const count = ptr->P[i].second;
					ptr->ANI.push(ANIoffset,NodeInfo(sym,D[sym],D[sym]+count));
				}
			}

			ptr->ATN.push(ATNoffset,TraversalNode(lANIoffset,ANIoffset-lANIoffset,0/* depth */,-1/*symbol */));
		}

		return ptr;
	}

	std::vector<TraversalContext::shared_ptr_type> enumerateSplit(uint64_t const maxdepth, int64_t const minsym, int64_t const maxsym, uint64_t const maxrange) const
	{
		TraversalContext::unique_ptr_type pcontext(getContext(maxdepth,minsym,maxsym));
		TraversalContext & context = *pcontext;

		std::vector<TraversalContext::shared_ptr_type> VR;

		// depth first traversal data
		uint64_t & ANIoffset = context.ANIoffset;
		uint64_t & ATNoffset = context.ATNoffset;
		libmaus2::autoarray::AutoArray<NodeInfo> & ANI = context.ANI;
		libmaus2::autoarray::AutoArray<NodeInfo> & ATNI = context.ATNI;
		libmaus2::autoarray::AutoArray<NodeInfo> & ARNI = context.ARNI;
		libmaus2::autoarray::AutoArray<TraversalNode> & ATN = context.ATN;
		libmaus2::autoarray::AutoArray<ExtendedTraversalNode> & ATTN = context.ATTN;
		libmaus2::autoarray::AutoArray<int64_t> & vtrace = context.vtrace;

		// array for sorting
		libmaus2::autoarray::AutoArray<SortNodeInfo> & ASNI = context.ASNI;

		// rank information arrays
		libmaus2::autoarray::AutoArray< std::pair<int64_t,uint64_t> > & P = context.P;
		libmaus2::autoarray::AutoArray< std::pair<int64_t,uint64_t> > & PB = context.PB;
		libmaus2::autoarray::AutoArray< std::pair<int64_t,uint64_t> > & PBC = context.PBC;

		// while the stack is not empty
		while ( ATNoffset )
		{
			// get current traversal node id (pop stack)
			uint64_t const tnodeid = --ATNoffset;
			// get node data
			TraversalNode const TN = ATN[tnodeid];

			// copy child nodes to temporary array
			uint64_t p = 0;
			for ( uint64_t i = TN.offset; i < TN.offset+TN.size; ++i )
				ATNI.push(p,ANI[i]);

			// copy to return array
			p = 0;
			for ( uint64_t i = TN.offset; i < TN.offset+TN.size; ++i )
				ARNI.push(p,ANI[i]);

			// clean up stack
			ANIoffset -= TN.size;
			assert ( ANIoffset == TN.offset );

			if ( TN.depth )
			{
				uint64_t const index = context.maxdepth - TN.depth;
				vtrace[index] = TN.symbol;
			}

			context.STN = SuffixTreeNode(TN.depth,ARNI.begin(),TN.size,vtrace.begin() + context.maxdepth - TN.depth);

			#if 0
			std::cerr << "TraversalNode(depth=" << TN.depth << ") ";

			for ( uint64_t i = 0; i < TN.depth; ++i )
				std::cerr << vtrace[i];

			std::cerr << std::endl;

			#if 1
			for ( uint64_t i = 0; i < TN.size; ++i )
				std::cerr << "\t" << ATNI[i].toString() << std::endl;
			#endif
			#endif

			if ( context.STN.range() <= maxrange || TN.depth == context.maxdepth )
			{
				TraversalContext::shared_ptr_type ptr(new TraversalContext(maxdepth,D.sigmasize,minsym,maxsym));

				uint64_t & ANIoffset = ptr->ANIoffset;
				uint64_t & ATNoffset = ptr->ATNoffset;

				ptr->ATN.push(ATNoffset,TraversalNode(ANIoffset,TN.size,TN.depth,TN.symbol));
				for ( uint64_t i = 0; i < TN.size; ++i )
					ptr->ANI.push(ANIoffset,ARNI[i]);

				std::copy(vtrace.begin(),vtrace.end(),ptr->vtrace.begin());

				VR.push_back(ptr);
			}
			else if ( TN.depth < context.maxdepth )
			{
				uint64_t oASNI = 0;

				// iterate over symbols on the right
				for ( uint64_t symi = 0; symi < TN.size; ++symi )
				{
					// get node info
					NodeInfo const & NI = ATNI[symi];
					int64_t const rightsym = NI.sym;
					uint64_t const from = NI.from;
					uint64_t const to = NI.to;

					assert ( rightsym >= context.minsym && rightsym <= context.maxsym );

					// get rank information for extension on the left
					uint64_t const s  = WT.enumerateSymbolsInRangeSorted(from,to,P.begin());
					uint64_t const sb = WT.enumerateSymbolsInRangeSorted(0,from,PB.begin());

					// merge
					{
						uint64_t i2 = 0;
						uint64_t i0 = 0;
						uint64_t i1 = 0;

						while ( i0 < s && i1 < sb )
						{
							if ( P[i0].first < PB[i1].first )
							{
								PBC.push(i2,std::pair<int64_t,uint64_t>(P[i0].first,0));
								++i0;
							}
							else if ( PB[i1].first < P[i0].first )
							{
								++i1;
							}
							else
							{
								PBC.push(i2,std::pair<int64_t,uint64_t>(P[i0].first,PB[i1].second));
								++i0;
								++i1;
							}
						}

						while ( i0 < s )
						{
							PBC.push(i2,std::pair<int64_t,uint64_t>(P[i0].first,0));
							++i0;
						}

						assert ( i2 == s );
					}

					// iterate over symbols on the left
					for ( uint64_t symj = 0; symj < s; ++symj )
					{
						int64_t const leftsym = P[symj].first;

						if ( leftsym >= context.minsym && leftsym <= context.maxsym )
						{
							uint64_t const freq = P[symj].second;
							uint64_t const nfrom = D[leftsym] + PBC[symj].second;
							uint64_t const nto = nfrom + freq;

							ASNI.push(oASNI,SortNodeInfo(leftsym,rightsym,nfrom,nto));
						}
					}
				}

				// sort ASNI by left extension
				std::sort(ASNI.begin(),ASNI.begin() + oASNI);

				// look for left extensions which are right maximal
				uint64_t ilow = 0;
				uint64_t ATNIoffset = 0;
				uint64_t ATTNoffset = 0;
				ATNIoffset = 0;
				while ( ilow < oASNI )
				{
					uint64_t ihigh = ilow+1;

					while ( ihigh < oASNI && ASNI[ilow].lsym == ASNI[ihigh].lsym )
						++ihigh;

					// more than one symbol on the right?
					if ( ihigh-ilow > 1 )
					{
						uint64_t lATNIoffset = ATNIoffset;
						for ( uint64_t i = ilow; i < ihigh; ++i )
							ATNI.push(ATNIoffset,NodeInfo(ASNI[i].sym,ASNI[i].from,ASNI[i].to));

						uint64_t const from = ATNI[lATNIoffset].from;
						uint64_t const to = ATNI[ATNIoffset-1].to;
						uint64_t const size = to - from;

						ATTN.push(ATTNoffset,ExtendedTraversalNode(lATNIoffset,ihigh-ilow,TN.depth+1/* depth */,ASNI[ilow].lsym,size));
					}

					ilow = ihigh;
				}

				// range descending by range size
				std::sort(ATTN.begin(),ATTN.begin()+ATTNoffset);

				for ( uint64_t i = 0; i < ATTNoffset; ++i )
				{
					ExtendedTraversalNode const TN = ATTN[i];

					uint64_t lANIoffset = ANIoffset;
					for ( uint64_t i = TN.offset; i < TN.offset + TN.size; ++i )
						ANI.push(ANIoffset,ATNI[i]);

					ATN.push(ATNoffset,TraversalNode(lANIoffset,TN.size,TN.depth,TN.symbol));
				}
			}

			// return &(context.STN);
		}

		return VR;
	}

	SuffixTreeNode * enumerate(TraversalContext & context) const
	{
		// depth first traversal data
		uint64_t & ANIoffset = context.ANIoffset;
		uint64_t & ATNoffset = context.ATNoffset;
		libmaus2::autoarray::AutoArray<NodeInfo> & ANI = context.ANI;
		libmaus2::autoarray::AutoArray<NodeInfo> & ATNI = context.ATNI;
		libmaus2::autoarray::AutoArray<NodeInfo> & ARNI = context.ARNI;
		libmaus2::autoarray::AutoArray<TraversalNode> & ATN = context.ATN;
		libmaus2::autoarray::AutoArray<ExtendedTraversalNode> & ATTN = context.ATTN;
		libmaus2::autoarray::AutoArray<int64_t> & vtrace = context.vtrace;

		// array for sorting
		libmaus2::autoarray::AutoArray<SortNodeInfo> & ASNI = context.ASNI;

		// rank information arrays
		libmaus2::autoarray::AutoArray< std::pair<int64_t,uint64_t> > & P = context.P;
		libmaus2::autoarray::AutoArray< std::pair<int64_t,uint64_t> > & PB = context.PB;
		libmaus2::autoarray::AutoArray< std::pair<int64_t,uint64_t> > & PBC = context.PBC;

		// while the stack is not empty
		if ( ATNoffset )
		{
			// get current traversal node id (pop stack)
			uint64_t const tnodeid = --ATNoffset;
			// get node data
			TraversalNode TN = ATN[tnodeid];

			// copy child nodes to temporary array
			uint64_t p = 0;
			for ( uint64_t i = TN.offset; i < TN.offset+TN.size; ++i )
				ATNI.push(p,ANI[i]);

			// copy to return array
			p = 0;
			for ( uint64_t i = TN.offset; i < TN.offset+TN.size; ++i )
				ARNI.push(p,ANI[i]);

			// clean up stack
			ANIoffset -= TN.size;
			assert ( ANIoffset == TN.offset );

			if ( TN.depth )
			{
				uint64_t const index = context.maxdepth - TN.depth;
				vtrace[index] = TN.symbol;
			}

			context.STN = SuffixTreeNode(TN.depth,ARNI.begin(),TN.size,vtrace.begin() + context.maxdepth - TN.depth);

			#if 0
			std::cerr << "TraversalNode(depth=" << TN.depth << ") ";

			for ( uint64_t i = 0; i < TN.depth; ++i )
				std::cerr << vtrace[i];

			std::cerr << std::endl;

			#if 1
			for ( uint64_t i = 0; i < TN.size; ++i )
				std::cerr << "\t" << ATNI[i].toString() << std::endl;
			#endif
			#endif

			if ( TN.depth < context.maxdepth )
			{
				uint64_t oASNI = 0;

				// iterate over symbols on the right
				for ( uint64_t symi = 0; symi < TN.size; ++symi )
				{
					// get node info
					NodeInfo const & NI = ATNI[symi];
					int64_t const rightsym = NI.sym;
					uint64_t const from = NI.from;
					uint64_t const to = NI.to;

					assert ( rightsym >= context.minsym && rightsym <= context.maxsym );

					// get rank information for extension on the left
					uint64_t const s  = WT.enumerateSymbolsInRangeSorted(from,to,P.begin());
					uint64_t const sb = WT.enumerateSymbolsInRangeSorted(0,from,PB.begin());

					// merge
					{
						uint64_t i2 = 0;
						uint64_t i0 = 0;
						uint64_t i1 = 0;

						while ( i0 < s && i1 < sb )
						{
							if ( P[i0].first < PB[i1].first )
							{
								PBC.push(i2,std::pair<int64_t,uint64_t>(P[i0].first,0));
								++i0;
							}
							else if ( PB[i1].first < P[i0].first )
							{
								++i1;
							}
							else
							{
								PBC.push(i2,std::pair<int64_t,uint64_t>(P[i0].first,PB[i1].second));
								++i0;
								++i1;
							}
						}

						while ( i0 < s )
						{
							PBC.push(i2,std::pair<int64_t,uint64_t>(P[i0].first,0));
							++i0;
						}

						assert ( i2 == s );
					}

					// iterate over symbols on the left
					for ( uint64_t symj = 0; symj < s; ++symj )
					{
						int64_t const leftsym = P[symj].first;

						if ( leftsym >= context.minsym && leftsym <= context.maxsym )
						{
							uint64_t const freq = P[symj].second;
							uint64_t const nfrom = D[leftsym] + PBC[symj].second;
							uint64_t const nto = nfrom + freq;

							ASNI.push(oASNI,SortNodeInfo(leftsym,rightsym,nfrom,nto));
						}
					}
				}

				// sort ASNI by left extension
				std::sort(ASNI.begin(),ASNI.begin() + oASNI);

				// look for left extensions which are right maximal
				uint64_t ilow = 0;
				uint64_t ATNIoffset = 0;
				uint64_t ATTNoffset = 0;
				ATNIoffset = 0;
				while ( ilow < oASNI )
				{
					uint64_t ihigh = ilow+1;

					while ( ihigh < oASNI && ASNI[ilow].lsym == ASNI[ihigh].lsym )
						++ihigh;

					// more than one symbol on the right?
					if ( ihigh-ilow > 1 )
					{
						uint64_t lATNIoffset = ATNIoffset;
						for ( uint64_t i = ilow; i < ihigh; ++i )
							ATNI.push(ATNIoffset,NodeInfo(ASNI[i].sym,ASNI[i].from,ASNI[i].to));

						uint64_t const from = ATNI[lATNIoffset].from;
						uint64_t const to = ATNI[ATNIoffset-1].to;
						uint64_t const size = to - from;

						ATTN.push(ATTNoffset,ExtendedTraversalNode(lATNIoffset,ihigh-ilow,TN.depth+1/* depth */,ASNI[ilow].lsym,size));
					}

					ilow = ihigh;
				}

				// range descending by range size
				std::sort(ATTN.begin(),ATTN.begin()+ATTNoffset);

				for ( uint64_t i = 0; i < ATTNoffset; ++i )
				{
					ExtendedTraversalNode const TN = ATTN[i];

					uint64_t lANIoffset = ANIoffset;
					for ( uint64_t i = TN.offset; i < TN.offset + TN.size; ++i )
						ANI.push(ANIoffset,ATNI[i]);

					ATN.push(ATNoffset,TraversalNode(lANIoffset,TN.size,TN.depth,TN.symbol));
				}
			}

			return &(context.STN);
		}
		else
		{
			return NULL;
		}
	}
};

struct IntervalInfo
{
	uint64_t from;
	uint64_t to;

	IntervalInfo()
	: from(std::numeric_limits<uint64_t>::max()), to(std::numeric_limits<uint64_t>::max())
	{}
	IntervalInfo(uint64_t const rfrom, uint64_t const rto)
	: from(rfrom), to(rto)
	{

	}

	std::ostream & serialise(std::ostream & out) const
	{
		libmaus2::util::NumberSerialisation::serialiseNumber(out,from);
		libmaus2::util::NumberSerialisation::serialiseNumber(out,to);
		return out;
	}

	std::istream & deserialise(std::istream & in)
	{
		from = libmaus2::util::NumberSerialisation::deserialiseNumber(in);
		to = libmaus2::util::NumberSerialisation::deserialiseNumber(in);
		return in;
	}

	bool operator<(IntervalInfo const & O) const
	{
		if ( from != O.from )
			return from < O.from;
		else
			return to > O.to;
	}
};

#if 0
struct BiEnumerator
{
	libmaus2::fm::BiIndex const & index;

	struct QueueNode
	{
		uint64_t f_sp;
		uint64_t r_sp;
		uint64_t size;

		QueueNode()
		{
		}
		QueueNode(uint64_t const rf_sp, uint64_t const rr_sp, uint64_t const rsize)
		: f_sp(rf_sp), r_sp(rr_sp), size(rsize) {}
	};

	BiEnumerator(libmaus2::fm::BiIndex const & rindex)
	: index(rindex)
	{

	}

	void enumerate()
	{
	}
};
#endif

struct RightMaxIOBase
{
	static uint64_t const rightmaxelementfromoffset;
	static uint64_t const rightmaxelementtooffset;
	static uint64_t const rightmaxelementcountoffset;
	static uint64_t const rightmaxelementsymcountoffset;
	static uint64_t const rightmaxtraceoffset;
	static uint64_t const rightmaxmetawords;
};

uint64_t const RightMaxIOBase::rightmaxelementfromoffset  = 0;
uint64_t const RightMaxIOBase::rightmaxelementtooffset    = 1;
uint64_t const RightMaxIOBase::rightmaxelementcountoffset = 2;
uint64_t const RightMaxIOBase::rightmaxelementsymcountoffset = 3;
uint64_t const RightMaxIOBase::rightmaxtraceoffset        = 4;
uint64_t const RightMaxIOBase::rightmaxmetawords          = RightMaxIOBase::rightmaxtraceoffset;

struct RightMaxIO : public RightMaxIOBase
{
	static void writeRightMaxEntry(
		libmaus2::aio::BufferedOutput<uint64_t> & BO,
		uint64_t const from,
		uint64_t const to,
		uint64_t const count,
		uint64_t const symcount,
		uint64_t const * D,
		uint64_t const s
	)
	{
		BO.put(from);
		BO.put(to);
		BO.put(count);
		BO.put(symcount);
		for ( uint64_t i = 0; i < s; ++i )
			BO.put(D[i]);
	}

	static uint64_t writeRightMaxEntry(
		libmaus2::autoarray::AutoArray<uint64_t> & BO,
		uint64_t BOo,
		uint64_t const from,
		uint64_t const to,
		uint64_t const count,
		uint64_t const symcount,
		uint64_t const * D,
		uint64_t const s
	)
	{
		BO.push(BOo,from);
		BO.push(BOo,to);
		BO.push(BOo,count);
		BO.push(BOo,symcount);
		for ( uint64_t i = 0; i < s; ++i )
			BO.push(BOo,D[i]);

		return BOo;
	}

	static uint64_t getFrom(uint8_t const * p)
	{
		return reinterpret_cast<uint64_t const *>(p)[rightmaxelementfromoffset];
	}
	static uint64_t getTo(uint8_t const * p)
	{
		return reinterpret_cast<uint64_t const *>(p)[rightmaxelementtooffset];
	}
	static uint64_t getCount(uint8_t const * p)
	{
		return reinterpret_cast<uint64_t const *>(p)[rightmaxelementcountoffset];
	}
	static uint64_t getSymCount(uint8_t const * p)
	{
		return reinterpret_cast<uint64_t const *>(p)[rightmaxelementsymcountoffset];
	}
	static uint64_t const * getTrace(uint8_t const * p)
	{
		return reinterpret_cast<uint64_t const *>(p) + rightmaxtraceoffset;
	}
};

struct ElementComparator
{
	static bool comp(uint8_t const * E0, uint8_t const * E1)
	{
		uint64_t const * U0 = reinterpret_cast<uint64_t const *>(E0);
		uint64_t const * U1 = reinterpret_cast<uint64_t const *>(E1);

		if ( U0[0] != U1[0] )
			return U0[0] < U1[0];
		else
			return U1[1] < U0[1];
	}
};

static int CELcomp(void const * va, void const * vb)
{
	uint8_t const * ua = reinterpret_cast<uint8_t const *>(va);
	uint8_t const * ub = reinterpret_cast<uint8_t const *>(vb);

	if ( ElementComparator::comp(ua,ub) )
		return -1;
	else if ( ElementComparator::comp(ub,ua) )
		return 1;
	else
		return 0;
}

struct InputBlock
{
	typedef InputBlock this_type;
	typedef libmaus2::util::unique_ptr<this_type>::type unique_ptr_type;

	std::istream & in;
	uint64_t blocksize;
	uint64_t n;

	uint8_t const * peekslot;
	bool peekslotfilled;

	uint64_t const elementsize;

	libmaus2::autoarray::AutoArray<uint8_t> A;
	uint8_t const * pc;
	uint8_t const * pe;

	InputBlock(std::istream & rin, uint64_t const rblocksize, uint64_t const rn, uint64_t const relementsize)
	: in(rin),
	  blocksize(rblocksize), n(rn),
	  peekslot(NULL),
	  peekslotfilled(false),
	  elementsize(relementsize),
	  A(blocksize*elementsize,false),
	  pc(A.begin()), pe(A.begin())
	{

	}

	bool peekNext(uint8_t const * & E)
	{
		if ( ! peekslotfilled )
			peekslotfilled = getNextRaw(peekslot);

		E = peekslot;

		return peekslotfilled;
	}

	bool getNext(uint8_t const * & E)
	{
		peekNext(E);

		if ( peekslotfilled )
		{
			E = peekslot;
			peekslotfilled = false;
			return true;
		}
		else
		{
			return false;
		}
	}

	bool getNextRaw(uint8_t const * & E)
	{
		if ( pc == pe )
		{
			uint64_t const use = std::min(n,blocksize);
			in.read(reinterpret_cast<char *>(A.begin()),use * elementsize);
			assert ( in.gcount() == static_cast<int64_t>(use * elementsize) );
			pc = A.begin();
			pe = pc + use*elementsize;
			n -= use;
		}

		if ( pc == pe )
			return false;

		E = pc;
		pc += elementsize;
		return true;
	}
};

static void sortFiles(
	std::vector<std::string> const & Vfn,
	std::vector<uint64_t> const & VN,
	uint64_t const elementsize,
	uint64_t const sortmem
)
{
	// #define SORT_DEBUG

	assert ( Vfn.size() == VN.size() );
	uint64_t const sortelements = (sortmem + elementsize - 1) / elementsize;
	libmaus2::autoarray::AutoArray<uint8_t> AS(sortelements*elementsize,false);
	for ( uint64_t i = 0; i < Vfn.size(); ++i )
	{
		uint64_t const numblocks = (VN[i] + sortelements - 1)/sortelements;

		#if defined(SORT_DEBUG)
		std::cerr << "[V] file " << i << " " << Vfn[i] << " num blocks " << numblocks << std::endl;
		#endif

		std::vector < std::pair<uint64_t,uint64_t> > Vblock;

		libmaus2::aio::InputOutputStreamInstance::unique_ptr_type pOSI(
			new libmaus2::aio::InputOutputStreamInstance(
				Vfn[i],std::ios::in | std::ios::out
			)
		);

		for ( uint64_t j = 0; j < numblocks; ++j )
		{
			uint64_t const low = j * sortelements;
			uint64_t const high = std::min(low+sortelements,VN[i]);

			#if defined(SORT_DEBUG)
			std::cerr << "[V] sorting block " << j << " [" << low << "," << high << ")" << std::endl;
			#endif

			Vblock.push_back(std::pair<uint64_t,uint64_t>(low,high));

			// read
			{
				uint64_t const blocksize = high - low;
				assert ( blocksize <= sortelements );
				uint64_t const blockbytes = blocksize * elementsize;

				libmaus2::aio::InputStreamInstance ISI(Vfn[i]);
				pOSI->clear();
				pOSI->seekg(low * elementsize);
				pOSI->read(reinterpret_cast<char *>(AS.begin()),blockbytes);

				if ( pOSI->gcount() != static_cast<int64_t>(blockbytes) )
				{
					libmaus2::exception::LibMausException lme;
					lme.getStream() << "[E] sortFiles gcount=" << pOSI->gcount() << " != blockbytes=" << blockbytes << std::endl;
					lme.finish();
					throw lme;
				}
				assert ( pOSI->gcount() == static_cast<int64_t>(blockbytes) );
			}

			::qsort(AS.begin(),high-low,elementsize,CELcomp);

			// write back
			{
				pOSI->clear();
				pOSI->seekp(low * elementsize);
				pOSI->write(reinterpret_cast<char const *>(AS.begin()),(high-low) * elementsize);
				assert ( *pOSI );
			}
		}

		pOSI->flush();
		pOSI.reset();

		while ( Vblock.size() > 1 )
		{
			#if defined(SORT_DEBUG)
			std::cerr << "[V] number of blocks now " << Vblock.size() << std::endl;
			#endif

			uint64_t const runs = (Vblock.size() + 1)/2;

			std::string const infn = Vfn[i];
			std::string const tmpfn = infn + ".tmp";

			libmaus2::aio::OutputStreamInstance::unique_ptr_type OSI(new libmaus2::aio::OutputStreamInstance(tmpfn));
			libmaus2::aio::BufferedOutput<uint8_t> BO(*OSI,1024);

			std::vector < std::pair<uint64_t,uint64_t> > Nblock;

			libmaus2::aio::InputStreamInstance ISI0(infn);
			libmaus2::aio::InputStreamInstance ISI1(infn);

			uint64_t pout = 0;
			for ( uint64_t r = 0; r < runs; ++r )
			{
				uint64_t const r0 = 2*r+0;
				uint64_t const r1 = 2*r+1;

				std::pair<uint64_t,uint64_t> const VB0 = Vblock[r0];
				std::pair<uint64_t,uint64_t> const VB1 = (r1 < Vblock.size()) ? Vblock[r1] : std::pair<uint64_t,uint64_t>(0,0);

				#if defined(SORT_DEBUG)
				std::cerr
					<< "[V] VB0=[" << VB0.first << "," << VB0.second << ")"
					<< " "
					<< "VB1=[" << VB1.first << "," << VB1.second << ")"
					<< std::endl;
				#endif

				ISI0.clear();
				ISI0.seekg(VB0.first * elementsize);
				ISI1.clear();
				ISI1.seekg(VB1.first * elementsize);

				InputBlock IB0(ISI0,1024,VB0.second-VB0.first,elementsize);
				InputBlock IB1(ISI1,1024,VB1.second-VB1.first,elementsize);

				uint8_t const * E0 = NULL;
				uint8_t const * E1 = NULL;
				uint64_t nout = 0;

				while ( IB0.peekNext(E0) && IB1.peekNext(E1) )
				{
					if ( ElementComparator::comp(E0,E1) )
					{
						BO.put(E0,elementsize);
						IB0.getNext(E0);
						++nout;
					}
					else if ( ElementComparator::comp(E1,E0) )
					{
						BO.put(E1,elementsize);
						IB1.getNext(E1);
						++nout;
					}
					else
					{
						BO.put(E0,elementsize);
						IB0.getNext(E0);
						++nout;
					}
				}
				while ( IB0.peekNext(E0) )
				{
					BO.put(E0,elementsize);
					IB0.getNext(E0);
					++nout;
				}
				while ( IB1.peekNext(E1) )
				{
					BO.put(E1,elementsize);
					IB1.getNext(E1);
					++nout;
				}

				Nblock.push_back(std::pair<uint64_t,uint64_t>(pout,pout+nout));

				assert ( nout == (VB0.second-VB0.first) + (VB1.second-VB1.first) );

				pout += nout;
			}

			BO.flush();
			OSI->flush();
			OSI.reset();

			libmaus2::aio::OutputStreamFactoryContainer::rename(tmpfn,infn);

			Vblock = Nblock;
		}

		#if defined(SORT_DEBUG)
		libmaus2::aio::InputStreamInstance ISI(Vfn[i]);
		InputBlock IB(ISI,1024,VN[i],elementsize);
		uint8_t const * E = NULL;
		while ( IB.getNext(E) )
		{
			uint64_t const * U = reinterpret_cast<uint64_t const *>(E);
			std::cerr << "Element [" << U[0] << "," << U[1] << ")" << std::endl;
		}
		#endif
	}
	AS = libmaus2::autoarray::AutoArray<uint8_t>(0);

}

struct KmerHandler
{
	virtual ~KmerHandler() {}
	virtual void operator()(uint64_t const slot, uint8_t const * p) = 0;
};

struct KmerHandlerContainer : public KmerHandler
{
	std::vector < KmerHandler * > V;

	virtual void operator()(uint64_t const slot, uint8_t const * p)
	{
		for ( uint64_t i = 0; i < V.size(); ++i )
			(*(V[i]))(slot,p);
	}
};

struct HistogramKmerHandler : public KmerHandler
{
	typedef HistogramKmerHandler this_type;
	typedef libmaus2::util::unique_ptr<this_type>::type unique_ptr_type;
	typedef libmaus2::util::shared_ptr<this_type>::type shared_ptr_type;

	uint64_t const numslots;
	uint64_t const k;
	uint64_t const sigmabits;
	int64_t const minsym;
	int64_t const maxsym;
	bool const bprint;
	std::ostream * pout;
	libmaus2::parallel::PosixSpinLock lock;

	libmaus2::autoarray::AutoArray < libmaus2::util::Histogram::unique_ptr_type > AH;
	libmaus2::autoarray::AutoArray < uint64_t > Anumk;
	libmaus2::autoarray::AutoArray < uint64_t > Anumkmax;
	libmaus2::autoarray::AutoArray < libmaus2::bitio::CompactArray::unique_ptr_type > AtraceCA;
	libmaus2::autoarray::AutoArray < libmaus2::util::unique_ptr<std::ostringstream>::type > Aost;
	std::string const empty;

	HistogramKmerHandler(uint64_t const rnumslots, uint64_t const rk, uint64_t const rsigmabits, bool const rprint, std::ostream * rpout = NULL, int64_t const rminsym = 0, int64_t const rmaxsym = 3)
	: numslots(rnumslots), k(rk), sigmabits(rsigmabits),  minsym(rminsym), maxsym(rmaxsym),  bprint(rprint), pout(rpout),
	  AH(numslots), Anumk(numslots), Anumkmax(numslots), AtraceCA(numslots), Aost(numslots), empty()
	{
		for ( uint64_t i = 0; i < AH.size(); ++i )
		{
			libmaus2::util::Histogram::unique_ptr_type tptr(
				new libmaus2::util::Histogram()
			);
			AH[i] = UNIQUE_PTR_MOVE(tptr);
		}
		for ( uint64_t i = 0; i < AH.size(); ++i )
		{
			libmaus2::bitio::CompactArray::unique_ptr_type tptr(
				new libmaus2::bitio::CompactArray(k,sigmabits)
			);
			AtraceCA[i] = UNIQUE_PTR_MOVE(tptr);
		}

		std::fill(Anumk.begin(),Anumk.end(),0);
		std::fill(Anumkmax.begin(),Anumkmax.end(),0);

		for ( uint64_t i = 0; i < Aost.size(); ++i )
		{
			libmaus2::util::unique_ptr<std::ostringstream>::type tptr(
				new std::ostringstream
			);
			Aost[i] = UNIQUE_PTR_MOVE(tptr);
		}
	}

	virtual void operator()(uint64_t const slot, uint8_t const * p)
	{
		libmaus2::bitio::CompactArray & traceCA = *(AtraceCA[slot]);
		libmaus2::util::Histogram & H = *(AH[slot]);
		uint64_t & numk    = Anumk[slot];
		uint64_t & numkmax = Anumkmax[slot];

		uint64_t const from = RightMaxIO::getFrom(p);
		uint64_t const to = RightMaxIO::getTo(p);
		uint64_t const tracesymcount = RightMaxIO::getSymCount(p);
		uint64_t const * trace = RightMaxIO::getTrace(p);
		uint64_t const size = to - from;

		for ( uint64_t i = 0; i < traceCA.s; ++i )
			traceCA.D[i] = trace[i];

		bool valid = true;
		for ( uint64_t i = 0; i < k; ++i )
		{
			int64_t const sym = traceCA.get(i);

			if ( sym < minsym || sym > maxsym )
				valid = false;
		}

		if ( valid )
		{
			H(size);

			if ( bprint && pout )
			{
				std::ostringstream & out = *(Aost[slot]);
				out.str(empty);

				out << "[F]\t";
				for ( uint64_t i = 0; i < k; ++i )
					out << traceCA.get(i);
				out << "\t" << size;

				if ( tracesymcount != std::numeric_limits<uint64_t>::max() )
					out << "\t" << tracesymcount;
				else
					out << "\t" << '*';
				out << "\t[" << from << "," << to << ")";

				out << "\n";

				std::string const s = out.str();

				libmaus2::parallel::ScopePosixSpinLock slock(libmaus2::aio::StreamLock::cerrlock);
				pout->write(s.c_str(),s.size());
			}

			numk += 1;

			if (
				tracesymcount != std::numeric_limits<uint64_t>::max()
				&&
				(
					tracesymcount >= 2 || tracesymcount == 0
				)
			)
			{
				numkmax += 1;
			}
		}
	}

	void print(std::ostream & out)
	{
		libmaus2::util::Histogram H;
		for ( uint64_t i = 0; i < AH.size(); ++i )
			H.merge(*(AH[i]));

		std::ostringstream Hstr;
		H.print(Hstr);
		std::string line;
		std::istringstream Histr(Hstr.str());
		while ( std::getline(Histr,line) )
		{
			out << "[H]\t" << line << "\n";
		}

		uint64_t const numk = std::accumulate(Anumk.begin(),Anumk.end(),0ull);
		uint64_t const numkmax = std::accumulate(Anumkmax.begin(),Anumkmax.end(),0ull);

		out << "[HS]\t" << numk << "\t" << numkmax << "\n";
	}
};

int testsuffixtreenum(libmaus2::util::ArgParser const & arg)
{
	std::string const hwtfn = arg[0];
	uint64_t const numthreads = arg.uniqueArgPresent("t") ? arg.getUnsignedNumericArg<uint64_t>("t") : getDefaultNumThreads();
	uint64_t const k = arg.uniqueArgPresent("k") ? arg.getUnsignedNumericArg<uint64_t>("k") : 4;
	uint64_t const sortmem = arg.uniqueArgPresent("m") ? arg.getUnsignedNumericArg<uint64_t>("m") : (1024*1024*1024);
	std::string const tmpfilebase = arg.uniqueArgPresent("T") ? arg["T"] : libmaus2::util::ArgInfo::getDefaultTmpFileName(arg.progname);
	bool const fulltable = arg.uniqueArgPresent("f");
	bool const histogram = arg.uniqueArgPresent("h");

	uint64_t const fanout = 128;

	libmaus2::lf::ImpCompactHuffmanWaveletLF::unique_ptr_type pLF(libmaus2::lf::ImpCompactHuffmanWaveletLF::load(hwtfn,numthreads));
	libmaus2::wavelet::ImpCompactHuffmanWaveletTree const & WT = pLF->getW();
	//libmaus2::wavelet::ImpCompactHuffmanWaveletTree::unique_ptr_type pWT(libmaus2::wavelet::ImpCompactHuffmanWaveletTree::load(hwtfn,numthreads));
	//libmaus2::wavelet::ImpCompactHuffmanWaveletTree const & WT = *pWT;

	Enumerator E(WT);
	uint64_t const maxdepth = k-1;
	int64_t const minsym = 0;
	int64_t const maxsym = static_cast<int64_t>(E.D.sigma)-1;
	int64_t const csymmin = 0;
	int64_t const csymmax = 3;

	//uint64_t const sigma = E.D.sigma;
	uint64_t const sigmabits = libmaus2::math::numbits(maxsym);

	libmaus2::bitio::CompactArray traceCA(k,sigmabits);
	uint64_t const rightmaxwords = RightMaxIO::rightmaxmetawords + traceCA.s;
	uint64_t const rightmaxelementsize = rightmaxwords*sizeof(uint64_t);

	KmerHandlerContainer KHC;
	HistogramKmerHandler HKH(numthreads,k,sigmabits,fulltable /* print */,&std::cout);
	KHC.V.push_back(&HKH);
	KmerHandler & KH = KHC;

	// uint64_t const wordbits = 8 * sizeof(uint64_t);
	Enumerator::TraversalContext::unique_ptr_type pcontext(E.getContext(maxdepth,minsym,maxsym));
	// SuffixTreeNode * STN = NULL;
	// uint64_t const sigmawords = traceCA.s;

	uint64_t const tparts = 4 * numthreads;
	uint64_t const maxrange = (WT.size() + tparts - 1)/tparts;
	uint64_t const ranksperthread = maxrange;

	libmaus2::timing::RealTimeClock rtc;
	rtc.start();

	std::vector<Enumerator::TraversalContext::shared_ptr_type> VT = E.enumerateSplit(maxdepth, minsym, maxsym, maxrange);
	uint64_t volatile splitn = 0;
	libmaus2::parallel::PosixSpinLock splitnlock;

	struct ThreadOutputRightMax
	{
		typedef ThreadOutputRightMax this_type;
		// typedef libmaus2::util::unique_ptr<this_type>::type unique_ptr_type;
		typedef libmaus2::util::shared_ptr<this_type>::type shared_ptr_type;

		libmaus2::autoarray::AutoArray<uint64_t> BO;
		uint64_t BOo;
		libmaus2::autoarray::AutoArray<uint64_t> RO;

		ThreadOutputRightMax()
		{}

		ThreadOutputRightMax(
			libmaus2::autoarray::AutoArray<uint64_t> & rBO,
			uint64_t const rBOo,
			libmaus2::autoarray::AutoArray<uint64_t> & rRO
		) : BO(rBO), BOo(rBOo), RO(rRO)
		{

		}
	};

	std::vector < ThreadOutputRightMax::shared_ptr_type > VTOR(VT.size());

	#if defined(_OPENMP)
	#pragma omp parallel for num_threads(numthreads) schedule(dynamic,1)
	#endif
	for ( uint64_t i = 0; i < VT.size(); ++i )
	{
		Enumerator::TraversalContext & context = *(VT[i]);
		libmaus2::autoarray::AutoArray<uint64_t> RO(tparts + 1);
		libmaus2::autoarray::AutoArray<uint64_t> BO;
		uint64_t BOo = 0;
		libmaus2::bitio::CompactArray traceCA(k,sigmabits);

		#if 0
		context.printTop(std::cerr);
		std::cerr << std::endl;
		#endif

		uint64_t lsplitn = 0;
		SuffixTreeNode * STN = NULL;
		while ( (STN = E.enumerate(context)) != NULL )
		{
			if ( STN->depth == maxdepth )
			{
				lsplitn += 1;

				uint64_t const from = STN->NI[0].from;
				uint64_t const slot = from / ranksperthread;
				uint64_t const to   = STN->NI[STN->size-1].to;
				uint64_t const size = STN->size;
				int64_t const * trace = STN->trace;
				uint64_t const tracelen = STN->depth;
				uint64_t symsize = 0;
				for ( uint64_t i = 0; i < size; ++i )
				{
					int64_t const sym = STN->NI[i].sym;

					if ( sym >= csymmin && sym <= csymmax )
						symsize += 1;
				}

				for ( uint64_t i = 0; i < tracelen; ++i )
					traceCA.set(i,trace[i]);

				BOo = RightMaxIO::writeRightMaxEntry(BO,BOo,from,to,size,symsize /* number of extensions on the right */,traceCA.D,traceCA.s);
				RO[slot]++;
			}
		}

		::qsort(BO.begin(),lsplitn,rightmaxelementsize,CELcomp);

		libmaus2::util::PrefixSums::prefixSums(RO.begin(),RO.end());

		ThreadOutputRightMax::shared_ptr_type TORM(new ThreadOutputRightMax(BO,BOo,RO));
		VTOR[i] = TORM;

		libmaus2::parallel::ScopePosixSpinLock slock(splitnlock);
		splitn += lsplitn;
	}
	libmaus2::autoarray::AutoArray<uint64_t> RO(tparts + 1);
	#if defined(_OPENMP)
	#pragma omp parallel for num_threads(numthreads)
	#endif
	for ( uint64_t i = 0; i < RO.size(); ++i )
		for ( uint64_t j = 0; j < VT.size(); ++j )
			RO[i] += VTOR[j]->RO[i];

	#if 0
	for ( uint64_t i = 0; i < RO.size(); ++i )
		std::cerr << "RO[" << i << "]=" << RO[i] << std::endl;
	#endif

	assert ( RO[RO.size()-1] == splitn );

	libmaus2::autoarray::AutoArray<uint64_t> GBO(splitn * rightmaxwords,false);
	#if defined(_OPENMP)
	#pragma omp parallel for num_threads(numthreads) schedule(dynamic,1)
	#endif
	for ( uint64_t i = 0; i < tparts; ++i )
	{
		uint64_t * outptr        = GBO.begin() + RO[i+0] * rightmaxwords;
		uint64_t * const outptre = GBO.begin() + RO[i+1] * rightmaxwords;

		struct HeapEntry
		{
			uint64_t * p;
			uint64_t * pe;

			HeapEntry() {}
			HeapEntry(uint64_t * rp, uint64_t * rpe) : p(rp), pe(rpe) {}

			bool operator<(HeapEntry const & H) const
			{
				uint64_t const from  = RightMaxIO::getFrom(reinterpret_cast<uint8_t const *>(p));
				uint64_t const Hfrom = RightMaxIO::getFrom(reinterpret_cast<uint8_t const *>(H.p));
				return from < Hfrom;
			}
		};

		libmaus2::util::FiniteSizeHeap<HeapEntry> H(VT.size());
		for ( uint64_t j = 0; j < VT.size(); ++j )
		{
			uint64_t const lfrom  = VTOR[j]->RO[i+0];
			uint64_t const lto    = VTOR[j]->RO[i+1];
			uint64_t const lrange = lto - lfrom;
			if ( lrange )
			{
				H.push(
					HeapEntry(
						VTOR[j]->BO.begin() + lfrom * rightmaxwords,
						VTOR[j]->BO.begin() + lto   * rightmaxwords
					)
				);
			}
		}

		while ( ! H.empty() )
		{
			HeapEntry HE = H.pop();

			std::copy(HE.p,HE.p + rightmaxwords,outptr);

			outptr += rightmaxwords;
			HE.p += rightmaxwords;

			if ( HE.p != HE.pe )
				H.push(HE);
		}

		assert ( outptr == outptre );
	}

	for ( uint64_t j = 0; j < VT.size(); ++j )
		VTOR[j] = ThreadOutputRightMax::shared_ptr_type();

	#if 0
	{
		uint64_t const * p = GBO.begin();
		uint64_t const * pe = GBO.end();

		int64_t pre = -1;
		while ( p != pe )
		{
			int64_t const next = RightMaxIO::getFrom(reinterpret_cast<uint8_t const *>(p));
			assert ( next > pre );
			pre = next;

			p += rightmaxwords;
		}

		std::cerr << "checked sorting" << std::endl;
	}
	#endif

	libmaus2::autoarray::AutoArray<uint64_t> Gextend;
	libmaus2::autoarray::AutoArray<uint64_t> Gleft;

	{
		uint64_t const entries = GBO.size()/rightmaxwords;
		uint64_t const entriesperthread = (entries + tparts - 1)/tparts;
		std::vector < ThreadOutputRightMax::shared_ptr_type > VTOR(tparts);

		#if defined(_OPENMP)
		#pragma omp parallel for schedule(dynamic,1) num_threads(numthreads)
		#endif
		for ( uint64_t t = 0; t < tparts; ++t )
		{
			libmaus2::bitio::CompactArray traceCA(k,sigmabits);
			libmaus2::autoarray::AutoArray< std::pair<int64_t,uint64_t> > PPbefore(E.D.sigma,false);
			libmaus2::autoarray::AutoArray< std::pair<int64_t,uint64_t> > PPinside(E.D.sigma,false);
			libmaus2::autoarray::AutoArray< std::pair<int64_t,uint64_t> > PPmerged(E.D.sigma,false);

			libmaus2::autoarray::AutoArray<uint64_t> BO;
			uint64_t BOo = 0;
			libmaus2::autoarray::AutoArray<uint64_t> RO(tparts + 1);

			uint64_t const tfrom = std::min(t * entriesperthread,entries);
			uint64_t const tto   = std::min(tfrom + entriesperthread,entries);

			uint64_t const * p  = GBO.begin() + tfrom * rightmaxwords;
			uint64_t const * pe = GBO.begin() + tto   * rightmaxwords;

			while ( p != pe )
			{
				uint64_t const fromrightmax = RightMaxIO::getFrom(reinterpret_cast<uint8_t const *>(p));
				uint64_t const torightmax   = RightMaxIO::getTo(reinterpret_cast<uint8_t const *>(p));
				uint64_t const * utracerightmax = RightMaxIO::getTrace(reinterpret_cast<uint8_t const *>(p));
				uint64_t const symcountrightmax = RightMaxIO::getSymCount(reinterpret_cast<uint8_t const *>(p));
				std::copy(utracerightmax,utracerightmax+traceCA.s,traceCA.D);

				// shift trace to the right
				for ( uint64_t i = 1; i < k; ++i )
				{
					uint64_t const ito = k-i;
					uint64_t const ifrom = k-i-1;
					traceCA.set(ito,traceCA.get(ifrom));

					// 6 -> 7
					// 5 -> 6
					// ...
				}

				// compute extensions on the left
				uint64_t const csigmabefore = WT.enumerateSymbolsInRangeSorted(0           ,fromrightmax,PPbefore.begin());
				uint64_t const csigmainside = WT.enumerateSymbolsInRangeSorted(fromrightmax,torightmax  ,PPinside.begin());

				for ( uint64_t i = 0; i < csigmainside; ++i )
					PPmerged[i] = std::pair<int64_t,uint64_t>(PPinside[i].first,0);

				uint64_t ibefore = 0;
				uint64_t iinside = 0;
				while ( ibefore < csigmabefore && iinside < csigmainside )
					if ( PPbefore[ibefore].first < PPinside[iinside].first )
						++ibefore;
					else if ( PPinside[iinside].first < PPbefore[ibefore].first )
						++iinside;
					else
					{
						PPmerged[iinside].second = PPbefore[ibefore].second;
						++iinside;
						++ibefore;
					}

				// enumerate extensions on the left
				for ( uint64_t i = 0; i < csigmainside; ++i )
				{
					int64_t const sym = PPinside[i].first;
					uint64_t const from = E.D[sym] + PPmerged[i].second;
					uint64_t const to = from + PPinside[i].second;
					uint64_t const slot = from / ranksperthread;

					#if defined(RIGHTMAX_DEBUG)
					uint64_t const stepfrom = pLF->step(sym,fromrightmax);
					uint64_t const stepto = pLF->step(sym,torightmax);

					std::cerr << "\t" << sym << " [" << from << "," << to << ")" << std::endl;

					assert ( stepfrom == from );
					assert ( stepto == to );
					#endif

					traceCA.set(0,sym);

					BOo = RightMaxIO::writeRightMaxEntry(BO,BOo,from,to,0 /* count */,symcountrightmax /* symcount */,traceCA.D,traceCA.s);
					RO[slot]++;
				}


				p += rightmaxwords;
			}

			libmaus2::util::PrefixSums::prefixSums(RO.begin(),RO.end());

			::qsort(BO.begin(),BOo/rightmaxwords,rightmaxelementsize,CELcomp);

			ThreadOutputRightMax::shared_ptr_type TORM(new ThreadOutputRightMax(BO,BOo,RO));
			VTOR[t] = TORM;
		}

		libmaus2::autoarray::AutoArray<uint64_t> RO(tparts + 1);
		#if defined(_OPENMP)
		#pragma omp parallel for num_threads(numthreads)
		#endif
		for ( uint64_t i = 0; i < RO.size(); ++i )
		{
			uint64_t & ROi = RO[i];

			for ( uint64_t j = 0; j < VTOR.size(); ++j )
				ROi += VTOR[j]->RO[i];
		}

		libmaus2::autoarray::AutoArray<uint64_t> Lextend(RO[tparts] * rightmaxwords,false);
		#if defined(_OPENMP)
		#pragma omp parallel for num_threads(numthreads) schedule(dynamic,1)
		#endif
		for ( uint64_t i = 0; i < tparts; ++i )
		{
			uint64_t * outptr        = Lextend.begin() + RO[i+0] * rightmaxwords;
			uint64_t * const outptre = Lextend.begin() + RO[i+1] * rightmaxwords;

			struct HeapEntry
			{
				uint64_t * p;
				uint64_t * pe;

				HeapEntry() {}
				HeapEntry(uint64_t * rp, uint64_t * rpe) : p(rp), pe(rpe) {}

				bool operator<(HeapEntry const & H) const
				{
					uint64_t const from  = RightMaxIO::getFrom(reinterpret_cast<uint8_t const *>(p));
					uint64_t const Hfrom = RightMaxIO::getFrom(reinterpret_cast<uint8_t const *>(H.p));
					return from < Hfrom;
				}
			};

			libmaus2::util::FiniteSizeHeap<HeapEntry> H(VTOR.size());
			for ( uint64_t j = 0; j < VTOR.size(); ++j )
			{
				uint64_t const lfrom  = VTOR[j]->RO[i+0];
				uint64_t const lto    = VTOR[j]->RO[i+1];
				uint64_t const lrange = lto - lfrom;
				if ( lrange )
				{
					H.push(
						HeapEntry(
							VTOR[j]->BO.begin() + lfrom * rightmaxwords,
							VTOR[j]->BO.begin() + lto   * rightmaxwords
						)
					);
				}
			}

			while ( ! H.empty() )
			{
				HeapEntry HE = H.pop();

				std::copy(HE.p,HE.p + rightmaxwords,outptr);

				outptr += rightmaxwords;
				HE.p += rightmaxwords;

				if ( HE.p != HE.pe )
					H.push(HE);
			}

			assert ( outptr == outptre );
		}

		for ( uint64_t j = 0; j < VTOR.size(); ++j )
			VTOR[j] = ThreadOutputRightMax::shared_ptr_type();

		Gextend = Lextend;
		Gleft = GBO;
	}

	uint64_t nleft   = Gleft.size() / rightmaxwords;
	uint64_t nextend = Gextend.size() / rightmaxwords;

	for ( uint64_t mergerun = 1; Gleft.size(); ++mergerun )
	{
		if ( nleft >= 1024*1024 || (mergerun % 1024 == 0) )
			std::cerr << "(" << mergerun << "," << nleft << "," << nextend << ")";

		uint64_t const leftperthread = (nleft + tparts - 1)/tparts;

		struct FromGetter
		{
			uint64_t const * p;
			uint64_t rightmaxwords;

			FromGetter() {}
			FromGetter(
				uint64_t const * rp,
				uint64_t rrightmaxwords
			) : p(rp), rightmaxwords(rrightmaxwords) {}

			uint64_t get(uint64_t i) const
			{
				uint64_t const * pp = p + i * rightmaxwords;
				uint8_t const * ppu = reinterpret_cast<uint8_t const *>(pp);
				return RightMaxIO::getFrom(ppu);
			}
		};
		FromGetter leftget(Gleft.begin(),rightmaxwords);
		FromGetter extendget(Gextend.begin(),rightmaxwords);

		libmaus2::autoarray::AutoArray<uint64_t> OFF(tparts+1);
		#if defined(_OPENMP)
		#pragma omp parallel for num_threads(numthreads)
		#endif
		for ( uint64_t t = 1; t < tparts; ++t )
		{
			uint64_t const tleft = std::min(t * leftperthread,nleft);
			// uint64_t const tright = std::min(tleft + leftperthread,nleft);
			//uint64_t const trange = tright-tleft;

			if ( tleft == nleft )
			{
				OFF[t] = nextend;
			}
			else
			{
				uint64_t const keyleftfrom = leftget.get(tleft);

				libmaus2::util::ConstIterator<FromGetter,uint64_t> ita(&extendget,0);
				libmaus2::util::ConstIterator<FromGetter,uint64_t> ite(&extendget,nextend);
				libmaus2::util::ConstIterator<FromGetter,uint64_t> itc = ::std::lower_bound(ita,ite,keyleftfrom);
				uint64_t const offset = itc - ita;

				OFF[t] = offset;
			}

			// std::cerr << "offset=" << offset << std::endl;
		}
		OFF[0] = 0;
		OFF[tparts] = nextend;

		#if 0
		for ( uint64_t i = 0; i < OFF.size(); ++i )
		{
			std::cerr << "OFF[" << i << "]=" << OFF[i] << std::endl;
		}
		#endif

		struct ThreadOutputLeftExtend
		{
			typedef ThreadOutputLeftExtend this_type;
			// typedef libmaus2::util::unique_ptr<this_type>::type unique_ptr_type;
			typedef libmaus2::util::shared_ptr<this_type>::type shared_ptr_type;

			libmaus2::autoarray::AutoArray<uint64_t> BOleft;
			uint64_t BOlefto;
			libmaus2::autoarray::AutoArray<uint64_t> ROleft;

			libmaus2::autoarray::AutoArray<uint64_t> BOextend;
			uint64_t BOextendo;
			libmaus2::autoarray::AutoArray<uint64_t> ROextend;

			ThreadOutputLeftExtend()
			{}

			ThreadOutputLeftExtend(
				libmaus2::autoarray::AutoArray<uint64_t> & rBOleft,
				uint64_t rBOlefto,
				libmaus2::autoarray::AutoArray<uint64_t> & rROleft,
				libmaus2::autoarray::AutoArray<uint64_t> & rBOextend,
				uint64_t rBOextendo,
				libmaus2::autoarray::AutoArray<uint64_t> & rROextend
			)  : BOleft(rBOleft), BOlefto(rBOlefto), ROleft(rROleft),
			     BOextend(rBOextend), BOextendo(rBOextendo), ROextend(rROextend)
			{

			}
		};

		std::vector<ThreadOutputLeftExtend::shared_ptr_type> VTOR(tparts);

		#if defined(_OPENMP)
		#pragma omp parallel for num_threads(numthreads)
		#endif
		for ( uint64_t t = 0; t < tparts; ++t )
		{
			#if defined(_OPENMP)
			uint64_t const tid = omp_get_thread_num();
			#else
			uint64_t const tid = 0;
			#endif

			uint64_t const tleft  = std::min(t * leftperthread,nleft);
			uint64_t const tright = std::min(tleft + leftperthread,nleft);
			uint64_t const trange = tright-tleft;
			libmaus2::bitio::CompactArray traceCA(k,sigmabits);
			libmaus2::autoarray::AutoArray< std::pair<int64_t,uint64_t> > PPbefore(E.D.sigma,false);
			libmaus2::autoarray::AutoArray< std::pair<int64_t,uint64_t> > PPinside(E.D.sigma,false);
			libmaus2::autoarray::AutoArray< std::pair<int64_t,uint64_t> > PPmerged(E.D.sigma,false);

			libmaus2::autoarray::AutoArray<uint64_t> ROextend(tparts + 1);
			libmaus2::autoarray::AutoArray<uint64_t> BOextend;
			uint64_t BOextendo = 0;
			libmaus2::autoarray::AutoArray<uint64_t> ROleft(tparts + 1);
			libmaus2::autoarray::AutoArray<uint64_t> BOleft;
			uint64_t BOlefto = 0;

			if ( trange )
			{
				uint64_t const exleft  = OFF[t];
				uint64_t const exright = OFF[t+1];

				uint64_t const * pleft      = Gleft.begin() + tleft  * rightmaxwords;
				uint64_t const * pleftend   = Gleft.begin() + tright * rightmaxwords;

				assert ( exleft * rightmaxwords <= Gextend.size() );
				assert ( exright * rightmaxwords <= Gextend.size() );


				uint64_t const * pextend    = Gextend.begin() + exleft * rightmaxwords;
				uint64_t const * pextendend = Gextend.begin() + exright * rightmaxwords;

				while ( pleft != pleftend )
				{
					uint64_t const kleftfrom  = RightMaxIO::getFrom(reinterpret_cast<uint8_t const *>(pleft));
					uint64_t const kleftto    = RightMaxIO::getTo(reinterpret_cast<uint8_t const *>(pleft));
					uint64_t       kleftcount = RightMaxIO::getCount(reinterpret_cast<uint8_t const *>(pleft));
					uint64_t       kleftsymcount = RightMaxIO::getSymCount(reinterpret_cast<uint8_t const *>(pleft));
					uint64_t const * klefttrace = RightMaxIO::getTrace(reinterpret_cast<uint8_t const *>(pleft));

					// while next element in in kextend is not maximal
					while (
						pextend != pextendend
						&&
						RightMaxIO::getTo(reinterpret_cast<uint8_t const *>(pextend)) <= kleftfrom
					)
					{
						// add to trace
						KH(tid,reinterpret_cast<uint8_t const *>(pextend));

						uint64_t const kextendfrom  = RightMaxIO::getFrom(reinterpret_cast<uint8_t const *>(pextend));
						uint64_t const kextendto    = RightMaxIO::getTo(reinterpret_cast<uint8_t const *>(pextend));
						uint64_t const * kextendtrace = RightMaxIO::getTrace(reinterpret_cast<uint8_t const *>(pextend));

						// copy trace
						std::copy(kextendtrace,kextendtrace+traceCA.s,traceCA.D);
						// shift
						for ( uint64_t i = 1; i < k; ++i )
						{
							uint64_t const ito = k-i;
							uint64_t const ifrom = k-i-1;
							traceCA.set(ito,traceCA.get(ifrom));
						}

						// compute extensions on the left
						uint64_t const csigmabefore = WT.enumerateSymbolsInRangeSorted(0          ,kextendfrom,PPbefore.begin());
						uint64_t const csigmainside = WT.enumerateSymbolsInRangeSorted(kextendfrom,kextendto  ,PPinside.begin());

						for ( uint64_t i = 0; i < csigmainside; ++i )
							PPmerged[i] = std::pair<int64_t,uint64_t>(PPinside[i].first,0);

						uint64_t ibefore = 0;
						uint64_t iinside = 0;
						while ( ibefore < csigmabefore && iinside < csigmainside )
							if ( PPbefore[ibefore].first < PPinside[iinside].first )
								++ibefore;
							else if ( PPinside[iinside].first < PPbefore[ibefore].first )
								++iinside;
							else
							{
								PPmerged[iinside].second = PPbefore[ibefore].second;
								++iinside;
								++ibefore;
							}

						// store extensions
						for ( uint64_t i = 0; i < csigmainside; ++i )
						{
							int64_t const sym = PPinside[i].first;
							uint64_t const from = E.D[sym] + PPmerged[i].second;
							uint64_t const to = from + PPinside[i].second;
							traceCA.set(0,sym);

							#if defined(RIGHTMAX_DEBUG)
							std::cerr << "\textended to from=" << from << " to=" << to << " kmer=";
							for ( uint64_t i = 0; i < k; ++i )
								std::cerr << traceCA.get(i);
							std::cerr << std::endl;
							#endif

							BOextendo = RightMaxIO::writeRightMaxEntry(BOextend,BOextendo,from,to,0 /* count */,std::numeric_limits<uint64_t>::max() /* symcount */,traceCA.D,traceCA.s);
							ROextend[from/ranksperthread]++;
						}

						pextend += rightmaxwords;
					}

					// inside interval
					while (
						pextend != pextendend
						&&
						RightMaxIO::getFrom(reinterpret_cast<uint8_t const *>(pextend)) >= kleftfrom
						&&
						RightMaxIO::getTo(reinterpret_cast<uint8_t const *>(pextend))   <= kleftto
					)
					{
						// add to trace
						KH(tid,reinterpret_cast<uint8_t const *>(pextend));

						assert ( kleftcount );
						kleftcount -= 1;

						pextend += rightmaxwords;
					}

					if ( kleftcount )
					{
						// output kleft with updated kleftcount
						BOlefto = RightMaxIO::writeRightMaxEntry(BOleft,BOlefto,kleftfrom,kleftto,kleftcount,kleftsymcount,klefttrace,traceCA.s);
						ROleft[kleftfrom/ranksperthread]++;
					}

					pleft += rightmaxwords;
				}

				// while next element
				while (
					pextend != pextendend
				)
				{
					// add to trace
					KH(tid,reinterpret_cast<uint8_t const *>(pextend));

					uint64_t const kextendfrom  = RightMaxIO::getFrom(reinterpret_cast<uint8_t const *>(pextend));
					uint64_t const kextendto    = RightMaxIO::getTo(reinterpret_cast<uint8_t const *>(pextend));
					uint64_t const * kextendtrace = RightMaxIO::getTrace(reinterpret_cast<uint8_t const *>(pextend));

					// copy trace
					std::copy(kextendtrace,kextendtrace+traceCA.s,traceCA.D);
					// shift
					for ( uint64_t i = 1; i < k; ++i )
					{
						uint64_t const ito = k-i;
						uint64_t const ifrom = k-i-1;
						traceCA.set(ito,traceCA.get(ifrom));
					}

					// compute extensions on the left
					uint64_t const csigmabefore = WT.enumerateSymbolsInRangeSorted(0          ,kextendfrom,PPbefore.begin());
					uint64_t const csigmainside = WT.enumerateSymbolsInRangeSorted(kextendfrom,kextendto  ,PPinside.begin());

					for ( uint64_t i = 0; i < csigmainside; ++i )
						PPmerged[i] = std::pair<int64_t,uint64_t>(PPinside[i].first,0);

					uint64_t ibefore = 0;
					uint64_t iinside = 0;
					while ( ibefore < csigmabefore && iinside < csigmainside )
						if ( PPbefore[ibefore].first < PPinside[iinside].first )
							++ibefore;
						else if ( PPinside[iinside].first < PPbefore[ibefore].first )
							++iinside;
						else
						{
							PPmerged[iinside].second = PPbefore[ibefore].second;
							++iinside;
							++ibefore;
						}

					// store extensions
					for ( uint64_t i = 0; i < csigmainside; ++i )
					{
						int64_t const sym = PPinside[i].first;
						uint64_t const from = E.D[sym] + PPmerged[i].second;
						uint64_t const to = from + PPinside[i].second;
						traceCA.set(0,sym);

						#if defined(RIGHTMAX_DEBUG)
						std::cerr << "\textended to from=" << from << " to=" << to << " kmer=";
						for ( uint64_t i = 0; i < k; ++i )
							std::cerr << traceCA.get(i);
						std::cerr << std::endl;
						#endif

						BOextendo = RightMaxIO::writeRightMaxEntry(BOextend,BOextendo,from,to,0 /* count */,std::numeric_limits<uint64_t>::max() /* symcount */,traceCA.D,traceCA.s);
						ROextend[from/ranksperthread]++;
					}

					pextend += rightmaxwords;
				}

				libmaus2::util::PrefixSums::prefixSums(ROextend.begin(),ROextend.end());
				::qsort(BOextend.begin(),BOextendo/rightmaxwords,rightmaxelementsize,CELcomp);

				libmaus2::util::PrefixSums::prefixSums(ROleft.begin(),ROleft.end());
			}

			ThreadOutputLeftExtend::shared_ptr_type ptr(
				new ThreadOutputLeftExtend(
					BOleft,
					BOlefto,
					ROleft,
					BOextend,
					BOextendo,
					ROextend
				)
			);

			VTOR[t] = ptr;
		}

		std::vector<uint64_t> Oleft(VTOR.size()+1);
		for ( uint64_t i = 0; i < VTOR.size(); ++i )
		{
			Oleft[i] = VTOR[i]->BOlefto/rightmaxwords;
		}
		libmaus2::util::PrefixSums::prefixSums(Oleft.begin(),Oleft.end());
		libmaus2::autoarray::AutoArray<uint64_t> Nleft(Oleft[VTOR.size()] * rightmaxwords,false);

		#if defined(_OPENMP)
		#pragma omp parallel for num_threads(numthreads) schedule(dynamic,1)
		#endif
		for ( uint64_t t = 0; t < VTOR.size(); ++t )
		{
			ThreadOutputLeftExtend const & T = *VTOR[t];
			uint64_t const * p  = T.BOleft.begin();
			uint64_t const * pe = T.BOleft.begin() + T.BOlefto;
			uint64_t * out = Nleft.begin() + Oleft[t] * rightmaxwords;

			while ( p != pe )
				*(out++) = *(p++);

			assert ( out == Nleft.begin() + Oleft[t+1] * rightmaxwords );
		}

		Gleft = Nleft;
		nleft = Gleft.size() / rightmaxwords;


		libmaus2::autoarray::AutoArray<uint64_t> RO(tparts + 1);
		#if defined(_OPENMP)
		#pragma omp parallel for num_threads(numthreads)
		#endif
		for ( uint64_t i = 0; i < RO.size(); ++i )
		{
			uint64_t & ROi = RO[i];

			for ( uint64_t j = 0; j < VTOR.size(); ++j )
				ROi += VTOR[j]->ROextend[i];
		}

		libmaus2::autoarray::AutoArray<uint64_t> Lextend(RO[tparts] * rightmaxwords,false);
		#if defined(_OPENMP)
		#pragma omp parallel for num_threads(numthreads) schedule(dynamic,1)
		#endif
		for ( uint64_t i = 0; i < tparts; ++i )
		{
			uint64_t * outptr        = Lextend.begin() + RO[i+0] * rightmaxwords;
			uint64_t * const outptre = Lextend.begin() + RO[i+1] * rightmaxwords;

			struct HeapEntry
			{
				uint64_t * p;
				uint64_t * pe;

				HeapEntry() {}
				HeapEntry(uint64_t * rp, uint64_t * rpe) : p(rp), pe(rpe) {}

				bool operator<(HeapEntry const & H) const
				{
					uint64_t const from  = RightMaxIO::getFrom(reinterpret_cast<uint8_t const *>(p));
					uint64_t const Hfrom = RightMaxIO::getFrom(reinterpret_cast<uint8_t const *>(H.p));
					return from < Hfrom;
				}
			};

			libmaus2::util::FiniteSizeHeap<HeapEntry> H(VTOR.size());
			for ( uint64_t j = 0; j < VTOR.size(); ++j )
			{
				uint64_t const lfrom  = VTOR[j]->ROextend[i+0];
				uint64_t const lto    = VTOR[j]->ROextend[i+1];
				uint64_t const lrange = lto - lfrom;
				if ( lrange )
				{
					H.push(
						HeapEntry(
							VTOR[j]->BOextend.begin() + lfrom * rightmaxwords,
							VTOR[j]->BOextend.begin() + lto   * rightmaxwords
						)
					);
				}
			}

			while ( ! H.empty() )
			{
				HeapEntry HE = H.pop();

				std::copy(HE.p,HE.p + rightmaxwords,outptr);

				outptr += rightmaxwords;
				HE.p += rightmaxwords;

				if ( HE.p != HE.pe )
					H.push(HE);
			}

			assert ( outptr == outptre );
		}

		Gextend = Lextend;
		nextend = Gextend.size() / rightmaxwords;

		for ( uint64_t j = 0; j < VTOR.size(); ++j )
			VTOR[j] = ThreadOutputLeftExtend::shared_ptr_type();
	}

	std::cerr << std::endl;

	std::cerr << libmaus2::util::MemUsage() << std::endl;

	if ( histogram )
		HKH.print(std::cout);

	return EXIT_SUCCESS;

	std::cerr << "splitn=" << splitn << " time " << rtc.getElapsedSeconds() << std::endl;


	{
		rtc.start();
		Enumerator::TraversalContext::unique_ptr_type pcontext(E.getContext(maxdepth,minsym,maxsym));
		uint64_t n = 0;
		SuffixTreeNode * STN = NULL;
		while ( (STN = E.enumerate(*pcontext)) != NULL )
		{
			if ( STN->depth == maxdepth )
				n += 1;
		}

		std::cerr << "n=" << n << " time " << rtc.getElapsedSeconds() << std::endl;
	}

	uint64_t n = 0;
	uint64_t p = 0;
	uint64_t const sh = 24;

	uint64_t const ranksperfile = (WT.size() + fanout - 1)/fanout;
	assert ( ranksperfile * fanout >= WT.size() );
	uint64_t const numfiles = ranksperfile ? ((WT.size() + ranksperfile - 1)/ranksperfile) : 0;
	libmaus2::aio::OutputStreamInstanceArray OSIArightmax(tmpfilebase + "_rightmax", numfiles);

	// uint64_t const sortblocksize = (sortelements + numfiles - 1) / numfiles;

	typedef libmaus2::aio::BufferedOutput<uint64_t> buffer_type;
	typedef buffer_type::unique_ptr_type buffer_ptr_type;

	libmaus2::autoarray::AutoArray< buffer_ptr_type > ABrightmax(numfiles);
	for ( uint64_t i = 0; i < numfiles; ++i )
	{
		buffer_ptr_type ptr(new buffer_type(OSIArightmax[i],8192));
		ABrightmax[i] = UNIQUE_PTR_MOVE(ptr);
	}

	SuffixTreeNode * STN = NULL;
	while ( (STN = E.enumerate(*pcontext)) != NULL )
	{
		if ( STN->depth == maxdepth )
		{
			uint64_t const from = STN->NI[0].from;
			uint64_t const to = STN->NI[STN->size-1].to;
			uint64_t const size = STN->size;
			int64_t const * trace = STN->trace;
			uint64_t const tracelen = STN->depth;
			uint64_t symsize = 0;
			for ( uint64_t i = 0; i < size; ++i )
			{
				int64_t const sym = STN->NI[i].sym;

				if ( sym >= csymmin && sym <= csymmax )
					symsize += 1;
			}

			for ( uint64_t i = 0; i < tracelen; ++i )
				traceCA.set(i,trace[i]);

			uint64_t const fileid = from / ranksperfile;
			RightMaxIO::writeRightMaxEntry(*(ABrightmax[fileid]),from,to,size,symsize /* number of extensions on the right */,traceCA.D,traceCA.s);

			n += 1;

			if ( (n >> sh) != (p >> sh) )
			{
				std::cerr << "[V] " << n << std::endl;
				p = n;
			}

			// std::cerr << STN->toString();
		}
	}

	std::cerr << "[V] found " << n << " right maximal nodes of depth " << maxdepth << std::endl;

	// flush files and count number of entries
	std::vector<uint64_t> VNrightmax(numfiles);
	for ( uint64_t i = 0; i < numfiles; ++i )
	{
		ABrightmax[i]->flush();
		VNrightmax[i] = ABrightmax[i]->getWrittenWords();
		assert ( VNrightmax[i] % rightmaxwords == 0 );
		VNrightmax[i] /= rightmaxwords;
		ABrightmax[i].reset();
	}
	// close files
	OSIArightmax.close();

	// sort files
	sortFiles(OSIArightmax.Vfn,VNrightmax,rightmaxelementsize,sortmem);


	std::string tracefile = tmpfilebase + "tracefile";
	libmaus2::util::TempFileRemovalContainer::addTempFile(tracefile);
	//uint64_t ntrace = 0;

	{
		// concatenate input files into one stream
		libmaus2::aio::ConcatInputStream::unique_ptr_type CISrightmax(new libmaus2::aio::ConcatInputStream(OSIArightmax.Vfn));
		InputBlock::unique_ptr_type IBrightmax(new InputBlock(*CISrightmax,1024,std::accumulate(VNrightmax.begin(),VNrightmax.end(),0ull),rightmaxelementsize));

		//
		libmaus2::aio::OutputStreamInstance::unique_ptr_type ptracefile(new libmaus2::aio::OutputStreamInstance(tracefile));

		//
		std::string kleftfile = tmpfilebase + "kleftfile";
		libmaus2::aio::OutputStreamInstance::unique_ptr_type pkleftfile(new libmaus2::aio::OutputStreamInstance(kleftfile));
		libmaus2::util::TempFileRemovalContainer::addTempFile(kleftfile);

		//
		std::string kextendfile = tmpfilebase + "kextendfile";
		libmaus2::aio::OutputStreamInstance::unique_ptr_type pkextendfile(new libmaus2::aio::OutputStreamInstance(kextendfile));
		libmaus2::aio::BufferedOutput<uint64_t>::unique_ptr_type pbokextendfile(
			new libmaus2::aio::BufferedOutput<uint64_t>(*pkextendfile,1024)
		);
		libmaus2::util::TempFileRemovalContainer::addTempFile(kextendfile);

		libmaus2::autoarray::AutoArray< std::pair<int64_t,uint64_t> > PPbefore(E.D.sigma,false);
		libmaus2::autoarray::AutoArray< std::pair<int64_t,uint64_t> > PPinside(E.D.sigma,false);
		libmaus2::autoarray::AutoArray< std::pair<int64_t,uint64_t> > PPmerged(E.D.sigma,false);

		// #define RIGHTMAX_DEBUG

		uint8_t const * prightmax = NULL;
		uint64_t nextend = 0;
		uint64_t nleft = 0;
		while ( IBrightmax->getNext(prightmax) )
		{
			uint64_t const fromrightmax     = RightMaxIO::getFrom(prightmax);
			uint64_t const torightmax       = RightMaxIO::getTo(prightmax);
			uint64_t const * utracerightmax = RightMaxIO::getTrace(prightmax);
			uint64_t const symcountrightmax = RightMaxIO::getSymCount(prightmax);
			std::copy(utracerightmax,utracerightmax+traceCA.s,traceCA.D);

			#if defined(RIGHTMAX_DEBUG)
			uint64_t const sizerightmax = RightMaxIO::getCount(prightmax);
			std::cerr << "rightmax ";
			for ( uint64_t i = 0; i < k; ++i )
				std::cerr << traceCA.get(i);
			std::cerr << " size=" << sizerightmax;
			std::cerr << " I=[" << fromrightmax << "," << torightmax << ") ";
			std::cerr << std::endl;
			#endif

			pkleftfile->write(reinterpret_cast<char const *>(prightmax),rightmaxelementsize);
			nleft += 1;
			// ntrace += 1;

			// shift trace to the right
			for ( uint64_t i = 1; i < k; ++i )
			{
				uint64_t const ito = k-i;
				uint64_t const ifrom = k-i-1;
				traceCA.set(ito,traceCA.get(ifrom));

				// 6 -> 7
				// 5 -> 6
				// ...
			}

			// compute extensions on the left
			uint64_t const csigmabefore = WT.enumerateSymbolsInRangeSorted(0           ,fromrightmax,PPbefore.begin());
			uint64_t const csigmainside = WT.enumerateSymbolsInRangeSorted(fromrightmax,torightmax  ,PPinside.begin());

			for ( uint64_t i = 0; i < csigmainside; ++i )
				PPmerged[i] = std::pair<int64_t,uint64_t>(PPinside[i].first,0);

			uint64_t ibefore = 0;
			uint64_t iinside = 0;
			while ( ibefore < csigmabefore && iinside < csigmainside )
				if ( PPbefore[ibefore].first < PPinside[iinside].first )
					++ibefore;
				else if ( PPinside[iinside].first < PPbefore[ibefore].first )
					++iinside;
				else
				{
					PPmerged[iinside].second = PPbefore[ibefore].second;
					++iinside;
					++ibefore;
				}

			// enumerate extensions on the left
			for ( uint64_t i = 0; i < csigmainside; ++i )
			{
				int64_t const sym = PPinside[i].first;
				uint64_t const from = E.D[sym] + PPmerged[i].second;
				uint64_t const to = from + PPinside[i].second;

				#if defined(RIGHTMAX_DEBUG)
				uint64_t const stepfrom = pLF->step(sym,fromrightmax);
				uint64_t const stepto = pLF->step(sym,torightmax);

				std::cerr << "\t" << sym << " [" << from << "," << to << ")" << std::endl;

				assert ( stepfrom == from );
				assert ( stepto == to );
				#endif

				traceCA.set(0,sym);

				RightMaxIO::writeRightMaxEntry(*pbokextendfile,from,to,0 /* count */,symcountrightmax /* symcount */,traceCA.D,traceCA.s);

				#if 0
				pbokextendfile->put(from);
				pbokextendfile->put(to);
				pbokextendfile->put(0);
				for ( uint64_t i = 0; i < traceCA.s; ++i )
					pbokextendfile->put(traceCA.D[i]);
				#endif

				nextend += 1;
			}
		}

		pbokextendfile->flush();
		pbokextendfile.reset();
		pkextendfile->flush();
		pkextendfile.reset();
		pkleftfile->flush();
		pkleftfile.reset();
		IBrightmax.reset();
		CISrightmax.reset();

		// sort extensions file
		sortFiles(
			std::vector<std::string>(1,kextendfile),
			std::vector<uint64_t>(1,nextend),rightmaxelementsize,
			sortmem
		);

		for ( uint64_t kleftrun = 1; libmaus2::util::GetFileSize::getFileSize(kleftfile); ++kleftrun )
		{
			std::cerr << "(" << kleftrun << ")";

			libmaus2::aio::InputStreamInstance::unique_ptr_type ISIkleft(new libmaus2::aio::InputStreamInstance(kleftfile));
			InputBlock::unique_ptr_type IBkleft(new InputBlock(*ISIkleft,1024,nleft,rightmaxelementsize));
			libmaus2::aio::InputStreamInstance::unique_ptr_type ISIkextend(new libmaus2::aio::InputStreamInstance(kextendfile));
			InputBlock::unique_ptr_type IBkextend(new InputBlock(*ISIkextend,1024,nextend,rightmaxelementsize));

			uint8_t const * pkleft = NULL;
			uint8_t const * pkextend = NULL;

			std::string kleftfilenew = tmpfilebase + "kleftfile_new";
			libmaus2::aio::OutputStreamInstance::unique_ptr_type pkleftfilenew(new libmaus2::aio::OutputStreamInstance(kleftfilenew));
			libmaus2::aio::BufferedOutput<uint64_t>::unique_ptr_type pbokleftfilenew(
				new libmaus2::aio::BufferedOutput<uint64_t>(*pkleftfilenew,1024)
			);
			libmaus2::util::TempFileRemovalContainer::addTempFile(kleftfilenew);

			std::string kextendfilenew = tmpfilebase + "kextendfile_new";
			libmaus2::aio::OutputStreamInstance::unique_ptr_type pkextendfilenew(new libmaus2::aio::OutputStreamInstance(kextendfilenew));
			libmaus2::aio::BufferedOutput<uint64_t>::unique_ptr_type pbokextendfilenew(
				new libmaus2::aio::BufferedOutput<uint64_t>(*pkextendfilenew,1024)
			);
			libmaus2::util::TempFileRemovalContainer::addTempFile(kextendfilenew);

			uint64_t nextendnew = 0;
			uint64_t nleftnew = 0;


			// iterate over entries left
			while ( IBkleft->getNext(pkleft) )
			{
				uint64_t const kleftfrom  = RightMaxIO::getFrom(pkleft);
				uint64_t const kleftto    = RightMaxIO::getTo(pkleft);
				uint64_t       kleftcount = RightMaxIO::getCount(pkleft);
				uint64_t       kleftsymcount = RightMaxIO::getSymCount(pkleft);
				uint64_t const * klefttrace = RightMaxIO::getTrace(pkleft);

				#if defined(RIGHTMAX_DEBUG)
				std::cerr << "left from=" << kleftfrom << " to=" << kleftto << " count=" << kleftcount << " kmer=";
				std::copy(klefttrace,klefttrace+traceCA.s,traceCA.D);
				for ( uint64_t i = 0; i < k; ++i )
					std::cerr << traceCA.get(i);
				std::cerr << std::endl;
				#endif

				// while next element in in kextend is not maximal
				while (
					IBkextend->peekNext(pkextend)
					&&
					RightMaxIO::getTo(pkextend) <= kleftfrom
				)
				{
					// output in tracefile and extend
					IBkextend->getNext(pkextend);

					// add to trace
					KH(0,pkextend);
					#if 0
					ptracefile->write(reinterpret_cast<char const *>(pkextend),rightmaxelementsize);
					ntrace += 1;
					#endif

					uint64_t const kextendfrom  = RightMaxIO::getFrom(pkextend);
					uint64_t const kextendto    = RightMaxIO::getTo(pkextend);
					uint64_t const * kextendtrace = RightMaxIO::getTrace(pkextend);

					// copy trace
					std::copy(kextendtrace,kextendtrace+traceCA.s,traceCA.D);
					// shift
					for ( uint64_t i = 1; i < k; ++i )
					{
						uint64_t const ito = k-i;
						uint64_t const ifrom = k-i-1;
						traceCA.set(ito,traceCA.get(ifrom));
					}

					// compute extensions on the left
					uint64_t const csigmabefore = WT.enumerateSymbolsInRangeSorted(0          ,kextendfrom,PPbefore.begin());
					uint64_t const csigmainside = WT.enumerateSymbolsInRangeSorted(kextendfrom,kextendto  ,PPinside.begin());

					for ( uint64_t i = 0; i < csigmainside; ++i )
						PPmerged[i] = std::pair<int64_t,uint64_t>(PPinside[i].first,0);

					uint64_t ibefore = 0;
					uint64_t iinside = 0;
					while ( ibefore < csigmabefore && iinside < csigmainside )
						if ( PPbefore[ibefore].first < PPinside[iinside].first )
							++ibefore;
						else if ( PPinside[iinside].first < PPbefore[ibefore].first )
							++iinside;
						else
						{
							PPmerged[iinside].second = PPbefore[ibefore].second;
							++iinside;
							++ibefore;
						}

					// store extensions
					for ( uint64_t i = 0; i < csigmainside; ++i )
					{
						int64_t const sym = PPinside[i].first;
						uint64_t const from = E.D[sym] + PPmerged[i].second;
						uint64_t const to = from + PPinside[i].second;
						traceCA.set(0,sym);

						#if defined(RIGHTMAX_DEBUG)
						std::cerr << "\textended to from=" << from << " to=" << to << " kmer=";
						for ( uint64_t i = 0; i < k; ++i )
							std::cerr << traceCA.get(i);
						std::cerr << std::endl;
						#endif

						RightMaxIO::writeRightMaxEntry(*pbokextendfilenew,from,to,0 /* count */,std::numeric_limits<uint64_t>::max() /* symcount */,traceCA.D,traceCA.s);

						#if 0
						pbokextendfilenew->put(from);
						pbokextendfilenew->put(to);
						pbokextendfilenew->put(0);
						for ( uint64_t i = 0; i < traceCA.s; ++i )
							pbokextendfilenew->put(traceCA.D[i]);
						#endif

						nextendnew += 1;
					}
				}

				// inside interval
				while (
					IBkextend->peekNext(pkextend)
					&&
					RightMaxIO::getFrom(pkextend) >= kleftfrom
					&&
					RightMaxIO::getTo(pkextend) <= kleftto
				)
				{
					IBkextend->getNext(pkextend);

					// add to trace
					KH(0,pkextend);
					#if 0
					ptracefile->write(reinterpret_cast<char const *>(pkextend),rightmaxelementsize);
					ntrace += 1;
					#endif

					assert ( kleftcount );
					kleftcount -= 1;
				}

				if ( kleftcount )
				{
					// output kleft with updated kleftcount
					RightMaxIO::writeRightMaxEntry(*pbokleftfilenew,kleftfrom,kleftto,kleftcount,kleftsymcount,klefttrace,traceCA.s);

					#if 0
					pbokleftfilenew->put(kleftfrom);
					pbokleftfilenew->put(kleftto);
					pbokleftfilenew->put(kleftcount);
					for ( uint64_t i = 0; i < traceCA.s; ++i )
						pbokleftfilenew->put(klefttrace[i]);
					#endif

					nleftnew += 1;
				}
			}

			while (
				IBkextend->peekNext(pkextend)
			)
			{
				// output in tracefile and extend
				IBkextend->getNext(pkextend);

				KH(0,pkextend);
				#if 0
				ptracefile->write(reinterpret_cast<char const *>(pkextend),rightmaxelementsize);
				ntrace += 1;
				#endif

				uint64_t const kextendfrom  = RightMaxIO::getFrom(pkextend);
				uint64_t const kextendto    = RightMaxIO::getTo(pkextend);
				uint64_t const * kextendtrace = RightMaxIO::getTrace(pkextend);

				std::copy(kextendtrace,kextendtrace+traceCA.s,traceCA.D);
				for ( uint64_t i = 1; i < k; ++i )
				{
					uint64_t const ito = k-i;
					uint64_t const ifrom = k-i-1;
					traceCA.set(ito,traceCA.get(ifrom));
				}

				// compute extensions on the left
				uint64_t const csigmabefore = WT.enumerateSymbolsInRangeSorted(0          ,kextendfrom,PPbefore.begin());
				uint64_t const csigmainside = WT.enumerateSymbolsInRangeSorted(kextendfrom,kextendto  ,PPinside.begin());

				for ( uint64_t i = 0; i < csigmainside; ++i )
					PPmerged[i] = std::pair<int64_t,uint64_t>(PPinside[i].first,0);

				uint64_t ibefore = 0;
				uint64_t iinside = 0;
				while ( ibefore < csigmabefore && iinside < csigmainside )
					if ( PPbefore[ibefore].first < PPinside[iinside].first )
						++ibefore;
					else if ( PPinside[iinside].first < PPbefore[ibefore].first )
						++iinside;
					else
					{
						PPmerged[iinside].second = PPbefore[ibefore].second;
						++iinside;
						++ibefore;
					}

				for ( uint64_t i = 0; i < csigmainside; ++i )
				{
					int64_t const sym = PPinside[i].first;
					uint64_t const from = E.D[sym] + PPmerged[i].second;
					uint64_t const to = from + PPinside[i].second;
					traceCA.set(0,sym);

					#if defined(RIGHTMAX_DEBUG)
					std::cerr << "\textended to from=" << from << " to=" << to << " kmer=";
					for ( uint64_t i = 0; i < k; ++i )
						std::cerr << traceCA.get(i);
					std::cerr << std::endl;
					#endif

					RightMaxIO::writeRightMaxEntry(*pbokextendfilenew,from,to,0 /* count */,std::numeric_limits<uint64_t>::max() /* symcount */,traceCA.D,traceCA.s);

					#if 0
					pbokextendfilenew->put(from);
					pbokextendfilenew->put(to);
					pbokextendfilenew->put(0);
					for ( uint64_t i = 0; i < traceCA.s; ++i )
						pbokextendfilenew->put(traceCA.D[i]);
					#endif

					nextendnew += 1;
				}
			}

			pbokleftfilenew->flush();
			pbokleftfilenew.reset();
			pkleftfilenew->flush();
			pkleftfilenew.reset();

			pbokextendfilenew->flush();
			pbokextendfilenew.reset();
			pkextendfilenew->flush();
			pkextendfilenew.reset();

			IBkleft.reset();
			ISIkleft.reset();
			IBkextend.reset();
			ISIkextend.reset();

			libmaus2::aio::OutputStreamFactoryContainer::rename(kleftfilenew,kleftfile);
			libmaus2::aio::OutputStreamFactoryContainer::rename(kextendfilenew,kextendfile);

			nleft = nleftnew;
			nextend = nextendnew;

			sortFiles(std::vector<std::string>(1,kextendfile),std::vector<uint64_t>(1,nextend),rightmaxelementsize,sortmem);
		}
		std::cerr << std::endl;

		ptracefile->flush();
		ptracefile.reset();

		if ( histogram )
			HKH.print(std::cout);

		#if 0
		// sort trace file (not necessary for some statistics)
		sortFiles(std::vector<std::string>(1,tracefile),std::vector<uint64_t>(1,ntrace),rightmaxelementsize,sortmem);

		libmaus2::util::Histogram H;
		uint64_t numk = 0;
		uint64_t numkmax = 0;

		libmaus2::aio::InputStreamInstance::unique_ptr_type ISItrace(new libmaus2::aio::InputStreamInstance(tracefile));
		InputBlock::unique_ptr_type IBtrace(new InputBlock(*ISItrace,1024,ntrace,rightmaxelementsize));
		uint8_t const * ptrace = NULL;
		while ( IBtrace->getNext(ptrace) )
		{
			uint64_t const tracefrom    = RightMaxIO::getFrom(ptrace);
			uint64_t const traceto      = RightMaxIO::getTo(ptrace);
			uint64_t const tracerange   = traceto - tracefrom;
			uint64_t       tracesymcount   = RightMaxIO::getSymCount(ptrace);
			uint64_t const * tracetrace = RightMaxIO::getTrace(ptrace);

			std::copy(tracetrace,tracetrace+traceCA.s,traceCA.D);

			bool traceok = true;
			for ( uint64_t i = 0; i < k; ++i )
				traceok = traceok && (traceCA.get(i) < 4);

			if ( traceok )
			{
				H(tracerange);

				numk += 1;
				if (
					tracesymcount != std::numeric_limits<uint64_t>::max()
					&&
					(
						tracesymcount >= 2 || tracesymcount == 0
					)
				)
				{
					numkmax += 1;
				}

				if ( fulltable )
				{
					std::cout << "[F]\t";
					for ( uint64_t i = 0; i < k; ++i )
						std::cout << traceCA.get(i);
					std::cout << "\t" << (traceto-tracefrom);

					if ( tracesymcount != std::numeric_limits<uint64_t>::max() )
						std::cout << "\t" << tracesymcount;
					else
						std::cout << "\t" << '*';
					std::cout << "\t[" << tracefrom << "," << traceto << ")";

					#if 0
					uint64_t ifrom = 0;
					uint64_t ito = pLF->W->size();

					for ( uint64_t i = 0; i < k; ++i )
					{
						ifrom = pLF->step(traceCA.get(k-i-1),ifrom);
						ito   = pLF->step(traceCA.get(k-i-1),ito);
					}
					std::cout << "\t[" << ifrom << "," << ito << ")";
					assert ( ifrom == tracefrom );
					assert ( ito == traceto );
					#endif

					std::cout << "\n";
				}
			}
		}

		if ( histogram )
		{
			std::ostringstream Hstr;
			H.print(Hstr);
			std::string line;
			std::istringstream Histr(Hstr.str());
			while ( std::getline(Histr,line) )
			{
				std::cout << "[H]\t" << line << "\n";
			}
		}
		#endif

		// std::cout << "[S]\tnumk=" << numk << "\tnumkmax=" << numkmax << std::endl;
	}

	return EXIT_SUCCESS;
}

int main(int argc, char * argv[])
{
	try
	{
		libmaus2::util::ArgParser const arg(argc,argv);

		if ( arg.size() < 1 )
		{
			std::cerr << "[E] usage " << argv[0] << " <hwt>" << std::endl;
			return EXIT_FAILURE;
		}

		return testsuffixtreenum(arg);
	}
	catch(std::exception const & ex)
	{
		std::cerr << ex.what() << std::endl;
		return EXIT_FAILURE;
	}
}
