/*
Copyright 2016 Timo Beller, Enno Ohlebusch

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

#include <iostream>
#include <fstream>
#include <limits>
#include <sdsl/sdsl_concepts.hpp>
#include <sdsl/csa_wt.hpp>
#include <sdsl/suffix_array_algorithm.hpp>
#include "create_datastructures.hpp"
#include "partial_lcp.hpp"
#include "handle_graph.hpp"

using namespace std;
using namespace sdsl;
using namespace std::chrono;

struct node
{
	uint64_t lb;
	uint64_t rb;
	uint64_t len;
	bool exit_node;
	vector<uint64_t> adj_list;
	vector<uint64_t> pos_list;
	node(uint64_t _lb=0, uint64_t _rb=0, uint64_t _len=0, bool _exit_node=false) : lb(_lb), rb(_rb), len(_len), exit_node(_exit_node)
	{
	}
};

struct node_c
{
	uint64_t lb;
	uint64_t rb;
	uint64_t len;
	vector<pair<uint64_t,uint64_t>> successors;
	node_c(uint64_t _lb=0, uint64_t _rb=0, uint64_t _len=0) : lb(_lb), rb(_rb), len(_len)
	{
	}
};

struct tmp_node
{
	uint64_t node_number;
	uint64_t lb;
	uint64_t rb;
	uint64_t len;
	tmp_node(uint64_t _node_number, uint64_t _lb=0, uint64_t _rb=0, uint64_t _len=0) : node_number(_node_number), lb(_lb), rb(_rb), len(_len)
	{
	}
};

tuple<vector<node_c>, vector<uint64_t>> create_cdbg_with_intervals(cache_config& config, uint64_t k)
{
	// Create WT of the BWT
	typedef wt_huff<bit_vector, rank_support_v<>, select_support_scan<1>, select_support_scan<0>> wt;
	wt wt_bwt;
	construct(wt_bwt, cache_file_name(conf::KEY_BWT, config));

	// Create C-array (needed for interval_symbols)
	vector<uint64_t> carray(256, 0);
	for(uint64_t i=0, sum=0; i<256; ++i)
	{
		carray[i] = sum;
		sum += wt_bwt.rank(wt_bwt.size(), i);
	}

	// Create bit-vector of k-Intervals (marks start and end position of each k-interval with 1)
	// and add split_nodes into the graph
	vector<node_c> graph;
	bit_vector bv(wt_bwt.size(), 0);
	{
		auto start = high_resolution_clock::now();
		int_vector<2> lcp_k = construct_partial_lcp<wt>(wt_bwt, carray, k);
		auto stop = high_resolution_clock::now();
		cerr << std::setw(10) << duration_cast<milliseconds>(stop-start).count() << "ms for partial LCP construction (sdsl-bblaca)" << endl;

		start = high_resolution_clock::now();
		uint64_t lb=0;
		bool open=false;
		for(uint64_t i=0; i<lcp_k.size(); ++i)
		{
			if(open)
			{
				if(lcp_k[i] == lt_k)
				{
					graph.emplace_back(node_c(lb, i-1, k));
					bv[i-1] = true;
					open = false;
				}
			}
			else if(lcp_k[i] == eq_k)
			{
				bv[lb] = true;
				open = true;
			}
			if(lcp_k[i] == lt_k)
			{
				lb = i;
			}
		}
		if(open)
		{
			graph.emplace_back(node_c(lb,lcp_k.size()-1, k));
			bv[lcp_k.size()-1] = true;
		}
		// Add exit node(s)
		graph.emplace_back(node_c(0,0,1));
	        for(uint64_t i=carray[1]; i<carray[2]; ++i)
	        {
	            	graph.emplace_back(node_c(i,i,1));
	        }
		stop = high_resolution_clock::now();
		cerr << std::setw(10) << duration_cast<milliseconds>(stop-start).count() << "ms for create bv and initial nodes" << endl;
	}
	auto start = high_resolution_clock::now();

	bit_vector::rank_1_type bv_rank;
	util::init_support(bv_rank, &bv);

	uint64_t quantity;
	vector<uint8_t> cs(wt_bwt.sigma);        // List of characters in the interval
	vector<uint64_t> rank_c_i(wt_bwt.sigma); // Number of occurrence of character in [0 .. i-1]
	vector<uint64_t> rank_c_j(wt_bwt.sigma); // Number of occurrence of character in [0 .. j-1]
	vector<uint64_t> start_nodes;
	for(uint64_t i=0; i<graph.size(); ++i)
	{
		tmp_node cur = tmp_node(i, graph[i].lb, graph[i].rb, graph[i].len);
		bool extend = true;
		while(extend)
		{
			extend = false;
			interval_symbols(wt_bwt, cur.lb, cur.rb+1, quantity, cs, rank_c_i, rank_c_j);
			for(uint64_t i=0; i<quantity; ++i)
			{
				uint8_t c = cs[i];
				uint64_t lb = carray[c] + rank_c_i[i];
				uint64_t rb = carray[c] + rank_c_j[i] - 1;
				uint64_t ones = bv_rank(lb+1);
				uint64_t node_number = numeric_limits<uint64_t>::max();

				if(ones % 2 == 0 and bv[lb] == 0)
				{
				}
				else
				{
					node_number = (ones-1)/2;
				}

				if(node_number != numeric_limits<uint64_t>::max())
				{
					graph[node_number].successors.emplace_back(make_pair(cur.node_number, rb-lb+1));
					graph[cur.node_number].lb = cur.lb;
					graph[cur.node_number].rb = cur.rb;
					graph[cur.node_number].len = cur.len;
				}
				else if(c<=1) // c == sentinal
				{
					graph[cur.node_number].lb = cur.lb;
					graph[cur.node_number].rb = cur.rb;
					graph[cur.node_number].len = cur.len;
					start_nodes.emplace_back(cur.node_number);
				}
				else
				{
					if(quantity == 1)
					{
						extend = true;
						cur.len++;
						cur.lb = lb;
						cur.rb = rb;
					}
					else
					{
						node_number = graph.size();
						graph.emplace_back(node_c(lb, rb, k));
						graph[node_number].successors.emplace_back(make_pair(cur.node_number, rb-lb+1));
						graph[cur.node_number].lb = cur.lb;
						graph[cur.node_number].rb = cur.rb;
						graph[cur.node_number].len = cur.len;
					}
				}
			}
		}
	}
	auto stop = high_resolution_clock::now();
	cerr << std::setw(10) << duration_cast<milliseconds>(stop-start).count() << "ms for creating graph" << endl;
	return make_tuple(graph, start_nodes);
}

vector<node> transform_graph(vector<node_c>& graph_c, const int_vector<>& sa)
{
	vector<node> graph(graph_c.size());
	for(uint64_t i=0; i<graph_c.size(); ++i)
	{
		graph[i].len = graph_c[i].len;
		graph[i].lb = graph_c[i].lb;
		graph[i].rb = graph_c[i].rb;
		for(uint64_t j=graph_c[i].lb; j<=graph_c[i].rb; ++j)
		{
			graph[i].pos_list.emplace_back(sa[j]+1);
		}
		sort(begin(graph[i].pos_list), end(graph[i].pos_list), greater<uint64_t>());
		for(uint64_t j=0; j<graph_c[i].successors.size(); ++j)
		{
			for(uint64_t k=0; k<graph_c[i].successors[j].second; ++k)
			{
				graph[i].adj_list.emplace_back(graph_c[i].successors[j].first);
			}
		}
	}
	return graph;
}

int main(int argc, char *argv[])
{
	// Check parameters
	if(argc != 4)
	{
		cerr << "Usage: " << argv[0] << " inputfile outputfile kFile" << endl;
		return 1;
	}

	// Get parameters
	string inputfile = argv[1];
	string outputfile = argv[2];
	string kfilename = argv[3];

	// Create datastructures
	cache_config config(true, ".", "tmp");
	uint64_t errors = create_datastructures(config, inputfile, kfilename, true);

	// Read k-values
	ifstream kfile(kfilename);
	uint64_t k;
	while(!errors and kfile >> k)
	{
		// Create graph
		vector<node> graph;
		vector<uint64_t> start_nodes;
		{
			vector<node_c> graph_c;
			tie(graph_c, start_nodes) = create_cdbg_with_intervals(config, k);

			// Complete graph
			auto start = high_resolution_clock::now();
			int_vector<> sa;
			load_from_file(sa, cache_file_name(conf::KEY_SA, config));
			graph = transform_graph(graph_c, sa);
			auto stop = high_resolution_clock::now();
			cerr << std::setw(10) << duration_cast<milliseconds>(stop-start).count() << "ms for graph transformation" << endl;
		}
		// Print graph
		{
			auto start = high_resolution_clock::now();
			ofstream output(outputfile+".k"+to_string(k)+".dot");
			ofstream output_start_nodes(outputfile+".k"+to_string(k)+".start_nodes.txt");
			print_graph(graph, start_nodes, output, output_start_nodes);
			auto stop = high_resolution_clock::now();
			cerr << std::setw(10) << duration_cast<milliseconds>(stop-start).count() << "ms for printing graph" << endl;
		}
	}

	// Delete files
	if(config.delete_files)
	{
		util::delete_all_files(config.file_map);
	}

	return errors;
}
