/*
    This file is part of SGDAE, a software to numerically solve differential
    algebraic equations using a steepest descent method based on Sobolev 
    gradients.
    You are welcome to contact the authors via e-mail:
        <manfred-sauter [at] gmx [dot] de>
        <robin.nittka [at] gmx [dot] de>

    Copyright 2005-2008 Manfred Sauter, Robin Nittka.

    SGDAE is free software distributed under the terms of the revised BSD 
    license as illustrated on <http://creativecommons.org/licenses/BSD/>.
    For details consult the accompanying LICENSE.txt file.

    $Id: $
*/


#ifndef __SOLVER_HXX__
#define __SOLVER_HXX__
namespace solver {

using namespace logging;    

enum MatrixType
{
	UNINITIALIZED = 0,
	SYM_POS = 2,
	SYM_INDEF = -2
};


struct ConjugateGradientSolver 
{
	typedef ublas::compressed_matrix<Real> SparseSymmetricMatrix;
	typedef ublas::vector<Real> DenseVector;

	const SparseSymmetricMatrix &A;
	const int n;

	Real eps;
	int max_it;

	ConjugateGradientSolver(const SparseSymmetricMatrix &A, MatrixType type = SYM_POS)
		: A(A), n(A.size1()), eps(PREC::MACHINE_EPS), max_it(std::numeric_limits<int>::max()) 
	{
		assert(A.size1() == A.size2());
		assert(type == SYM_POS);
	}

	int solve(DenseVector &x, const DenseVector &b) 
	{
		using ublas::prod;
		using ublas::noalias;
		using ublas::inner_prod;

		DenseVector Ap(n), r(n);
		noalias(r) = b - prod(A,x);
		DenseVector p = r;
		Real ns_r = inner_prod(r,r);
		for (int k=1; k<=max_it; k++)
		{
			if (ns_r <= eps) return k-1;

			Ap = prod(A,p);
			const Real alpha = ns_r/inner_prod(p,Ap);
			noalias(x) += alpha*p;
			noalias(r) = b - prod(A,x);

			const Real tmp = ns_r;
			ns_r = inner_prod(r,r);
			const Real beta = ns_r/tmp;
			p = beta*p + r;
		}   
		return max_it;
	}
};	


typedef std::vector<SparseVector> tVSV;

void orthonormalize(const tVSV &v, DenseMatrix &M)
{
	using ublas::inner_prod;
	using ublas::noalias;
	using ublas::matrix_column;
	using ublas::column;

	assert(v.size() > 0);
	M.resize(v[0].size(), v.size(), false);
	typedef matrix_column<DenseMatrix> Column;
	
	for (size_t i=0; i<v.size(); i++)
	{
		Column col(M, i);
		col = v[i];

		for (size_t j=0; j<i; j++)
			col -= inner_prod(col, column(M,j)) * column(M, j);

		Real norm = norm_2(col);
		assert(norm > 1e-12);
		col /= norm;
	}
}




#ifdef MKL

extern "C" {
void pardisoinit_(void *pt, int *mtype, int *iparm);

void pardiso_(void *pt, int *maxfct, int *mnum, int *mtype, int *phase,
	int *n, const double *a, const int *ia, const int *ja, int *perm, int *nrhs,
    int *iparm, int *msglvl, const double *b, double *sol, int *error);

void dgetrf_(const int *m, const int *n, double* a, const int *lda, int *ipiv, int *info);

void dgetrs_(char const* trans, int const* n, int const* nrhs, 
	double const* a, int const* lda, int const* ipiv, 
	double* b, int const* ldb, int* info);

void dgesvd_(char const* jobu, char const* jobvt, 
	int const* m, int const* n, double* a, int const* lda, 
	double* s, double* u, int const* ldu, 
	double* vt, int const* ldvt,
	double* work, int const* lwork, int* info); 

} // end extern "C"


struct PardisoSolver
{
	typedef std::vector<int> tVI;
	typedef std::vector<double> tVD;

	typedef ublas::compressed_matrix<double> SparseSymmetricMatrix;
	typedef ublas::vector<double> DenseVector;

    int n, mtype, nrhs, maxfct, mnum, msglvl;
	const int *ia;
	const int *ja;
	const double *a;
	tVI via;
	tVI vja;
	tVD va;
		
    int iparm[64];
    int *pt[64]; 


	PardisoSolver(const SparseSymmetricMatrix &S, MatrixType type = SYM_POS)
	{
		assert(S.size1() == S.size2());

		// Initialize diagonal as the pardiso solver expects this.
//		for (int i=0; i<n; i++) S(i,i) = S(i,i);
		int cnt = 1;

		for (SparseMatrix::const_iterator1 it=S.begin1(); it!=S.end1(); ++it)
		{
			via.push_back(cnt);
			for (SparseMatrix::const_iterator2 jt=it.begin(); jt!=it.end(); ++jt)
			{
				if (jt.index2() < jt.index1()) continue;
				++cnt;
				va.push_back(*jt);
				vja.push_back(jt.index2()+1);
			}
		}
		
		via.push_back(cnt);

		n = S.size1();
		// symmetric positive definite matrix: 2
		// indefinite matrix: -2
		mtype = type; 

		a = &va[0];
		ia = &via[0];
		ja = &vja[0];

		nrhs = 1; // number of right hand sides
		maxfct = 1; // Maximum number of numerical factorizations.
		mnum = 1; // Which factorization to use.
		msglvl = 0; // Print statistical information.


//		pardisoinit_(pt, &mtype, iparm); 

        for (int i=0; i<64; i++) 
        {
            iparm[i] = 0;
            pt[i] = NULL;
        }
        iparm[0] = 1; /* No solver default */
        iparm[1] = 2; /* Fill-in reordering from METIS */
        iparm[2] = 1; /* Numbers of processors, value of OMP_NUM_THREADS */
        iparm[3] = 0; /* No iterative-direct algorithm */
        iparm[4] = 0; /* No user fill-in reducing permutation */
        iparm[5] = 0; /* Write solution into x */
        iparm[6] = 0; /* Not in use */
        iparm[7] = 2; /* Max numbers of iterative refinement steps */
        iparm[8] = 0; /* Not in use */
        iparm[9] = 13; /* Perturb the pivot elements with 1E-13 */
        iparm[10] = 1; /* Use nonsymmetric permutation and scaling MPS */
        iparm[11] = 0; /* Not in use */
        iparm[12] = 0; /* Not in use */
        iparm[13] = 0; /* Output: Number of perturbed pivots */
        iparm[14] = 0; /* Not in use */
        iparm[15] = 0; /* Not in use */
        iparm[16] = 0; /* Not in use */
        iparm[17] = -1; /* Output: Number of nonzeros in the factor LU */
        iparm[18] = -1; /* Output: Mflops for LU factorization */
        iparm[19] = 0; /* Output: Numbers of CG Iterations */

		// Numbers of processors, value of OMP_NUM_THREADS 
		int num_procs;
		char *var = std::getenv("OMP_NUM_THREADS");
		if(var != NULL) std::sscanf(var, "%d", &num_procs);
		else
		{
			LOG(ERROR) << "Set environment OMP_NUM_THREADS to 1" << endl;
			std::exit(1);
		}
		iparm[2]  = num_procs;

		if (type == SYM_INDEF)
		{
			// Some tweaking of the parameters for the PARDISO solver.
		    // #TODO:	
/*
            iparm[9] = 10; 
			iparm[10] = 1;
			iparm[12] = 1;
			iparm[20] = 1;
*/            
		}

		int error = 0;
		int phase = 11; // Symbolic factorization.
		double double_dummy; 
		int int_dummy; 
		pardiso_(pt, &maxfct, &mnum, &mtype, &phase,
			&n, a, ia, ja, &int_dummy, &nrhs,
			iparm, &msglvl, &double_dummy, &double_dummy, &error);
		
		if (error != 0) {
			LOG(ERROR) << "ERROR during symbolic factorization: " << error << endl;
			std::exit(2);
		}

        LOG(DEBUG) << "Number of nonzeros in factors  = " << iparm[17] << endl;
        LOG(DEBUG) << "Number of factorization MFLOPS = " << iparm[18] << endl;
	   
		phase = 22; // Numerical factorization.
		pardiso_(pt, &maxfct, &mnum, &mtype, &phase,
			&n, a, ia, ja, &int_dummy, &nrhs,
			iparm, &msglvl, &double_dummy, &double_dummy, &error);
	   
		if (error != 0)
		{
			LOG(ERROR) << "ERROR during numerical factorization: " << error << endl;
			std::exit(3);
		}
/*
		for (int i=0; i<64; i++)
			std::cout << "iparm(" << i+1 << ") = " << iparm[i] << std::endl;
*/			
	}

	void solve(DenseVector &x, const DenseVector &b)
	{
		assert(b.size() == (size_t) n);
		x.resize(n, false);

		int phase = 33;
		int error = 0;
		int int_dummy;

		iparm[7] = -10; // Max numbers of iterative refinement steps.

		pardiso_(pt, &maxfct, &mnum, &mtype, &phase,
			&n, a, ia, ja, &int_dummy, &nrhs,
			iparm, &msglvl, &b.data()[0], &x.data()[0], &error);
	   
		if (error != 0)
		{
			LOG(ERROR) << "ERROR during solution: " << error << endl;
			std::exit(4);
		}
		LOG(DEBUG) << "Number of iterative refinement steps = " << iparm[6] << endl;
	}


	~PardisoSolver()
	{
		int phase = -1; // Release internal memory.
		int error = 0;
		double double_dummy; 
		int int_dummy;   
		
		pardiso_(pt, &maxfct, &mnum, &mtype, &phase,
			&n, a, ia, ja, &int_dummy, &nrhs,
			iparm, &msglvl, &double_dummy, &double_dummy, &error);
	}
};

typedef PardisoSolver DefaultSymmetricSolver;


int null_space_projection(const SparseRowMatrix &A, DenseMatrix &P)
{
    SCOPE("null space projection using svd");
	typedef std::vector<double> tVD;

	typedef ublas::matrix<double, ublas::column_major> FortranMat;
	typedef ublas::matrix<double, ublas::row_major> CMat;
	
	// ublas bug: FortranMat = SparseRowMatrix triggers failed assertion.
	CMat tmp(A);
	FortranMat M(tmp);

	const char jobu = 'N';
	const char jobvt = 'A';
	const int m = M.size1();
	const int n = M.size2();

	double dummy;

	tVD sv(std::max(1, std::min(m, n)));

    LOG(DEBUG) << "Initializing singular value decomposition." << endl;
	DenseMatrix Vt(n,n);
	int lwork = -1;
	int info;
	dgesvd_(&jobu, &jobvt, &m, &n, &M.data()[0], &m, &sv[0], &dummy, &m, &Vt.data()[0], &n, &dummy, &lwork, &info);
	assert(info == 0);
	if (info != 0) 
	{
		LOG(ERROR) << "Problem initializing singular value decomposition." << endl;	
		return 1;
	}

	lwork = int(dummy);
	tVD work(lwork);
	
	dgesvd_(&jobu, &jobvt, &m, &n, &M.data()[0], &m, &sv[0], &dummy, &m, &Vt.data()[0], &n, &work[0], &lwork, &info);
	assert(info == 0);
	if (info != 0)
	{
		LOG(ERROR) << "Problem with singular value calculation." << endl;
		return 2;
	}

	using ublas::noalias;
	using ublas::outer_prod;
	using ublas::column;

	P.clear();
	int i = 0;
	for (tVD::const_iterator it=sv.begin(); it!=sv.end(); ++it,i++)
	{
		if (std::fabs(*it) < 1e-12)
		noalias(P) += outer_prod(column(Vt,i), column(Vt,i));
	}
	for (; i<n; i++)
		noalias(P) += outer_prod(column(Vt,i), column(Vt,i));

	assert(ublas::norm_inf(ublas::prod(A, P)) < 1e-12);
	return 0;
}

struct LUSolver
{
	typedef std::vector<int> tVI;
	int n;
	DenseMatrix &A;
	tVI ipiv;


	LUSolver(DenseMatrix &A) : A(A)
	{
		assert(A.size1() == A.size2());
		n = A.size1();
		ipiv.resize(n);

		int info;
		dgetrf_(&n, &n, &A.data()[0], &n, &ipiv[0], &info);

		LOG(DEBUG) << "info dgetrf: " << info << endl;
	}

	void solve(DenseVector &x, const DenseVector &b)
	{
		assert(b.size() == size_t(n));
		x = b;
		char trans = 'N';
		int nrhs = 1;
		int info;
		dgetrs_(&trans, &n, &nrhs, &A.data()[0], &n, &ipiv[0], &x.data()[0], &n, &info);

		LOG(DEBUG) << "info dgetrs: " << info << endl;
	}
};


#elif UMFPACK

struct UMFPackSolver
{
	typedef ublas::compressed_matrix<double, ublas::column_major, 0, ublas::unbounded_array<int>, ublas::unbounded_array<double> > umfpack_matrix;

	umf::symbolic_type<double> Symbolic;
	umf::numeric_type<double> Numeric;
	umfpack_matrix A;

	UMFPackSolver(const SparseMatrix &S, const MatrixType &) : A(S)
	{
		assert(A.size1() == A.size2());
		umf::symbolic(A, Symbolic); 
		umf::numeric(A, Symbolic, Numeric); 
	}

	void solve(DenseVector &x, const DenseVector &b)
	{
		assert(b.size() == (size_t) A.size1());
		x.resize(A.size1(), false);
		umf::solve(A, x, b, Numeric);   
	}
};

typedef UMFPackSolver DefaultSymmetricSolver;

#else
#warning "We strongly recommend using MKL or UMFPACK for solving the sparse linear systems."
#warning "Currently you are compiling this program so that it uses a naive CG implementation."

typedef ConjugateGradientSolver DefaultSymmetricSolver;

#endif



int manual_null_space_projection(const SparseRowMatrix &M, DenseMatrix &P)
{
    SCOPE("manual null space projection");

//	using std::cout;
//	using std::endl;

	SparseRowMatrix A = M;

/*
   	// ublas bug
    {
        SparseVector tmp = ublas::column(A, 0);
        ublas::column(A, 0) = ublas::column(A, 1);
        ublas::column(A, 1) = tmp;
    }
*/

	using utility::flabs;
	using utility::is_zero;
	using ublas::matrix_row;
	using ublas::row;
	using ublas::project;
	using ublas::range;


    LOG(DEBUG) << "Gaussian elimination with total pivoting to get free variables." << endl;
	typedef matrix_row<SparseRowMatrix> SparseRow;
	typedef std::set<int> tSI;
	tSI vfree;

	for (size_t c=0; c<A.size1(); c++)
	{
		bool nonzero = false;
		size_t ibest = c;
		for (size_t i=c; i<A.size1(); i++)
		{
			if (!is_zero(A(i,c)))
			{
				nonzero = true;
				if (flabs(A(i,c)) > flabs(A(ibest,c))) ibest = i;
			}
		}

		if (nonzero)
		{
			// Swap rows if necessary.
			if (ibest != c)
			{
				SparseVector tmp = row(A, ibest);
				row(A, ibest) = row(A, c);
				row(A, c) = tmp;
			}

			SparseRow crow(row(A, c));
			for (size_t i=c+1; i<A.size1(); i++)
			{
				if (is_zero(A(i,c))) continue;

				const Real fac = -A(i,c)/A(c,c);
				SparseRow irow(row(A, i));
				for (SparseRow::const_iterator jt=crow.begin(); jt!=crow.end(); ++jt)
					irow(jt.index()) += fac * (*jt);
			}
		}
		// If all following elements in this column are zero we have another free variable.
		else vfree.insert(c);
	}

	// Determine the remaining free variables.
    LOG(DEBUG) << "Determine the remaining free variables." << endl;
	if (A.size1() > 0)
	{
		// Search for the first nonzero element in the last row.
		SparseRow last(row(A, A.size1()-1));
		size_t j = A.size1()-1;
		for (SparseRow::const_iterator jt=last.begin(); jt!=last.end(); ++jt)
			if (!is_zero(*jt))
			{
				j = jt.index();
				break;
			}

		assert(j >= A.size1()-1);
		for (size_t k=j+1; k<A.size2(); k++)
			vfree.insert(k);
	}
	else
	{
		for (size_t k=0; k<A.size2(); k++)
			vfree.insert(k);
	}

/*
	cout << "Free Variables: " << endl;
	for (tSI::const_iterator it=vfree.begin(); it!=vfree.end(); ++it)
		cout << *it << " ";
	cout << endl;
*/	

	// Determine a basis of the kernel using the free variables.
    LOG(DEBUG) << "Determine a basis of the kernel using the free variables." << endl;
	tVSV basis;
	for (tSI::const_reverse_iterator it=vfree.rbegin(); it!=vfree.rend(); ++it)
	{
		SparseVector v(A.size2());
		tSI known;
		v(*it) = 1.;

		// Backsubstitution.
		for (size_t i=A.size1(); i>0; --i)
		{
			SparseRow irow(row(A, i-1));
			Real value = 0.;
			int target = -1;

			for (SparseRow::const_iterator jt=irow.begin(); jt!=irow.end(); ++jt)
			{
				if (is_zero(*jt)) continue;

				if (vfree.find(jt.index()) == vfree.end() && target == -1)
					target = jt.index();
				else value -= *jt * v(jt.index());
			}

			if (target != -1)
				v(target) = value/irow(target);
		}
		basis.push_back(v);
	}
/*
	cout << "BASIS: " << endl;
	for (tVSV::const_iterator it=basis.begin(); it!=basis.end(); ++it)
	{
		cout << *it << endl;
	}
*/	

	using ublas::noalias;
	using ublas::column;
	using ublas::outer_prod;

	DenseMatrix ONB;
	orthonormalize(basis, ONB);

	P.clear();
	for (size_t i=0; i<ONB.size2(); i++)
		noalias(P) += outer_prod(column(ONB,i), column(ONB,i));

	assert(ublas::norm_inf(ublas::prod(A, P)) < 1e-12);
	return 0;
}
#ifndef MKL
#warning "You really should prefer the null space projection method based on SVD."
int null_space_projection(const SparseRowMatrix &M, DenseMatrix &P)
{
    return manual_null_space_projection(M, P);
}

#endif // not MKL


} // end namespace solver

#endif // __SOLVER_HXX__
