/*
    This file is part of SGDAE, a software to numerically solve differential
    algebraic equations using a steepest descent method based on Sobolev 
    gradients.
    You are welcome to contact the authors via e-mail:
        <manfred-sauter [at] gmx [dot] de>
        <robin.nittka [at] gmx [dot] de>

    Copyright 2005-2008 Manfred Sauter, Robin Nittka.

    SGDAE is free software distributed under the terms of the revised BSD 
    license as illustrated on <http://creativecommons.org/licenses/BSD/>.
    For details consult the accompanying LICENSE.txt file.

    $Id: $
*/


#ifndef __DESCENT_HXX__
#define __DESCENT_HXX__
namespace steepest_descent {

enum SobolevNorm
{
	NORM_H1,
	NORM_EUCLID,
	NORM_EUCLIDEAN = NORM_EUCLID,
	NORM_GRAPH,
	NORM_H1GRAPH,
	NORM_WEIGHTED,
	NORM_WEIGHTED1 = NORM_WEIGHTED,
	NORM_WEIGHTED2,
	LEAST_SQUARES
};

const std::string SOBOLEV_NORM_NAME[] = 
{
    "H1",
    "EU",
    "GR",
    "G1",
    "W1",
    "W2",
    "LS"
};


using ublas::prod;
using ublas::trans;
using ublas::norm_2;
using ublas::noalias;
using ublas::subrange;
using ublas::slice;
using ublas::range;


using std::string;
using std::setw;

using namespace logging;



template<typename P> struct DiscretizationBase
{
	typedef typename P::problem_type problem_type;

	const int N;
	const Real delta;

	const P &problem;
	DenseVector u;

	SparseMatrix D0, D0tD0;
	SparseMatrix D1, D1tD1;
	SparseMatrix H1;

	SparseMatrix S;
	solver::MatrixType type_S;

	SparseMatrix W1, W2;
	SparseMatrix Q;	
	SparseMatrix QtQ;

	DenseVector g;
	
	DiscretizationBase(const int N, const P &problem) : N(N), delta(problem.T/N), problem(problem) 
	{
        SCOPE("constructor DiscretizationBase");
		LOG(DEBUG) << "Global initialization" << endl;

		type_S = solver::UNINITIALIZED;

		init_D();
		init_g();
	}
	virtual ~DiscretizationBase() {}

private:	
	virtual void init_D()
	{
        SCOPE("init_D");

        LOG(DEBUG) << "Initialize matrices D0, D1, D0tD0" << endl;
		D0 = IdentityMatrix(N+1);
		D0tD0 = IdentityMatrix(N+1);
		
		D1.resize(N+1, N+1, false);
        D1.clear();
		D1(0,0) = -3.; D1(0,1) = 4.; D1(0,2) = -1.;
		for (int k=1; k<N; k++)
		{
			D1(k,k-1) = -1.;
			D1(k,k+1) = 1.;
		}
		D1(N,N) = 3.; D1(N,N-1) = -4.; D1(N,N-2) = 1.;
		D1 *= 1./(2.*delta);


        LOG(DEBUG) << "Initialize matrix D1tD1" << endl;
        // the following is a faster version than the direct matrix multiplication
        // D1tD1 = prod(trans(D1),D1);
		D1tD1.resize(N+1, N+1, false);
        D1tD1.clear();
		for (int i=0; i<=N; i++)
			for (int j=i-2; j<=i+2; j++)
			{
				if (j>N || j<0) continue;

				for (int k=i-2; k<=i+2; k++)
				{
					if (k>N || k<0) continue;
					if (abs(i-k)>2 || abs(j-k)>2) continue;
					D1tD1(i,j) += D1(k,i)*D1(k,j);
				}
			}


        LOG(DEBUG) << "Initialize matrix H1" << endl;
        H1.resize((N+1)*problem.n,(N+1)*problem.n, false);
        H1.clear();
        for (int j=0; j<=N; j++)
            for (int i=0; i<problem.n; i++)
            {
                for (int k=j-2; k<=j+2; k++)
                {
                    if (k>N || k<0) continue;
                    H1(j*problem.n+i, k*problem.n+i) = D1tD1(j,k);
                }
                // H1(j*problem.n+i, j*problem.n+i) += 1.; // Then H1 alone would be the matrix connected with the H1 inner product.
            }
	}

	virtual void init_Q()
	{
        SCOPE("init_Q");

        LOG(DEBUG) << "Initialize matrix Q" << endl;
		Q.resize(problem.m*(N+1), problem.n*(N+1), false);
		for (int i=0; i<=N; i++)
			for (int j=i-2; j<=i+2; j++)
			{
				if (j>N || j<0) continue;

				noalias(project(Q, range(i*problem.m,(i+1)*problem.m), range(j*problem.n,(j+1)*problem.n))) 
					+= D1(i,j)*cache_A[i] + D0(i,j)*cache_B[i];
			}

		LOG(DEBUG) << "Initialize matrix W1" << endl;
		W1.resize(problem.n*(N+1), problem.n*(N+1), false);
		for (int i=0; i<=N; i++)
			for (int j=i-2; j<=i+2; j++)
			{
				if (j>N || j<0) continue;

				for (int k=i-2; k<=i+2; k++)
				{
					if (k>N || k<0) continue;
					if (abs(i-k)>2 || abs(j-k)>2) continue;

					noalias(project(W1, range(i*problem.n,(i+1)*problem.n), range(j*problem.n,(j+1)*problem.n))) 
						+= D1(k,i)*D1(k,j) * cache_AtA[k];
				}
			}

		LOG(DEBUG) << "Initialize matrix W2" << endl;
		W2.resize(problem.n*(N+1), problem.n*(N+1), false);
		for (int i=0; i<=N; i++)
			for (int j=i-2; j<=i+2; j++)
			{
				if (j>N || j<0) continue;

				for (int k=i-2; k<=i+2; k++)
				{
					if (k>N || k<0) continue;
					if (abs(i-k)>2 || abs(j-k)>2) continue;

					noalias(project(W2, range(i*problem.n,(i+1)*problem.n), range(j*problem.n,(j+1)*problem.n))) 
						+= D0(k,i)*D0(k,j)*cache_BtB[k];
				}
			}

		LOG(DEBUG) << "init QtQ" << endl;
		QtQ.resize(problem.n*(N+1), problem.n*(N+1), false);

		for (int i=0; i<=N; i++)
			for (int j=i-2; j<=i+2; j++)
			{
				if (j>N || j<0) continue;

				for (int k=i-2; k<=i+2; k++)
				{
					if (k>N || k<0) continue;
					if (abs(i-k)>2 || abs(j-k)>2) continue;

					noalias(project(QtQ, range(i*problem.n,(i+1)*problem.n), range(j*problem.n,(j+1)*problem.n))) 
						+= D0(k,i) * D1(k,j) * cache_BtA[k] + D1(k,i) * D0(k,j) * cache_AtB[k];
				}
			}
		QtQ = W1 + W2 + QtQ;
        
	}

	virtual void init_g()
	{
		g.resize(problem.m*(N+1), false);

		for (int i=0; i<=N; i++)
			noalias(subrange(g, i*problem.m, (i+1)*problem.m)) = problem.g(i*delta);
	}

public:
	virtual Real eval_psi(const DenseVector &) = 0;
	virtual void update_descent(SobolevNorm, const DenseVector &, const Real &scale = 1.) {} 


	virtual void init_descent(SobolevNorm style, const DenseVector &u, const Real &scale = 1.)
	{
        SCOPE("DiscretizationBase init_descent");

        LOG(DEBUG) << "Initialize caching and matrix Q" << endl;
		init_caching(u);
		init_Q();

		LOG(DEBUG) << "Initialize S" << endl;

		type_S = solver::SYM_POS;
		S.resize(problem.n*(N+1), problem.n*(N+1), false);

		switch (style)
		{
		case NORM_H1:
//			for (int i=0; i<problem.n; i++)
//				noalias(project(S, slice(i,problem.n,N+1), slice(i,problem.n,N+1))) += D1tD1; 
            S = H1;
			break;
	
		case NORM_H1GRAPH:	
		case NORM_GRAPH: 	
			S = QtQ;
			break;

		case LEAST_SQUARES:
			S = QtQ;
			type_S = solver::SYM_INDEF;
			goto ready;

		case NORM_EUCLID:
			break;

		case NORM_WEIGHTED1:
			S = W1;
			break;

		case NORM_WEIGHTED2:
			noalias(S) = W1 + W2;
			break;

		default:
			throw "Method not yet implemented.";
		}

        // TODO: shouldn't one add the identity as well
		if (style == NORM_H1GRAPH)
			S = scale*H1 + S;
		else
			S = scale*IdentityMatrix(problem.n*(N+1)) + S;

//		for (int i=0; i<problem.n*(N+1); i++)
//            S(i,i) += scale*1.;

/*
//		for (int i=0; i<problem.n; i++)
//			noalias(project(S, slice(i,problem.n,N+1), slice(i,problem.n,N+1))) += D0tD0;
		SparseMatrix foo = D0tD0;
		ublas::matrix_slice<SparseMatrix> ms(S, slice(0,problem.n,N+1), slice(0,problem.n,N+1));
		ms += foo;
*/
ready:	;
	}

private:	
	virtual void init_caching(const DenseVector &) = 0;

protected:	
	std::vector<DenseMatrix> cache_A;
	std::vector<DenseMatrix> cache_B;
	std::vector<DenseMatrix> cache_AtA;
	std::vector<DenseMatrix> cache_BtB;
	std::vector<DenseMatrix> cache_AtB;
	std::vector<DenseMatrix> cache_BtA;

	Real compute_step_width_quadratic(const DenseVector &u, const DenseVector &dir)
	{
		const Real p_0 = eval_psi(u);
		const Real p_1 = eval_psi(u - dir);
		const Real p_2 = eval_psi(u - 2.*dir);

		const Real a = (p_2 + p_0  - 2.0 * p_1)/2.0;
		const Real b = p_1 - p_0 - a; 
		assert(a > 0.);
		assert(b <= 0.);

        // TODO:
		if (std::fabs(a) < PREC::MACHINE_EPS/100000./N)	
        {
            LOG(WARNING) << "Quadatric step width calculation failed: a = " << a << endl;
			throw "Minimum seems to be reached to machine precision as no direction of descent was found.";
        }
		const Real res = -b/(2.0*a);    
		return res;
	}

	Real compute_step_width_local_convex(const DenseVector &u, const DenseVector &dir)
	{
		// TODO: dynamic control of minimal/maximal step size and the calculation details
		const Real MIN_STEP = 1e-10;
		const Real MAX_STEP = 1e12;
        const Real STEP_MUL = 8.;
        const int MAX_ITER = 80;
        

		Real step = MIN_STEP;
		Real cur = eval_psi(u);
		Real next = eval_psi(u - step*dir);
		while (step < MAX_STEP && next < cur)
		{
			step *= STEP_MUL;
			cur = next;
			next = eval_psi(u - step*dir);

//			LOG(INFO) << "bracketing: " << step << " " << next << endl;
		}

		Real a = 0.0;
		Real b = step;

		Real vala = eval_psi(u);
		Real valb = eval_psi(u - b*dir);
		for (int fine = 0; fine < MAX_ITER; fine++)
		{
			Real mid1 = a + (b-a)/3.0;
			Real mid2 = b - (b-a)/3.0;
			Real val1 = eval_psi(u - mid1*dir);
			Real val2 = eval_psi(u - mid2*dir);
			if (val1 < val2)
			{
				b = mid2;
				valb = val2;
			}
			else
			{
				a = mid1;
				vala = val1;
			}
		}
		const Real res = (a+b)/2.0;
		return res;
	}

};


template<typename P, typename TYPE>
struct Discretization : public DiscretizationBase<P>
{
	Discretization(const int N, const P &problem) : DiscretizationBase<P>(N, problem) {}
};

template<typename P>
struct Discretization<P, problems::problem_linear> : public DiscretizationBase<P>
{
	using DiscretizationBase<P>::problem;
	using DiscretizationBase<P>::N;
	using DiscretizationBase<P>::cache_A;
	using DiscretizationBase<P>::cache_B;
	using DiscretizationBase<P>::cache_AtA;
	using DiscretizationBase<P>::cache_BtB;
	using DiscretizationBase<P>::cache_AtB;
	using DiscretizationBase<P>::cache_BtA;
	using DiscretizationBase<P>::delta;
	using DiscretizationBase<P>::Q;
	using DiscretizationBase<P>::g;

	Discretization(const int N, const P &problem) : DiscretizationBase<P>(N, problem) {}


private:
	void init_caching(const DenseVector &)
	{
        SCOPE("init_caching linear");

        LOG(DEBUG) << "Caching A, B, AtA, AtB, BtB, BtA matrices..." << endl;

		for (int i=0; i<=N; i++)
		{
			cache_A.push_back(problem.A(delta * i));
			cache_B.push_back(problem.B(delta * i));
		}

		for (int i=0; i<=N; i++)
		{
			cache_AtA.push_back(prod(trans(cache_A[i]), cache_A[i]));
			cache_BtB.push_back(prod(trans(cache_B[i]), cache_B[i]));
		}

		for (int i=0; i<=N; i++)
		{
			cache_AtB.push_back(prod(trans(cache_A[i]), cache_B[i]));
			cache_BtA.push_back(prod(trans(cache_B[i]), cache_A[i]));
		}
	}


public:
	Real eval_psi(const DenseVector &u)
	{
		DenseVector y = -g;
		axpy_prod(Q, u, y, false);
		return inner_prod(y,y)*(problem.T/(N+1))/2.;
	}

	void euclidean_gradient(const DenseVector &u, DenseVector &eg) const
	{
		DenseVector y = -g;
		eg.resize(u.size());
		axpy_prod(Q, u, y, false);
		axpy_prod(y, Q, eg, true); 
	}

	Real compute_step_width(const DenseVector &u, const DenseVector &dir)
	{
		const Real sw1 = this->compute_step_width_local_convex(u, dir);
		const Real sw2 = this->compute_step_width_quadratic(u, dir);
		const Real p_1 = eval_psi(u - sw1*dir);
		const Real p_2 = eval_psi(u - sw2*dir);

		Real res = sw1;
		if (p_1 > p_2) res = sw2;
		return res;
	}
};

template<typename P>
struct Discretization<P, problems::problem_nonlinear> : public DiscretizationBase<P>
{
	using DiscretizationBase<P>::problem;
	using DiscretizationBase<P>::N;
	using DiscretizationBase<P>::cache_A;
	using DiscretizationBase<P>::cache_B;
	using DiscretizationBase<P>::cache_AtA;
	using DiscretizationBase<P>::cache_BtB;
	using DiscretizationBase<P>::cache_AtB;
	using DiscretizationBase<P>::cache_BtA;
	using DiscretizationBase<P>::delta;
	using DiscretizationBase<P>::D1;
	using DiscretizationBase<P>::Q;
	using DiscretizationBase<P>::g;


	Discretization(const int N, const P &problem) : DiscretizationBase<P>(N, problem) {}

private:
	void init_caching(const DenseVector &u)
	{
        SCOPE("init_caching non-linear");

        LOG(DEBUG) << "Caching local A, B, AtA, AtB, BtB, BtA matrices..." << endl;

		cache_A.clear();
		cache_B.clear();
		cache_AtA.clear();
		cache_BtB.clear();
		cache_AtB.clear();
		cache_BtA.clear();

		DenseVector Du(u.size());
		for (int i=0; i<problem.n; i++) 
			subslice(Du, i, problem.n, N+1) = prod(D1, subslice(u, i, problem.n, N+1));

		for (int i=0; i<=N; i++)
		{
			const DenseVector u_i = subrange(u, i*problem.n, (i+1)*problem.n);
			const DenseVector Du_i = subrange(Du, i*problem.n, (i+1)*problem.n);

			cache_A.push_back(problem.A(delta * i, u_i, Du_i));
			cache_B.push_back(problem.B(delta * i, u_i, Du_i));
			cache_AtA.push_back(prod(trans(cache_A[i]), cache_A[i]));
			cache_BtB.push_back(prod(trans(cache_B[i]), cache_B[i]));
			cache_AtB.push_back(prod(trans(cache_A[i]), cache_B[i]));
			cache_BtA.push_back(prod(trans(cache_B[i]), cache_A[i]));
		}
	}

public:
	virtual void update_descent(SobolevNorm style, const DenseVector &u, const Real &scale = 1.) 
	{
        SCOPE("update_descent non-linear");
		this->init_descent(style, u, scale);
	} 

	Real eval_psi(const DenseVector &u)
	{
		try
		{
			DenseVector Du(u.size());
			for (int i=0; i<problem.n; i++) 
				subslice(Du, i, problem.n, N+1) = prod(D1, subslice(u, i, problem.n, N+1));

			DenseVector y = -g;
			for (int i=0; i<=N; i++)
			{
				const DenseVector u_i = subrange(u, i*problem.n, (i+1)*problem.n);
				const DenseVector Du_i = subrange(Du, i*problem.n, (i+1)*problem.n);

				noalias(subrange(y, i*problem.m, (i+1)*problem.m)) += problem.F(i*delta, u_i, Du_i);
			}

			const Real res = inner_prod(y,y)*(problem.T/(N+1))/2.;
			if (isnan(res)) throw InvalidInput("Result in eval_psi is not a number...");
			return res;
		}
		catch (const InvalidInput &e)
		{
			LOG(WARNING) << "Exception in eval_psi: " << e.what() << endl;
			return HUGE_VAL;
		}
	}

	void euclidean_gradient(const DenseVector &u, DenseVector &eg)
	{
		DenseVector Du(u.size());
		for (int i=0; i<problem.n; i++) 
			subslice(Du, i, problem.n, N+1) = prod(D1, subslice(u, i, problem.n, N+1));

		DenseVector y = -g;
		for (int i=0; i<=N; i++)
		{
			const DenseVector u_i = subrange(u, i*problem.n, (i+1)*problem.n);
			const DenseVector Du_i = subrange(Du, i*problem.n, (i+1)*problem.n);

			noalias(subrange(y, i*problem.m, (i+1)*problem.m)) += problem.F(i*delta, u_i, Du_i);
		}

		eg.resize(u.size(), false);
		axpy_prod(y, Q, eg, true); 
	}

	Real compute_step_width(const DenseVector &u, const DenseVector &dir)
	{
		return this->compute_step_width_local_convex(u, dir);
	}
};



template<typename SUPP>
struct SteepestDescent : public Discretization<typename SUPP::P, typename SUPP::P::problem_type>
{
    typedef SteepestDescent<SUPP> self_type;
	typedef typename SUPP::P P;
	using Discretization<P, typename P::problem_type>::problem;
	using Discretization<P, typename P::problem_type>::N;
	using Discretization<P, typename P::problem_type>::S;
	using Discretization<P, typename P::problem_type>::type_S;

	using Discretization<P, typename P::problem_type>::eval_psi;
	using Discretization<P, typename P::problem_type>::update_descent;
	using Discretization<P, typename P::problem_type>::init_descent;
	using Discretization<P, typename P::problem_type>::compute_step_width;
	using Discretization<P, typename P::problem_type>::euclidean_gradient;

    using Discretization<P, typename P::problem_type>::u;

	boost::timer timer;

    int step;

	SUPP &supp;


    // TODO: This is a very primitive smoothing method based on Gauss-Seidel.
    void smooth(int num = 8)
    {
        using ublas::subslice;
        using ublas::noalias;

        for (int i=0; i<problem.n; i++)
        {
            DenseVector f(N+1);
            f = subslice(u, i, problem.n, N+1);

            for (int r=0; r<num; r++)
            {
                f(0) = 1./3.*(2*f(0) + f(1));
                for (int ii=1; ii<N; ii++)
                    f(ii) = 0.25*(2*f(ii) + f(ii-1) + f(ii+1));
                f(N) = 1./3.*(2*f(N) + f(N-1));
            }
            subslice(u, i, problem.n, N+1) = f;
        }
    }

	SteepestDescent(SUPP &supp) : Discretization<typename SUPP::P, typename SUPP::P::problem_type>(supp.N, supp.problem), supp(supp)
	{
        SCOPE("Constructor SteepestDescent");
        step = 0;
		supp.finalize();

		// Default initial function is the vector of constant zero functions.
		u = ScalarVector(problem.n*(N+1), 0.);
	}

	void set_current_estimate(int k, const DenseVector &d)
	{
		noalias(subrange(u, k*problem.n, (k+1)*problem.n)) = d;
	}

	void get_current_estimate(int k, DenseVector &d) const
	{
		d = subrange(u, k*problem.n, (k+1)*problem.n);
	}


	void descent(SobolevNorm style, const int STEPS, const Real &FAC = 1., const Real &SCALE = 1.)
	{
        SCOPE("descent");
		LOG(DEBUG) << string(78, '-') << endl;

		const int dim = u.size();

		DenseVector eg(dim);
		DenseVector sg(dim);
		DenseVector sg0(dim);
		DenseVector dir(dim);
#if 0        
		DenseVector prev_sg0(dim);
		DenseVector prev_dir(dim);
#endif        

		init_descent(style, u, SCALE);
		LOG(INFO) << "psi = " << eval_psi(u) << endl;	

		LOG(DEBUG) << string(78, '-') << endl;
		timer.restart();
		for (int i=0; i<STEPS; i++)
		{
            using std::stringstream;
            using std::ofstream;

/*
            stringstream ss;
            ss << "data/data.step." << step << ".dat";
            ofstream data(ss.str().c_str());
            gnuplot_output(*this, data);
*/
            

			supp.update(S, type_S);
			euclidean_gradient(u, eg);
			sg = eg;

			try
			{
				if (style != NORM_EUCLID) 
					supp.solve(sg, eg);
				supp.project(sg, sg0);

				const Real len = norm_2(sg0);
                dir = sg0;

#if 0                
				if (i != 0)
				{
                    // Fletcher-Reeves
					// const Real beta = std::max<Real>(0., inner_prod(sg0, sg0 - prev_sg0)/inner_prod(prev_sg0, prev_sg0));
                    
                    // Polak-Ribiere
					const Real beta = inner_prod(sg0, sg0)/inner_prod(prev_sg0, prev_sg0);
					dir = sg0 + beta * prev_dir;
				}
				prev_sg0 = sg0;
				prev_dir = dir;
#endif                

// #TODO:
/*
                Real sw = 1.;
                u -= dir;
*/                
				dir /= norm_2(dir);
                Real sw = compute_step_width(u, dir);
                u -= FAC*sw*dir;

				const Real psi = eval_psi(u);
			    ++step;	
                std::cout << "Step " << setw(3) << step << " (" << SOBOLEV_NORM_NAME[style] << "): psi = " << psi << ", sw " << sw << ", len " << len << endl;

//                if (0 == step % 100)
                    LOG(INFO) << "Step " << setw(3) << step << " (" << SOBOLEV_NORM_NAME[style] << "): psi = " << psi << ", sw " << sw << ", len " << len << endl;
//                else
//                    LOG(DEBUG) << "Step " << setw(3) << step << " (" << SOBOLEV_NORM_NAME[style] << "): psi = " << psi << ", sw " << sw << ", len " << len << endl;

				if (std::abs(psi) < PREC::EPS*1e-12) throw MaximumPrecisionReached();

                const Real MIN_STEP = 1e-10;
                const int MIN_COUNT = 5;
                static int mcnt = 0;
                if (std::abs(sw) < MIN_STEP) ++mcnt;
                else mcnt = 0;

                if (mcnt > MIN_COUNT)
                    throw MinimalStepSize();
			}
			catch (const char *msg)
			{	
				LOG(WARNING) << "Exception: " << msg << endl;
				break;
			}

			LOG(DEBUG) << "prepare next step" << endl;
			update_descent(style, u, SCALE);
		}
	}
};




template<typename S>
void gnuplot_output(const S &sd, std::ostream &out, const Real &offset = 0.)
{
	using std::endl;

	for (int i=0; i<=sd.N; i++)
	{
        // TODO: we assume a uniform time grid
		const Real t = i*sd.delta + offset;
		out << t;

		DenseVector u = subrange(sd.u, i*sd.problem.n, (i+1)*sd.problem.n);

		for (int j=0; j<sd.problem.n; j++)
			out << " " << u(j);
		out << endl;
	}
}

template<typename S>
void gnuplot_input(S &sd, std::istream &in)
{
    for (int k=0; k<=sd.N; k++)
    {
        DenseVector dv(sd.problem.n);
        // TODO: we assume that the first column lists a uniform time grid
        Real dummy;
        in >> dummy;
        for (int i=0; i<sd.problem.n; i++)
            in >> dv(i);
        sd.set_current_estimate(k, dv);
    }
}




template<typename PP> struct EmptySupplementaryConditions
{
	typedef PP P;
	const P &problem;
	const int N;

	solver::DefaultSymmetricSolver *solver;

	EmptySupplementaryConditions(const P &problem, const int N) : problem(problem), N(N) 
	{
		solver = NULL;
	}
    virtual ~EmptySupplementaryConditions()
    {
        if (solver != NULL) delete solver;
    }

	void project(const DenseVector &u, DenseVector &Pu)
	{
		Pu = u;
	}

	void update(SparseSymmetricMatrix &S, solver::MatrixType mtype)
	{
		if (solver != NULL) delete solver;
		solver = new solver::DefaultSymmetricSolver(S, mtype);
	}
		
	int solve(DenseVector &x, const DenseVector &b) 
	{
		solver->solve(x, b);
		return 0;
	}

	void finalize() {}
};

template<typename PP> struct SupplementaryConditions 
{
	typedef PP P;

	const P &problem;
	const int N;


	typedef std::vector<int> tVI;
	typedef std::set<int> tSI;
	typedef std::vector<DenseMatrix> tVM;

	std::vector<SparseRowMatrix> supp;
	std::vector<SparseMatrix> proj;
    SparseSymmetricMatrix Proj;

	SupplementaryConditions(const P &problem, const int N) : problem(problem), N(N)
	{
		supp.assign(problem.n, SparseRowMatrix(1,N+1));
		for (std::vector<SparseRowMatrix>::iterator it=supp.begin(); it!=supp.end(); ++it)
			it->clear();

        Proj.clear();
        Proj = IdentityMatrix((N+1)*problem.n);
        for (int i=0; i<problem.n; i++)
            Proj(i,i) = 0.;
        for (int i=problem.n; i<2*problem.n; i++)
        {
            Proj(i,i) = 9./25.;
            Proj(i,i+problem.n) = 12./25.;
        }
        for (int i=2*problem.n; i<3*problem.n; i++)
        {
            Proj(i,i-problem.n) = 12./25.;
            Proj(i,i) = 16./25.;
        }
	}

	void fix_value(int i, int j)
	{
		assert(0<=i && i<problem.n);
		assert(0<=j && j<=N);
		int row = supp[i].size1();
		supp[i].resize(row+1, N+1, true);
		supp[i](row, j) = 1.;
	}

	void fix_derivative(int i, int j)
	{
		assert(0<=i && i<problem.n);
		assert(0<=j && j<=N);
		int row = supp[i].size1();
		supp[i].resize(row+1, N+1, true);

		if (j == 0)
		{
			supp[i](row, 0) = 1.;
			supp[i](row, 1) = -4.;
			supp[i](row, 2) = 3.;
		}
		else if (j == N)
		{
			supp[i](row, N-2) = -3.;
			supp[i](row, N-1) = 4.;
			supp[i](row, N-0) = -1.;
		}
		else
		{
			supp[i](row, j-1) = -1.;
			supp[i](row, j+1) = 1.;
		}
	}

	void fix_equal_values(int i, int j1, int j2)
	{
		assert(0<=i && i<problem.n);
		assert(0<=j1 && j1<=N);
		assert(0<=j2 && j2<=N);

		if (j1 == j2) return;
		int row = supp[i].size1();
		supp[i].resize(row+1, N+1, true);

		supp[i](row, j1) = -1.;
		supp[i](row, j2) = 1.;
	}

	void fix_equal_derivatives(int i, int j1, int j2)
	{
		assert(0<=i && i<problem.n);
		assert(0<=j1 && j1<=N);
		assert(0<=j2 && j2<=N);

		if (j1 == j2) return;
		int row = supp[i].size1();
		supp[i].resize(row+1, N+1, true);

		if (j1 == 0)
		{
			supp[i](row, 0) = 1.;
			supp[i](row, 1) = -4.;
			supp[i](row, 2) = 3.;
		}
		else if (j1 == N)
		{
			supp[i](row, N-2) = -3.;
			supp[i](row, N-1) = 4.;
			supp[i](row, N-0) = -1.;
		}
		else
		{
			supp[i](row, j1-1) = -1.;
			supp[i](row, j1+1) = 1.;
		}

		if (j2 == 0)
		{
			supp[i](row, 0) += -1.;
			supp[i](row, 1) += 4.;
			supp[i](row, 2) += -3.;
		}
		else if (j2 == N)
		{
			supp[i](row, N-2) += 3.;
			supp[i](row, N-1) += -4.;
			supp[i](row, N-0) += 1.;
		}
		else
		{
			supp[i](row, j2-1) += 1.;
			supp[i](row, j2+1) += -1.;
		}
	}

	void finalize()
	{
        SCOPE("finalize general supp");
        
		boost::timer timer;
        proj.clear();

		LOG(DEBUG) << "Prepare supplementary conditions..." << endl;
		for (int i=0; i<problem.n; i++)
		{
			SparseRowMatrix &A = supp[i];
			DenseMatrix Projection(A.size2(), A.size2());
			solver::null_space_projection(A, Projection);

			SparseMatrix SP(A.size2(), A.size2());
			for (size_t i=0; i<A.size2(); i++)
				for (size_t j=0; j<A.size2(); j++)
					if (!utility::is_zero(Projection(i,j))) SP(i,j) = Projection(i,j);
            proj.push_back(SP);
		}
		LOG(DEBUG) << "Supplementary condition preparation: " << timer.elapsed() << " sec" << endl;
	}


	void project(const DenseVector &u, DenseVector &Pu)
	{
        assert(int(u.size()) == (N+1)*problem.n);
        assert(int(Pu.size()) == (N+1)*problem.n);
		assert(int(proj.size()) == problem.n);
		assert(proj[0].size1() == proj[0].size2());
		assert(int(proj[0].size1()) == N+1);

		for (int i=0; i<problem.n; i++) 
			noalias(subslice(Pu, i, problem.n, N+1)) = prod(proj[i], subslice(u, i, problem.n, N+1));
	}


	const SparseSymmetricMatrix *S;
	void update(const SparseSymmetricMatrix &S, solver::MatrixType mtype)
	{
		assert(mtype == solver::SYM_POS);

		if (mtype != solver::SYM_POS)
			throw "When specifying supplementary conditions we currently use a conjugate gradient solver.\n\
This method can only be employed, if one has positive definiteness at least on the space\n\
fulfilling the supplementary conditions. Probably you tried to solve a LEAST_SQUARES problem\n\
with additional supplementary conditions.";

		this->S = &S;
	}

	// Solve Pt S P x = Pt b
    // #TODO: use some appropriate preconditioner
    // #TODO: use something better than naive CG
    // #TODO: reduce code duplication
	int solve(DenseVector &x, const DenseVector &b) 
	{
        SCOPE("solve general supp");
		const SparseSymmetricMatrix &A = *S;

        // #TODO:
		const Real EPS = PREC::MACHINE_EPS;
		const int MAX_IT = 2*A.size1()+A.size1()/10; // std::numeric_limits<int>::max();
		const int n = A.size1();

		DenseVector bb(b.size());
		project(b, bb);

		using ublas::prod;
		using ublas::noalias;
		using ublas::inner_prod;

		DenseVector Ap(n), r(n), Ax(n);
		multiply(A, x, Ax);

		noalias(r) = bb - Ax;
		DenseVector p = r;
		Real ns_r = inner_prod(r,r);
		for (int k=1; k<=MAX_IT; k++)
		{
//            LOG(DEBUG) << "CG solver step " << k << ", residual = " << ns_r << endl;
			if (ns_r <= EPS)
            {
                LOG(DEBUG) << "CG solver: " << k << " steps, residual = " << ns_r << endl;    
                return k-1;
            }

			multiply(A, p, Ap);
			const Real alpha = ns_r/inner_prod(p,Ap);
			noalias(x) += alpha*p;
			multiply(A, x, Ax);
			noalias(r) = bb - Ax;

			const Real tmp = ns_r;
			ns_r = inner_prod(r,r);
			const Real beta = ns_r/tmp;
			p = beta*p + r;
		}
        LOG(DEBUG) << "CG solver: " << MAX_IT << " steps (MAX_IT), residual = " << ns_r << endl;    
		return MAX_IT;
	}

private:    
	void multiply(const SparseSymmetricMatrix &A, const DenseVector &u, DenseVector &res)
	{
		DenseVector tmp(problem.n*(N+1));
		res.resize(problem.n*(N+1), false);

		project(u, res);
		ublas::axpy_prod(A, res, tmp, true);
		project(tmp, res);
	}
};

template<typename PP> struct DirichletSupplementaryConditions 
{
	typedef PP P;

	const P &problem;
	const int N;
   
	typedef std::vector<int> tVI;
	typedef std::set<int> tSI;

	solver::DefaultSymmetricSolver *solver;
	tSI supp;

	DirichletSupplementaryConditions(const P &problem, const int N) : problem(problem), N(N)
	{
		solver = NULL;
	}
    virtual ~DirichletSupplementaryConditions()
    {
        if (solver != NULL) delete solver;
    }

	void fix_value(int i, int j)
	{
		assert(0<=i && i<problem.n);
		assert(0<=j && j<=N);
        supp.insert(j*problem.n + i);
	}

	void project(const DenseVector &u, DenseVector &Pu)
	{
		Pu = u;
        for (tSI::const_iterator it=supp.begin(); it!=supp.end(); ++it)
            Pu(*it) = 0.; 
	}

	void update(SparseSymmetricMatrix &S, solver::MatrixType mtype)
	{
		if (solver != NULL) delete solver;

        // Now we have to remove the corresponding columns and rows of the matrix S.
        const int N_proj = (N+1)*problem.n - supp.size();
        SparseSymmetricMatrix S_proj(N_proj, N_proj);

        int subi = 0;
        tSI::const_iterator posi = supp.begin();
		for (SparseSymmetricMatrix::const_iterator1 it=S.begin1(); it!=S.end1(); ++it)
		{
            while (posi != supp.end() && int(it.index1()) > *posi)
            {
                ++subi;
                ++posi;
            }
            
            if (posi != supp.end() && int(it.index1()) == *posi) continue;

            int subj = 0;
            tSI::const_iterator posj = supp.begin();
			for (SparseSymmetricMatrix::const_iterator2 jt=it.begin(); jt!=it.end(); ++jt)
            {
                while (posj != supp.end() && int(jt.index2()) > *posj)
                {
                    ++subj;
                    ++posj;
                }

                if (posj != supp.end() && int(jt.index2()) == *posj) continue;
                S_proj(jt.index1()-subi, jt.index2()-subj) = *jt;
			}
		}

		solver = new solver::DefaultSymmetricSolver(S_proj, mtype);
	}
		
	int solve(DenseVector &x, const DenseVector &b) 
	{
        // project x and b
        const int N_orig = (N+1)*problem.n;
        const int N_proj = N_orig - supp.size();

        assert(int(x.size()) == N_orig);
        assert(int(b.size()) == N_orig);

        DenseVector x_proj(N_proj);
        DenseVector b_proj(N_proj);
        int sub = 0;
        tSI::const_iterator pos = supp.begin();
        for (int i=0; i<N_orig; i++)
        {
            if (pos != supp.end() && i > *pos)
            {
                sub++;
                pos++;
            }
            if (pos != supp.end() && i == *pos) continue;

            x_proj(i-sub) = x(i);
            b_proj(i-sub) = b(i);
        }

		solver->solve(x_proj, b_proj);

        // now fill in the missing zeros
        sub = 0;
        pos = supp.begin();
        for (int i=0; i<N_orig; i++)
        {
            if (pos != supp.end() && i > *pos)
            {
                sub++;
                pos++;
            }
            if (pos != supp.end() && i == *pos) x(i) = 0.;
            else x(i) = x_proj(i-sub);
        }
        
		return 0;
	}

	void finalize() {}
};

template<typename PP> struct IVSupplementaryConditions 
{
	typedef PP P;

	const P &problem;
	const int N;
   
	typedef std::vector<int> tVI;
	typedef std::set<int> tSI;

	solver::DefaultSymmetricSolver *solver;
	tSI supp;
    SparseSymmetricMatrix Proj;

	IVSupplementaryConditions(const P &problem, const int N) : problem(problem), N(N), Proj((N+1)*problem.n,(N+1)*problem.n)
	{
        SCOPE("constructor IVP supp");

		solver = NULL;

        Proj.clear();
        Proj = IdentityMatrix((N+1)*problem.n);
        for (int i=0; i<problem.n; i++)
            Proj(i,i) = 0.;
        for (int i=problem.n; i<2*problem.n; i++)
        {
            Proj(i,i) = 9./25.;
            Proj(i,i+problem.n) = 12./25.;
        }
        for (int i=2*problem.n; i<3*problem.n; i++)
        {
            Proj(i,i-problem.n) = 12./25.;
            Proj(i,i) = 16./25.;
        }
	}
    virtual ~IVSupplementaryConditions()
    {
        if (solver != NULL) delete solver;
    }

	void project(const DenseVector &u, DenseVector &Pu)
	{
/*        
		Pu = u;
        for (int i=0; i<problem.n; i++)
            Pu(i) = 0.;
        for (int i=problem.n; i<2*problem.n; i++)
            Pu(i) = 9./25. * u(i) + 12./25. * u(i+problem.n);
        for (int i=2*problem.n; i<3*problem.n; i++)
            Pu(i) = 12./25 * u(i-problem.n) + 16./25. * u(i);
*/
        axpy_prod(Proj, u, Pu, true);
	}

	void update(SparseSymmetricMatrix &S, solver::MatrixType mtype)
	{
		if (solver != NULL) delete solver;


        SparseSymmetricMatrix tmp1(S.size1(), S.size2());
        axpy_prod(S, Proj, tmp1, true);
        SparseSymmetricMatrix tmp2(S.size1(), S.size2());
        axpy_prod(Proj, tmp1, tmp2, true);
/*
        // remove the first and second row/col of blocks
        // replace the third row block with 12/25 * second row block + 16/25 * third row block
        const int N_proj = (N+1)*problem.n - 2*problem.n;

        SparseSymmetricMatrix S_proj(N_proj, N_proj);
		for (SparseSymmetricMatrix::const_iterator1 it=S.begin1(); it!=S.end1(); ++it)
		{
            if (int(it.index1()) < 2*problem.n) continue;
			for (SparseSymmetricMatrix::const_iterator2 jt=it.begin(); jt!=it.end(); ++jt)
            {
                if (int(jt.index2()) < 2*problem.n) continue;
                S_proj(jt.index1()-2*problem.n, jt.index2()-2*problem.n) = *jt;
			}
		}
		solver = new solver::DefaultSymmetricSolver(S_proj, solver::SYM_INDEF);
*/        
		solver = new solver::DefaultSymmetricSolver(tmp2, solver::SYM_INDEF);
	}
		
	int solve(DenseVector &x, const DenseVector &b) 
	{
        // project x and b
        const int N_orig = (N+1)*problem.n;
//        const int N_proj = N_orig - 2*problem.n;

        assert(int(x.size()) == N_orig);
        assert(int(b.size()) == N_orig);

        DenseVector x_proj(N_orig);
        DenseVector b_proj(N_orig);
        project(x, x_proj);
        project(b, b_proj);

		solver->solve(x_proj, b_proj);

        project(x_proj, x);
        return 0;

/*
        // project x and b
        const int N_orig = (N+1)*problem.n;
        const int N_proj = N_orig - 2*problem.n;

        assert(int(x.size()) == N_orig);
        assert(int(b.size()) == N_orig);

        DenseVector x_proj(N_proj);
        DenseVector b_proj(N_proj);
        noalias(x_proj) = subrange(x, 2*problem.n, N_orig);
        noalias(b_proj) = subrange(b, 2*problem.n, N_orig);

		solver->solve(x_proj, b_proj);

        // now fill in the missing values
        noalias(ublas::project(x, range(2*problem.n, N_orig))) = x_proj;
                
        for (int i=0; i<problem.n; i++)
            x(i) = 0.;
        for (int i=problem.n; i<2*problem.n; i++)
            x(i) = -4.*x(i+problem.n)/3.;

		return 0;
*/        
	}

	void finalize() {}
};


} // end namespace steepest_descent

#endif // __DESCENT_HXX__
