// Implementation of the functions of the DiMEPACK interface:
// ----------------------------------------------------------
// This module contains the functions dpVcycleConst and dpFMGVcycleConst which
// represent the interface of the DiMEPACK library

include(fortran-interface.m4)

#include "dpConstCoeff.h"
#include "dpUtilities.h"
#include "dpMgData.h"

#include <assert.h>
#include <stdlib.h>
#include <iostream.h>
#include <fstream.h>
#include <math.h>
#include <string>

#include "dpDirectSolve.h"
#include "dpApplyPadding.h"
#include "dpRestrictRhs.h"
#include "relax.h"
#include "restriction.h"
#include "interpolation.h"
#include "ipo-relax.h"
#include "relax-restrict.h"
#include "norm.h"
#include "dpFuncPtr.h"

// Static function prototypes:
static void dpVConst(dpMgData &dat, DIME_REAL omega, int topLevel, int nu1, int nu2,
		     tBoundary *bTypes, tRestrict rType);

static void dpSmoothConst(int niter, int nxp, int nyp, DIME_REAL *coeff, int ncoeff,
			  DIME_REAL *u, int padu, DIME_REAL *f, int padf, tBoundary *btypes,
			  bool	isFinest, bool isHom, bool isIdCoeff, DIME_REAL omega);

static void dpRestrResConst(int nxp, int nyp, int nxpc, int nypc, int ncoeff,
			    DIME_REAL *coeff, tBoundary *btypes, int padu, int padf,
			    int paduc, int padfc, DIME_REAL *u, DIME_REAL *f, DIME_REAL *uc,
			    DIME_REAL *fc, DIME_REAL omega, bool isIdCoeff, bool isFinest,
			    bool isHom, tRestrict rtype);

#ifdef DIME_COMPUTE_NORM
static DIME_REAL dpResNormConst(tNorm ntype, int ncoeff, DIME_REAL *matcoeff, bool isHom,
			     DIME_REAL omega, int nxp, int nyp, DIME_REAL *u, DIME_REAL *f,
			     int padu, int padf, DIME_REAL *coeff, tBoundary *btypes,
			     DIME_REAL hx, DIME_REAL hy, int iter);
#endif

static void dpSmoothRestrResConst(int nxp, int nyp, DIME_REAL *u, int padu, DIME_REAL *f,
				  int padf, int nxpc, int nypc, DIME_REAL *uc, int paduc,
				  DIME_REAL *fc, int padfc, int niter, DIME_REAL *coeff,
				  int ncoeff, tBoundary *btypes, DIME_REAL omega,
				  bool isFinest, bool isHom, bool isIdCoeff, tRestrict rtype);

static void dpInterpoSmoothConst(int nxp, int nyp, DIME_REAL *u, int padu, DIME_REAL *f, int padf,
				 int nxpc, int nypc, DIME_REAL *uc, int paduc, int niter,
				 DIME_REAL *coeff, int ncoeff, tBoundary *btypes,
				 DIME_REAL omega, bool isFinest, bool isHom, bool isIdCoeff);

static int dpMethodSelection(int nxp,int nyp,int niter,int type)
{
  int methodIndex=0;

  // select smoother routine
  if(type==0){
#ifdef DIME_USE_OPTIMIZED_SMOOTHER
#ifdef DIME_METHODSELECTION_HARDCODED
    //if (niter==1) methodIndex=1; // nothing to be blocked if there is only one iteration

    // only fusion
    //methodIndex=1;

    // 1D blocking if possible otherwise fusion
    //if (niter*2<nyp) methodIndex=2;
    //else methodIndex=1;

    // 2D blocking if possible otherwise 1D blockin or fusion
    if ((niter*2<nyp)&&(niter*2<nxp)) methodIndex=3; 
    else if (niter*2<nyp) methodIndex=2; 
    else methodIndex=1;
#else
    if (niter==1) methodIndex=1; // nothing to be blocked if there is only one iteration
    else if ((niter*2<nyp)&&(niter*2<nxp)&& ((nxp>513)||(nyp>513))) methodIndex=3; // 2D-blocking possible
    else if (niter*2<nyp && ((nxp>65)||(nyp>65))) methodIndex=2; // 1D-blocking possible
    else methodIndex=1; // fusion is always possible
#endif
#else
    methodIndex=0;
#endif
  }

  // select melted operations
  else if (type==1 || type==2){
#ifdef DIME_METHODSELECTION_HARDCODED
    //if (niter==1) methodIndex=1; // nothing to be blocked if there is only one iteration

    // only fusion
    //methodIndex=1;

    // 1D blocking if possible otherwise fusion
    if (niter*2+3<nyp) methodIndex=2;
    else methodIndex=1;

    // 2D blocking if possible otherwise 1D blocking or fusion
    //if ((niter*2+3<nyp)&&(niter*2+3<nxp)) methodIndex=3; 
    //else if (niter*2+3<nyp) methodIndex=2; 
    //else methodIndex=1;
#else
    // skewed blocking is not hand unrolled so won't work efficient
    //if ((niter*2+3<nyp)&&(niter*2+3<nxp)&&((nxp>513)||(nyp>513))) methodIndex=3; // 2D-blocking possible
    //else
    if (niter*2+3<nyp&&((nxp>65)||(nyp>65))) methodIndex=2; // 1D-blocking possible
    else methodIndex=1; // fusion is always possible
#endif
  }
  else{
    cout << "DiMEPACK: Warning unknown methodSelection option occured" << endl; 
  }

  return methodIndex;
}

// Function dpVcycleConst
// ----------------------
// Multigrid V-cycle for constant coefficient problem
//
// Parameters:
// -----------
// nlevels:       Number of levels in the grid hierarchy
// ntype:         Norm type used for the stopping criterion
// epsilon:       Tolerance needed for the stopping criterion
// maxit:         Maximum number of V-cycles
// u:             Solution vector
// isInitialized: Flag denoting if u is initialized
// f:             Right-hand side (NULL if problem is homogeneous)
// nu1, nu2:      Numbers of pre- and post-smooting iterations, respectively
// ncoeff:        Number of coefficients (5 or 9)
// matcoeff:      Array of coefficients (matrix entries)
// btypes:        Array of boundary types
// bvals:         Array of array of boundary values
// rtype:         Type of restriction operator
// omega:         Relaxation parameter for SOR smoother
// fixCGSolution  Flag specifying if SW corner value is to be fixed on the coarsest grid

void dpVcycleConst(int nlevels, tNorm ntype, DIME_REAL epsilon, int maxit,
		   dpGrid2d *u, bool isInitialized, dpGrid2d *fIn, int nu1, int nu2,
		   int ncoeff, DIME_REAL *matcoeff, tBoundary *btypes,
		   DIME_REAL **bvals, tRestrict rtype, DIME_REAL omega,
		   const bool fixCGSolution= false)
{
  assert(omega>0.0 && omega<2.0);
  assert(maxit>0);
  assert(nu1>=0);
  assert(nu2>=0);

  dpInitFuncPtrArray();

  dpMgData dat(nlevels, ncoeff, u, fIn, matcoeff, btypes, isInitialized, bvals, omega,
	       false, fixCGSolution);

#ifdef DIME_COMPUTE_NORM
  DIME_REAL resnorm; // Current norm of the residual
#else
  cout << "DiMEPACK: Ignoring parameters <norm type> and <precision>" << endl;
#endif

  // We require a positive number of presmoothing steps:
  if (nu1==0) {
    cerr << "DiMEPACK: Pre-smoothing iterations required!" << endl;
    exit(1);
  }

#ifdef DIME_COMPUTE_NORM
  // Compute the discrete L2 norm or the maximum norm of the residual:
  resnorm= dpResNormConst(ntype, dat.nCoeff, dat.matCoeff, dat.isHom, omega, dat.xDims[dat.maxLevel],
			  dat.yDims[dat.maxLevel], dat.uGrids[dat.maxLevel], dat.fGrids[dat.maxLevel],
			  dat.uPads[dat.maxLevel], dat.fPads[dat.maxLevel], dat.coeff, btypes,
			  dat.hx[dat.maxLevel], dat.hy[dat.maxLevel], 0);

  if (ntype==L2)
    cout << "DiMEPACK: Initial discrete L2 residual norm: " << resnorm << endl;
  else
    cout << "DiMEPACK: Initial max. residual norm: " << resnorm << endl;
#endif

  // Start iterating: 
#ifdef DIME_COMPUTE_NORM
  for (int iter= 0; iter<maxit && resnorm>epsilon; iter++)
#else
  for (int iter= 0; iter<maxit; iter++)
#endif
    {
      // Perform one V-cycle:
      dpVConst(dat,omega,dat.maxLevel,nu1,nu2,btypes,rtype);

#ifdef DIME_COMPUTE_NORM      
      // Compute the discrete L2 norm or the maximum norm of the residual:
      resnorm= dpResNormConst(ntype, dat.nCoeff, dat.matCoeff, dat.isHom, omega, dat.xDims[dat.maxLevel],
			      dat.yDims[dat.maxLevel], dat.uGrids[dat.maxLevel], dat.fGrids[dat.maxLevel],
			      dat.uPads[dat.maxLevel], dat.fPads[dat.maxLevel], dat.coeff, btypes,
			      dat.hx[dat.maxLevel], dat.hy[dat.maxLevel], iter+1);

      if (ntype==L2)
        cout << "DiMEPACK: Discrete L2 residual norm: " << resnorm << endl;
      else
        cout << "DiMEPACK: Max. residual norm: " << resnorm << endl;	  
#endif

    }
  
#ifdef DIME_DUMP_RESULT
  {
 	 ofstream outfile("u-exit.dat");
 	 dpPrintGrid(outfile, u->getmem(), u->getdimx(), u->getdimy(), u->getpad());
  }
#endif

  return;
}


// Function dpFMGVcycleConst
// -------------------------
// Full Multigrid V-cycle (nested iteration) for constant coefficient problem
//
// Parameters:
// -----------
// nlevels:       Number of levels in the grid hierarchy
// ntype:         Norm type used for the stopping criterion
// epsilon:       Tolerance needed for the stopping criterion
// maxAddIt:      Maximum number of additional (!) V-cycles
// u:             Solution vector
// fIn:           Right-hand side (NULL if problem is homogeneous)
// nu1, nu2:      Numbers of pre- and post-smooting iterations, respectively
// gamma:         Cycling parameter
// ncoeff:        Number of coefficients (5 or 9)
// matcoeff:      Array of coefficients (matrix entries)
// btypes:        Array of boundary types
// bvals:         Array of array of boundary values
// rtype:         Type of restriction operator
// omega:         Relaxation parameter for SOR smoother
// fixCGSolution  Flag specifying if SW corner value is to be fixed on the coarsest grid

void dpFMGVcycleConst(int nlevels, tNorm ntype, DIME_REAL epsilon, int maxAddIt,
		      dpGrid2d *u, dpGrid2d *fIn, int nu1, int nu2,
		      int gamma, int ncoeff, DIME_REAL *matcoeff, tBoundary *btypes,
		      DIME_REAL **bvals, tRestrict rtype, DIME_REAL omega,
		      const bool fixCGSolution= false)
{
  assert(omega>0.0 && omega<2.0);
  assert(maxAddIt>=0);
  assert(nu1>=0);
  assert(nu2>=0);
  assert(gamma>0);
  
  dpInitFuncPtrArray();
  
  dpMgData dat(nlevels, ncoeff, u, fIn, matcoeff, btypes, false, bvals, omega,
	       true, fixCGSolution);

#ifdef DIME_COMPUTE_NORM
  DIME_REAL resnorm; // Current norm of the residual
#else
  cout << "DiMEPACK: Ignoring parameters <norm type> and <precision>" << endl;
#endif
  
  // We require a positive number of presmoothing steps:
  if (nu1==0) {
    cerr << "DiMEPACK: Pre-smoothing iterations required!" << endl;
    cerr << "This feature is not yet implemented!" << endl;
    exit(1);
  }

  // Restrict the rhs from the finest grid down to the coarsest grid:
  dpRestrictRhs(dat, rtype);

  // Solve coarsest system directly:
  dpDirectSolve(dat.uGrids[0], dat.fGrids[0], dat.xDims[0], dat.yDims[0], btypes);

#ifndef DIME_NDEBUG
  {
    // After direct solution:
 	 ofstream outfile(dpGetFileName("u-ads.fmg",0).c_str()); 
 	 dpPrintGrid(outfile, dat.uGrids[0], dat.xDims[0], dat.yDims[0], dat.uPads[0]);
  }
#endif

  // Perform the FMG V cycle:
  for (int l=0; l<dat.maxLevel; l++) {
    // Interpolate approximation from level l to level l+1:
    dpF77SUBCALL(interp,`&dat.xDims[l+1], &dat.yDims[l+1], dat.uGrids[l+1], &dat.uPads[l+1], &dat.xDims[l], &dat.yDims[l], dat.uGrids[l], &dat.uPads[l], (int *) btypes');

#ifndef DIME_NDEBUG
  {	
 	 ofstream outfile(dpGetFileName("u-start.fmg",l+1).c_str());
 	 dpPrintGrid(outfile, dat.uGrids[l+1], dat.xDims[l+1], dat.yDims[l+1], dat.uPads[l+1]);
  }
#endif

    for (int i=0; i<gamma; i++)
      dpVConst(dat,omega,l+1,nu1,nu2,btypes,rtype);
  }

#ifdef DIME_COMPUTE_NORM
  // Compute the discrete L2 norm or the maximum norm of the residual:
  resnorm= dpResNormConst(ntype, dat.nCoeff, dat.matCoeff, dat.isHom, omega, dat.xDims[dat.maxLevel],
                          dat.yDims[dat.maxLevel], dat.uGrids[dat.maxLevel], dat.fGrids[dat.maxLevel],
			  dat.uPads[dat.maxLevel], dat.fPads[dat.maxLevel], dat.coeff, btypes,
			  dat.hx[dat.maxLevel], dat.hy[dat.maxLevel], 0);

  if (ntype==L2)
    cout << "DiMEPACK: Discrete L2 residual norm after FMG V cycle: " << resnorm << endl;
  else
    cout << "DiMEPACK: Max. residual norm after FMG V cycle: " << resnorm << endl;
#endif

  // Perform additional V cycles (if necessary):
  if (maxAddIt>0) {
    // Start iterating: 
#ifdef DIME_COMPUTE_NORM
    for (int iter= 0; iter<maxAddIt && resnorm>epsilon; iter++)
#else
    for (int iter= 0; iter<maxAddIt; iter++)
#endif
      {
	// Perform one V-cycle:
	dpVConst(dat,omega,dat.maxLevel,nu1,nu2,btypes,rtype);

#ifdef DIME_COMPUTE_NORM
	// Compute the discrete L2 norm or the maximum norm of the residual:
	resnorm= dpResNormConst(ntype, dat.nCoeff, dat.matCoeff, dat.isHom, omega, dat.xDims[dat.maxLevel],
				dat.yDims[dat.maxLevel], dat.uGrids[dat.maxLevel], dat.fGrids[dat.maxLevel],
				dat.uPads[dat.maxLevel], dat.fPads[dat.maxLevel], dat.coeff, btypes,
				dat.hx[dat.maxLevel], dat.hy[dat.maxLevel], iter+1);

        if (ntype==L2)
          cout << "DiMEPACK: Discrete L2 residual norm: " << resnorm << endl;
        else
          cout << "DiMEPACK: Max. residual norm: " << resnorm << endl;
#endif
      }
  }
  
#ifdef DIME_DUMP_RESULT
  {
 	 ofstream outfile("u-exit.dat");
 	 dpPrintGrid(outfile, u->getmem(), u->getdimx(), u->getdimy(), u->getpad());
  }
#endif

  return;
}


// Function dpVConst
// -----------------
// Performs a Multigrid V cycle, used by the standard V cycle function and by the
// Full Multigrid V cycle function (nested iteration)

static void dpVConst(dpMgData& dat, DIME_REAL omega, int topLevel, int nu1, int nu2,
		     tBoundary *bTypes, tRestrict rType)
{
  static int iter=0;
  iter++;

  for (int l= topLevel; l>0; l--)
  {
    bool isFinest= (l==topLevel) ? true : false;

    // Apply pre-smoothing, compute and restrict residual in one sweep:
    dpSmoothRestrResConst(dat.xDims[l],dat.yDims[l],dat.uGrids[l],dat.uPads[l],
			    dat.fGrids[l],dat.fPads[l],dat.xDims[l-1],dat.yDims[l-1],
			    dat.uGrids[l-1],dat.uPads[l-1],dat.fGrids[l-1],dat.fPads[l-1],
			    nu1,dat.coeff,dat.nCoeff,bTypes,omega,isFinest,
			    dat.isHom,dat.isIdCoeff,rType);
    
#ifndef DIME_NDEBUG
  {
    ofstream outf1(dpGetFileName("u-pre",l,iter).c_str());
    dpPrintGrid(outf1, dat.uGrids[l], dat.xDims[l], dat.yDims[l], dat.uPads[l]);
    ofstream outf2(dpGetFileName("f",l-1,iter).c_str());
    dpPrintGrid(outf2, dat.fGrids[l-1], dat.xDims[l-1], dat.yDims[l-1], dat.fPads[l-1]);
  }
#endif      

  } // End of for loop
  
  // Solve coarsest system directly:
  dpDirectSolve(dat.uGrids[0], dat.fGrids[0], dat.xDims[0], dat.yDims[0], bTypes);
  
// -------- BEGIN DEBUGGING OUTPUT --------
#ifndef DIME_NDEBUG
  {
    ofstream outf(dpGetFileName("u-post",0,iter).c_str());
    dpPrintGrid(outf, dat.uGrids[0], dat.xDims[0], dat.yDims[0], dat.uPads[0]);
  }
#endif
// -------- END DEBUGGING OUTPUT --------
  
  for (int l=0; l<topLevel; l++) {
    bool isFinest= (l+1==topLevel) ? true : false;
    
#ifdef DIME_USE_MELTED_OPS
    if(nu2>0){
      dpInterpoSmoothConst(dat.xDims[l+1],dat.yDims[l+1],dat.uGrids[l+1],dat.uPads[l+1],dat.fGrids[l+1],dat.fPads[l+1],
			   dat.xDims[l],dat.yDims[l],dat.uGrids[l],dat.uPads[l],
			   nu2,dat.coeff,dat.nCoeff,bTypes, omega,isFinest,dat.isHom,dat.isIdCoeff);

// -------- BEGIN DEBUGGING OUTPUT --------
#ifndef DIME_NDEBUG
      {
	ofstream outf(dpGetFileName("u-post",l+1,iter).c_str());
	dpPrintGrid(outf, dat.uGrids[l+1], dat.xDims[l+1], dat.yDims[l+1], dat.uPads[l+1]);
      }
#endif
// -------- END DEBUGGING OUTPUT --------
    }
    else {
      // Interpolate the correction:
      dpF77SUBCALL(interp,`&dat.xDims[l+1], &dat.yDims[l+1], dat.uGrids[l+1], &dat.uPads[l+1], &dat.xDims[l], &dat.yDims[l], dat.uGrids[l], &dat.uPads[l], (int *) bTypes');

// -------- BEGIN DEBUGGING OUTPUT --------
#ifndef DIME_NDEBUG
      {
        ofstream outf(dpGetFileName("u-corr",l+1,iter).c_str());
        dpPrintGrid(outf, dat.uGrids[l+1], dat.xDims[l+1], dat.yDims[l+1], dat.uPads[l+1]);
      }
#endif
// -------- END DEBUGGING OUTPUT --------
    } 
#else
    // Interpolate the correction:
    dpF77SUBCALL(interp,`&dat.xDims[l+1], &dat.yDims[l+1], dat.uGrids[l+1], &dat.uPads[l+1], &dat.xDims[l], &dat.yDims[l], dat.uGrids[l], &dat.uPads[l], (int *) bTypes');
	  
// -------- BEGIN DEBUGGING OUTPUT --------
#ifndef DIME_NDEBUG
      {
	ofstream outf(dpGetFileName("u-corr",l+1,iter).c_str());
	dpPrintGrid(outf, dat.uGrids[l+1], dat.xDims[l+1], dat.yDims[l+1], dat.uPads[l+1]);
      }
#endif
// -------- END DEBUGGING OUTPUT --------

    // Post-smoothing:
    if(nu2>0) {
      // apply post smoother only if the user requests post smoothing
      dpSmoothConst(nu2, dat.xDims[l+1], dat.yDims[l+1], dat.coeff, dat.nCoeff, dat.uGrids[l+1],
	            dat.uPads[l+1], dat.fGrids[l+1], dat.fPads[l+1], bTypes, isFinest, dat.isHom,
                    dat.isIdCoeff, omega);
	
// -------- BEGIN DEBUGGING OUTPUT --------
#ifndef DIME_NDEBUG
      {
	ofstream outf(dpGetFileName("u-post",l+1,iter).c_str());
	dpPrintGrid(outf, dat.uGrids[l+1], dat.xDims[l+1], dat.yDims[l+1], dat.uPads[l+1]);
      }
#endif
// -------- END DEBUGGING OUTPUT --------
    }
#endif
  }

  return;
}

// Function dpSmoothConst
// ----------------------
// Wrapper function for fast FORTRAN77 smoothing procedures for the
// case of constant coefficients
//
// Parameters:
// -----------
// niter:     Number of smooting iterations
// nxp, nyp:  Number of grid points in dimensions x and y, respectively
// coeff:     Array of coefficients
// ncoeff:    Number of coefficients (5 or 9)
// u:         Solution vector
// padu:      Padding in array u
// f:         Rhs (NULL if problem is homogeneous)
// padf:      Padding in array f
// btypes:    Array of boundary types
// isFinest:  Flag specifying if the finest level is processed
// isHom:     Flag specifying if the original problem is homogeneous
// isIdCoeff: Flag specifying if the non-center coefficients are idetical
// omega:     Relaxation parameter

static void dpSmoothConst(int niter, int nxp, int nyp, DIME_REAL *coeff, int ncoeff,
	 	   DIME_REAL *u, int padu, DIME_REAL *f, int padf, tBoundary *btypes,
		   bool	isFinest, bool isHom, bool isIdCoeff, DIME_REAL omega)
{
  assert(niter > 0);
  assert(nxp>=3);
  assert(nyp>=3);

#ifdef DIME_NO_ARITHMETIC_OPT
  int funcIndex=3;
  if(ncoeff==9) funcIndex+=8;
#else
  // determine which variant can/should be called
  int funcIndex=0;
  if(ncoeff==9) funcIndex=8;
  if(omega==1.0) funcIndex+=4;
  if(!isIdCoeff) funcIndex+=2;
  if(!(isFinest && isHom)) funcIndex+=1;
#endif

  // determine which data locality optimization can be applied
  int methodIndex=dpMethodSelection(nxp,nyp,niter,0);
  int (*smoother)(int *, int *, DIME_REAL *, int *, DIME_REAL *, int *, int *, DIME_REAL *, int *);
  smoother=dpSmootherFuncPtr[methodIndex][funcIndex];

  (*smoother)(&nxp, &nyp, u, &padu, f, &padf, &niter, coeff, (int *) btypes);

  return;
}


// Function dpRestrResConst
// ------------------------
// Wrapper function for fast FORTRAN77 restriction procedures for the
// case of constant coefficients
//
// Parameters:
// -----------
// nxp, nyp:     Number of grid points in dimensions x and y (fine grid)
// nxpc, nypc:   Number of grid points in dimensions x and y (coarse grid)
// ncoeff:       Number of coefficients (5 or 9)
// coeff:        Array of coefficients
// btypes:       Array of boundary types
// padu, padf:   Paddings in array u and in array f (fine)
// paduc, padfc: Paddings in array u and in array f (coarse)
// u, f:         Solution vector and rhs (fine)
// uc, fc:       Solution vector and rhs (coarse)
// omega:        Relaxation parameter
// isIdCoeff:    Flag specifying if non-center coefficients are identical
// isFinest:     Flag specifying if finer level is the finest level
// isHom:        Flag specifying if the problem is homogeneous
// rtype:        Restriction type

static void dpRestrResConst(int nxp, int nyp, int nxpc, int nypc, int ncoeff,
		     DIME_REAL *coeff, tBoundary *btypes, int padu, int padf,
		     int paduc, int padfc, DIME_REAL *u, DIME_REAL *f, DIME_REAL *uc,
		     DIME_REAL *fc, DIME_REAL omega, bool isIdCoeff, bool isFinest,
		     bool isHom, tRestrict rtype)
{
  // determine which variant can/should be called
#ifdef DIME_NO_ARITHMETIC_OPT
  int funcIndex=7;
  if(ncoeff==9) funcIndex+=8;
#else
  int funcIndex=0;
  if(ncoeff==9) funcIndex=8;
  if(omega==1.0) funcIndex+=4;
  if(!isIdCoeff) funcIndex+=2;
  if(!(isFinest && isHom)) funcIndex+=1;
#endif

  // determines whether half weighting or full weighting is used
  int methodIndex=0;
  if (rtype==FW) methodIndex=1;

  int (*restriction)(int *, int *, DIME_REAL *, int *, DIME_REAL *, int *, int *, int *, DIME_REAL *, int *, DIME_REAL *, int *, DIME_REAL *, int *, DIME_REAL *);
  restriction=dpRestrictionFuncPtr[methodIndex][funcIndex];

  (*restriction)(&nxp, &nyp, u, &padu, f, &padf, &nxpc, &nypc, uc, &paduc, fc, &padfc, coeff, (int *) btypes, &omega);

  return;
}


// Function dpSmoothRestResConst
// ------------------------
// Wrapper function for fast FORTRAN77 smoothing and restriction procedures for the
// case of constant coefficients
//
// Parameters:
// -----------
// niter:        Number of smooting iterations
// nxp, nyp:     Number of grid points in dimensions x and y (fine grid)
// nxpc, nypc:   Number of grid points in dimensions x and y (coarse grid)
// nCoeff:       Number of coefficients (5 or 9)
// coeff:        Array of coefficients
// bTypes:       Array of boundary types
// padu, padf:   Paddings in array u and in array f (fine)
// paduc, padfc: Paddings in array u and in array f (coarse)
// u, f:         Solution vector and rhs (fine)
// uc, fc:       Solution vector and rhs (coarse)
// omega:        Relaxation parameter
// isIdCoeff:    Flag specifying if non-center coefficients are identical
// isFinest:     Flag specifying if finer level is the finest level
// isHom:        Flag specifying if the problem is homogeneous
// rtype:        Restriction type

static void dpSmoothRestrResConst(
 int nxp, int nyp, DIME_REAL *u, int padu, DIME_REAL *f, int padf,
 int nxpc, int nypc, DIME_REAL *uc, int paduc, DIME_REAL *fc, int padfc,
 int niter, DIME_REAL *coeff, int nCoeff, tBoundary *bTypes, DIME_REAL omega,
 bool isFinest, bool isHom, bool isIdCoeff, tRestrict rType)
{

  bool useMeltedOps;
#ifdef DIME_USE_MELTED_OPS
  useMeltedOps=(nxp>=7)||(nyp>=7)&&(niter>0); 
#else
  useMeltedOps=false;
#endif

  if(useMeltedOps){

    assert(niter>0);
    assert(nxp>=5);
    assert(nyp>=5);
    assert(bTypes!=NULL);

#ifdef DIME_NO_ARITHMETIC_OPT
    int funcIndex=7;
    if(nCoeff==9) funcIndex+=8;
#else
    // determine which variant can/should be called
    int funcIndex=0;
    if(nCoeff==9) funcIndex=8;
    if(omega==1.0) funcIndex+=4;
    if(!isIdCoeff) funcIndex+=2;
    if(!(isFinest && isHom)) funcIndex+=1;
#endif

    int methodIndex=dpMethodSelection(nxp,nyp,niter,1);
    assert(methodIndex!=0); // Standard red-black is not implemented for melted operations!
    if(rType==FW) methodIndex+=4;

    int (*preCoarseOps)(int *,int *,DIME_REAL *,int *,DIME_REAL *,int *,int *,
	int *,DIME_REAL *,int *,DIME_REAL *,int *,int *,DIME_REAL *,int *,DIME_REAL *)
	=dpPreCoarseFuncPtr[methodIndex][funcIndex];

    (*preCoarseOps)(&nxp,&nyp,u,&padu,f,&padf,&nxpc,&nypc,uc,&paduc,fc,&padfc,&niter,coeff,(int *)bTypes,&omega);
  }
  else{
        // Pre-smoothing:
        dpSmoothConst(niter, nxp, nyp, coeff, nCoeff, u,padu,f, padf, bTypes, isFinest, isHom,isIdCoeff, omega);
      
        // Compute and restrict the residual:
        dpRestrResConst(nxp, nyp, nxpc, nypc, nCoeff,coeff, bTypes, padu, padf, paduc,
		        padfc, u, f, uc, fc, omega, isIdCoeff, isFinest, isHom, rType);
  }    
}


// Function dpInterpoSmoothConst
// -----------------------------
// Wrapper function for fast FORTRAN77 smoothing and restriction procedures for the
// case of constant coefficients
//
// Parameters:
// -----------
// niter:        Number of smooting iterations
// nxp, nyp:     Number of grid points in dimensions x and y (fine grid)
// nxpc, nypc:   Number of grid points in dimensions x and y (coarse grid)
// ncoeff:       Number of coefficients (5 or 9)
// coeff:        Array of coefficients
// btypes:       Array of boundary types
// padu, padf:   Paddings in array u and in array f (fine)
// paduc:        Paddings in array u (coarse)
// u, f:         Solution vector and rhs (fine)
// uc:           Solution vector (coarse)
// omega:        Relaxation parameter
// isIdCoeff:    Flag specifying if non-center coefficients are identical
// isFinest:     Flag specifying if finer level is the finest level
// isHom:        Flag specifying if the problem is homogeneous

static void dpInterpoSmoothConst(
 int nxp, int nyp, DIME_REAL *u, int padu, DIME_REAL *f, int padf,
 int nxpc, int nypc, DIME_REAL *uc, int paduc,
 int niter, DIME_REAL *coeff, int ncoeff, tBoundary *btypes,
 DIME_REAL omega, bool isFinest, bool isHom, bool isIdCoeff)
{
  assert(niter>0);
  assert(nxp>=3);
  assert(nyp>=3);
  assert(btypes!=NULL);

#ifdef DIME_NO_ARITHMETIC_OPT
  int funcIndex=7;
  if(ncoeff==9) funcIndex+=8;
#else
  // determine which variant can/should be called
  int funcIndex=0;
  if(ncoeff==9) funcIndex=8;
  if(omega==1.0) funcIndex+=4;
  if(!isIdCoeff) funcIndex+=2;
  if(!(isFinest && isHom)) funcIndex+=1;
#endif

  // determine which data locality optimization can be applied
  int methodIndex=dpMethodSelection(nxp,nyp,niter,2);
  assert(methodIndex!=0); // Standard red-black is not implemented for melted operations!

  int (*postCoarseOps)(int *,int *,DIME_REAL *,int *,DIME_REAL *,int *,int *,int *,DIME_REAL *,int *,int *,DIME_REAL *, int *);

  postCoarseOps=dpPostCoarseFuncPtr[methodIndex][funcIndex];

  (*postCoarseOps)(&nxp,&nyp,u,&padu,f,&padf,&nxpc,&nypc,uc,&paduc,&niter,coeff,(int *)btypes);

  return;
}


#ifdef DIME_COMPUTE_NORM
// Function dpResNormConst
// -----------------------
// Wrapper function for FORTRAN77 residual norm calculation procedures
// for the case of constant coefficients
//
// Parameters:
// -----------
// ntype:      Type of required norm
// ncoeff:     Number of coefficients (5 or 9)
// matcoeff:   Matrix coefficients
// isHom:      Flag specifying if the problem is homogeneous
// omega:      Relaxation parameter
// nxp, nyp:   Number of grid points in dimensions x and y (fine grid)
// u, f:       Solution vector and rhs (fine)
// upad, fpad: Paddings in array u and in array f (fine)
// coeff:      Array of coefficients
// btypes:     Array of boundary types
// hx, hy:     Mesh widths
// iter:       Number of iteration

static DIME_REAL dpResNormConst(tNorm ntype, int ncoeff, DIME_REAL *matcoeff, bool isHom,
                             DIME_REAL omega, int nxp, int nyp, DIME_REAL *u, DIME_REAL *f,
                             int upad, int fpad, DIME_REAL *coeff, tBoundary *btypes,
                             DIME_REAL hx, DIME_REAL hy, int iter)
{
  DIME_REAL resnorm;

  long oldFlags = cout.flags();
  cout.flags(oldFlags | ios::scientific);

  if (ntype==L2) { // Compute the discrete L2 norm of the residual
    if (ncoeff==5) {
      if (isHom==true)
	resnorm= dpF77SUBCALL(lnmh5,`&nxp, &nyp, u, &upad, f, &fpad, coeff, (int *) btypes, &omega, &hx, &hy');
      else
        resnorm= dpF77SUBCALL(lnmn5,`&nxp, &nyp, u, &upad, f, &fpad, coeff, (int *) btypes, &omega, &hx, &hy');
      resnorm*= fabs(matcoeff[2]/omega);
    }
    else {
      if (isHom==true)
	resnorm= dpF77SUBCALL(lnmh9,`&nxp, &nyp, u, &upad, f, &fpad, coeff, (int *) btypes, &omega, &hx, &hy');
      else
	resnorm= dpF77SUBCALL(lnmn9,`&nxp, &nyp, u, &upad, f, &fpad, coeff, (int *) btypes, &omega, &hx, &hy');
      resnorm*= fabs(matcoeff[4]/omega);
    }
  }
  else { // Compute the maximum norm of the residual
    if (ncoeff==5) {
      if (isHom==true)
        resnorm= dpF77SUBCALL(mnmh5,`&nxp, &nyp, u, &upad, f, &fpad, coeff, (int *) btypes, &omega, &hx, &hy');
      else
        resnorm= dpF77SUBCALL(mnmn5,`&nxp, &nyp, u, &upad, f, &fpad, coeff, (int *) btypes, &omega, &hx, &hy');
      resnorm*= fabs(matcoeff[2]/omega);
    }
    else {
      if (isHom==true)
        resnorm= dpF77SUBCALL(mnmh9,`&nxp, &nyp, u, &upad, f, &fpad, coeff, (int *) btypes, &omega, &hx, &hy');
      else
        resnorm= dpF77SUBCALL(mnmn9,`&nxp, &nyp, u, &upad, f, &fpad, coeff, (int *) btypes, &omega, &hx, &hy');
      resnorm*= fabs(matcoeff[4]/omega);
    }
  }

  cout.flags(oldFlags);
  ++iter;

  return resnorm;
}
#endif
