#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <MFAUTO.h>

FILE *fp9=(FILE*)NULL;   /* Must be initialized */
#define max(a,b) ((a) >= (b) ? (a) : (b))
double d_imag(doublecomplex *z);
double z_abs(doublecomplex *z);

typedef struct {
  double ***a;
  double ***b;
  double ***c;
  double **d;
  double ***a1;
  double ***a2;
  double ***s1;
  double ***s2;
  double ***bb;
  double ***cc;
  double **faa;
  double ***ca1;

  long *icf;
  long *irf;
  long *ipr;
  long *icf11;
  long *icf1;
  long *icf2;
  long *np;
} main_auto_storage_type;

int stupbv(MFAUTO_IAP iap, MFAUTO_RAP rap, double *par, long *icp, MFAUTO_F funi, double *rlcur, double *rlold, double *rldot, long *ndxloc, double **ups, double **uoldps, double **upoldp)
 {
  /* Local variables */
  long ndim, ncol;
  long nfpr, ntst;
  long i, j, k;
  long n1, ips;
  double *dfdp,*dfdu,*uold,*f,*u;

  dfdp = (double *)malloc(sizeof(double)*(iap->ndim)*NPARX);
  dfdu = (double *)malloc(sizeof(double)*(iap->ndim)*(iap->ndim));
  uold = (double *)malloc(sizeof(double)*(iap->ndim));
  f    = (double *)malloc(sizeof(double)*(iap->ndim));
  u    = (double *)malloc(sizeof(double)*(iap->ndim));


/* Stores U-prime (derivative with respect to T) in UPOLDP. */


/* Local */

  /* Parameter adjustments */
  /*--par;*/
  /*--icp;*/
  /*--rlcur;*/
  /*--rlold;*/
  /*--rldot;*/

  ndim = iap->ndim;
  ips = iap->ips;
  ntst = iap->ntst;
  ncol = iap->ncol;
  nfpr = iap->nfpr;

  for (i = 0; i < nfpr; ++i) {
    par[icp[i]] = rlold[i];
  }

  for (j = 0; j < ntst + 1; ++j) {
    for (i = 0; i < ndim; ++i) {
      u[i] = uoldps[j][i];
      if (ips == 14 || ips == 16) {
	uold[i] = uoldps[j][i] * 2 - ups[j][i];
      } else {
	uold[i] = uoldps[j][i];
      }
    }
    (*funi)(iap, rap, ndim, u, uold, icp, par, 0, f, 
	    dfdu, dfdp);
    for (i = 0; i < ndim; ++i) {
      upoldp[j][i] = f[i];
    }
  }

  for (k = 1; k <= ncol - 1; ++k) {
    n1 = k * ndim;
    for (j = 0; j < ntst; ++j) {
      for (i = 0; i < ndim; ++i) {
	u[i] = uoldps[j][n1 + i];
	if (ips == 14 || ips == 16) {
	  uold[i] = uoldps[j][n1 + i] * 2 - ups[j][n1 + i];
	} else {
	  uold[i] = uoldps[j][n1 + i];
	}
      }
      (*funi)(iap, rap, ndim, u, uold, icp, par, 0, f, dfdu, dfdp);
      for (i = 0; i < ndim; ++i) {
	upoldp[j][n1 + i] = f[i];
      }
    }
  }

  for (i = 0; i < nfpr; ++i) {
    par[icp[i]] = rlcur[i];
  }
  free(dfdp);
  free(dfdu);
  free(uold);
  free(f   );
  free(u   );

  return 0;
 } /* stupbv_ */

int stepbv(MFAUTO_IAP iap, MFAUTO_RAP rap, double *par, long *icp, MFAUTO_F funi, MFAUTO_BC bcni, MFAUTO_IC icni, MFAUTO_PLBV pvli, double *rds, double *rlcur, double *rlold, double *rldot, long ndxloc, double **ups, double **dups, double **uoldps, double **udotps, double **upoldp, double **fa, double *fc, double *tm, double *dtm, double **p0, double **p1, double *thl, double *thu)
 {
  /* Local variables */
  long iads;
  double adrl;
  long done;
  long ndim, ncol;
  double epsl, rdrl;
  long nfpr, ifst;
  double epsu;
  long ntop, itnw, nllv;
  double dumx;
  long ntot, nrow=0, nwtn, ntst, i, j;
  double dsold, dsmin;
  long nitps;
  double rdumx;
  long istop;
  double au;

  double delref=0.0, delmax;

  long iid;
  double adu;
  long ibr, mxt;
  double umx;
  long nit1;

  /* Controls the solution of the nonlinear equations (by Newton's method) 
*/
/* for the next solution (PAR(ICP(*)) , U) on a branch of solutions. */



  /* Parameter adjustments */
  /*--par;*/
  /*--icp;*/
  /*--rlcur;*/
  /*--rlold;*/
  /*--rldot;*/
  /*--fc;*/
  /*--tm;*/
  /*--dtm;*/
    
  ndim = iap->ndim;
  ntst = iap->ntst;
  ncol = iap->ncol;
  iads = iap->iads;
  iid = iap->iid;
  itnw = iap->itnw;
  nwtn = iap->nwtn;
  nfpr = iap->nfpr;
  ibr = iap->ibr;
  ntot = iap->ntot;
  ntop = (ntot + 1) % 10000;

  dsmin = rap->dsmin;
  epsl = rap->epsl;
  epsu = rap->epsu;

 L1:
  dsold = *rds;
  rap->dsold = dsold;
  nitps = 0;
  iap->nit = nitps;

  /* Write additional output on unit 9 if requested. */

  wrtbv9(iap, rap, par, icp, rlcur, &ndxloc, 
	 ups, tm, dtm, thl, thu);

  /* Generate the Jacobian matrix and the right hand side. */

  for (nit1 = 1; nit1 <= itnw; ++nit1) {

    nitps = nit1;
    iap->nit = nitps;
    nllv = 0;

    ifst = 0;
    if (nitps <= nwtn) {
      ifst = 1;
    }
    solvbv(&ifst, iap, rap, par, icp, funi, bcni, icni, 
	   rds, &nllv, rlcur, rlold, rldot, &ndxloc, 
	   ups, dups, uoldps, 
	   udotps, upoldp, dtm, fa, fc, p0, 
	   p1, thl, thu,1,NULL);

    /* Add Newton increments. */

    for (i = 0; i < ndim; ++i) {
      ups[ntst][i] += fc[i];
    }
    for (i = 0; i < nfpr; ++i) {
      rlcur[i] += fc[ndim + i];
      par[icp[i]] = rlcur[i];
    }

    dumx = 0.;
    umx = 0.;
    nrow = ndim * ncol;
    for (j = 0; j < ntst; ++j) {
      for (i = 0; i < nrow; ++i) {
	adu = fabs(fa[j][i]);
	if (adu > dumx) {
	  dumx = adu;
	}
	au = fabs(ups[j][i]);
	if (au > umx) {
	  umx = au;
	}
	ups[j][i] += fa[j][i];
      }
    }
    /* I am not sure why this is here, and it requires a wierd
       special case for the parsing of points which are "special"
       (i.e. bifurcations, folds, etc).  I may want to get rid
       of it at some time.
    */
    wrtbv9(iap, rap, par, icp, rlcur, &ndxloc, ups, tm, dtm, thl, thu);

    /* Check whether user-supplied error tolerances have been met : */

    done = TRUE;
    rdrl = 0.;
    for (i = 0; i < nfpr; ++i) {
      adrl = fabs(fc[ndim + i]) / (fabs(rlcur[i]) + 1.);
      if (adrl > epsl) {
	done = FALSE;
      }
      if (adrl > rdrl) {
	rdrl = adrl;
      }
    }
    rdumx = dumx / (umx + 1.);
    if (done && rdumx < epsu) {
      (*pvli)(iap, rap, icp, dtm, &ndxloc, ups, &ndim, p0, p1, par);
      if (iid >= 2) {
	fprintf(fp9," \n");	
      }
      return 0;
    }

    if (nitps == 1) {
      delref = max(rdrl,rdumx) * 20;
    } else {
      delmax = max(rdrl,rdumx);
      if (delmax > delref) {
	goto L3;
      }
    }

    /* L2: */
  }

  /* Maximum number of iterations reached. */

 L3:
  if (iads == 0 && iap->mynode == 0) {
    fprintf(fp9,"%4li%6li NOTE:No convergence with fixed step size\n",ibr,ntop);	
  }
  if (iads == 0) {
    goto L13;
  }

  /* Reduce stepsize and try again. */

  mxt = itnw;
  iap->nit = mxt;
  adptds(iap, rap, rds);
  if (fabs(*rds) < dsmin) {
    goto L12;
  }
  for (i = 0; i < nfpr; ++i) {
    rlcur[i] = rlold[i] + *rds * rldot[i];
  }
  for (j = 0; j < ntst + 1; ++j) {
    for (i = 0; i < nrow; ++i) {
      ups[j][i] = uoldps[j][i] + *rds * udotps[j][i];
    }
  }
  if (iid >= 2 && iap->mynode == 0) {
    fprintf(fp9,"%4li%6li NOTE:Retrying step\n",ibr,ntop);	

  }
  goto L1;

/* Minimum stepsize reached. */

 L12:
  if (iap->mynode == 0) {
    fprintf(fp9,"%4li%6li, NOTE:No convergence using minimum step size\n",ibr,ntop);	

  }
 L13:
  for (i = 0; i < nfpr; ++i) {
    rlcur[i] = rlold[i];
    par[icp[i]] = rlcur[i];
  }
  for (j = 0; j < ntst + 1; ++j) {
    for (i = 0; i < nrow; ++i) {
      ups[j][i] = uoldps[j][i];
    }
  }
  istop = 1;
  iap->istop = istop;


  return 0;
 } /* stepbv_ */

/* The last two arguments of this function affect how the null vectors of the Jacobian are computed.
   If "useDefaultRHS" is 1 then the normals right hand side is used (i.e. 1.0 is each of the appropriate positions).
   If "useDefaultRHS" is 0 then the right hand side in RHSVector is used. 
   The idea is that we add additional pseudo-arclength constraints for the manifold calculations,
   and this allows us to compute a basis of the tangent space. 
*/
int solvbv(long *ifst, MFAUTO_IAP iap, MFAUTO_RAP rap, double *par, long *icp, MFAUTO_F funi, MFAUTO_BC bcni, MFAUTO_IC icni, double *rds, long *nllv, double *rlcur, double *rlold, double *rldot, long *ndxloc, double **ups, double **dups, double **uoldps, double **udotps, double **upoldp, double *dtm, double **fa, double *fc, double **p0, double **p1, double *thl, double *thu,long useDefaultRHS,double *RHSVector)
 {
  
  
  /* Local variables */
  
  long ndim;
  long ipar;
  long ncol, nclm, nfpr, nint, nrow, ntst, ntst0;
  long nalc;
  
  double **ff, **ft;
  
  long nbc, iid, iam;
  double det;
  long ips, nrc;
  
  long kwt;
  
  static main_auto_storage_type main_auto_storage={NULL,NULL,NULL,NULL,
						   NULL,NULL,NULL,NULL,
						   NULL,NULL,NULL,NULL,
						   NULL,NULL,NULL,NULL,
						   NULL,NULL,NULL};
  
  /*     N AX is the local N TSTX, which is smaller than the global N TSTX. */
  /*     NODES is the total number of nodes. */
  
  
  /* Sets up and solves the linear equations for one Newton/Chord iteration 
   */

  
  
  /* Most of the required memory is allocated below */
  /* This is an interesting section of code.  The main point
     is that setubv and conpar only get called when ifst
     is 1.  This is a optimization since you can solve
     the system using the previously factored jacobian.
     One thing to watch out for is that two seperate calls
     of solvbv_ talk to each other through these arrays,
     so it is only safe to get rid of them when ifst is
     1 (since their entries will then be recreated in conpar
     and setubv).
  */

  ff = dmatrix(iap->ndim * iap->ncol, iap->ntst + 1);
  ft = dmatrix(iap->ndim * iap->ncol, iap->ntst + 1);
    
  if (*ifst==1){
    /* The formulas used for the allocation are somewhat complex, but they
       are based on following macros (the space after the first letter is 
       for the scripts which detect these things automatically, the original
       name does not have the space:
       
       M 1AAR =  (((iap->ndim * iap->ncol ) + iap->ndim ) )      
       M 2AA  =	((iap->ndim * iap->ncol ) )                     
       N AX   =	(iap->ntst /NODES+1)                            
       M 1BB  =	(NPARX)                                         
       M 2BB  =	((iap->ndim * iap->ncol ) )                     
       M 1CC  =	((((iap->ndim * iap->ncol ) + iap->ndim ) ) )   
       M 2CC  =	(((iap->ndim +3) +NINTX+1) )                    
       M 1DD  =	(((iap->ndim +3) +NINTX+1) )                    
       M 2DD  =	(NPARX)                                         
       N RCX  =	((iap->ndim +3) +NINTX+1)                       
       N CLMX =	((iap->ndim * iap->ncol ) + iap->ndim )         
       N ROWX =	(iap->ndim * iap->ncol )                        
    */
    
    /* Free floating point arrays */
    free_dmatrix_3d(main_auto_storage.a);
    free_dmatrix_3d(main_auto_storage.b);
    free_dmatrix_3d(main_auto_storage.c);
    free_dmatrix(main_auto_storage.d);
    free_dmatrix_3d(main_auto_storage.a1);
    free_dmatrix_3d(main_auto_storage.a2);
    free_dmatrix_3d(main_auto_storage.s1);
    free_dmatrix_3d(main_auto_storage.s2);
    free_dmatrix_3d(main_auto_storage.bb);
    free_dmatrix_3d(main_auto_storage.cc);
    free_dmatrix(main_auto_storage.faa);
    free_dmatrix_3d(main_auto_storage.ca1);
    
    /* Free long arrays */
    free(main_auto_storage.icf);
    free(main_auto_storage.irf);
    free(main_auto_storage.ipr);
    free(main_auto_storage.icf11);
    free(main_auto_storage.icf1);
    free(main_auto_storage.icf2);
    free(main_auto_storage.np);
    
    /*(M 1AAR*M 2AA*N AX) */
    main_auto_storage.a=dmatrix_3d(iap->ntst + 1,
                                   iap->ncol * iap->ndim,
                                   (iap->ncol + 1) * iap->ndim); 
    /*(M 1BB*M 2BB*N AX)*/ 
    main_auto_storage.b=dmatrix_3d(iap->ntst + 1, iap->ndim * iap->ncol, NPARX);
    /*(M 1CC*M 2CC*N AX)*/ 
    main_auto_storage.c=dmatrix_3d(iap->ntst + 1,
                                   iap->nbc + iap->nint + iap->nalc,
                                   (iap->ncol + 1) * iap->ndim);
    /*(M 1DD*M 2DD)*/ 
    main_auto_storage.d=dmatrix(iap->nbc + iap->nint + iap->nalc, NPARX);
    /*(iap->ndim * iap->ndim *N AX)*/ 
    main_auto_storage.a1=dmatrix_3d(iap->ntst + 1, iap->ndim, iap->ndim);
    /*(iap->ndim * iap->ndim *N AX)*/ 
    main_auto_storage.a2=dmatrix_3d(iap->ntst + 1, iap->ndim, iap->ndim);
    /*(iap->ndim * iap->ndim *N AX)*/ 
    main_auto_storage.s1=dmatrix_3d(iap->ntst + 1, iap->ndim, iap->ndim);
    /*(iap->ndim * iap->ndim *N AX)*/ 
    main_auto_storage.s2=dmatrix_3d(iap->ntst + 1, iap->ndim, iap->ndim);
    /*(iap->ndim *N PARX*N AX)*/ 
    main_auto_storage.bb=dmatrix_3d(iap->ntst + 1, iap->ndim, NPARX);
    /*(N RCX* iap->ndim *N AX+1)*/ 
    main_auto_storage.cc=dmatrix_3d(iap->ntst + 1, iap->nbc + iap->nint + iap->nalc, iap->ndim);
    /*(iap->ndim *N AX)*/ 
    main_auto_storage.faa=dmatrix(iap->ndim, iap->ntst +1);

    /*(iap->ndim * iap->ndim *K REDO)*/ 
    main_auto_storage.ca1=dmatrix_3d(KREDO, iap->ndim, iap->ndim);
    
    /*(N CLMX*N AX)*/ 
    main_auto_storage.icf=(long *)malloc(sizeof(long)*(((iap->ndim * iap->ncol ) + iap->ndim ) * (iap->ntst +1) ) );
    /*(N ROWX*N AX)*/ 
    main_auto_storage.irf=(long *)malloc(sizeof(long)*((iap->ndim * iap->ncol ) * (iap->ntst +1) ) );
    /*(iap->ndim *N AX)*/ 
    main_auto_storage.ipr=(long *)malloc(sizeof(long)*(iap->ndim * (iap->ntst +1) ) );
    /*(iap->ndim *K REDO)*/ 
    main_auto_storage.icf11=(long *)malloc(sizeof(long)*(iap->ndim *KREDO) );
    /*(iap->ndim *N AX)*/ 
    main_auto_storage.icf1=(long *)malloc(sizeof(long)*(iap->ndim * (iap->ntst +1) ));
    /*(iap->ndim *N AX)*/ 
    main_auto_storage.icf2=(long *)malloc(sizeof(long)*(iap->ndim * (iap->ntst +1) )); 
    /*(2)*/ 
    main_auto_storage.np=(long *)malloc(sizeof(long)*(2) );
  }
  
  
  iam = iap->mynode;
  kwt = iap->numnodes;
  if (kwt > 1) {
    ipar = TRUE;
  } else {
    ipar = FALSE;
  }
  
  ndim = iap->ndim;
  ips = iap->ips;
  ntst = iap->ntst;
  ncol = iap->ncol;
  nbc = iap->nbc;
  nint = iap->nint;
  nalc = iap->nalc;
  iid = iap->iid;
  nfpr = iap->nfpr;
  nrc = nbc + nint + nalc;
  nrow = ndim * ncol;
  nclm = nrow + ndim;
  
  if (kwt > ntst) {
    printf("NTST is less than the number of nodes\n");
    abort();
  } else {
    partition(&ntst, &kwt, main_auto_storage.np);
  }
  
  /*     NTST0 is the global one, NTST is the local one. */
  /*     The value of NTST may be different in different nodes. */
  ntst0 = ntst;
  ntst = main_auto_storage.np[iam];
  
  if (*ifst == 1) {
#ifdef USAGE
    struct rusage *setubv_usage;
    usage_start(&setubv_usage);
#endif
    setubv(ndim, ips, ntst, ncol, nbc, nint, nfpr, nrc, nrow, nclm,
	   funi, bcni, icni, *ndxloc, iap, rap, par, icp, 
	   *rds, main_auto_storage.a, main_auto_storage.b, main_auto_storage.c, main_auto_storage.d, ft, fc, rlcur, 
	   rlold, rldot, ups, uoldps, udotps, upoldp, dups, 
	   dtm, thl, thu);
#ifdef USAGE
    usage_end(setubv_usage,"all of setubv");
#endif
  } else {
    setrhs(&ndim, &ips, &ntst, &ntst0, main_auto_storage.np, &ncol, &nbc, &nint, &nalc, &
	   nfpr, &nrc, &nrow, &nclm, &iam, &kwt, &ipar, funi, bcni, icni,
	   ndxloc, iap, rap, par, icp, rds, ft, fc, rlcur, 
	   rlold, rldot, ups, uoldps, udotps, upoldp, dups, dtm, thl, 
	   thu, p0, p1);
  }
  /*     The matrix D and FC are set to zero for all nodes except the first.
   */
  if (iam > 0) {
    setfcdd(ifst, main_auto_storage.d, fc, &nfpr, &nrc);
  }

#ifdef MATLAB_OUTPUT
  print_jacobian(*iap,main_auto_storage);
  {
    static num_calls = 0;
    char filename[80];
    sprintf(filename,"before%03d",num_calls);
    num_calls++;
    print_fa_fc(*iap,ft,fc,filename);
  }
#endif

  brbd(main_auto_storage.a, main_auto_storage.b, main_auto_storage.c, main_auto_storage.d, ft, fc, p0, p1, 
       ifst, &iid, nllv, &det, ndim, ntst, nbc, nrow, nclm, nfpr,
       nrc, &iam, &kwt, &ipar, main_auto_storage.a1, main_auto_storage.a2, main_auto_storage.bb, 
       main_auto_storage.cc, main_auto_storage.faa, main_auto_storage.ca1, main_auto_storage.s1, main_auto_storage.s2, 
       main_auto_storage.icf11, main_auto_storage.ipr, main_auto_storage.icf1, main_auto_storage.icf2, 
       main_auto_storage.irf, main_auto_storage.icf,useDefaultRHS,RHSVector);

  /*
    This is some stuff from the parallel version that isn't needed anymore 
    ----------------------------------------------------------------------
    lenft = ntst * nrow << 3;
    lenff = ntst0 * nrow << 3;
    jtmp1 = M 2AA;   I added spaces so these don't get flagged as header file macro dependancies
    jtmp2 = M 3AA;   I added spaces so these don't get flagged as header file macro dependancies
    lenff2 = jtmp1 * (jtmp2 + 1) << 3;
  */
  if (ipar) {
    /*        Global concatenation of the solution from each node. */
    long tmp;
    gcol();
    tmp = iap->ntst + 1;
    faft(ff, fa, &ntst0, &nrow, ndxloc);
  } else {
    long tmp;
    tmp = iap->ntst + 1;
    faft(ft, fa, &ntst0, &nrow, ndxloc);
  }
#ifdef MATLAB_OUTPUT
  {
    static num_calls = 0;
    char filename[80];
    sprintf(filename,"after%03d",num_calls);
    num_calls++;
    print_fa_fc(*iap,ft,fc,filename);
  }
#endif  

  rap->det = det;
  free_dmatrix(ff);
  free_dmatrix(ft);
  return 0;
 } /* solvbv_ */

double fnspbv(MFAUTO_IAP iap, MFAUTO_RAP rap, double *par, long *icp, long *chng, MFAUTO_F funi, MFAUTO_BC bcni, MFAUTO_IC icni, double **p0, double **p1, doublecomplex *ev, double *rlcur, double *rlold, double *rldot, long *ndxloc, double **ups, double **uoldps, double **udotps, double **upoldp, double **fa, double *fc, double **dups, double *tm, double *dtm, double *thl, double *thu, long *iuz, double *vuz)
 {
  /* System generated locals */
  double ret_val;

  /* Local variables */
  double amin;
  long ndim, nins, ntop, ntot;
  doublecomplex ztmp;
  long nins1=0;
  double d;
  long i, j;

  long iid,ibr,loc=0,isp,isw;
  double azm1;

  /* This function returns a quantity that changes sign when a complex */
  /* pair of eigenvalues of the linearized Poincare map moves in or out */
  /* of the unit circle or when a real eigenvalues passes through -1. */
  /* Local */



  /* Parameter adjustments */
  /*--ev;*/
  /*--p1;*/
  /*--p0;*/
    
    

  
  ndim = iap->ndim;
  isp = iap->isp;
  isw = iap->isw;
  iid = iap->iid;
  ibr = iap->ibr;
  ntot = iap->ntot;
  ntop = (ntot + 1) % 10000;

  /* Initialize. */

  ret_val = 0.;
  rap->spbf = ret_val;
  d = 0.;
  *chng = FALSE;

/*  Compute the Floquet multipliers */

  flowkm(ndim, p0, p1, iid, ev);
  /* Find the multiplier closest to z=1. */

  amin = RLARGE;
  for (j = 0; j < ndim; ++j) {
    doublecomplex tmp;
    tmp.r = ev[j].r - 1., tmp.i = ev[j].i;
    azm1 = z_abs(&tmp);
    if (azm1 <= amin) {
      amin = azm1;
      loc = j;
    }
  }
  if (loc != 0) {
    ztmp.r = ev[loc].r, ztmp.i = ev[loc].i;
    ev[loc].r = ev[0].r, ev[loc].i = ev[0].i;
    ev[0].r = ztmp.r, ev[0].i = ztmp.i;
  }

  /* Order the remaining Floquet multipliers by distance from |z|=1. */

  if (ndim >= 3) {
    for (i = 1; i < ndim - 1; ++i) {
      amin = RLARGE;
      for (j = i; j < ndim; ++j) {
	azm1 = z_abs(&ev[j]) - 1.;
	azm1 = fabs(azm1);
	if (azm1 <= amin) {
	  amin = azm1;
	  loc = j;
	}
      }
      if (loc != i) {
	ztmp.r = ev[loc].r, ztmp.i = ev[loc].i;
	ev[loc].r = ev[i].r, ev[loc].i = ev[i].i;
	ev[i].r = ztmp.r, ev[i].i = ztmp.i;
      }
    }
  }

  /* Print error message if the Floquet multiplier at z=1 is inaccurate. */
  /* (ISP is set to negative and detection of bifurations is discontinued) 
   */

  {
    doublecomplex tmp;
    tmp.r = ev[0].r - 1., tmp.i = ev[0].i;
    amin = z_abs(&tmp);
  }
  if (amin > (double).05 && isp == 2) {
    if (iap->mynode == 0) {
      if (iid >= 2) {
	fprintf(fp9,"%4li%6li NOTE:Multiplier inaccurate\n",abs(ibr),ntop);	

      }
      for (i = 0; i < ndim; ++i) {
	fprintf(fp9,"%4li%6li        Multiplier %3li %14.6E %14.6E\n",abs(ibr),ntop,i,ev[i].r,ev[i].i);	
      }
    }
    nins = 0;
    iap->nins = nins;
    if (iap->mynode == 0) {
      fprintf(fp9,"%4li%6li        Multipliers:   Stable: %3li\n",abs(ibr),ntop,nins);	
    }
    isp = -isp;
    iap->isp = isp;
    return ret_val;
  }

  /* Restart automatic detection if the Floquet multiplier at z=1 is */
  /* sufficiently accurate again. */

  if (isp < 0) {
    if (amin < (double).01) {
      if (iap->mynode == 0) {
	fprintf(fp9,"%4li%6li NOTE:Multiplier accurate again\n",abs(ibr),ntop);	
      }
      isp = -isp;
      iap->isp = isp;
    } else {
      if (iap->mynode == 0) {
	for (i = 0; i < ndim; ++i) {
	  fprintf(fp9,"%4li%6li        Multiplier %3li %14.6E %14.6E\n",abs(ibr),ntop,i,ev[i].r,ev[i].i);	
	}
      }
      return ret_val;
    }
  }

  /* Count the number of Floquet multipliers inside the unit circle. */

  if (ndim == 1) {
    d = 0.;
    ret_val = d;
    rap->spbf = ret_val;
  } else {
    nins1 = 1;
    for (i = 1; i < ndim; ++i) {
      if (z_abs(&ev[i]) <= 1.) {
	++nins1;
      }
    }
    if (isp == 2) {
      if (d_imag(&ev[1]) == 0. && ev[1].r > 0.) {
	/*            *Ignore if second multiplier is real positive */
	d = 0.;
      } else {
	d = z_abs(&ev[1]) - 1.;
      }
      if (isw == 2) {
	ret_val = 0.;
      } else {
	ret_val = d;
      }
      rap->spbf = ret_val;
      nins = iap->nins;
      if (nins1 != nins) {
	*chng = TRUE;
      }
    }
  }

  nins = nins1;
  iap->nins = nins;
  if (iid >= 2 && (isp == 1 || isp == 2)) {
    if (iap->mynode == 0) {
      fprintf(fp9,"%4li%6li        SPB  Function %14.5E\n",abs(ibr),ntop,d);	

    }
  }

  /* Print the Floquet multipliers. */

  nins = iap->nins;
  if (iap->mynode == 0) {
    fprintf(fp9,"%4li%6li        Multipliers:     Stable %4li\n",abs(ibr),ntop,nins);	

    for (i = 0; i < ndim; ++i) {
      fprintf(fp9,"%4li%6li        Multiplier %3li %14.6E %14.5E\n",abs(ibr),ntop,i,ev[i].r,ev[i].i);	

    }
  }


  return ret_val;
 } /* fnspbv_ */

double fnuzbv(MFAUTO_IAP iap, MFAUTO_RAP rap, double *par, long *icp, long *chng, MFAUTO_F funi, MFAUTO_BC bcni, MFAUTO_IC icni, double **p0, double **p1, doublecomplex *ev, double *rlcur, double *rlold, double *rldot, long *ndxloc, double **ups, double **uoldps, double **udotps, double **upoldp, double **fa, double *fc, double **dups, double *tm, double *dtm, double *thl, double *thu, long *iuz, double *vuz)
 {
  /* System generated locals */
  double ret_val;

    

    /* Local variables */
  long ntop, ntot, iuzr, iid, ibr;

  
  iid = iap->iid;
  iuzr = iap->iuzr;
  ibr = iap->ibr;
  ntot = iap->ntot;
  ntop = (ntot + 1) % 10000;

  ret_val = par[abs(iuz[iuzr])] - vuz[iuzr];
  *chng = TRUE;

  if (iid >= 3) {
    fprintf(fp9,"%4li%6li        User Func. %3li %14.5E\n",abs(ibr),ntop,iuzr,ret_val);	
  }

  return ret_val;
 } /* fnuzbv_ */

double rinpr(MFAUTO_IAP iap, long *ndim1, long *ndxloc, double **ups, double **vps, double *dtm, double *thu)
 {
  /* System generated locals */
  double ret_val;

  /* Local variables */
  long ndim, ncol;

  long ntst, i, j, k;
  double s;
  long k1;
  double sj, *wi;
  long jp1;

  wi = (double *)malloc(sizeof(double)*(iap->ncol+1));

  /* Computes the L2 inner product of UPS and VPS. */
  /* (Using the first NDIM1 components only.) */

  /* Local */

  /* Parameter adjustments */
  /*--dtm;*/
  /*--thu;*/
    
  ndim = iap->ndim;
  ntst = iap->ntst;
  ncol = iap->ncol;

  /* Weights for the integration formulae : */
  wint(ncol + 1, wi);

  s = 0.;
  for (j = 0; j < ntst; ++j) {
    jp1 = j + 1;
    sj = 0.;
    for (i = 0; i < *ndim1; ++i) {
      for (k = 0; k < ncol; ++k) {
	k1 = k * ndim + i;
	sj += wi[k] * thu[i] * ups[j][k1] * vps[j][k1];
      }
      sj += wi[ncol] * thu[i] * ups[jp1][i] * vps[jp1][i];
    }
    s += dtm[j] * sj;
  }

  ret_val = s;
  free(wi);

  return ret_val;
 } /* rinpr_ */

/*
 * dmatrix()
 *
 * memory allocation for a matrix of doubles
 * returns m so that m[0][0],...,m[n_rows-1][n_cols-1] are the valid locations
 *
 * last modified:  8/21/91  paw
 */

double **dmatrix(long n_rows, long n_cols)
{
  long i, total_pts;
  double **m;

  if (n_rows<=0 || n_cols<=0) return(NULL);
  total_pts = n_rows*n_cols;
  
  if ( (m = (double **) malloc( (unsigned) (n_rows * sizeof(double *)))) == NULL)
    {
      printf("dmatrix: memory allocation failure! %ld bytes\n",n_rows * sizeof(double *));
    }
  else
    {
      if ( (m[0] = (double *) malloc( (unsigned) (total_pts * sizeof(double)))) == NULL)
	{
	  free(m);
	  m = NULL;
	  printf("dmatrix: memory allocation failure! %ld bytes\n",total_pts * sizeof(double));
	}
      else
	  for (i=1; i<n_rows; i++) m[i] = m[i-1] + n_cols;
    }
  return(m);
}

double ***dmatrix_3d(long n_levels, long n_rows, long n_cols)
{
  long i, total_ptrs;
  double ***m;

  if (n_levels<=0 || n_rows<=0 || n_cols<=0) return(NULL);
  total_ptrs = n_levels*n_rows;
  
  if ( (m = (double ***) malloc( (unsigned) (n_levels * sizeof(double **)))) == NULL)
    {
      printf("dmatrix_3d: memory allocation failure! %ld bytes\n",n_levels * sizeof(double **));
    }
  else
    {
      if ( (m[0] = dmatrix( (unsigned) total_ptrs, (unsigned) n_cols)) == NULL)
	{
	  free(m);
	  m = NULL;
	  printf("dmatrix_3d: memory allocation failure!\n");
	}
      else
	  for (i=1; i<n_levels; i++) m[i] = m[i-1] + n_rows;
    }
  return(m);
}

/*
 * free_dmatrix()
 *
 * frees memory allocated by dmatrix()
 *
 * last modified: 8/21/91  paw
 */
void free_dmatrix(double **m)
{
  if (m==NULL) return;
  free( (char *) (m[0]) );
  free( (char *) (m) );
}

void free_dmatrix_3d(double ***m)
{
  if (m==NULL) return;
  free_dmatrix(m[0]);
  free( (char *) (m) );
}

int wint(const long n, double *wi)
 {
  double c;

  /* Generates the weights for the integration formula based on polynomial */
  /* interpolation at N equally spaced points in [0,1]. */
  
  switch ((int)(n - 2)) {
  case 1:  goto L3;
  case 2:  goto L4;
  case 3:  goto L5;
  case 4:  goto L6;
  case 5:  goto L7;
  case 6:  goto L8;
  }

 L3:
  c = .16666666666666666;
  wi[0] = c;
  wi[1] = c * 4.;
  wi[2] = c;
  return 0;

 L4:
  c = .125;
  wi[0] = c;
  wi[1] = c * 3.;
  wi[2] = wi[1];
  wi[3] = c;
  return 0;

 L5:
  c = .011111111111111112;
  wi[0] = c * 7.;
  wi[1] = c * 32.;
  wi[2] = c * 12.;
  wi[3] = wi[1];
  wi[4] = wi[0];
  return 0;

 L6:
  wi[0] = .065972222222222224;
  wi[1] = .26041666666666669;
  wi[2] = .1736111111111111;
  wi[3] = wi[2];
  wi[4] = wi[1];
  wi[5] = wi[0];
  return 0;

 L7:
  wi[0] = .04880952380952381;
  wi[1] = .25714285714285712;
  wi[2] = .03214285714285714;
  wi[3] = .32380952380952382;
  wi[4] = wi[2];
  wi[5] = wi[1];
  wi[6] = wi[0];
  return 0;

 L8:
  wi[0] = .043460648148148151;
  wi[1] = .20700231481481482;
  wi[2] = .076562500000000006;
  wi[3] = .17297453703703702;
  wi[4] = wi[3];
  wi[5] = wi[2];
  wi[6] = wi[1];
  wi[7] = wi[0];

  return 0;
 }

double d_imag(doublecomplex *z)
 {
  return z->i;
 }

double z_abs(doublecomplex *z)
 {
  return sqrt(z->r*z->r+z->i*z->i);
 }

int setrhs(long *ndim, long *ips, long *na, long *ntst, long *np, long *ncol, long *nbc, long *nint, long *nalc, long *ncb, long *nrc, long *nra, long *nca, long *iam, long *kwt, long *ipar, MFAUTO_F funi, MFAUTO_BC bcni, MFAUTO_IC icni, long *ndxloc, MFAUTO_IAP iap, MFAUTO_RAP rap, double *par, long *icp, double *rds, double **fa, double *fc, double *rlcur, double *rlold, double *rldot, double **ups, double **uoldps, double **udotps, double **upoldp, double **dups, double *dtm, double *thl, double *thu, double **p0, double **p1)
{
  long i, j, k, l, m;
  long mpart, i1, j1, k1, l1;

  double rlsum;
  long ib, ic, jj;
  long ic1;

  long jp1;
  long ncp1;
  double dt,ddt;

  double *dicd = NULL, *ficd = NULL, *dfdp, *dfdu, *uold;
  double *f;
  double *u, **wploc;
  double *wi, **wp, **wt;
  double *dbc, *fbc, *uic, *uio, *prm, *uid, *uip, *ubc0, *ubc1;
  long udotps_off;

  if (iap->nint > 0)
  {
    dicd = (double *)malloc(sizeof(double)*(iap->nint)*(iap->ndim + NPARX));
    ficd = (double *)malloc(sizeof(double)*(iap->nint));
  }  
  dfdp = (double *)malloc(sizeof(double)*(iap->ndim)*NPARX);
  dfdu = (double *)malloc(sizeof(double)*(iap->ndim)*(iap->ndim));
  uold = (double *)malloc(sizeof(double)*(iap->ndim));
  f    = (double *)malloc(sizeof(double)*(iap->ndim));
  u    = (double *)malloc(sizeof(double)*(iap->ndim));
  wploc= dmatrix(iap->ncol+1, iap->ncol);
  wi   = (double *)malloc(sizeof(double)*(iap->ncol+1) );
  wp   = dmatrix(iap->ncol+1, iap->ncol);
  wt   = dmatrix(iap->ncol+1, iap->ncol);
  dbc  = (double *)malloc(sizeof(double)*(iap->nbc)*(2*iap->ndim + NPARX));
  fbc  = (double *)malloc(sizeof(double)*(iap->nbc));
  uic  = (double *)malloc(sizeof(double)*(iap->ndim));
  uio  = (double *)malloc(sizeof(double)*(iap->ndim));
  prm  = (double *)malloc(sizeof(double)*NPARX);
  uid  = (double *)malloc(sizeof(double)*(iap->ndim));
  uip  = (double *)malloc(sizeof(double)*(iap->ndim));
  ubc0 = (double *)malloc(sizeof(double)*(iap->ndim));
  ubc1 = (double *)malloc(sizeof(double)*(iap->ndim));


  /* Parameter adjustments */
  /*--np;*/
  /*--par;*/
  /*--icp;*/
  /*--fc;*/
  /*--rlcur;*/
  /*--rlold;*/
  /*--rldot;*/
  /*--dtm;*/
  /*--thl;*/
  /*--thu;*/
    
  *iam = iap->mynode;
  *kwt = iap->numnodes;
  if (*kwt > 1) {
    *ipar = TRUE;
  } else {
    *ipar = FALSE;
  }

  wint(*ncol + 1, wi);
  genwts(*ncol, iap->ncol + 1, wt, wp);
  /* Initialize to zero. */
  for (i = 0; i < *nrc; ++i) {
    fc[i] = 0.;
  }

  /* Set constants. */
  ncp1 = *ncol + 1;
  for (i = 0; i < *ncb; ++i) {
    par[icp[i]] = rlcur[i];
  }

  /* Generate FA : */

/*      Partition the mesh intervals. */
  mpart = mypart(iam, np);

  for (jj = 0; jj < *na; ++jj) {
    j = jj + mpart;
    jp1 = j + 1;
    dt = dtm[j];
    ddt = 1. / dt;
    for (ic = 0; ic < *ncol; ++ic) {
      for (ib = 0; ib < ncp1; ++ib) {
	wploc[ib][ic] = ddt * wp[ib][ic];
      }
    }
    for (ic = 0; ic < *ncol; ++ic) {
      for (k = 0; k < *ndim; ++k) {
	u[k] = wt[*ncol][ic] * ups[jp1][k];
	uold[k] = wt[*ncol][ic] * uoldps[jp1][k];
	for (l = 0; l < *ncol; ++l) {
	  l1 = l * *ndim + k;
	  u[k] += wt[l][ic] * ups[j][l1];
	  uold[k] += wt[l][ic] * uoldps[j][l1];
	}
      }
      /*     ** Time evolution computations (parabolic systems) */
      if (*ips == 14 || *ips == 16) {
	rap->tivp = rlold[0];
      }
      for (i = 0; i < NPARX; ++i) {
	prm[i] = par[i];
      }
      (*funi)(iap, rap, *ndim, u, uold, icp, prm, 0, f, dfdu, dfdp);
      ic1 = ic * *ndim;
      for (i = 0; i < *ndim; ++i) {
	fa[ic1 + i][jj] = f[i] - wploc[*ncol][ic] * ups[jp1][i];
	for (k = 0; k < *ncol; ++k) {
	  k1 = k * *ndim + i;
	  fa[ic1 + i][jj] -= wploc[k][ic] * ups[j][k1];
	}
      }
      /* L1: */
    }
    /* L2: */
  }

  /*     Generate FC : */

/*     Boundary conditions : */

  if (*nbc > 0) {
    for (i = 0; i < *ndim; ++i) {
      ubc0[i] = ups[0][i];
      ubc1[i] = ups[*ntst][i];
    }
    (*bcni)(iap, rap, *ndim, par, icp, *nbc, ubc0, ubc1, 
	    fbc, 2, dbc);
    for (i = 0; i < *nbc; ++i) {
      fc[i] = -fbc[i];
    }
    /*       Save difference : */
    for (j = 0; j < *ntst + 1; ++j) {
      for (i = 0; i < *nra; ++i) {
	dups[j][i] = ups[j][i] - uoldps[j][i];
      }
    }
  }

  /*     Integral constraints : */
  if (*nint > 0) {
    for (jj = 0; jj < *na; ++jj) {
      j = jj + mpart;
      jp1 = j + 1;
      for (k = 0; k < ncp1; ++k) {
	for (i = 0; i < *ndim; ++i) {
	  i1 = k * *ndim + i;
	  j1 = j;
	  if (k + 1 == ncp1) {
	    i1 = i;
	  }
	  if (k + 1 == ncp1) {
	    j1 = jp1;
	  }
	  uic[i] = ups[j1][i1];
	  uio[i] = uoldps[j1][i1];
	  uid[i] = udotps[j1][i1];
	  uip[i] = upoldp[j1][i1];
	}
	(*icni)(iap, rap, *ndim, par, icp, *nint, uic, 
		uio, uid, uip, ficd, 0, dicd);
	for (m = 0; m < *nint; ++m) {
	  fc[*nbc + m] -= dtm[j] * wi[k] * ficd[m];
	}
      }
    }
  }

  /*     Pseudo-arclength equation : */
  udotps_off=(iap->ntst + 1)*(iap->ndim * iap->ncol);
  for(m=0;m<*nalc;m++){
    rlsum = 0.;
    for (i = 0; i < *ncb; ++i) {
      rlsum += thl[icp[i]] * (rlcur[i] - rlold[i]) * rldot[i+m*NPARX];
    }

    fc[*nrc-*nalc+m] = rds[m] - rinpr(iap, ndim, ndxloc, udotps+m*(iap->ntst + 1), 
			         dups, dtm, thu) - rlsum;
   }

  free(dicd );
  free(ficd );
  free(dfdp );
  free(dfdu );
  free(uold );
  free(f    );
  free(u    );
  free_dmatrix(wploc);
  free(wi   );
  free_dmatrix(wp);
  free_dmatrix(wt);
  free(dbc  );
  free(fbc  );
  free(uic  );
  free(uio  );
  free(prm  );
  free(uid  );
  free(uip  );
  free(ubc0 );
  free(ubc1 );

  return 0;
} /* setrhs_ */


int brbd(double ***a, double ***b, double ***c, double **d, double **fa, double *fc, double **p0, double **p1, long *ifst, long *idb, long *nllv, double *det, long nov, long na, long nbc, long nra, long nca, long ncb, long nrc, long *iam, long *kwt, long *par, double ***a1, double ***a2, double ***bb, double ***cc, double **faa, double ***ca1, double ***s1, double ***s2, long *icf11, long *ipr, long *icf1, long *icf2, long *irf, long *icf,long useDefaultRHS,double* RHSVector)
{
  double **e;
  double *fcc;
  double *sol1,*sol2,*sol3;

  e = dmatrix(nov + nrc, nov + nrc);
  fcc = (double *)malloc(sizeof(double)*((nov + nrc) + (2*nov*nov)+1));

  sol1 = (double *)malloc(sizeof(double)*nov*(na + 1));
  sol2 = (double *)malloc(sizeof(double)*nov*(na + 1));
  sol3 = (double *)malloc(sizeof(double)*nov*(na + 1));
  
  if (*idb > 4 && *iam == 0) {
    print1(nov, na, nra, nca, ncb, nrc, a, b, c, d, fa, fc);
  }
  if (*ifst == 1) {
#ifdef USAGE
    struct rusage *conpar_usage;
    usage_start(&conpar_usage);
#endif
    conpar(nov, na, nra, nca, a, ncb, b, nbc, nrc, c, d, irf, icf);
#ifdef USAGE
    usage_end(conpar_usage,"all of conpar");
#endif
    copycp(na, nov, nra, nca, a, ncb, b, nrc, c, 
	   a1, a2, bb, cc, irf);
  }

  if (*nllv == 0) {
    conrhs(nov, na, nra, nca, a, nbc, nrc, c, fa, fc, 
	   irf, icf, iam);
    cpyrhs(na, nov, nra, faa, fa, irf);
  } else {
#ifdef RANDY_FIX
    /* The faa array needs to be intialized as well, since it 
       it used in the dimrge_ rountine to print stuff out,
       and in the bcksub_ routine for actual computations! */
    {
      long k;
      for(k=0;k<(nov * (*na + 1));k++)
	faa[k]=0.0;
    }
    setzero(fa, fc, na, nra, nrc);
#else
    setzero(fa, fc, na, nra, nrc);
    cpyrhs(na, nov, nra, faa, fa, irf);
#endif
  }

  if (*ifst == 1) {
#ifdef USAGE
    struct rusage *reduce_usage;
    usage_start(&reduce_usage);
#endif
    reduce(iam, kwt, par, a1, a2, bb, cc, d, na, 
	   nov, ncb, nrc, s1, s2, ca1, icf1, icf2, 
	   icf11, ipr, nbc);
#ifdef USAGE
    usage_end(reduce_usage,"all of reduce");
#endif
  }

  if (*nllv == 0) {
    redrhs(iam, kwt, par, a1, a2, cc, faa, fc, na, 
	   nov, ncb, nrc, ca1, icf1, icf2, icf11, ipr,nbc);
  }

  dimrge(iam, kwt, par, e, cc, d, fc, ifst, na, 
	 nrc, nov, ncb, idb, nllv, fcc, p0, p1, det, s1, a2,
	 faa, bb,useDefaultRHS,RHSVector);

  bcksub(iam, kwt, par, s1, s2, a2, bb, faa, fc, 
	 fcc, sol1, sol2, sol3, na, nov, ncb, icf2);

  infpar(iam, par, a, b, fa, sol1, sol2, fc, na, nov, nra, 
	 nca, ncb, irf, icf);

  free_dmatrix(e);
  free(fcc);
  free(sol1);
  free(sol2);
  free(sol3);
  return 0;
} /* brbd_ */
