Reverse Mode General Case (Checkpointing): Example and Test

reverse_checkpoint.cpp

@(@\newcommand{\W}[1]{ \; #1 \; } \newcommand{\R}[1]{ {\rm #1} } \newcommand{\B}[1]{ {\bf #1} } \newcommand{\D}[2]{ \frac{\partial #1}{\partial #2} } \newcommand{\DD}[3]{ \frac{\partial^2 #1}{\partial #2 \partial #3} } \newcommand{\Dpow}[2]{ \frac{\partial^{#1}}{\partial {#2}^{#1}} } \newcommand{\dpow}[2]{ \frac{ {\rm d}^{#1}}{{\rm d}\, {#2}^{#1}} }@)@Reverse Mode General Case (Checkpointing): Example and Test
See Also
checkpoint

Purpose
Break a large computation into pieces and only store values at the interface of the pieces (this is much easier to do using checkpoint ). In actual applications, there may be many functions, but for this example there are only two. The functions @(@ F : \B{R}^2 \rightarrow \B{R}^2 @)@ and @(@ G : \B{R}^2 \rightarrow \B{R}^2 @)@ defined by @[@ F(x) = \left( \begin{array}{c} x_0 x_1 \\ x_1 - x_0 \end{array} \right) \; , \; G(y) = \left( \begin{array}{c} y_0 - y_1 \\ y_1 y_0 \end{array} \right) @]@

Processing Steps
We apply reverse mode to compute the derivative of @(@ H : \B{R}^2 \rightarrow \B{R} @)@ is defined by @[@ \begin{array}{rcl} H(x) & = & G_0 [ F(x) ] + G_1 [ F(x) ] \\ & = & x_0 x_1 - ( x_1 - x_0 ) + x_0 x_1 ( x_1 - x_0 ) \\ & = & x_0 x_1 ( 1 - x_0 + x_1 ) - x_1 + x_0 \end{array} @]@ Given the zero and first order Taylor coefficients @(@ x^{(0)} @)@ and @(@ x^{(1)} @)@, we use @(@ X(t) @)@, @(@ Y(t) @)@ and @(@ Z(t) @)@ for the corresponding functions; i.e., @[@ \begin{array}{rcl} X(t) & = & x^{(0)} + x^{(1)} t \\ Y(t) & = & F[X(t)] = y^{(0)} + y^{(1)} t + O(t^2) \\ Z(t) & = & G \{ F [ X(t) ] \} = z^{(0)} + z^{(1)} t + O(t^2) \\ h^{(0)} & = & z^{(0)}_0 + z^{(0)}_1 \\ h^{(1)} & = & z^{(1)}_0 + z^{(1)}_1 \end{array} @]@ Here are the processing steps:

Use forward mode on @(@ F(x) @)@ to compute @(@ y^{(0)} @)@ and @(@ y^{(1)} @)@.
Free some, or all, of the memory corresponding to @(@ F(x) @)@.
Use forward mode on @(@ G(y) @)@ to compute @(@ z^{(0)} @)@ and @(@ z^{(1)} @)@
Use reverse mode on @(@ G(y) @)@ to compute the derivative of @(@ h^{(1)} @)@ with respect to @(@ y^{(0)} @)@ and @(@ y^{(1)} @)@.
Free all the memory corresponding to @(@ G(y) @)@.
Use reverse mode on @(@ F(x) @)@ to compute the derivative of @(@ h^{(1)} @)@ with respect to @(@ x^{(0)} @)@ and @(@ x^{(1)} @)@.

This uses the following relations: @[@ \begin{array}{rcl} \partial_{x(0)} h^{(1)} [ x^{(0)} , x^{(1)} ] & = & \partial_{y(0)} h^{(1)} [ y^{(0)} , y^{(1)} ] \partial_{x(0)} y^{(0)} [ x^{(0)} , x^{(1)} ] \\ & + & \partial_{y(1)} h^{(1)} [ y^{(0)} , y^{(1)} ] \partial_{x(0)} y^{(1)} [ x^{(0)} , x^{(1)} ] \\ \partial_{x(1)} h^{(1)} [ x^{(0)} , x^{(1)} ] & = & \partial_{y(0)} h^{(1)} [ y^{(0)} , y^{(1)} ] \partial_{x(1)} y^{(0)} [ x^{(0)} , x^{(1)} ] \\ & + & \partial_{y(1)} h^{(1)} [ y^{(0)} , y^{(1)} ] \partial_{x(1)} y^{(1)} [ x^{(0)} , x^{(1)} ] \end{array} @]@ where @(@ \partial_{x(0)} @)@ denotes the partial with respect to @(@ x^{(0)} @)@.



# include <cppad/cppad.hpp>

namespace {
     template <class Vector>
     Vector F(const Vector& x)
     {     Vector y(2);
          y[0] = x[0] * x[1];
          y[1] = x[1] - x[0];
          return y;
     }
     template <class Vector>
     Vector G(const Vector& y)
     {     Vector z(2);
          z[0] = y[0] - y[1];
          z[1] = y[1] * y[0];
          return z;
     }
}

namespace {
     bool reverse_any_case(bool free_all)
     {     bool ok = true;
     double eps = 10. * CppAD::numeric_limits<double>::epsilon();

          using CppAD::AD;
          using CppAD::NearEqual;
          CppAD::ADFun<double> f, g, empty;

          // specify the Taylor coefficients for X(t)
          size_t n    = 2;
          CPPAD_TESTVECTOR(double) x0(n), x1(n);
          x0[0] = 1.; x0[1] = 2.;
          x1[0] = 3.; x1[1] = 4.;

          // record the function F(x)
          CPPAD_TESTVECTOR(AD<double>) X(n), Y(n);
          size_t i;
          for(i = 0; i < n; i++)
               X[i] = x0[i];
          CppAD::Independent(X);
          Y = F(X);
          f.Dependent(X, Y);

          // a fucntion object with an almost empty operation sequence
          CppAD::Independent(X);
          empty.Dependent(X, X);

          // compute the Taylor coefficients for Y(t)
          CPPAD_TESTVECTOR(double) y0(n), y1(n);
          y0 = f.Forward(0, x0);
          y1 = f.Forward(1, x1);
          if( free_all )
               f = empty;
          else
          {     // free all the Taylor coefficients stored in f
               f.capacity_order(0);
          }

          // record the function G(x)
          CPPAD_TESTVECTOR(AD<double>) Z(n);
          CppAD::Independent(Y);
          Z = G(Y);
          g.Dependent(Y, Z);

          // compute the Taylor coefficients for Z(t)
          CPPAD_TESTVECTOR(double) z0(n), z1(n);
          z0 = g.Forward(0, y0);
          z1 = g.Forward(1, y1);

          // check zero order Taylor coefficient for h^0 = z_0^0 + z_1^0
          double check = x0[0] * x0[1] * (1. - x0[0] + x0[1]) - x0[1] + x0[0];
          double h0    = z0[0] + z0[1];
          ok          &= NearEqual(h0, check, eps, eps);

          // check first order Taylor coefficient h^1
          check     = x0[0] * x0[1] * (- x1[0] + x1[1]) - x1[1] + x1[0];
          check    += x1[0] * x0[1] * (1. - x0[0] + x0[1]);
          check    += x0[0] * x1[1] * (1. - x0[0] + x0[1]);
          double h1 = z1[0] + z1[1];
          ok       &= NearEqual(h1, check, eps, eps);

          // compute the derivative with respect to y^0 and y^0 of h^1
          size_t p = 2;
          CPPAD_TESTVECTOR(double) w(n*p), dw(n*p);
          w[0*p+0] = 0.; // coefficient for z_0^0
          w[0*p+1] = 1.; // coefficient for z_0^1
          w[1*p+0] = 0.; // coefficient for z_1^0
          w[1*p+1] = 1.; // coefficient for z_1^1
          dw       = g.Reverse(p, w);

          // We are done using g, so we can free its memory.
          g = empty;
          // We need to use f next.
          if( free_all )
          {     // we must again record the operation sequence for F(x)
               CppAD::Independent(X);
               Y = F(X);
               f.Dependent(X, Y);
          }
          // now recompute the Taylor coefficients corresponding to F(x)
          // (we already know the result; i.e., y0 and y1).
          f.Forward(0, x0);
          f.Forward(1, x1);

          // compute the derivative with respect to x^0 and x^0 of
          //     h^1 = z_0^1 + z_1^1
          CPPAD_TESTVECTOR(double) dv(n*p);
          dv   = f.Reverse(p, dw);

          // check partial of h^1 w.r.t x^0_0
          check  = x0[1] * (- x1[0] + x1[1]);
          check -= x1[0] * x0[1];
          check += x1[1] * (1. - x0[0] + x0[1]) - x0[0] * x1[1];
          ok    &= NearEqual(dv[0*p+0], check, eps, eps);

          // check partial of h^1 w.r.t x^0_1
          check  = x0[0] * (- x1[0] + x1[1]);
          check += x1[0] * (1. - x0[0] + x0[1]) + x1[0] * x0[1];
          check += x0[0] * x1[1];
          ok    &= NearEqual(dv[1*p+0], check, eps, eps);

          // check partial of h^1 w.r.t x^1_0
          check  = 1. - x0[0] * x0[1];
          check += x0[1] * (1. - x0[0] + x0[1]);
          ok    &= NearEqual(dv[0*p+1], check, eps, eps);

          // check partial of h^1 w.r.t x^1_1
          check  = x0[0] * x0[1] - 1.;
          check += x0[0] * (1. - x0[0] + x0[1]);
          ok    &= NearEqual(dv[1*p+1], check, eps, eps);

          return ok;
     }
}
bool reverse_any(void)
{     bool ok = true;
     ok     &= reverse_any_case(true);
     ok     &= reverse_any_case(false);
     return ok;
}

Input File: example/general/reverse_checkpoint.cpp