CppAD: A C++ Algorithmic Differentiation Package  20171217
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
eigen_mat_inv.hpp
Go to the documentation of this file.
1 // $Id$
2 # ifndef CPPAD_EXAMPLE_EIGEN_MAT_INV_HPP
3 # define CPPAD_EXAMPLE_EIGEN_MAT_INV_HPP
4 
5 /* --------------------------------------------------------------------------
6 CppAD: C++ Algorithmic Differentiation: Copyright (C) 2003-17 Bradley M. Bell
7 
8 CppAD is distributed under multiple licenses. This distribution is under
9 the terms of the
10  Eclipse Public License Version 1.0.
11 
12 A copy of this license is included in the COPYING file of this distribution.
13 Please visit http://www.coin-or.org/CppAD/ for information on other licenses.
14 -------------------------------------------------------------------------- */
15 
16 /*
17 $begin atomic_eigen_mat_inv.hpp$$
18 $spell
19  Eigen
20  Taylor
21 $$
22 
23 $section Atomic Eigen Matrix Inversion Class$$
24 
25 $head Purpose$$
26 Construct an atomic operation that computes the matrix inverse
27 $latex R = A^{-1}$$
28 for any positive integer $latex p$$
29 and invertible matrix $latex A \in \B{R}^{p \times p}$$.
30 
31 $head Matrix Dimensions$$
32 This example puts the matrix dimension $latex p$$
33 in the atomic function arguments,
34 instead of the $cref/constructor/atomic_ctor/$$,
35 so it can be different for different calls to the atomic function.
36 
37 $head Theory$$
38 
39 $subhead Forward$$
40 The zero order forward mode Taylor coefficient is give by
41 $latex \[
42  R_0 = A_0^{-1}
43 \]$$
44 For $latex k = 1 , \ldots$$,
45 the $th k$$ order Taylor coefficient of $latex A R$$ is given by
46 $latex \[
47  0 = \sum_{\ell=0}^k A_\ell R_{k-\ell}
48 \] $$
49 Solving for $latex R_k$$ in terms of the coefficients
50 for $latex A$$ and the lower order coefficients for $latex R$$ we have
51 $latex \[
52  R_k = - R_0 \left( \sum_{\ell=1}^k A_\ell R_{k-\ell} \right)
53 \] $$
54 Furthermore, once we have $latex R_k$$ we can compute the sum using
55 $latex \[
56  A_0 R_k = - \left( \sum_{\ell=1}^k A_\ell R_{k-\ell} \right)
57 \] $$
58 
59 
60 $subhead Product of Three Matrices$$
61 Suppose $latex \bar{E}$$ is the derivative of the
62 scalar value function $latex s(E)$$ with respect to $latex E$$; i.e.,
63 $latex \[
64  \bar{E}_{i,j} = \frac{ \partial s } { \partial E_{i,j} }
65 \] $$
66 Also suppose that $latex t$$ is a scalar valued argument and
67 $latex \[
68  E(t) = B(t) C(t) D(t)
69 \] $$
70 It follows that
71 $latex \[
72  E'(t) = B'(t) C(t) D(t) + B(t) C'(t) D(t) + B(t) C(t) D'(t)
73 \] $$
74 
75 $latex \[
76  (s \circ E)'(t)
77  =
78  \R{tr} [ \bar{E}^\R{T} E'(t) ]
79 \] $$
80 $latex \[
81  =
82  \R{tr} [ \bar{E}^\R{T} B'(t) C(t) D(t) ] +
83  \R{tr} [ \bar{E}^\R{T} B(t) C'(t) D(t) ] +
84  \R{tr} [ \bar{E}^\R{T} B(t) C(t) D'(t) ]
85 \] $$
86 $latex \[
87  =
88  \R{tr} [ B(t) D(t) \bar{E}^\R{T} B'(t) ] +
89  \R{tr} [ D(t) \bar{E}^\R{T} B(t) C'(t) ] +
90  \R{tr} [ \bar{E}^\R{T} B(t) C(t) D'(t) ]
91 \] $$
92 $latex \[
93  \bar{B} = \bar{E} (C D)^\R{T} \W{,}
94  \bar{C} = B^\R{T} \bar{E} D^\R{T} \W{,}
95  \bar{D} = (B C)^\R{T} \bar{E}
96 \] $$
97 
98 $subhead Reverse$$
99 For $latex k > 0$$, reverse mode
100 eliminates $latex R_k$$ and expresses the function values
101 $latex s$$ in terms of the coefficients of $latex A$$
102 and the lower order coefficients of $latex R$$.
103 The effect on $latex \bar{R}_0$$
104 (of eliminating $latex R_k$$) is
105 $latex \[
106 \bar{R}_0
107 = \bar{R}_0 - \bar{R}_k \left( \sum_{\ell=1}^k A_\ell R_{k-\ell} \right)^\R{T}
108 = \bar{R}_0 + \bar{R}_k ( A_0 R_k )^\R{T}
109 \] $$
110 For $latex \ell = 1 , \ldots , k$$,
111 the effect on $latex \bar{R}_{k-\ell}$$ and $latex A_\ell$$
112 (of eliminating $latex R_k$$) is
113 $latex \[
114 \bar{A}_\ell = \bar{A}_\ell - R_0^\R{T} \bar{R}_k R_{k-\ell}^\R{T}
115 \] $$
116 $latex \[
117 \bar{R}_{k-\ell} = \bar{R}_{k-\ell} - ( R_0 A_\ell )^\R{T} \bar{R}_k
118 \] $$
119 We note that
120 $latex \[
121  R_0 '(t) A_0 (t) + R_0 (t) A_0 '(t) = 0
122 \] $$
123 $latex \[
124  R_0 '(t) = - R_0 (t) A_0 '(t) R_0 (t)
125 \] $$
126 The reverse mode formula that eliminates $latex R_0$$ is
127 $latex \[
128  \bar{A}_0
129  = \bar{A}_0 - R_0^\R{T} \bar{R}_0 R_0^\R{T}
130 \]$$
131 
132 $nospell
133 
134 $head Start Class Definition$$
135 $srccode%cpp% */
136 # include <cppad/cppad.hpp>
137 # include <Eigen/Core>
138 # include <Eigen/LU>
139 
140 
141 
142 /* %$$
143 $head Public$$
144 
145 $subhead Types$$
146 $srccode%cpp% */
147 namespace { // BEGIN_EMPTY_NAMESPACE
148 
149 template <class Base>
151 public:
152  // -----------------------------------------------------------
153  // type of elements during calculation of derivatives
154  typedef Base scalar;
155  // type of elements during taping
157  // type of matrix during calculation of derivatives
158  typedef Eigen::Matrix<
159  scalar, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> matrix;
160  // type of matrix during taping
161  typedef Eigen::Matrix<
162  ad_scalar, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor > ad_matrix;
163 /* %$$
164 $subhead Constructor$$
165 $srccode%cpp% */
166  // constructor
167  atomic_eigen_mat_inv(void) : CppAD::atomic_base<Base>(
168  "atom_eigen_mat_inv" ,
169  CppAD::atomic_base<Base>::set_sparsity_enum
170  )
171  { }
172 /* %$$
173 $subhead op$$
174 $srccode%cpp% */
175  // use atomic operation to invert an AD matrix
176  ad_matrix op(const ad_matrix& arg)
177  { size_t nr = size_t( arg.rows() );
178  size_t ny = nr * nr;
179  size_t nx = 1 + ny;
180  assert( nr == size_t( arg.cols() ) );
181  // -------------------------------------------------------------------
182  // packed version of arg
183  CPPAD_TESTVECTOR(ad_scalar) packed_arg(nx);
184  packed_arg[0] = ad_scalar( nr );
185  for(size_t i = 0; i < ny; i++)
186  packed_arg[1 + i] = arg.data()[i];
187  // -------------------------------------------------------------------
188  // packed version of result = arg^{-1}.
189  // This is an atomic_base function call that CppAD uses to
190  // store the atomic operation on the tape.
191  CPPAD_TESTVECTOR(ad_scalar) packed_result(ny);
192  (*this)(packed_arg, packed_result);
193  // -------------------------------------------------------------------
194  // unpack result matrix
195  ad_matrix result(nr, nr);
196  for(size_t i = 0; i < ny; i++)
197  result.data()[i] = packed_result[i];
198  return result;
199  }
200  /* %$$
201 $head Private$$
202 
203 $subhead Variables$$
204 $srccode%cpp% */
205 private:
206  // -------------------------------------------------------------
207  // one forward mode vector of matrices for argument and result
209  // one reverse mode vector of matrices for argument and result
211  // -------------------------------------------------------------
212 /* %$$
213 $subhead forward$$
214 $srccode%cpp% */
215  // forward mode routine called by CppAD
216  virtual bool forward(
217  // lowest order Taylor coefficient we are evaluating
218  size_t p ,
219  // highest order Taylor coefficient we are evaluating
220  size_t q ,
221  // which components of x are variables
222  const CppAD::vector<bool>& vx ,
223  // which components of y are variables
224  CppAD::vector<bool>& vy ,
225  // tx [ j * (q+1) + k ] is x_j^k
226  const CppAD::vector<scalar>& tx ,
227  // ty [ i * (q+1) + k ] is y_i^k
229  )
230  { size_t n_order = q + 1;
231  size_t nr = size_t( CppAD::Integer( tx[ 0 * n_order + 0 ] ) );
232  size_t ny = nr * nr;
233 # ifndef NDEBUG
234  size_t nx = 1 + ny;
235 # endif
236  assert( vx.size() == 0 || nx == vx.size() );
237  assert( vx.size() == 0 || ny == vy.size() );
238  assert( nx * n_order == tx.size() );
239  assert( ny * n_order == ty.size() );
240  //
241  // -------------------------------------------------------------------
242  // make sure f_arg_ and f_result_ are large enough
243  assert( f_arg_.size() == f_result_.size() );
244  if( f_arg_.size() < n_order )
245  { f_arg_.resize(n_order);
246  f_result_.resize(n_order);
247  //
248  for(size_t k = 0; k < n_order; k++)
249  { f_arg_[k].resize(nr, nr);
250  f_result_[k].resize(nr, nr);
251  }
252  }
253  // -------------------------------------------------------------------
254  // unpack tx into f_arg_
255  for(size_t k = 0; k < n_order; k++)
256  { // unpack arg values for this order
257  for(size_t i = 0; i < ny; i++)
258  f_arg_[k].data()[i] = tx[ (1 + i) * n_order + k ];
259  }
260  // -------------------------------------------------------------------
261  // result for each order
262  // (we could avoid recalculting f_result_[k] for k=0,...,p-1)
263  //
264  f_result_[0] = f_arg_[0].inverse();
265  for(size_t k = 1; k < n_order; k++)
266  { // initialize sum
267  matrix f_sum = matrix::Zero(nr, nr);
268  // compute sum
269  for(size_t ell = 1; ell <= k; ell++)
270  f_sum -= f_arg_[ell] * f_result_[k-ell];
271  // result_[k] = arg_[0]^{-1} * sum_
272  f_result_[k] = f_result_[0] * f_sum;
273  }
274  // -------------------------------------------------------------------
275  // pack result_ into ty
276  for(size_t k = 0; k < n_order; k++)
277  { for(size_t i = 0; i < ny; i++)
278  ty[ i * n_order + k ] = f_result_[k].data()[i];
279  }
280  // -------------------------------------------------------------------
281  // check if we are computing vy
282  if( vx.size() == 0 )
283  return true;
284  // ------------------------------------------------------------------
285  // This is a very dumb algorithm that over estimates which
286  // elements of the inverse are variables (which is not efficient).
287  bool var = false;
288  for(size_t i = 0; i < ny; i++)
289  var |= vx[1 + i];
290  for(size_t i = 0; i < ny; i++)
291  vy[i] = var;
292  return true;
293  }
294 /* %$$
295 $subhead reverse$$
296 $srccode%cpp% */
297  // reverse mode routine called by CppAD
298  virtual bool reverse(
299  // highest order Taylor coefficient that we are computing derivative of
300  size_t q ,
301  // forward mode Taylor coefficients for x variables
302  const CppAD::vector<double>& tx ,
303  // forward mode Taylor coefficients for y variables
304  const CppAD::vector<double>& ty ,
305  // upon return, derivative of G[ F[ {x_j^k} ] ] w.r.t {x_j^k}
307  // derivative of G[ {y_i^k} ] w.r.t. {y_i^k}
308  const CppAD::vector<double>& py
309  )
310  { size_t n_order = q + 1;
311  size_t nr = size_t( CppAD::Integer( tx[ 0 * n_order + 0 ] ) );
312  size_t ny = nr * nr;
313 # ifndef NDEBUG
314  size_t nx = 1 + ny;
315 # endif
316  //
317  assert( nx * n_order == tx.size() );
318  assert( ny * n_order == ty.size() );
319  assert( px.size() == tx.size() );
320  assert( py.size() == ty.size() );
321  // -------------------------------------------------------------------
322  // make sure f_arg_ is large enough
323  assert( f_arg_.size() == f_result_.size() );
324  // must have previous run forward with order >= n_order
325  assert( f_arg_.size() >= n_order );
326  // -------------------------------------------------------------------
327  // make sure r_arg_, r_result_ are large enough
328  assert( r_arg_.size() == r_result_.size() );
329  if( r_arg_.size() < n_order )
330  { r_arg_.resize(n_order);
331  r_result_.resize(n_order);
332  //
333  for(size_t k = 0; k < n_order; k++)
334  { r_arg_[k].resize(nr, nr);
335  r_result_[k].resize(nr, nr);
336  }
337  }
338  // -------------------------------------------------------------------
339  // unpack tx into f_arg_
340  for(size_t k = 0; k < n_order; k++)
341  { // unpack arg values for this order
342  for(size_t i = 0; i < ny; i++)
343  f_arg_[k].data()[i] = tx[ (1 + i) * n_order + k ];
344  }
345  // -------------------------------------------------------------------
346  // unpack py into r_result_
347  for(size_t k = 0; k < n_order; k++)
348  { for(size_t i = 0; i < ny; i++)
349  r_result_[k].data()[i] = py[ i * n_order + k ];
350  }
351  // -------------------------------------------------------------------
352  // initialize r_arg_ as zero
353  for(size_t k = 0; k < n_order; k++)
354  r_arg_[k] = matrix::Zero(nr, nr);
355  // -------------------------------------------------------------------
356  // matrix reverse mode calculation
357  //
358  for(size_t k1 = n_order; k1 > 1; k1--)
359  { size_t k = k1 - 1;
360  // bar{R}_0 = bar{R}_0 + bar{R}_k (A_0 R_k)^T
361  r_result_[0] +=
362  r_result_[k] * f_result_[k].transpose() * f_arg_[0].transpose();
363  //
364  for(size_t ell = 1; ell <= k; ell++)
365  { // bar{A}_l = bar{A}_l - R_0^T bar{R}_k R_{k-l}^T
366  r_arg_[ell] -= f_result_[0].transpose()
367  * r_result_[k] * f_result_[k-ell].transpose();
368  // bar{R}_{k-l} = bar{R}_{k-1} - (R_0 A_l)^T bar{R}_k
369  r_result_[k-ell] -= f_arg_[ell].transpose()
370  * f_result_[0].transpose() * r_result_[k];
371  }
372  }
373  r_arg_[0] -=
374  f_result_[0].transpose() * r_result_[0] * f_result_[0].transpose();
375  // -------------------------------------------------------------------
376  // pack r_arg into px
377  for(size_t k = 0; k < n_order; k++)
378  { for(size_t i = 0; i < ny; i++)
379  px[ (1 + i) * n_order + k ] = r_arg_[k].data()[i];
380  }
381  //
382  return true;
383  }
384 /* %$$
385 $head End Class Definition$$
386 $srccode%cpp% */
387 }; // End of atomic_eigen_mat_inv class
388 
389 } // END_EMPTY_NAMESPACE
390 /* %$$
391 $$ $comment end nospell$$
392 $end
393 */
394 
395 
396 # endif
includes the entire CppAD package in the necessary order.
Eigen::Matrix< scalar, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor > matrix
Definition: ad.hpp:34
Type * data(void)
raw pointer to the data
Definition: vector.hpp:391
Eigen::Matrix< ad_scalar, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor > ad_matrix
virtual bool reverse(size_t q, const CppAD::vector< double > &tx, const CppAD::vector< double > &ty, CppAD::vector< double > &px, const CppAD::vector< double > &py)
size_t size(void) const
number of elements currently in this vector.
Definition: vector.hpp:387
int Integer(const std::complex< double > &x)
virtual bool forward(size_t p, size_t q, const CppAD::vector< bool > &vx, CppAD::vector< bool > &vy, const CppAD::vector< scalar > &tx, CppAD::vector< scalar > &ty)