CppAD: A C++ Algorithmic Differentiation Package  20171217
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
eigen_cholesky.hpp
Go to the documentation of this file.
1 // $Id$
2 # ifndef CPPAD_EXAMPLE_EIGEN_CHOLESKY_HPP
3 # define CPPAD_EXAMPLE_EIGEN_CHOLESKY_HPP
4 
5 /* --------------------------------------------------------------------------
6 CppAD: C++ Algorithmic Differentiation: Copyright (C) 2003-17 Bradley M. Bell
7 
8 CppAD is distributed under multiple licenses. This distribution is under
9 the terms of the
10  Eclipse Public License Version 1.0.
11 
12 A copy of this license is included in the COPYING file of this distribution.
13 Please visit http://www.coin-or.org/CppAD/ for information on other licenses.
14 -------------------------------------------------------------------------- */
15 
16 /*
17 $begin atomic_eigen_cholesky.hpp$$
18 $spell
19  Eigen
20  Taylor
21  Cholesky
22  op
23 $$
24 
25 $section Atomic Eigen Cholesky Factorization Class$$
26 
27 $head Purpose$$
28 Construct an atomic operation that computes a lower triangular matrix
29 $latex L $$ such that $latex L L^\R{T} = A$$
30 for any positive integer $latex p$$
31 and symmetric positive definite matrix $latex A \in \B{R}^{p \times p}$$.
32 
33 $head Start Class Definition$$
34 $srccode%cpp% */
35 # include <cppad/cppad.hpp>
36 # include <Eigen/Dense>
37 
38 
39 /* %$$
40 $head Public$$
41 
42 $subhead Types$$
43 $srccode%cpp% */
44 namespace { // BEGIN_EMPTY_NAMESPACE
45 
46 template <class Base>
48 public:
49  // -----------------------------------------------------------
50  // type of elements during calculation of derivatives
51  typedef Base scalar;
52  // type of elements during taping
54  //
55  // type of matrix during calculation of derivatives
56  typedef Eigen::Matrix<
57  scalar, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> matrix;
58  // type of matrix during taping
59  typedef Eigen::Matrix<
60  ad_scalar, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor > ad_matrix;
61  //
62  // lower triangular scalar matrix
63  typedef Eigen::TriangularView<matrix, Eigen::Lower> lower_view;
64 /* %$$
65 $subhead Constructor$$
66 $srccode%cpp% */
67  // constructor
68  atomic_eigen_cholesky(void) : CppAD::atomic_base<Base>(
69  "atom_eigen_cholesky" ,
70  CppAD::atomic_base<Base>::set_sparsity_enum
71  )
72  { }
73 /* %$$
74 $subhead op$$
75 $srccode%cpp% */
76  // use atomic operation to invert an AD matrix
77  ad_matrix op(const ad_matrix& arg)
78  { size_t nr = size_t( arg.rows() );
79  size_t ny = ( (nr + 1 ) * nr ) / 2;
80  size_t nx = 1 + ny;
81  assert( nr == size_t( arg.cols() ) );
82  // -------------------------------------------------------------------
83  // packed version of arg
84  CPPAD_TESTVECTOR(ad_scalar) packed_arg(nx);
85  size_t index = 0;
86  packed_arg[index++] = ad_scalar( nr );
87  // lower triangle of symmetric matrix A
88  for(size_t i = 0; i < nr; i++)
89  { for(size_t j = 0; j <= i; j++)
90  packed_arg[index++] = arg(i, j);
91  }
92  assert( index == nx );
93  // -------------------------------------------------------------------
94  // packed version of result = arg^{-1}.
95  // This is an atomic_base function call that CppAD uses to
96  // store the atomic operation on the tape.
97  CPPAD_TESTVECTOR(ad_scalar) packed_result(ny);
98  (*this)(packed_arg, packed_result);
99  // -------------------------------------------------------------------
100  // unpack result matrix L
101  ad_matrix result = ad_matrix::Zero(nr, nr);
102  index = 0;
103  for(size_t i = 0; i < nr; i++)
104  { for(size_t j = 0; j <= i; j++)
105  result(i, j) = packed_result[index++];
106  }
107  return result;
108  }
109  /* %$$
110 $head Private$$
111 
112 $subhead Variables$$
113 $srccode%cpp% */
114 private:
115  // -------------------------------------------------------------
116  // one forward mode vector of matrices for argument and result
118  // one reverse mode vector of matrices for argument and result
120  // -------------------------------------------------------------
121 /* %$$
122 $subhead forward$$
123 $srccode%cpp% */
124  // forward mode routine called by CppAD
125  virtual bool forward(
126  // lowest order Taylor coefficient we are evaluating
127  size_t p ,
128  // highest order Taylor coefficient we are evaluating
129  size_t q ,
130  // which components of x are variables
131  const CppAD::vector<bool>& vx ,
132  // which components of y are variables
133  CppAD::vector<bool>& vy ,
134  // tx [ j * (q+1) + k ] is x_j^k
135  const CppAD::vector<scalar>& tx ,
136  // ty [ i * (q+1) + k ] is y_i^k
138  )
139  { size_t n_order = q + 1;
140  size_t nr = size_t( CppAD::Integer( tx[ 0 * n_order + 0 ] ) );
141  size_t ny = ((nr + 1) * nr) / 2;
142 # ifndef NDEBUG
143  size_t nx = 1 + ny;
144 # endif
145  assert( vx.size() == 0 || nx == vx.size() );
146  assert( vx.size() == 0 || ny == vy.size() );
147  assert( nx * n_order == tx.size() );
148  assert( ny * n_order == ty.size() );
149  //
150  // -------------------------------------------------------------------
151  // make sure f_arg_ and f_result_ are large enough
152  assert( f_arg_.size() == f_result_.size() );
153  if( f_arg_.size() < n_order )
154  { f_arg_.resize(n_order);
155  f_result_.resize(n_order);
156  //
157  for(size_t k = 0; k < n_order; k++)
158  { f_arg_[k].resize(nr, nr);
159  f_result_[k].resize(nr, nr);
160  }
161  }
162  // -------------------------------------------------------------------
163  // unpack tx into f_arg_
164  for(size_t k = 0; k < n_order; k++)
165  { size_t index = 1;
166  // unpack arg values for this order
167  for(size_t i = 0; i < nr; i++)
168  { for(size_t j = 0; j <= i; j++)
169  { f_arg_[k](i, j) = tx[ index * n_order + k ];
170  f_arg_[k](j, i) = f_arg_[k](i, j);
171  index++;
172  }
173  }
174  }
175  // -------------------------------------------------------------------
176  // result for each order
177  // (we could avoid recalculting f_result_[k] for k=0,...,p-1)
178  //
179  Eigen::LLT<matrix> cholesky(f_arg_[0]);
180  f_result_[0] = cholesky.matrixL();
181  lower_view L_0 = f_result_[0].template triangularView<Eigen::Lower>();
182  for(size_t k = 1; k < n_order; k++)
183  { // initialize sum as A_k
184  matrix f_sum = f_arg_[k];
185  // compute A_k - B_k
186  for(size_t ell = 1; ell < k; ell++)
187  f_sum -= f_result_[ell] * f_result_[k-ell].transpose();
188  // compute L_0^{-1} * (A_k - B_k) * L_0^{-T}
189  matrix temp = L_0.template solve<Eigen::OnTheLeft>(f_sum);
190  temp = L_0.transpose().template solve<Eigen::OnTheRight>(temp);
191  // divide the diagonal by 2
192  for(size_t i = 0; i < nr; i++)
193  temp(i, i) /= scalar(2.0);
194  // L_k = L_0 * low[ L_0^{-1} * (A_k - B_k) * L_0^{-T} ]
195  lower_view view = temp.template triangularView<Eigen::Lower>();
196  f_result_[k] = f_result_[0] * view;
197  }
198  // -------------------------------------------------------------------
199  // pack result_ into ty
200  for(size_t k = 0; k < n_order; k++)
201  { size_t index = 0;
202  for(size_t i = 0; i < nr; i++)
203  { for(size_t j = 0; j <= i; j++)
204  { ty[ index * n_order + k ] = f_result_[k](i, j);
205  index++;
206  }
207  }
208  }
209  // -------------------------------------------------------------------
210  // check if we are computing vy
211  if( vx.size() == 0 )
212  return true;
213  // ------------------------------------------------------------------
214  // This is a very dumb algorithm that over estimates which
215  // elements of the inverse are variables (which is not efficient).
216  bool var = false;
217  for(size_t i = 0; i < ny; i++)
218  var |= vx[1 + i];
219  for(size_t i = 0; i < ny; i++)
220  vy[i] = var;
221  //
222  return true;
223  }
224 /* %$$
225 $subhead reverse$$
226 $srccode%cpp% */
227  // reverse mode routine called by CppAD
228  virtual bool reverse(
229  // highest order Taylor coefficient that we are computing derivative of
230  size_t q ,
231  // forward mode Taylor coefficients for x variables
232  const CppAD::vector<double>& tx ,
233  // forward mode Taylor coefficients for y variables
234  const CppAD::vector<double>& ty ,
235  // upon return, derivative of G[ F[ {x_j^k} ] ] w.r.t {x_j^k}
237  // derivative of G[ {y_i^k} ] w.r.t. {y_i^k}
238  const CppAD::vector<double>& py
239  )
240  { size_t n_order = q + 1;
241  size_t nr = size_t( CppAD::Integer( tx[ 0 * n_order + 0 ] ) );
242 # ifndef NDEBUG
243  size_t ny = ( (nr + 1 ) * nr ) / 2;
244  size_t nx = 1 + ny;
245 # endif
246  //
247  assert( nx * n_order == tx.size() );
248  assert( ny * n_order == ty.size() );
249  assert( px.size() == tx.size() );
250  assert( py.size() == ty.size() );
251  // -------------------------------------------------------------------
252  // make sure f_arg_ is large enough
253  assert( f_arg_.size() == f_result_.size() );
254  // must have previous run forward with order >= n_order
255  assert( f_arg_.size() >= n_order );
256  // -------------------------------------------------------------------
257  // make sure r_arg_, r_result_ are large enough
258  assert( r_arg_.size() == r_result_.size() );
259  if( r_arg_.size() < n_order )
260  { r_arg_.resize(n_order);
261  r_result_.resize(n_order);
262  //
263  for(size_t k = 0; k < n_order; k++)
264  { r_arg_[k].resize(nr, nr);
265  r_result_[k].resize(nr, nr);
266  }
267  }
268  // -------------------------------------------------------------------
269  // unpack tx into f_arg_
270  for(size_t k = 0; k < n_order; k++)
271  { size_t index = 1;
272  // unpack arg values for this order
273  for(size_t i = 0; i < nr; i++)
274  { for(size_t j = 0; j <= i; j++)
275  { f_arg_[k](i, j) = tx[ index * n_order + k ];
276  f_arg_[k](j, i) = f_arg_[k](i, j);
277  index++;
278  }
279  }
280  }
281  // -------------------------------------------------------------------
282  // unpack py into r_result_
283  for(size_t k = 0; k < n_order; k++)
284  { r_result_[k] = matrix::Zero(nr, nr);
285  size_t index = 0;
286  for(size_t i = 0; i < nr; i++)
287  { for(size_t j = 0; j <= i; j++)
288  { r_result_[k](i, j) = py[ index * n_order + k ];
289  index++;
290  }
291  }
292  }
293  // -------------------------------------------------------------------
294  // initialize r_arg_ as zero
295  for(size_t k = 0; k < n_order; k++)
296  r_arg_[k] = matrix::Zero(nr, nr);
297  // -------------------------------------------------------------------
298  // matrix reverse mode calculation
299  lower_view L_0 = f_result_[0].template triangularView<Eigen::Lower>();
300  //
301  for(size_t k1 = n_order; k1 > 1; k1--)
302  { size_t k = k1 - 1;
303  //
304  // L_0^T * bar{L}_k
305  matrix tmp1 = L_0.transpose() * r_result_[k];
306  //
307  //low[ L_0^T * bar{L}_k ]
308  for(size_t i = 0; i < nr; i++)
309  tmp1(i, i) /= scalar(2.0);
310  matrix tmp2 = tmp1.template triangularView<Eigen::Lower>();
311  //
312  // L_0^{-T} low[ L_0^T * bar{L}_k ]
313  tmp1 = L_0.transpose().template solve<Eigen::OnTheLeft>( tmp2 );
314  //
315  // M_k = L_0^{-T} * low[ L_0^T * bar{L}_k ]^{T} L_0^{-1}
316  matrix M_k = L_0.transpose().template
317  solve<Eigen::OnTheLeft>( tmp1.transpose() );
318  //
319  // remove L_k and compute bar{B}_k
320  matrix barB_k = scalar(0.5) * ( M_k + M_k.transpose() );
321  r_arg_[k] += barB_k;
322  barB_k = scalar(-1.0) * barB_k;
323  //
324  // 2.0 * lower( bar{B}_k L_k )
325  matrix temp = scalar(2.0) * barB_k * f_result_[k];
326  temp = temp.template triangularView<Eigen::Lower>();
327  //
328  // remove C_k
329  r_result_[0] += temp;
330  //
331  // remove B_k
332  for(size_t ell = 1; ell < k; ell++)
333  { // bar{L}_ell = 2 * lower( \bar{B}_k * L_{k-ell} )
334  temp = scalar(2.0) * barB_k * f_result_[k-ell];
335  r_result_[ell] += temp.template triangularView<Eigen::Lower>();
336  }
337  }
338  // M_0 = L_0^{-T} * low[ L_0^T * bar{L}_0 ]^{T} L_0^{-1}
339  matrix M_0 = L_0.transpose() * r_result_[0];
340  for(size_t i = 0; i < nr; i++)
341  M_0(i, i) /= scalar(2.0);
342  M_0 = M_0.template triangularView<Eigen::Lower>();
343  M_0 = L_0.template solve<Eigen::OnTheRight>( M_0 );
344  M_0 = L_0.transpose().template solve<Eigen::OnTheLeft>( M_0 );
345  // remove L_0
346  r_arg_[0] += scalar(0.5) * ( M_0 + M_0.transpose() );
347  // -------------------------------------------------------------------
348  // pack r_arg into px
349  // note that only the lower triangle of barA_k is stored in px
350  for(size_t k = 0; k < n_order; k++)
351  { size_t index = 0;
352  px[ index * n_order + k ] = 0.0;
353  index++;
354  for(size_t i = 0; i < nr; i++)
355  { for(size_t j = 0; j < i; j++)
356  { px[ index * n_order + k ] = 2.0 * r_arg_[k](i, j);
357  index++;
358  }
359  px[ index * n_order + k] = r_arg_[k](i, i);
360  index++;
361  }
362  }
363  // -------------------------------------------------------------------
364  return true;
365  }
366 /* %$$
367 $head End Class Definition$$
368 $srccode%cpp% */
369 }; // End of atomic_eigen_cholesky class
370 
371 } // END_EMPTY_NAMESPACE
372 /* %$$
373 $end
374 */
375 
376 
377 # endif
includes the entire CppAD package in the necessary order.
Eigen::TriangularView< matrix, Eigen::Lower > lower_view
Definition: ad.hpp:34
Eigen::Matrix< ad_scalar, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor > ad_matrix
virtual bool reverse(size_t q, const CppAD::vector< double > &tx, const CppAD::vector< double > &ty, CppAD::vector< double > &px, const CppAD::vector< double > &py)
size_t size(void) const
number of elements currently in this vector.
Definition: vector.hpp:387
virtual bool forward(size_t p, size_t q, const CppAD::vector< bool > &vx, CppAD::vector< bool > &vy, const CppAD::vector< scalar > &tx, CppAD::vector< scalar > &ty)
int Integer(const std::complex< double > &x)
Eigen::Matrix< scalar, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor > matrix