63 template <
typename VECTOR>
struct bfgs_invhessian {
65 typedef typename linalg_traits<VECTOR>::value_type T;
66 typedef typename number_traits<T>::magnitude_type R;
68 std::vector<VECTOR> delta, gamma, zeta;
69 std::vector<T> tau, rho;
72 template<
typename VEC1,
typename VEC2>
void hmult(
const VEC1 &X, VEC2 &Y) {
74 for (
size_type k = 0 ; k < delta.size(); ++k) {
78 add(scaled(zeta[k], rho[k]*xdelta), Y);
79 add(scaled(delta[k], rho[k]*(xzeta-rho[k]*tau[k]*xdelta)), Y);
82 add(scaled(delta[k], rho[k]*xdelta), Y);
83 add(scaled(zeta[k], -xzeta/tau[k]), Y);
90 delta.resize(0); gamma.resize(0); zeta.resize(0);
91 tau.resize(0); rho.resize(0);
94 template<
typename VECT1,
typename VECT2>
95 void update(
const VECT1 &deltak,
const VECT2 &gammak) {
96 T vsp =
vect_sp(deltak, gammak);
97 if (vsp == T(0))
return;
98 size_type N = vect_size(deltak), k = delta.size();
101 delta.resize(k+1); gamma.resize(k+1); zeta.resize(k+1);
102 tau.resize(k+1); rho.resize(k+1);
108 add(delta[k], scaled(Y, -1), zeta[k]);
111 tau[k] =
vect_sp(gammak, zeta[k]);
114 bfgs_invhessian(
int v = 0) { version = v; }
118 template <
typename FUNCTION,
typename DERIVATIVE,
typename VECTOR>
119 void bfgs(
const FUNCTION &f,
const DERIVATIVE &grad, VECTOR &x,
120 int restart, iteration& iter,
int version = 0,
121 double lambda_init=0.001,
double print_norm=1.0) {
123 typedef typename linalg_traits<VECTOR>::value_type T;
124 typedef typename number_traits<T>::magnitude_type R;
126 bfgs_invhessian<VECTOR> invhessian(version);
127 VECTOR r(vect_size(x)), d(vect_size(x)), y(vect_size(x)), r2(vect_size(x));
129 R lambda = lambda_init, valx = f(x), valy;
132 if (iter.get_noisy() >= 1) cout <<
"value " << valx / print_norm <<
" ";
133 while (! iter.finished_vect(r)) {
135 invhessian.hmult(r, d); gmm::scale(d, T(-1));
139 R lambda_min(0), lambda_max(0), m1 = 0.27, m2 = 0.57;
140 bool unbounded =
true, blocked =
false, grad_computed =
false;
143 add(x, scaled(d, lambda), y);
145 if (iter.get_noisy() >= 2) {
147 cout <<
"Wolfe line search, lambda = " << lambda
148 <<
" value = " << valy /print_norm << endl;
153 if (valy <= valx + m1 * lambda * derivative) {
154 grad(y, r2); grad_computed =
true;
156 if (derivative2 >= m2*derivative)
break;
163 if (unbounded) lambda *= R(10);
164 else lambda = (lambda_max + lambda_min) / R(2);
165 if (lambda == lambda_max || lambda == lambda_min)
break;
169 if (valy <= valx + R(2)*gmm::abs(derivative)*lambda &&
170 (lambda < R(lambda_init*1E-8) ||
171 (!unbounded && lambda_max-lambda_min < R(lambda_init*1E-8))))
172 { blocked =
true; lambda = lambda_init;
break; }
177 if (!grad_computed) grad(y, r2);
179 if ((iter.get_iteration() % restart) == 0 || blocked) {
180 if (iter.get_noisy() >= 1) cout <<
"Restart\n";
181 invhessian.restart();
182 if (++nb_restart > 10) {
183 if (iter.get_noisy() >= 1) cout <<
"BFGS is blocked, exiting\n";
188 invhessian.update(gmm::scaled(d,lambda), gmm::scaled(r,-1));
191 copy(r2, r);
copy(y, x); valx = valy;
192 if (iter.get_noisy() >= 1)
193 cout <<
"BFGS value " << valx/print_norm <<
"\t";
199 template <
typename FUNCTION,
typename DERIVATIVE,
typename VECTOR>
200 inline void dfp(
const FUNCTION &f,
const DERIVATIVE &grad, VECTOR &x,
201 int restart, iteration& iter,
int version = 1) {
202 bfgs(f, grad, x, restart, iter, version);
void copy(const L1 &l1, L2 &l2)
*/
void resize(V &v, size_type n)
*/
strongest_value_type< V1, V2 >::value_type vect_sp(const V1 &v1, const V2 &v2)
*/
void add(const L1 &l1, L2 &l2)
*/
Include the base gmm files.
size_t size_type
used as the common size type in the library