SHOGUN  6.0.0
SingleLaplaceInferenceMethod.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2016 Wu Lin
8  * Written (W) 2013 Roman Votyakov
9  * Copyright (C) 2012 Jacob Walker
10  * Copyright (C) 2013 Roman Votyakov
11  *
12  * Code adapted from Gaussian Process Machine Learning Toolbox
13  * http://www.gaussianprocess.org/gpml/code/matlab/doc/
14  * This code specifically adapted from infLaplace.m
15  */
16 
18 
19 
22 #include <shogun/lib/external/brent.h>
25 
26 using namespace shogun;
27 using namespace Eigen;
28 
29 namespace shogun
30 {
31 
32 #ifndef DOXYGEN_SHOULD_SKIP_THIS
33 
34 class PsiLine : public func_base
35 {
36 public:
37  float64_t log_scale;
38  MatrixXd K;
39  VectorXd dalpha;
40  VectorXd start_alpha;
41  Map<VectorXd>* alpha;
46  CLikelihoodModel* lik;
47  CLabels* lab;
48 
49  virtual double operator() (double x)
50  {
51  Map<VectorXd> eigen_f(f->vector, f->vlen);
52  Map<VectorXd> eigen_m(m->vector, m->vlen);
53 
54  // compute alpha=alpha+x*dalpha and f=K*alpha+m
55  (*alpha)=start_alpha+x*dalpha;
56  eigen_f=K*(*alpha)*CMath::exp(log_scale*2.0)+eigen_m;
57 
58  // get first and second derivatives of log likelihood
59  (*dlp)=lik->get_log_probability_derivative_f(lab, (*f), 1);
60 
61  (*W)=lik->get_log_probability_derivative_f(lab, (*f), 2);
62  W->scale(-1.0);
63 
64  // compute psi=alpha'*(f-m)/2-lp
65  float64_t result = (*alpha).dot(eigen_f-eigen_m)/2.0-
67 
68  return result;
69  }
70 };
71 
72 class SingleLaplaceInferenceMethodCostFunction: public FirstOrderCostFunction
73 {
74 public:
75  SingleLaplaceInferenceMethodCostFunction():FirstOrderCostFunction() { init(); }
76  virtual ~SingleLaplaceInferenceMethodCostFunction() { SG_UNREF(m_obj); }
77  void set_target(CSingleLaplaceInferenceMethod *obj)
78  {
79  REQUIRE(obj, "Obj must set\n");
80  if(m_obj != obj)
81  {
82  SG_REF(obj);
83  SG_UNREF(m_obj);
84  m_obj=obj;
85  }
86  }
87  virtual float64_t get_cost()
88  {
89  REQUIRE(m_obj,"Object not set\n");
90  return m_obj->get_psi_wrt_alpha();
91  }
92  void unset_target(bool is_unref)
93  {
94  if(is_unref)
95  {
96  SG_UNREF(m_obj);
97  }
98  m_obj=NULL;
99  }
100  virtual SGVector<float64_t> obtain_variable_reference()
101  {
102  REQUIRE(m_obj,"Object not set\n");
103  m_derivatives = SGVector<float64_t>((m_obj->m_alpha).vlen);
104  return m_obj->m_alpha;
105  }
106  virtual SGVector<float64_t> get_gradient()
107  {
108  REQUIRE(m_obj,"Object not set\n");
109  m_obj->get_gradient_wrt_alpha(m_derivatives);
110  return m_derivatives;
111  }
112  virtual const char* get_name() const { return "SingleLaplaceInferenceMethodCostFunction"; }
113 private:
114  void init()
115  {
116  m_obj=NULL;
117  m_derivatives = SGVector<float64_t>();
118  SG_ADD(&m_derivatives, "SingleLaplaceInferenceMethodCostFunction__m_derivatives",
119  "derivatives in SingleLaplaceInferenceMethodCostFunction", MS_NOT_AVAILABLE);
120  SG_ADD((CSGObject **)&m_obj, "SingleLaplaceInferenceMethodCostFunction__m_obj",
121  "obj in SingleLaplaceInferenceMethodCostFunction", MS_NOT_AVAILABLE);
122 
123  }
124 
125  SGVector<float64_t> m_derivatives;
127 };
128 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
129 
130 
132 {
133  REQUIRE(obj, "Obj must set\n");
134  if(m_obj != obj)
135  {
136  SG_REF(obj);
137  SG_UNREF(m_obj);
138  m_obj=obj;
139  }
140 }
141 
143 {
144  if(is_unref)
145  {
146  SG_UNREF(m_obj);
147  }
148  m_obj=NULL;
149 
150 }
151 
152 void CSingleLaplaceNewtonOptimizer::init()
153 {
154  m_obj=NULL;
155  m_iter=20;
156  m_tolerance=1e-6;
157  m_opt_tolerance=1e-6;
158  m_opt_max=10;
159 
160  SG_ADD((CSGObject **)&m_obj, "CSingleLaplaceNewtonOptimizer__m_obj",
161  "obj in CSingleLaplaceNewtonOptimizer", MS_NOT_AVAILABLE);
162  SG_ADD(&m_iter, "CSingleLaplaceNewtonOptimizer__m_iter",
163  "iter in CSingleLaplaceNewtonOptimizer", MS_NOT_AVAILABLE);
164  SG_ADD(&m_tolerance, "CSingleLaplaceNewtonOptimizer__m_tolerance",
165  "tolerance in CSingleLaplaceNewtonOptimizer", MS_NOT_AVAILABLE);
166  SG_ADD(&m_opt_tolerance, "CSingleLaplaceNewtonOptimizer__m_opt_tolerance",
167  "opt_tolerance in CSingleLaplaceNewtonOptimizer", MS_NOT_AVAILABLE);
168  SG_ADD(&m_opt_max, "CSingleLaplaceNewtonOptimizer__m_opt_max",
169  "opt_max in CSingleLaplaceNewtonOptimizer", MS_NOT_AVAILABLE);
170 }
171 
173 {
174  REQUIRE(m_obj,"Object not set\n");
175  float64_t Psi_Old=CMath::INFTY;
176  float64_t Psi_New=m_obj->m_Psi;
177 
178  // get mean vector and create eigen representation of it
179  Map<VectorXd> eigen_mean( (m_obj->m_mean_f).vector, (m_obj->m_mean_f).vlen);
180 
181  // create eigen representation of kernel matrix
182  Map<MatrixXd> eigen_ktrtr( (m_obj->m_ktrtr).matrix, (m_obj->m_ktrtr).num_rows, (m_obj->m_ktrtr).num_cols);
183 
184  Map<VectorXd> eigen_mu(m_obj->m_mu, (m_obj->m_mu).vlen);
185 
186  // compute W = -d2lp
187  m_obj->m_W=m_obj->m_model->get_log_probability_derivative_f(m_obj->m_labels, m_obj->m_mu, 2);
188  m_obj->m_W.scale(-1.0);
189 
190  Map<VectorXd> eigen_alpha(m_obj->m_alpha.vector, m_obj->m_alpha.vlen);
191 
192  // get first derivative of log probability function
193  m_obj->m_dlp=m_obj->m_model->get_log_probability_derivative_f(m_obj->m_labels, m_obj->m_mu, 1);
194 
195  // create shogun and eigen representation of sW
196  m_obj->m_sW=SGVector<float64_t>((m_obj->m_W).vlen);
197  Map<VectorXd> eigen_sW((m_obj->m_sW).vector, (m_obj->m_sW).vlen);
198 
199  index_t iter=0;
200 
201  while (Psi_Old-Psi_New>m_tolerance && iter<m_iter)
202  {
203  Map<VectorXd> eigen_W( (m_obj->m_W).vector, (m_obj->m_W).vlen);
204  Map<VectorXd> eigen_dlp( (m_obj->m_dlp).vector, (m_obj->m_dlp).vlen);
205 
206  Psi_Old = Psi_New;
207  iter++;
208 
209  if (eigen_W.minCoeff() < 0)
210  {
211  // Suggested by Vanhatalo et. al.,
212  // Gaussian Process Regression with Student's t likelihood, NIPS 2009
213  // Quoted from infLaplace.m
214  float64_t df;
215 
216  if (m_obj->m_model->get_model_type()==LT_STUDENTST)
217  {
219  df=lik->get_degrees_freedom();
220  SG_UNREF(lik);
221  }
222  else
223  df=1;
224 
225  eigen_W+=(2.0/df)*eigen_dlp.cwiseProduct(eigen_dlp);
226  }
227 
228  // compute sW = sqrt(W)
229  eigen_sW=eigen_W.cwiseSqrt();
230 
231  LLT<MatrixXd> L((eigen_sW*eigen_sW.transpose()).cwiseProduct(eigen_ktrtr*CMath::exp((m_obj->m_log_scale)*2.0))+
232  MatrixXd::Identity( (m_obj->m_ktrtr).num_rows, (m_obj->m_ktrtr).num_cols));
233 
234  VectorXd b=eigen_W.cwiseProduct(eigen_mu - eigen_mean)+eigen_dlp;
235 
236  VectorXd dalpha=b-eigen_sW.cwiseProduct(
237  L.solve(eigen_sW.cwiseProduct(eigen_ktrtr*b*CMath::exp((m_obj->m_log_scale)*2.0))))-eigen_alpha;
238 
239  // perform Brent's optimization
240  PsiLine func;
241 
242  func.log_scale=m_obj->m_log_scale;
243  func.K=eigen_ktrtr;
244  func.dalpha=dalpha;
245  func.start_alpha=eigen_alpha;
246  func.alpha=&eigen_alpha;
247  func.dlp=&(m_obj->m_dlp);
248  func.f=&(m_obj->m_mu);
249  func.m=&(m_obj->m_mean_f);
250  func.W=&(m_obj->m_W);
251  func.lik=m_obj->m_model;
252  func.lab=m_obj->m_labels;
253 
254  float64_t x;
255  Psi_New=local_min(0, m_opt_max, m_opt_tolerance, func, x);
256  }
257 
258  if (Psi_Old-Psi_New>m_tolerance && iter>=m_iter)
259  {
260  SG_SWARNING("Max iterations (%d) reached, but convergence level (%f) is not yet below tolerance (%f)\n", m_iter, Psi_Old-Psi_New, m_tolerance);
261  }
262  return Psi_New;
263 }
264 
266 {
267  init();
268 }
269 
271  CFeatures* feat, CMeanFunction* m, CLabels* lab, CLikelihoodModel* mod)
272  : CLaplaceInference(kern, feat, m, lab, mod)
273 {
274  init();
275 }
276 
277 void CSingleLaplaceInferenceMethod::init()
278 {
279  m_Psi=0;
280  SG_ADD(&m_Psi, "Psi", "posterior log likelihood without constant terms", MS_NOT_AVAILABLE);
281  SG_ADD(&m_sW, "sW", "square root of W", MS_NOT_AVAILABLE);
282  SG_ADD(&m_d2lp, "d2lp", "second derivative of log likelihood with respect to function location", MS_NOT_AVAILABLE);
283  SG_ADD(&m_d3lp, "d3lp", "third derivative of log likelihood with respect to function location", MS_NOT_AVAILABLE);
285 }
286 
288 {
290  update();
291 
292  return SGVector<float64_t>(m_sW);
293 }
294 
296  CInference* inference)
297 {
298  if (inference==NULL)
299  return NULL;
300 
301  if (inference->get_inference_type()!=INF_LAPLACE_SINGLE)
302  SG_SERROR("Provided inference is not of type CSingleLaplaceInferenceMethod\n")
303 
304  SG_REF(inference);
305  return (CSingleLaplaceInferenceMethod*)inference;
306 }
307 
309 {
310 }
311 
313 {
315  update();
316 
317  // create eigen representations alpha, f, W, L
318  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
319  Map<VectorXd> eigen_mu(m_mu.vector, m_mu.vlen);
320  Map<VectorXd> eigen_W(m_W.vector, m_W.vlen);
322 
323  // get mean vector and create eigen representation of it
325  Map<VectorXd> eigen_mean(mean.vector, mean.vlen);
326 
327  // get log likelihood
329  m_mu));
330 
331  float64_t result;
332 
333  if (eigen_W.minCoeff()<0)
334  {
335  Map<VectorXd> eigen_sW(m_sW.vector, m_sW.vlen);
337 
338  FullPivLU<MatrixXd> lu(MatrixXd::Identity(m_ktrtr.num_rows, m_ktrtr.num_cols)+
339  eigen_ktrtr*CMath::exp(m_log_scale*2.0)*eigen_sW.asDiagonal());
340 
341  result=(eigen_alpha.dot(eigen_mu-eigen_mean))/2.0-
342  lp+log(lu.determinant())/2.0;
343  }
344  else
345  {
346  result=eigen_alpha.dot(eigen_mu-eigen_mean)/2.0-lp+
347  eigen_L.diagonal().array().log().sum();
348  }
349 
350  return result;
351 }
352 
354 {
357  Map<VectorXd> eigen_sW(m_sW.vector, m_sW.vlen);
358 
361 
362  // compute V = L^(-1) * W^(1/2) * K, using upper triangular factor L^T
363  MatrixXd eigen_V=eigen_L.triangularView<Upper>().adjoint().solve(
364  eigen_sW.asDiagonal()*eigen_K*CMath::exp(m_log_scale*2.0));
365 
366  // compute covariance matrix of the posterior:
367  // Sigma = K - K * W^(1/2) * (L * L^T)^(-1) * W^(1/2) * K =
368  // K - (K * W^(1/2)) * (L^T)^(-1) * L^(-1) * W^(1/2) * K =
369  // K - (W^(1/2) * K)^T * (L^(-1))^T * L^(-1) * W^(1/2) * K = K - V^T * V
370  eigen_Sigma=eigen_K*CMath::exp(m_log_scale*2.0)-eigen_V.adjoint()*eigen_V;
371 }
372 
374 {
375  // get log probability derivatives
379 
380  // W = -d2lp
381  m_W=m_d2lp.clone();
382  m_W.scale(-1.0);
384 
385  // compute sW
386  Map<VectorXd> eigen_W(m_W.vector, m_W.vlen);
387  Map<VectorXd> eigen_sW(m_sW.vector, m_sW.vlen);
388 
389  if (eigen_W.minCoeff()>0)
390  eigen_sW=eigen_W.cwiseSqrt();
391  else
392  //post.sW = sqrt(abs(W)).*sign(W);
393  eigen_sW=((eigen_W.array().abs()+eigen_W.array())/2).sqrt()-((eigen_W.array().abs()-eigen_W.array())/2).sqrt();
394 
395  // create eigen representation of kernel matrix
397 
398  // create shogun and eigen representation of posterior cholesky
401 
402  if (eigen_W.minCoeff() < 0)
403  {
404  //A = eye(n)+K.*repmat(w',n,1);
405  FullPivLU<MatrixXd> lu(
406  MatrixXd::Identity(m_ktrtr.num_rows,m_ktrtr.num_cols)+
407  eigen_ktrtr*CMath::exp(m_log_scale*2.0)*eigen_W.asDiagonal());
408  // compute cholesky: L = -(K + 1/W)^-1
409  //-iA = -inv(A)
410  eigen_L=-lu.inverse();
411  // -repmat(w,1,n).*iA == (-iA'.*repmat(w',n,1))'
412  eigen_L=eigen_W.asDiagonal()*eigen_L;
413  }
414  else
415  {
416  // compute cholesky: L = chol(sW * sW' .* K + I)
417  LLT<MatrixXd> L(
418  (eigen_sW*eigen_sW.transpose()).cwiseProduct(eigen_ktrtr*CMath::exp(m_log_scale*2.0))+
419  MatrixXd::Identity(m_ktrtr.num_rows, m_ktrtr.num_cols));
420 
421  eigen_L = L.matrixU();
422  }
423 }
424 
426 {
427  SG_DEBUG("entering\n");
428 
430  update_init();
431  update_alpha();
432  update_chol();
433  m_gradient_update=false;
435 
436  SG_DEBUG("leaving\n");
437 }
438 
439 
441 {
442  float64_t Psi_New;
443  float64_t Psi_Def;
444  // get mean vector and create eigen representation of it
447 
448  // create eigen representation of kernel matrix
450 
451  // create shogun and eigen representation of function vector
453  Map<VectorXd> eigen_mu(m_mu, m_mu.vlen);
454 
456  {
457  // set alpha a zero vector
459  m_alpha.zero();
460 
461  // f = mean, if length of alpha and length of y doesn't match
462  eigen_mu=eigen_mean;
463 
465  m_labels, m_mu));
466  }
467  else
468  {
469  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
470 
471  // compute f = K * alpha + m
472  eigen_mu=eigen_ktrtr*CMath::exp(m_log_scale*2.0)*eigen_alpha+eigen_mean;
473 
474  Psi_New=eigen_alpha.dot(eigen_mu-eigen_mean)/2.0-
476 
478 
479  // if default is better, then use it
480  if (Psi_Def < Psi_New)
481  {
482  m_alpha.zero();
483  eigen_mu=eigen_mean;
484  Psi_New=Psi_Def;
485  }
486  }
487  m_Psi=Psi_New;
488 }
489 
490 
492 {
493  REQUIRE(minimizer, "Minimizer must set\n");
494  if (!dynamic_cast<CSingleLaplaceNewtonOptimizer*>(minimizer))
495  {
496  FirstOrderMinimizer* opt= dynamic_cast<FirstOrderMinimizer*>(minimizer);
497  REQUIRE(opt, "The provided minimizer is not supported\n")
498  }
500 }
501 
503 {
505  bool cleanup=false;
506  if (opt)
507  {
508  opt->set_target(this);
509 #ifdef USE_REFERENCE_COUNTING
510  if(this->ref_count()>1)
511  cleanup=true;
512 #endif
513  opt->minimize();
514  opt->unset_target(cleanup);
515  }
516  else
517  {
518  FirstOrderMinimizer* minimizer= dynamic_cast<FirstOrderMinimizer*>(m_minimizer);
519  REQUIRE(minimizer, "The provided minimizer is not supported\n");
520 
522  cost_fun->set_target(this);
523 #ifdef USE_REFERENCE_COUNTING
524  if(this->ref_count()>1)
525  cleanup=true;
526 #endif
527  minimizer->set_cost_function(cost_fun);
528  minimizer->minimize();
529  minimizer->unset_cost_function(false);
530  cost_fun->unset_target(cleanup);
531  SG_UNREF(cost_fun);
532  }
533  // get mean vector and create eigen representation of it
535 
536  // create eigen representation of kernel matrix
538 
539  Map<VectorXd> eigen_mu(m_mu, m_mu.vlen);
540 
541  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
542 
543  // compute f = K * alpha + m
544  eigen_mu=eigen_ktrtr*CMath::exp(m_log_scale*2.0)*eigen_alpha+eigen_mean;
545 }
546 
548 {
549  // create eigen representation of W, sW, dlp, d3lp, K, alpha and L
550  Map<VectorXd> eigen_W(m_W.vector, m_W.vlen);
551  Map<VectorXd> eigen_sW(m_sW.vector, m_sW.vlen);
552  Map<VectorXd> eigen_dlp(m_dlp.vector, m_dlp.vlen);
553  Map<VectorXd> eigen_d3lp(m_d3lp.vector, m_d3lp.vlen);
555  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
557 
558  // create shogun and eigen representation of matrix Z
561 
562  // create shogun and eigen representation of the vector g
564  Map<VectorXd> eigen_g(m_g.vector, m_g.vlen);
565 
566  if (eigen_W.minCoeff()<0)
567  {
568  eigen_Z=-eigen_L;
569 
570  // compute iA = (I + K * diag(W))^-1
571  FullPivLU<MatrixXd> lu(MatrixXd::Identity(m_ktrtr.num_rows, m_ktrtr.num_cols)+
572  eigen_K*CMath::exp(m_log_scale*2.0)*eigen_W.asDiagonal());
573  MatrixXd iA=lu.inverse();
574 
575  // compute derivative ln|L'*L| wrt W: g=sum(iA.*K,2)/2
576  eigen_g=(iA.cwiseProduct(eigen_K*CMath::exp(m_log_scale*2.0))).rowwise().sum()/2.0;
577  }
578  else
579  {
580  // solve L'*L*Z=diag(sW) and compute Z=diag(sW)*Z
581  eigen_Z=eigen_L.triangularView<Upper>().adjoint().solve(
582  MatrixXd(eigen_sW.asDiagonal()));
583  eigen_Z=eigen_L.triangularView<Upper>().solve(eigen_Z);
584  eigen_Z=eigen_sW.asDiagonal()*eigen_Z;
585 
586  // solve L'*C=diag(sW)*K
587  MatrixXd C=eigen_L.triangularView<Upper>().adjoint().solve(
588  eigen_sW.asDiagonal()*eigen_K*CMath::exp(m_log_scale*2.0));
589 
590  // compute derivative ln|L'*L| wrt W: g=(diag(K)-sum(C.^2,1)')/2
591  eigen_g=(eigen_K.diagonal()*CMath::exp(m_log_scale*2.0)-
592  (C.cwiseProduct(C)).colwise().sum().adjoint())/2.0;
593  }
594 
595  // create shogun and eigen representation of the vector dfhat
597  Map<VectorXd> eigen_dfhat(m_dfhat.vector, m_dfhat.vlen);
598 
599  // compute derivative of nlZ wrt fhat
600  eigen_dfhat=eigen_g.cwiseProduct(eigen_d3lp);
601 }
602 
604  const TParameter* param)
605 {
606  REQUIRE(!strcmp(param->m_name, "log_scale"), "Can't compute derivative of "
607  "the nagative log marginal likelihood wrt %s.%s parameter\n",
608  get_name(), param->m_name)
609 
610  // create eigen representation of K, Z, dfhat, dlp and alpha
613  Map<VectorXd> eigen_dfhat(m_dfhat.vector, m_dfhat.vlen);
614  Map<VectorXd> eigen_dlp(m_dlp.vector, m_dlp.vlen);
615  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
616 
617  SGVector<float64_t> result(1);
618 
619  // compute derivative K wrt scale
620  // compute dnlZ=sum(sum(Z.*dK))/2-alpha'*dK*alpha/2
621  result[0]=(eigen_Z.cwiseProduct(eigen_K)).sum()/2.0-
622  (eigen_alpha.adjoint()*eigen_K).dot(eigen_alpha)/2.0;
623 
624  // compute b=dK*dlp
625  VectorXd b=eigen_K*eigen_dlp;
626 
627  // compute dnlZ=dnlZ-dfhat'*(b-K*(Z*b))
628  result[0]=result[0]-eigen_dfhat.dot(b-eigen_K*CMath::exp(m_log_scale*2.0)*(eigen_Z*b));
629  result[0]*=CMath::exp(m_log_scale*2.0)*2.0;
630 
631  return result;
632 }
633 
635  const TParameter* param)
636 {
637  // create eigen representation of K, Z, g and dfhat
640  Map<VectorXd> eigen_g(m_g.vector, m_g.vlen);
641  Map<VectorXd> eigen_dfhat(m_dfhat.vector, m_dfhat.vlen);
642 
643  // get derivatives wrt likelihood model parameters
645  m_mu, param);
647  m_mu, param);
649  m_mu, param);
650 
651  // create eigen representation of the derivatives
652  Map<VectorXd> eigen_lp_dhyp(lp_dhyp.vector, lp_dhyp.vlen);
653  Map<VectorXd> eigen_dlp_dhyp(dlp_dhyp.vector, dlp_dhyp.vlen);
654  Map<VectorXd> eigen_d2lp_dhyp(d2lp_dhyp.vector, d2lp_dhyp.vlen);
655 
656  SGVector<float64_t> result(1);
657 
658  // compute b vector
659  VectorXd b=eigen_K*eigen_dlp_dhyp;
660 
661  // compute dnlZ=-g'*d2lp_dhyp-sum(lp_dhyp)-dfhat'*(b-K*(Z*b))
662  result[0]=-eigen_g.dot(eigen_d2lp_dhyp)-eigen_lp_dhyp.sum()-
663  eigen_dfhat.dot(b-eigen_K*CMath::exp(m_log_scale*2.0)*(eigen_Z*b));
664 
665  return result;
666 }
667 
669  const TParameter* param)
670 {
671  // create eigen representation of K, Z, dfhat, dlp and alpha
674  Map<VectorXd> eigen_dfhat(m_dfhat.vector, m_dfhat.vlen);
675  Map<VectorXd> eigen_dlp(m_dlp.vector, m_dlp.vlen);
676  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
677 
678  REQUIRE(param, "Param not set\n");
679  SGVector<float64_t> result;
680  int64_t len=const_cast<TParameter *>(param)->m_datatype.get_num_elements();
681  result=SGVector<float64_t>(len);
682 
683  for (index_t i=0; i<result.vlen; i++)
684  {
686 
687  if (result.vlen==1)
688  dK=m_kernel->get_parameter_gradient(param);
689  else
690  dK=m_kernel->get_parameter_gradient(param, i);
691 
692  Map<MatrixXd> eigen_dK(dK.matrix, dK.num_rows, dK.num_cols);
693 
694  // compute dnlZ=sum(sum(Z.*dK))/2-alpha'*dK*alpha/2
695  result[i]=(eigen_Z.cwiseProduct(eigen_dK)).sum()/2.0-
696  (eigen_alpha.adjoint()*eigen_dK).dot(eigen_alpha)/2.0;
697 
698  // compute b=dK*dlp
699  VectorXd b=eigen_dK*eigen_dlp;
700 
701  // compute dnlZ=dnlZ-dfhat'*(b-K*(Z*b))
702  result[i]=result[i]-eigen_dfhat.dot(b-eigen_K*CMath::exp(m_log_scale*2.0)*
703  (eigen_Z*b));
704  result[i]*=CMath::exp(m_log_scale*2.0);
705  }
706 
707  return result;
708 }
709 
711  const TParameter* param)
712 {
713  // create eigen representation of K, Z, dfhat and alpha
716  Map<VectorXd> eigen_dfhat(m_dfhat.vector, m_dfhat.vlen);
717  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
718 
719  REQUIRE(param, "Param not set\n");
720  SGVector<float64_t> result;
721  int64_t len=const_cast<TParameter *>(param)->m_datatype.get_num_elements();
722  result=SGVector<float64_t>(len);
723 
724  for (index_t i=0; i<result.vlen; i++)
725  {
727 
728  if (result.vlen==1)
730  else
732 
733  Map<VectorXd> eigen_dmu(dmu.vector, dmu.vlen);
734 
735  // compute dnlZ=-alpha'*dm-dfhat'*(dm-K*(Z*dm))
736  result[i]=-eigen_alpha.dot(eigen_dmu)-eigen_dfhat.dot(eigen_dmu-
737  eigen_K*CMath::exp(m_log_scale*2.0)*(eigen_Z*eigen_dmu));
738  }
739 
740  return result;
741 }
742 
744 {
746 
748  Map<VectorXd> eigen_res(res.vector, res.vlen);
749 
750  Map<VectorXd> eigen_mu(m_mu, m_mu.vlen);
752  Map<VectorXd> eigen_mean(mean.vector, mean.vlen);
753  eigen_res=eigen_mu-eigen_mean;
754 
755  return res;
756 }
757 
758 
760 {
766  m_ktrtr.num_cols);
768  m_mean_f.vlen);
769  /* f = K * alpha + mean_f given alpha*/
770  eigen_f
771  = kernel * ((eigen_alpha) * CMath::exp(m_log_scale*2.0)) + eigen_mean_f;
772 
773  /* psi = 0.5 * alpha .* (f - m) - sum(dlp)*/
774  float64_t psi = eigen_alpha.dot(eigen_f - eigen_mean_f) * 0.5;
776  return psi;
777 }
778 
780 {
781  REQUIRE(gradient.vlen==m_alpha.vlen,
782  "The length of gradients (%d) should the same as the length of parameters (%d)\n",
783  gradient.vlen, m_alpha.vlen);
784 
786  Eigen::Map<Eigen::VectorXd> eigen_gradient(gradient.vector, gradient.vlen);
791  m_ktrtr.num_cols);
793  m_mean_f.vlen);
794 
795  /* f = K * alpha + mean_f given alpha*/
796  eigen_f = kernel * ((eigen_alpha) * CMath::exp(m_log_scale*2.0)) + eigen_mean_f;
797 
798  SGVector<float64_t> dlp_f =
800 
801  Eigen::Map<Eigen::VectorXd> eigen_dlp_f(dlp_f.vector, dlp_f.vlen);
802 
803  /* g_alpha = K * (alpha - dlp_f)*/
804  eigen_gradient = kernel * ((eigen_alpha - eigen_dlp_f) * CMath::exp(m_log_scale*2.0));
805 }
806 
807 
808 }
809 
float64_t m_log_scale
Definition: Inference.h:485
virtual SGVector< float64_t > get_log_probability_f(const CLabels *lab, SGVector< float64_t > func) const =0
virtual void update()
Definition: Inference.cpp:243
virtual void update_parameter_hash()
Definition: SGObject.cpp:281
SGVector< float64_t > m_mu
int32_t index_t
Definition: common.h:72
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
static const float64_t INFTY
infinity
Definition: Math.h:2069
virtual int32_t get_num_labels() const =0
CKernel * m_kernel
Definition: Inference.h:464
static T sum(T *vec, int32_t len)
Return sum(vec)
Definition: SGVector.h:392
#define SG_SWARNING(...)
Definition: SGIO.h:177
The build-in minimizer for SingleLaplaceInference.
Definition: SGMatrix.h:24
parameter struct
virtual SGVector< float64_t > get_derivative_wrt_inference_method(const TParameter *param)
#define REQUIRE(x,...)
Definition: SGIO.h:205
T dot(const SGVector< T > &a, const SGVector< T > &b)
virtual SGVector< float64_t > get_mean_vector(const CFeatures *features) const =0
An abstract class of the mean function.
Definition: MeanFunction.h:49
std::enable_if<!std::is_same< T, complex128_t >::value, float64_t >::type mean(const Container< T > &a)
#define SG_REF(x)
Definition: SGObject.h:52
CFeatures * m_features
Definition: Inference.h:473
SGMatrix< float64_t > m_ktrtr
Definition: Inference.h:488
float64_t get_degrees_freedom() const
CMeanFunction * m_mean
Definition: Inference.h:467
virtual SGVector< float64_t > get_second_derivative(const CLabels *lab, SGVector< float64_t > func, const TParameter *param) const
void set_target(CSingleLaplaceInferenceMethod *obj)
void scale(T alpha)
Scale vector inplace.
Definition: SGVector.cpp:884
static CSingleLaplaceInferenceMethod * obtain_from_generic(CInference *inference)
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:125
virtual SGVector< float64_t > get_derivative_wrt_mean(const TParameter *param)
CLabels * m_labels
Definition: Inference.h:476
virtual void unset_cost_function(bool is_unref=true)
double float64_t
Definition: common.h:60
SGVector< float64_t > m_dlp
virtual SGVector< float64_t > get_diagonal_vector()
index_t num_rows
Definition: SGMatrix.h:463
SGMatrix< float64_t > m_L
Definition: Inference.h:482
The first order cost function base class.
Matrix< float64_t,-1,-1, 0,-1,-1 > MatrixXd
Definition: KLInference.h:52
index_t num_cols
Definition: SGMatrix.h:465
static void inverse(SGMatrix< float64_t > matrix)
inverses square matrix in-place
Definition: SGMatrix.cpp:972
virtual float64_t minimize()=0
virtual SGVector< float64_t > get_posterior_mean()
Class that models a Student&#39;s-t likelihood.
virtual SGVector< float64_t > get_third_derivative(const CLabels *lab, SGVector< float64_t > func, const TParameter *param) const
virtual SGVector< float64_t > get_derivative_wrt_kernel(const TParameter *param)
virtual void register_minimizer(Minimizer *minimizer)
Definition: Inference.cpp:116
virtual SGVector< float64_t > get_parameter_derivative(const CFeatures *features, const TParameter *param, index_t index=-1)
Definition: MeanFunction.h:73
#define SG_UNREF(x)
Definition: SGObject.h:53
#define SG_DEBUG(...)
Definition: SGIO.h:106
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
The Laplace approximation inference method base class.
T sum(const Container< T > &a, bool no_diag=false)
The Inference Method base class.
Definition: Inference.h:81
Minimizer * m_minimizer
Definition: Inference.h:461
SGMatrix< float64_t > m_Sigma
The class Features is the base class of all feature objects.
Definition: Features.h:68
#define SG_SERROR(...)
Definition: SGIO.h:178
static float64_t exp(float64_t x)
Definition: Math.h:616
virtual SGMatrix< float64_t > get_parameter_gradient(const TParameter *param, index_t index=-1)
virtual SGVector< float64_t > get_log_probability_derivative_f(const CLabels *lab, SGVector< float64_t > func, index_t i) const =0
SGVector< float64_t > m_W
The Kernel base class.
The minimizer base class.
Definition: Minimizer.h:43
#define SG_ADD(...)
Definition: SGObject.h:94
static CStudentsTLikelihood * obtain_from_generic(CLikelihoodModel *likelihood)
virtual SGVector< float64_t > get_first_derivative(const CLabels *lab, SGVector< float64_t > func, const TParameter *param) const
The SingleLaplace approximation inference method class for regression and binary Classification.
virtual EInferenceType get_inference_type() const
Definition: Inference.h:104
CLikelihoodModel * m_model
Definition: Inference.h:470
virtual bool parameter_hash_changed()
Definition: SGObject.cpp:295
The Likelihood model base class.
virtual SGVector< float64_t > get_derivative_wrt_likelihood_model(const TParameter *param)
SGVector< T > clone() const
Definition: SGVector.cpp:247
virtual void set_cost_function(FirstOrderCostFunction *fun)
index_t vlen
Definition: SGVector.h:545
SGVector< float64_t > m_alpha
Definition: Inference.h:479
void get_gradient_wrt_alpha(SGVector< float64_t > gradient)
The first order minimizer base class.
virtual void register_minimizer(Minimizer *minimizer)

SHOGUN Machine Learning Toolbox - Documentation