SHOGUN  6.0.0
SingleFITCLaplaceInferenceMethod.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) The Shogun Machine Learning Toolbox
3  * Written (W) 2015 Wu Lin
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  * list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  * The views and conclusions contained in the software and documentation are those
27  * of the authors and should not be interpreted as representing official policies,
28  * either expressed or implied, of the Shogun Development Team.
29  *
30  */
31 
33 
36 #include <shogun/lib/external/brent.h>
40 
41 using namespace shogun;
42 using namespace Eigen;
43 
44 namespace shogun
45 {
46 
47 #ifndef DOXYGEN_SHOULD_SKIP_THIS
48 
50 class CFITCPsiLine : public func_base
51 {
52 public:
53  float64_t log_scale;
54  VectorXd dalpha;
55  VectorXd start_alpha;
56  SGVector<float64_t>* alpha;
61  CLikelihoodModel* lik;
62  CLabels* lab;
64 
65  virtual double operator() (double x)
66  {
67  //time complexity O(m*n)
68  Map<VectorXd> eigen_f(f->vector, f->vlen);
69  Map<VectorXd> eigen_m(m->vector, m->vlen);
70  Map<VectorXd> eigen_alpha(alpha->vector, alpha->vlen);
71 
72  //alpha = alpha + s*dalpha;
73  eigen_alpha=start_alpha+x*dalpha;
74  SGVector<float64_t> tmp=inf->compute_mvmK(*alpha);
75  Map<VectorXd> eigen_tmp(tmp.vector, tmp.vlen);
76  //f = mvmK(alpha,V,d0)+m;
77  eigen_f=eigen_tmp+eigen_m;
78 
79  // get first and second derivatives of log likelihood
80  (*dlp)=lik->get_log_probability_derivative_f(lab, (*f), 1);
81 
82  (*W)=lik->get_log_probability_derivative_f(lab, (*f), 2);
83  W->scale(-1.0);
84 
85  // compute psi=alpha'*(f-m)/2-lp
86  float64_t result = eigen_alpha.dot(eigen_f-eigen_m)/2.0-
88 
89  return result;
90  }
91 };
92 
93 class SingleFITCLaplaceInferenceMethodCostFunction: public FirstOrderCostFunction
94 {
95 public:
96  SingleFITCLaplaceInferenceMethodCostFunction():FirstOrderCostFunction() { init(); }
97  virtual ~SingleFITCLaplaceInferenceMethodCostFunction() { clean(); }
98  void set_target(CSingleFITCLaplaceInferenceMethod *obj)
99  {
100  REQUIRE(obj, "Obj must set\n");
101  if(m_obj != obj)
102  {
103  SG_REF(obj);
104  SG_UNREF(m_obj);
105  m_obj=obj;
106  }
107  }
108 
109  void clean()
110  {
111  SG_UNREF(m_obj);
112  }
113 
114  virtual float64_t get_cost()
115  {
116  REQUIRE(m_obj,"Object not set\n");
117  return m_obj->get_psi_wrt_alpha();
118  }
119  void unset_target(bool is_unref)
120  {
121  if(is_unref)
122  {
123  SG_UNREF(m_obj);
124  }
125  m_obj=NULL;
126  }
127  virtual SGVector<float64_t> obtain_variable_reference()
128  {
129  REQUIRE(m_obj,"Object not set\n");
130  m_derivatives = SGVector<float64_t>((m_obj->m_al).vlen);
131  return m_obj->m_al;
132  }
133  virtual SGVector<float64_t> get_gradient()
134  {
135  REQUIRE(m_obj,"Object not set\n");
136  m_obj->get_gradient_wrt_alpha(m_derivatives);
137  return m_derivatives;
138  }
139  virtual const char* get_name() const { return "SingleFITCLaplaceInferenceMethodCostFunction"; }
140 private:
141  void init()
142  {
143  m_obj=NULL;
144  m_derivatives = SGVector<float64_t>();
145  SG_ADD(&m_derivatives, "SingleFITCLaplaceInferenceMethodCostFunction__m_derivatives",
146  "derivatives in SingleFITCLaplaceInferenceMethodCostFunction", MS_NOT_AVAILABLE);
147  SG_ADD((CSGObject **)&m_obj, "SingleFITCLaplaceInferenceMethodCostFunction__m_obj",
148  "obj in SingleFITCLaplaceInferenceMethodCostFunction", MS_NOT_AVAILABLE);
149  }
150 
151  SGVector<float64_t> m_derivatives;
153 };
154 
155 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
156 
158 {
159  REQUIRE(obj, "Obj must set\n");
160  if(m_obj != obj)
161  {
162  SG_REF(obj);
163  SG_UNREF(m_obj);
164  m_obj=obj;
165  }
166 }
167 
169 {
170  if(is_unref)
171  {
172  SG_UNREF(m_obj);
173  }
174  m_obj=NULL;
175 
176 }
177 
178 void CSingleFITCLaplaceNewtonOptimizer::init()
179 {
180  m_obj=NULL;
181  m_iter=20;
182  m_tolerance=1e-6;
183  m_opt_tolerance=1e-6;
184  m_opt_max=10;
185 
186  SG_ADD((CSGObject **)&m_obj, "CSingleFITCLaplaceNewtonOptimizer__m_obj",
187  "obj in CSingleFITCLaplaceNewtonOptimizer", MS_NOT_AVAILABLE);
188  SG_ADD(&m_iter, "CSingleFITCLaplaceNewtonOptimizer__m_iter",
189  "iter in CSingleFITCLaplaceNewtonOptimizer", MS_NOT_AVAILABLE);
190  SG_ADD(&m_tolerance, "CSingleFITCLaplaceNewtonOptimizer__m_tolerance",
191  "tolerance in CSingleFITCLaplaceNewtonOptimizer", MS_NOT_AVAILABLE);
192  SG_ADD(&m_opt_tolerance, "CSingleFITCLaplaceNewtonOptimizer__m_opt_tolerance",
193  "opt_tolerance in CSingleFITCLaplaceNewtonOptimizer", MS_NOT_AVAILABLE);
194  SG_ADD(&m_opt_max, "CSingleFITCLaplaceNewtonOptimizer__m_opt_max",
195  "opt_max in CSingleFITCLaplaceNewtonOptimizer", MS_NOT_AVAILABLE);
196 }
197 
199 {
200  REQUIRE(m_obj,"Object not set\n");
201  //time complexity O(m^2*n)
202  Map<MatrixXd> eigen_kuu((m_obj->m_kuu).matrix, (m_obj->m_kuu).num_rows, (m_obj->m_kuu).num_cols);
203  Map<MatrixXd> eigen_V((m_obj->m_V).matrix, (m_obj->m_V).num_rows, (m_obj->m_V).num_cols);
204  Map<VectorXd> eigen_dg((m_obj->m_dg).vector, (m_obj->m_dg).vlen);
205  Map<MatrixXd> eigen_R0((m_obj->m_chol_R0).matrix, (m_obj->m_chol_R0).num_rows, (m_obj->m_chol_R0).num_cols);
206  Map<VectorXd> eigen_mu(m_obj->m_mu, (m_obj->m_mu).vlen);
207 
208  SGVector<float64_t> mean=m_obj->m_mean->get_mean_vector(m_obj->m_features);
209  Map<VectorXd> eigen_mean(mean.vector, mean.vlen);
210 
211  float64_t Psi_Old=CMath::INFTY;
212  float64_t Psi_New=m_obj->m_Psi;
213 
214  // compute W = -d2lp
215  m_obj->m_W=m_obj->m_model->get_log_probability_derivative_f(m_obj->m_labels, m_obj->m_mu, 2);
216  m_obj->m_W.scale(-1.0);
217 
218  //n-by-1 vector
219  Map<VectorXd> eigen_al((m_obj->m_al).vector, (m_obj->m_al).vlen);
220 
221  // get first derivative of log probability function
222  m_obj->m_dlp=m_obj->m_model->get_log_probability_derivative_f(m_obj->m_labels, m_obj->m_mu, 1);
223 
224  index_t iter=0;
225 
226  m_obj->m_Wneg=false;
227  while (Psi_Old-Psi_New>m_tolerance && iter<m_iter)
228  {
229  //time complexity O(m^2*n)
230  Map<VectorXd> eigen_W((m_obj->m_W).vector, (m_obj->m_W).vlen);
231  Map<VectorXd> eigen_dlp((m_obj->m_dlp).vector, (m_obj->m_dlp).vlen);
232 
233  Psi_Old = Psi_New;
234  iter++;
235 
236  if (eigen_W.minCoeff() < 0)
237  {
238  // Suggested by Vanhatalo et. al.,
239  // Gaussian Process Regression with Student's t likelihood, NIPS 2009
240  // Quoted from infFITC_Laplace.m
241  float64_t df;
242 
243  if (m_obj->m_model->get_model_type()==LT_STUDENTST)
244  {
246  df=lik->get_degrees_freedom();
247  SG_UNREF(lik);
248  }
249  else
250  df=1;
251  eigen_W+=(2.0/(df+1))*eigen_dlp.cwiseProduct(eigen_dlp);
252  }
253 
254  //b = W.*(f-m) + dlp;
255  VectorXd b=eigen_W.cwiseProduct(eigen_mu-eigen_mean)+eigen_dlp;
256 
257  //dd = 1./(1+W.*d0);
258  VectorXd dd=MatrixXd::Ones(b.rows(),1).cwiseQuotient(eigen_W.cwiseProduct(eigen_dg)+MatrixXd::Ones(b.rows(),1));
259 
260  VectorXd eigen_t=eigen_W.cwiseProduct(dd);
261  //m-by-m matrix
262  SGMatrix<float64_t> tmp( (m_obj->m_V).num_rows, (m_obj->m_V).num_rows);
263  Map<MatrixXd> eigen_tmp(tmp.matrix, tmp.num_rows, tmp.num_cols);
264  //eye(nu)+(V.*repmat((W.*dd)',nu,1))*V'
265  eigen_tmp=eigen_V*eigen_t.asDiagonal()*eigen_V.transpose()+MatrixXd::Identity(tmp.num_rows,tmp.num_rows);
266  tmp=m_obj->get_chol_inv(tmp);
267  //chol_inv(eye(nu)+(V.*repmat((W.*dd)',nu,1))*V')
268  Map<MatrixXd> eigen_tmp2(tmp.matrix, tmp.num_rows, tmp.num_cols);
269  //RV = chol_inv(eye(nu)+(V.*repmat((W.*dd)',nu,1))*V')*V;
270  // m-by-n matrix
271  MatrixXd eigen_RV=eigen_tmp2*eigen_V;
272  //dalpha = dd.*b - (W.*dd).*(RV'*(RV*(dd.*b))) - alpha; % Newt dir + line search
273  VectorXd dalpha=dd.cwiseProduct(b)-eigen_t.cwiseProduct(eigen_RV.transpose()*(eigen_RV*(dd.cwiseProduct(b))))-eigen_al;
274 
275  //perform Brent's optimization
276  CFITCPsiLine func;
277 
278  func.log_scale=m_obj->m_log_scale;
279  func.dalpha=dalpha;
280  func.start_alpha=eigen_al;
281  func.alpha=&(m_obj->m_al);
282  func.dlp=&(m_obj->m_dlp);
283  func.f=&(m_obj->m_mu);
284  func.m=&mean;
285  func.W=&(m_obj->m_W);
286  func.lik=m_obj->m_model;
287  func.lab=m_obj->m_labels;
288  func.inf=m_obj;
289 
290  float64_t x;
291  Psi_New=local_min(0, m_opt_max, m_opt_tolerance, func, x);
292  }
293 
294  if (Psi_Old-Psi_New>m_tolerance && iter>=m_iter)
295  {
296  SG_SWARNING("Max iterations (%d) reached, but convergence level (%f) is not yet below tolerance (%f)\n", m_iter, Psi_Old-Psi_New, m_tolerance);
297  }
298  return Psi_New;
299 }
300 
302 {
303  init();
304 }
305 
307  CMeanFunction* m, CLabels* lab, CLikelihoodModel* mod, CFeatures* lat)
308 : CSingleFITCInference(kern, feat, m, lab, mod, lat)
309 {
310  init();
311 }
312 
313 void CSingleFITCLaplaceInferenceMethod::init()
314 {
315  m_Psi=0;
316  m_Wneg=false;
317 
318  SG_ADD(&m_dlp, "dlp", "derivative of log likelihood with respect to function location", MS_NOT_AVAILABLE);
319  SG_ADD(&m_W, "W", "the noise matrix", MS_NOT_AVAILABLE);
320 
321  SG_ADD(&m_sW, "sW", "square root of W", MS_NOT_AVAILABLE);
322  SG_ADD(&m_d2lp, "d2lp", "second derivative of log likelihood with respect to function location", MS_NOT_AVAILABLE);
323  SG_ADD(&m_d3lp, "d3lp", "third derivative of log likelihood with respect to function location", MS_NOT_AVAILABLE);
324  SG_ADD(&m_chol_R0, "chol_R0", "Cholesky of inverse covariance of inducing features", MS_NOT_AVAILABLE);
325  SG_ADD(&m_dfhat, "dfhat", "derivative of negative log (approximated) marginal likelihood wrt f", MS_NOT_AVAILABLE);
326  SG_ADD(&m_g, "g", "variable g defined in infFITC_Laplace.m", MS_NOT_AVAILABLE);
327  SG_ADD(&m_dg, "dg", "variable d0 defined in infFITC_Laplace.m", MS_NOT_AVAILABLE);
328  SG_ADD(&m_Psi, "Psi", "the negative log likelihood without constant terms used in Newton's method", MS_NOT_AVAILABLE);
329  SG_ADD(&m_Wneg, "Wneg", "whether W contains negative elements", MS_NOT_AVAILABLE);
330 
332 }
333 
335 {
337 
338  if (!m_gradient_update)
339  {
341  update_deriv();
342  m_gradient_update=true;
344  }
345 }
346 
348 {
349  SG_DEBUG("entering\n");
350 
352  update_init();
353  update_alpha();
354  update_chol();
355  m_gradient_update=false;
357 
358  SG_DEBUG("leaving\n");
359 }
360 
362 {
364  update();
365 
366  return SGVector<float64_t>(m_sW);
367 }
368 
370 {
371 }
372 
374 {
375  //time complexity O(m*n)
377  Map<VectorXd> eigen_t(m_t.vector, m_t.vlen);
378  Map<VectorXd> eigen_x(x.vector, x.vlen);
379 
380  SGVector<float64_t> res(x.vlen);
381  Map<VectorXd> eigen_res(res.vector, res.vlen);
382 
383  //Zx = t.*x - RVdd'*(RVdd*x);
384  eigen_res=eigen_x.cwiseProduct(eigen_t)-eigen_Rvdd.transpose()*(eigen_Rvdd*eigen_x);
385  return res;
386 }
387 
389 {
390  //time complexity O(m*n)
392  Map<VectorXd> eigen_dg(m_dg.vector, m_dg.vlen);
393  Map<VectorXd> eigen_al(al.vector, al.vlen);
394 
395  SGVector<float64_t> res(al.vlen);
396  Map<VectorXd> eigen_res(res.vector, res.vlen);
397 
398  //Kal = V'*(V*al) + d0.*al;
399  eigen_res= eigen_V.transpose()*(eigen_V*eigen_al)+eigen_dg.cwiseProduct(eigen_al);
400  return res;
401 }
402 
404  CInference* inference)
405 {
406  REQUIRE(inference!=NULL, "Inference should be not NULL");
407 
409  SG_SERROR("Provided inference is not of type CSingleFITCLaplaceInferenceMethod!\n")
410 
411  SG_REF(inference);
412  return (CSingleFITCLaplaceInferenceMethod*)inference;
413 }
414 
416 {
417  //time complexity O(m^3), where mtx is a m-by-m matrix
418  REQUIRE(mtx.num_rows==mtx.num_cols, "Matrix must be square\n");
419 
420  Map<MatrixXd> eigen_mtx(mtx.matrix, mtx.num_rows, mtx.num_cols);
421  LLT<MatrixXd> chol(eigen_mtx.colwise().reverse().rowwise().reverse().matrix());
422  //tmp=chol(rot180(A))'
423  MatrixXd tmp=chol.matrixL();
424  SGMatrix<float64_t> res(mtx.num_rows, mtx.num_cols);
425  Map<MatrixXd> eigen_res(res.matrix, res.num_rows, res.num_cols);
426  //chol_inv = @(A) rot180(chol(rot180(A))')\eye(nu); % chol(inv(A))
427  eigen_res=tmp.colwise().reverse().rowwise().reverse().matrix().triangularView<Upper>(
428  ).solve(MatrixXd::Identity(mtx.num_rows, mtx.num_cols));
429  return res;
430 }
431 
433 {
435  update();
436 
437  if (m_Wneg)
438  {
439  SG_WARNING("nlZ cannot be computed since W is too negative");
440  //nlZ = NaN;
441  return CMath::INFTY;
442  }
443  //time complexity O(m^2*n)
444  Map<VectorXd> eigen_alpha(m_al.vector, m_al.vlen);
445  Map<VectorXd> eigen_mu(m_mu.vector, m_mu.vlen);
447  Map<VectorXd> eigen_mean(mean.vector, mean.vlen);
448  // get log likelihood
450  m_mu));
451 
453  Map<VectorXd>eigen_t(m_t.vector, m_t.vlen);
454  MatrixXd A=eigen_V*eigen_t.asDiagonal()*eigen_V.transpose()+MatrixXd::Identity(m_V.num_rows,m_V.num_rows);
455  LLT<MatrixXd> chol(A);
456  A=chol.matrixU();
457 
458  Map<VectorXd> eigen_dg(m_dg.vector, m_dg.vlen);
459  Map<VectorXd> eigen_W(m_W.vector, m_W.vlen);
460 
461  //nlZ = alpha'*(f-m)/2 - sum(lp) - sum(log(dd))/2 + sum(log(diag(chol(A))));
462  float64_t result=eigen_alpha.dot(eigen_mu-eigen_mean)/2.0-lp+
463  A.diagonal().array().log().sum()+(eigen_W.cwiseProduct(eigen_dg)+MatrixXd::Ones(eigen_dg.rows(),1)).array().log().sum()/2.0;
464 
465  return result;
466 }
467 
469 {
470 }
471 
473 {
474  //time complexity O(m^2*n)
475  //m-by-m matrix
477  //m-by-n matrix
479 
481 
483  Map<MatrixXd> eigen_cor_kuu(cor_kuu.matrix, cor_kuu.num_rows, cor_kuu.num_cols);
484  eigen_cor_kuu=eigen_kuu*CMath::exp(m_log_scale*2.0)+CMath::exp(m_log_ind_noise)*MatrixXd::Identity(
486  //R0 = chol_inv(Kuu+snu2*eye(nu)); m-by-m matrix
487  m_chol_R0=get_chol_inv(cor_kuu);
489 
490  //V = R0*Ku; m-by-n matrix
493 
494  eigen_V=eigen_R0*(eigen_ktru*CMath::exp(m_log_scale*2.0));
496  Map<VectorXd> eigen_dg(m_dg.vector, m_dg.vlen);
497  //d0 = diagK-sum(V.*V,1)';
498  eigen_dg=eigen_ktrtr_diag*CMath::exp(m_log_scale*2.0)-(eigen_V.cwiseProduct(eigen_V)).colwise().sum().adjoint();
499 
500  // get mean vector and create eigen representation of it
503 
504  // create shogun and eigen representation of function vector
506  Map<VectorXd> eigen_mu(m_mu, m_mu.vlen);
507 
508  float64_t Psi_New;
509  float64_t Psi_Def;
511  {
512  // set alpha a zero vector
514  m_al.zero();
515 
516  // f = mean, if length of alpha and length of y doesn't match
517  eigen_mu=eigen_mean;
518 
520  m_labels, m_mu));
521  }
522  else
523  {
524  Map<VectorXd> eigen_alpha(m_al.vector, m_al.vlen);
525 
526  // compute f = K * alpha + m
528  Map<VectorXd> eigen_tmp(tmp.vector, tmp.vlen);
529  eigen_mu=eigen_tmp+eigen_mean;
530 
531  Psi_New=eigen_alpha.dot(eigen_tmp)/2.0-
533 
535 
536  // if default is better, then use it
537  if (Psi_Def < Psi_New)
538  {
539  m_al.zero();
540  eigen_mu=eigen_mean;
541  Psi_New=Psi_Def;
542  }
543  }
544  m_Psi=Psi_New;
545 }
546 
548 {
549  REQUIRE(minimizer, "Minimizer must set\n");
550  if (!dynamic_cast<CSingleFITCLaplaceNewtonOptimizer*>(minimizer))
551  {
552  FirstOrderMinimizer* opt= dynamic_cast<FirstOrderMinimizer*>(minimizer);
553  REQUIRE(opt, "The provided minimizer is not supported\n")
554  }
556 }
557 
558 
560 {
562  bool cleanup=false;
563  if (opt)
564  {
565  opt->set_target(this);
566 #ifdef USE_REFERENCE_COUNTING
567  if(this->ref_count()>1)
568  cleanup=true;
569 #endif
570  opt->minimize();
571  opt->unset_target(cleanup);
572  }
573  else
574  {
575  FirstOrderMinimizer* minimizer= dynamic_cast<FirstOrderMinimizer*>(m_minimizer);
576  REQUIRE(minimizer, "The provided minimizer is not supported\n");
577 
579  cost_fun->set_target(this);
580 #ifdef USE_REFERENCE_COUNTING
581  if(this->ref_count()>1)
582  cleanup=true;
583 #endif
584  minimizer->set_cost_function(cost_fun);
585  minimizer->minimize();
586  minimizer->unset_cost_function(false);
587  cost_fun->unset_target(cleanup);
588  SG_UNREF(cost_fun);
589  }
590 
594  Map<VectorXd> eigen_mu(m_mu, m_mu.vlen);
595  Map<VectorXd> eigen_al(m_al.vector, m_al.vlen);
596 
597 
598  // compute f = K * alpha + m
600  Map<VectorXd> eigen_tmp(tmp.vector, tmp.vlen);
601  eigen_mu=eigen_tmp+eigen_mean;
602 
604  Map<VectorXd> eigen_post_alpha(m_alpha.vector, m_alpha.vlen);
605  //post.alpha = R0'*(V*alpha);
606  //m-by-1 vector
607  eigen_post_alpha=eigen_R0.transpose()*(eigen_V*eigen_al);
608 }
609 
611 {
612  //time complexity O(m^2*n)
613  Map<VectorXd> eigen_dg(m_dg.vector, m_dg.vlen);
616 
617  // get log probability derivatives
621 
622  // W = -d2lp
623  m_W=m_d2lp.clone();
624  m_W.scale(-1.0);
625 
626  Map<VectorXd> eigen_W(m_W.vector, m_W.vlen);
628  Map<VectorXd> eigen_sW(m_sW.vector, m_sW.vlen);
629 
630  VectorXd Wd0_1=eigen_W.cwiseProduct(eigen_dg)+MatrixXd::Ones(eigen_W.rows(),1);
631 
632  // compute sW
633  // post.sW = sqrt(abs(W)).*sign(W); % preserve sign in case of negative
634  if (eigen_W.minCoeff()>0)
635  {
636  eigen_sW=eigen_W.cwiseSqrt();
637  }
638  else
639  {
640  eigen_sW=((eigen_W.array().abs()+eigen_W.array())/2).sqrt()-((eigen_W.array().abs()-eigen_W.array())/2).sqrt();
641  //any(1+d0.*W<0)
642  if (!(Wd0_1.array().abs().matrix()==Wd0_1))
643  m_Wneg=true;
644  }
645 
647  Map<VectorXd>eigen_t(m_t.vector, m_t.vlen);
648 
649  //dd = 1./(1+d0.*W);
650  VectorXd dd=MatrixXd::Ones(Wd0_1.rows(),1).cwiseQuotient(Wd0_1);
651  eigen_t=eigen_W.cwiseProduct(dd);
652 
653  //m-by-m matrix
655  Map<MatrixXd> eigen_A(A.matrix, A.num_rows, A.num_cols);
656  //A = eye(nu)+(V.*repmat((W.*dd)',nu,1))*V';
657  eigen_A=eigen_V*eigen_t.asDiagonal()*eigen_V.transpose()+MatrixXd::Identity(A.num_rows,A.num_rows);
658 
659  //R0tV = R0'*V; m-by-n
660  MatrixXd R0tV=eigen_R0.transpose()*eigen_V;
661 
662  //B = R0tV.*repmat((W.*dd)',nu,1); m-by-n matrix
663  MatrixXd B=R0tV*eigen_t.asDiagonal();
664 
665  //m-by-m matrix
668 
669  //post.L = -B*R0tV';
670  eigen_L=-B*R0tV.transpose();
671 
673  Map<MatrixXd> eigen_tmp(tmp.matrix, tmp.num_rows, tmp.num_cols);
674  //RV = chol_inv(A)*V; m-by-n matrix
675  MatrixXd eigen_RV=eigen_tmp*eigen_V;
676  //RVdd m-by-n matrix
679  //RVdd = RV.*repmat((W.*dd)',nu,1);
680  eigen_Rvdd=eigen_RV*eigen_t.asDiagonal();
681 
682  if (!m_Wneg)
683  {
684  //B = B*RV';
685  B=B*eigen_RV.transpose();
686  //post.L = post.L + B*B';
687  eigen_L+=B*B.transpose();
688  }
689  else
690  {
691  //B = B*V';
692  B=B*eigen_V.transpose();
693  //post.L = post.L + (B*inv(A))*B';
694  FullPivLU<MatrixXd> lu(eigen_A);
695  eigen_L+=B*lu.inverse()*B.transpose();
696  }
697 
700  Map<VectorXd> eigen_g(m_g.vector, m_g.vlen);
701  //g = d/2 + sum(((R*R0)*P).^2,1)'/2
702  eigen_g=((eigen_dg.cwiseProduct(dd)).array()+
703  ((eigen_tmp*eigen_R0)*(eigen_ktru*CMath::exp(m_log_scale*2.0))*dd.asDiagonal()
704  ).array().pow(2).colwise().sum().transpose())/2;
705 }
706 
708 {
709  //time complexity O(m^2*n)
712  Map<VectorXd> eigen_al(m_al.vector, m_al.vlen);
714  // create shogun and eigen representation of B
715  // m-by-n matrix
718 
719  //B = (R0'*R0)*Ku
720  eigen_B=eigen_R0.transpose()*eigen_V;
721 
722  // create shogun and eigen representation of w
724  //w = B*al;
725  Map<VectorXd> eigen_w(m_w.vector, m_w.vlen);
726  eigen_w=eigen_B*eigen_al;
727 
728  // create shogun and eigen representation of the vector dfhat
729  Map<VectorXd> eigen_d3lp(m_d3lp.vector, m_d3lp.vlen);
730  Map<VectorXd> eigen_g(m_g.vector, m_g.vlen);
732  Map<VectorXd> eigen_dfhat(m_dfhat.vector, m_dfhat.vlen);
733 
734  // compute derivative of nlZ wrt fhat
735  // dfhat = g.*d3lp;
736  eigen_dfhat=eigen_g.cwiseProduct(eigen_d3lp);
737 }
738 
741 {
742  //time complexity O(m^2*n)
744  Map<VectorXd> eigen_ddiagKi(ddiagKi.vector, ddiagKi.vlen);
745  //m-by-m matrix
746  Map<MatrixXd> eigen_dKuui(dKuui.matrix, dKuui.num_rows, dKuui.num_cols);
747  //m-by-n matrix
748  Map<MatrixXd> eigen_dKui(dKui.matrix, dKui.num_rows, dKui.num_cols);
749 
750  // compute R=2*dKui-dKuui*B
751  SGMatrix<float64_t> dA(dKui.num_rows, dKui.num_cols);
752  Map<MatrixXd> eigen_dA(dA.matrix, dA.num_rows, dA.num_cols);
753  //dA = 2*dKu'-R0tV'*dKuu;
754  //dA' = 2*dKu-dKuu'*R0tV;
755  eigen_dA=2*eigen_dKui-eigen_dKuui*eigen_R0tV;
756 
757  SGVector<float64_t> v(ddiagKi.vlen);
758  Map<VectorXd> eigen_v(v.vector, v.vlen);
759  //w = sum(dA.*R0tV',2);
760  //w' = sum(dA'.*R0tV,1);
761  //v = ddiagK-w;
762  eigen_v=eigen_ddiagKi-eigen_dA.cwiseProduct(eigen_R0tV).colwise().sum().transpose();
763 
764  //explicit term
765  float64_t result=CSingleFITCInference::get_derivative_related_cov(ddiagKi, dKuui, dKui, v, dA);
766 
767  //implicit term
768  Map<VectorXd> eigen_dlp(m_dlp.vector, m_dlp.vlen);
769  Map<VectorXd> eigen_dfhat(m_dfhat.vector, m_dfhat.vlen);
770 
772  Map<VectorXd> eigen_b(b.vector, b.vlen);
773  //b = dA*(R0tV*dlp) + v.*dlp;
774  eigen_b=eigen_dA.transpose()*(eigen_R0tV*eigen_dlp)+eigen_v.cwiseProduct(eigen_dlp);
775  //KZb = mvmK(mvmZ(b,RVdd,t),V,d0);
777  Map<VectorXd> eigen_KZb(KZb.vector, KZb.vlen);
778  //dnlZ.cov(i) = dnlZ.cov(i) - dfhat'*( b-KZb );
779  result-=eigen_dfhat.dot(eigen_b-eigen_KZb);
780  return result;
781 }
782 
784  const TParameter* param)
785 {
786  REQUIRE(param, "Param not set\n");
787  //time complexity O(m^2*n)
788  REQUIRE(!(strcmp(param->m_name, "log_scale")
789  && strcmp(param->m_name, "log_inducing_noise")
790  && strcmp(param->m_name, "inducing_features")),
791  "Can't compute derivative of"
792  " the nagative log marginal likelihood wrt %s.%s parameter\n",
793  get_name(), param->m_name)
794 
795  SGVector<float64_t> result;
796  int32_t len;
797  if (!strcmp(param->m_name, "inducing_features"))
798  {
799  if(m_Wneg)
800  {
801  int32_t dim=m_inducing_features.num_rows;
802  int32_t num_samples=m_inducing_features.num_cols;
803  len=dim*num_samples;
804  }
805  else if (!m_fully_sparse)
807  else
809  }
810  else
811  len=1;
812 
813  if (m_Wneg)
814  {
815  result=SGVector<float64_t>(len);
816  return derivative_helper_when_Wneg(result, param);
817  }
818 
819  if (!strcmp(param->m_name, "log_inducing_noise"))
820  // wrt inducing_noise
821  // compute derivative wrt inducing noise
822  return get_derivative_wrt_inducing_noise(param);
823 
824  result=SGVector<float64_t>(len);
825  // wrt scale
826  // clone kernel matrices
828  SGMatrix<float64_t> deriv_uu=m_kuu.clone();
829  SGMatrix<float64_t> deriv_tru=m_ktru.clone();
830 
831  // create eigen representation of kernel matrices
832  Map<VectorXd> ddiagKi(deriv_trtr.vector, deriv_trtr.vlen);
833  Map<MatrixXd> dKuui(deriv_uu.matrix, deriv_uu.num_rows, deriv_uu.num_cols);
834  Map<MatrixXd> dKui(deriv_tru.matrix, deriv_tru.num_rows, deriv_tru.num_cols);
835 
836  // compute derivatives wrt scale for each kernel matrix
837  result[0]=get_derivative_related_cov(deriv_trtr, deriv_uu, deriv_tru);
838  result[0]*=CMath::exp(m_log_scale*2.0)*2.0;
839  return result;
840 }
841 
843  const TParameter* param)
844 {
845  SGVector<float64_t> result(1);
846  if (m_Wneg)
847  return derivative_helper_when_Wneg(result, param);
848 
849  // get derivatives wrt likelihood model parameters
851  m_mu, param);
853  m_mu, param);
855  m_mu, param);
856 
857  // create eigen representation of the derivatives
858  Map<VectorXd> eigen_lp_dhyp(lp_dhyp.vector, lp_dhyp.vlen);
859  Map<VectorXd> eigen_dlp_dhyp(dlp_dhyp.vector, dlp_dhyp.vlen);
860  Map<VectorXd> eigen_d2lp_dhyp(d2lp_dhyp.vector, d2lp_dhyp.vlen);
861  Map<VectorXd> eigen_g(m_g.vector, m_g.vlen);
862  Map<VectorXd> eigen_dfhat(m_dfhat.vector, m_dfhat.vlen);
863 
864  //explicit term
865  //dnlZ.lik(i) = -g'*d2lp_dhyp - sum(lp_dhyp);
866  result[0]=-eigen_g.dot(eigen_d2lp_dhyp)-eigen_lp_dhyp.sum();
867 
868  //implicit term
869  //b = mvmK(dlp_dhyp,V,d0);
870  SGVector<float64_t> b=compute_mvmK(dlp_dhyp);
871  //dnlZ.lik(i) = dnlZ.lik(i) - dfhat'*(b-mvmK(mvmZ(b,RVdd,t),V,d0));
873 
874  return result;
875 }
876 
878  const TParameter* param)
879 {
880  REQUIRE(param, "Param not set\n");
881  SGVector<float64_t> result;
882  int64_t len=const_cast<TParameter *>(param)->m_datatype.get_num_elements();
883  result=SGVector<float64_t>(len);
884 
885  if (m_Wneg)
886  return derivative_helper_when_Wneg(result, param);
887 
888  m_lock->lock();
889  CFeatures *inducing_features=get_inducing_features();
890  for (index_t i=0; i<result.vlen; i++)
891  {
892  //time complexity O(m^2*n)
893  SGVector<float64_t> deriv_trtr;
894  SGMatrix<float64_t> deriv_uu;
895  SGMatrix<float64_t> deriv_tru;
896 
898  deriv_trtr=m_kernel->get_parameter_gradient_diagonal(param, i);
899 
900  m_kernel->init(inducing_features, inducing_features);
901  deriv_uu=m_kernel->get_parameter_gradient(param, i);
902 
903  m_kernel->init(inducing_features, m_features);
904  deriv_tru=m_kernel->get_parameter_gradient(param, i);
905 
906  // create eigen representation of derivatives
907  Map<VectorXd> ddiagKi(deriv_trtr.vector, deriv_trtr.vlen);
908  Map<MatrixXd> dKuui(deriv_uu.matrix, deriv_uu.num_rows,
909  deriv_uu.num_cols);
910  Map<MatrixXd> dKui(deriv_tru.matrix, deriv_tru.num_rows,
911  deriv_tru.num_cols);
912 
913  result[i]=get_derivative_related_cov(deriv_trtr, deriv_uu, deriv_tru);
914  result[i]*=CMath::exp(m_log_scale*2.0);
915  }
916  SG_UNREF(inducing_features);
917  m_lock->unlock();
918 
919  return result;
920 }
921 
923 {
924  //time complexity O(m*n)
925  //explicit term
927 
928  //implicit term
929  //Zdm = mvmZ(dm,RVdd,t);
930  //tmp = mvmK(Zdm,V,d0)
931  //dnlZ.mean(i) = dnlZ.mean(i) - dfhat'*(dm-mvmK(Zdm,V,d0));
933 
934  return result;
935 }
936 
938 {
939  //time complexity O(m*n)
940  Map<VectorXd> eigen_d(d.vector, d.vlen);
942  Map<VectorXd> eigen_tmp(tmp.vector, tmp.vlen);
943  Map<VectorXd> eigen_dfhat(m_dfhat.vector, m_dfhat.vlen);
944  return eigen_dfhat.dot(eigen_d-eigen_tmp);
945 }
946 
948  const TParameter* param)
949 {
950  //time complexity O(m*n)
951  REQUIRE(param, "Param not set\n");
952  SGVector<float64_t> result;
953  int64_t len=const_cast<TParameter *>(param)->m_datatype.get_num_elements();
954  result=SGVector<float64_t>(len);
955 
956  if (m_Wneg)
957  return derivative_helper_when_Wneg(result, param);
958 
959  for (index_t i=0; i<result.vlen; i++)
960  {
963  result[i]=get_derivative_related_mean(dmu);
964  }
965 
966  return result;
967 }
968 
970  SGVector<float64_t> res, const TParameter *param)
971 {
972  REQUIRE(param, "Param not set\n");
973  SG_WARNING("Derivative wrt %s cannot be computed since W (the Hessian (diagonal) matrix) is too negative\n", param->m_name);
974  //dnlZ = struct('cov',0*hyp.cov, 'mean',0*hyp.mean, 'lik',0*hyp.lik);
975  res.zero();
976  return res;
977 }
978 
980  const TParameter* param)
981 {
982  //time complexity depends on the implementation of the provided kernel
983  //time complexity is at least O(max((p*n*m),(m^2*n))), where p is the dimension (#) of features
984  //For an ARD kernel with KL_FULL, the time complexity is O(max((p*n*m*d),(m^2*n)))
985  //where the paramter \f$\Lambda\f$ of the ARD kerenl is a \f$d\f$-by-\f$p\f$ matrix,
986  //For an ARD kernel with KL_SCALE and KL_DIAG, the time complexity is O(max((p*n*m),(m^2*n)))
987  //efficiently compute the implicit term and explicit term at one shot
988  Map<VectorXd> eigen_al(m_al.vector, m_al.vlen);
990  //w=B*al
991  Map<VectorXd> eigen_w(m_w.vector, m_w.vlen);
993  Map<VectorXd> eigen_dlp(m_dlp.vector, m_dlp.vlen);
994  Map<VectorXd> eigen_dfhat(m_dfhat.vector, m_dfhat.vlen);
995 
996  //q = dfhat - mvmZ(mvmK(dfhat,V,d0),RVdd,t);
998  Map<VectorXd> eigen_q(q.vector, q.vlen);
999  eigen_q=eigen_dfhat-eigen_q;
1000 
1001  //explicit term
1002  //diag_dK = alpha.*alpha + sum(RVdd.*RVdd,1)'-t, where t can be cancelled out
1003  //-v_1=get_derivative_related_cov_diagonal= -(alpha.*alpha + sum(RVdd.*RVdd,1)')
1004  //implicit term
1005  //-v_2=-2*dlp.*q
1006  //neg_v = -(diag_dK+ 2*dlp.*q);
1008  Map<VectorXd> eigen_neg_v(neg_v.vector, neg_v.vlen);
1009  eigen_neg_v-=2*eigen_dlp.cwiseProduct(eigen_q);
1010 
1012  Map<MatrixXd> eigen_BdK(BdK.matrix, BdK.num_rows, BdK.num_cols);
1013  //explicit
1014  //BdK = (B*alpha)*alpha' + (B*RVdd')*RVdd - B.*repmat(v_1',nu,1),
1015  //implicit
1016  //BdK = BdK + (B*dlp)*q' + (B*q)*dlp' - B.*repmat(v_2',nu,1)
1017  //where v_1 is the explicit part of v and v_2 is the implicit part of v
1018  //v=v_1+v_2
1019  eigen_BdK=eigen_B*eigen_neg_v.asDiagonal()+eigen_w*(eigen_al.transpose())+
1020  (eigen_B*eigen_Rvdd.transpose())*eigen_Rvdd+
1021  (eigen_B*eigen_dlp)*eigen_q.transpose()+(eigen_B*eigen_q)*eigen_dlp.transpose();
1022 
1023  return get_derivative_related_inducing_features(BdK, param);
1024 }
1025 
1027  const TParameter* param)
1028 {
1029  //time complexity O(m^2*n)
1030  //explicit term
1032 
1033  //implicit term
1035  Map<VectorXd> eigen_dlp(m_dlp.vector, m_dlp.vlen);
1036 
1037  //snu = sqrt(snu2);
1038  //T = chol_inv(Kuu + snu2*eye(nu)); T = T'*(T*(snu*Ku));
1039  //t1 = sum(T.*T,1)';
1040  VectorXd eigen_t1=eigen_B.cwiseProduct(eigen_B).colwise().sum().adjoint();
1041 
1042  //b = (t1.*dlp-T'*(T*dlp))*2;
1043  SGVector<float64_t> b(eigen_t1.rows());
1044  Map<VectorXd> eigen_b(b.vector, b.vlen);
1045  float64_t factor=2.0*CMath::exp(m_log_ind_noise);
1046  eigen_b=(eigen_t1.cwiseProduct(eigen_dlp)-eigen_B.transpose()*(eigen_B*eigen_dlp))*factor;
1047 
1048  //KZb = mvmK(mvmZ(b,RVdd,t),V,d0);
1049  //z = z - dfhat'*( b-KZb );
1051 
1052  return result;
1053 }
1054 
1056 {
1057  compute_gradient();
1058 
1060  Map<VectorXd> eigen_res(res.vector, res.vlen);
1061 
1062  /*
1063  //true posterior mean with equivalent FITC prior approximated by Newton method
1064  //time complexity O(n)
1065  Map<VectorXd> eigen_mu(m_mu, m_mu.vlen);
1066  SGVector<float64_t> mean=m_mean->get_mean_vector(m_features);
1067  Map<VectorXd> eigen_mean(mean.vector, mean.vlen);
1068  eigen_res=eigen_mu-eigen_mean;
1069  */
1070 
1071  //FITC (further) approximated posterior mean with Netwon method
1072  //time complexity of the following operation is O(m*n)
1073  Map<VectorXd> eigen_post_alpha(m_alpha.vector, m_alpha.vlen);
1075  eigen_res=CMath::exp(m_log_scale*2.0)*eigen_Ktru.adjoint()*eigen_post_alpha;
1076 
1077  return res;
1078 }
1079 
1081 {
1082  compute_gradient();
1083  //time complexity of the following operations is O(m*n^2)
1084  //Warning: the the time complexity increases from O(m^2*n) to O(n^2*m) if this method is called
1087  m_Sigma.num_cols);
1088 
1089  //FITC (further) approximated posterior covariance with Netwon method
1092  Map<VectorXd> eigen_dg(m_dg.vector, m_dg.vlen);
1094 
1095  MatrixXd diagonal_part=eigen_dg.asDiagonal();
1096  //FITC equivalent prior
1097  MatrixXd prior=eigen_V.transpose()*eigen_V+diagonal_part;
1098 
1099  MatrixXd tmp=CMath::exp(m_log_scale*2.0)*eigen_Ktru;
1100  eigen_Sigma=prior-tmp.adjoint()*eigen_L*tmp;
1101 
1102  /*
1103  //true posterior mean with equivalent FITC prior approximated by Newton method
1104  Map<VectorXd> eigen_t(m_t.vector, m_t.vlen);
1105  Map<MatrixXd> eigen_Rvdd(m_Rvdd.matrix, m_Rvdd.num_rows, m_Rvdd.num_cols);
1106  Map<VectorXd> eigen_W(m_W.vector, m_W.vlen);
1107  MatrixXd tmp1=eigen_Rvdd*prior;
1108  eigen_Sigma=prior+tmp.transpose()*tmp;
1109 
1110  MatrixXd tmp2=((eigen_dg.cwiseProduct(eigen_t)).asDiagonal()*eigen_V.transpose())*eigen_V;
1111  eigen_Sigma-=(tmp2+tmp2.transpose());
1112  eigen_Sigma-=eigen_V.transpose()*(eigen_V*eigen_t.asDiagonal()*eigen_V.transpose())*eigen_V;
1113  MatrixXd tmp3=((eigen_dg.cwiseProduct(eigen_dg)).cwiseProduct(eigen_t)).asDiagonal();
1114  eigen_Sigma-=tmp3;
1115  */
1116 
1117  return SGMatrix<float64_t>(m_Sigma);
1118 }
1119 
1121 {
1122  //time complexity O(m*n)
1123  Map<VectorXd> eigen_alpha(m_al, m_al.vlen);
1125  Map<VectorXd> eigen_f(f.vector, f.vlen);
1126  Map<VectorXd> eigen_mean_f(m_mean_f.vector,m_mean_f.vlen);
1127  /* f = K * alpha + mean_f given alpha*/
1129  Map<VectorXd> eigen_tmp(tmp.vector, tmp.vlen);
1130  eigen_f=eigen_tmp+eigen_mean_f;
1131 
1132  /* psi = 0.5 * alpha .* (f - m) - sum(dlp)*/
1133  float64_t psi=eigen_alpha.dot(eigen_tmp) * 0.5;
1135 
1136  return psi;
1137 }
1138 
1140 {
1141  //time complexity O(m*n)
1142  Map<VectorXd> eigen_alpha(m_al, m_al.vlen);
1143  Map<VectorXd> eigen_gradient(gradient.vector, gradient.vlen);
1145  Map<VectorXd> eigen_f(f.vector, f.vlen);
1146  Map<MatrixXd> kernel(m_ktrtr.matrix,
1147  m_ktrtr.num_rows,
1148  m_ktrtr.num_cols);
1149  Map<VectorXd> eigen_mean_f(m_mean_f.vector, m_mean_f.vlen);
1150 
1151  /* f = K * alpha + mean_f given alpha*/
1153  Map<VectorXd> eigen_tmp(tmp.vector, tmp.vlen);
1154  eigen_f=eigen_tmp+eigen_mean_f;
1155 
1156  SGVector<float64_t> dlp_f =
1158 
1159  Map<VectorXd> eigen_dlp_f(dlp_f.vector, dlp_f.vlen);
1160 
1161  /* g_alpha = K * (alpha - dlp_f)*/
1163  Map<VectorXd> eigen_tmp2(tmp2.vector, tmp2.vlen);
1164  eigen_tmp2=eigen_alpha-eigen_dlp_f;
1165  tmp2=compute_mvmK(tmp2);
1166  Map<VectorXd> eigen_tmp3(tmp2.vector, tmp2.vlen);
1167  eigen_gradient=eigen_tmp3;
1168 }
1169 
1170 }
virtual bool init(CFeatures *lhs, CFeatures *rhs)
Definition: Kernel.cpp:81
float64_t m_log_scale
Definition: Inference.h:485
virtual SGVector< float64_t > get_log_probability_f(const CLabels *lab, SGVector< float64_t > func) const =0
virtual void update()
Definition: Inference.cpp:243
void set_target(CSingleFITCLaplaceInferenceMethod *obj)
virtual void update_parameter_hash()
Definition: SGObject.cpp:281
virtual SGVector< float64_t > get_derivative_wrt_inference_method(const TParameter *param)
virtual SGVector< float64_t > get_derivative_wrt_inducing_noise(const TParameter *param)
void get_gradient_wrt_alpha(SGVector< float64_t > gradient)
int32_t index_t
Definition: common.h:72
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
static const float64_t INFTY
infinity
Definition: Math.h:2069
virtual int32_t get_num_labels() const =0
CKernel * m_kernel
Definition: Inference.h:464
static T sum(T *vec, int32_t len)
Return sum(vec)
Definition: SGVector.h:392
#define SG_SWARNING(...)
Definition: SGIO.h:177
SGVector< float64_t > m_mu
The build-in minimizer for SingleFITCLaplaceInference.
Definition: SGMatrix.h:24
parameter struct
virtual SGVector< float64_t > get_derivative_wrt_inference_method(const TParameter *param)
#define REQUIRE(x,...)
Definition: SGIO.h:205
virtual SGVector< float64_t > get_derivative_wrt_mean(const TParameter *param)
virtual SGVector< float64_t > get_mean_vector(const CFeatures *features) const =0
virtual SGVector< float64_t > get_derivative_related_cov_diagonal()
virtual float64_t get_derivative_related_cov(SGVector< float64_t > ddiagKi, SGMatrix< float64_t > dKuui, SGMatrix< float64_t > dKui)
static CSingleFITCLaplaceInferenceMethod * obtain_from_generic(CInference *inference)
An abstract class of the mean function.
Definition: MeanFunction.h:49
std::enable_if<!std::is_same< T, complex128_t >::value, float64_t >::type mean(const Container< T > &a)
#define SG_REF(x)
Definition: SGObject.h:52
CFeatures * m_features
Definition: Inference.h:473
SGMatrix< float64_t > m_ktrtr
Definition: Inference.h:488
float64_t get_degrees_freedom() const
CMeanFunction * m_mean
Definition: Inference.h:467
virtual SGVector< float64_t > get_second_derivative(const CLabels *lab, SGVector< float64_t > func, const TParameter *param) const
void scale(T alpha)
Scale vector inplace.
Definition: SGVector.cpp:884
SGMatrix< T > clone() const
Definition: SGMatrix.cpp:330
virtual SGVector< float64_t > get_derivative_wrt_inducing_noise(const TParameter *param)
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:125
virtual float64_t get_derivative_related_mean(SGVector< float64_t > dmu)
CLabels * m_labels
Definition: Inference.h:476
virtual void unset_cost_function(bool is_unref=true)
double float64_t
Definition: common.h:60
virtual void compute_gradient()
Definition: Inference.cpp:270
SGMatrix< float64_t > m_Sigma
virtual CFeatures * get_inducing_features()
index_t num_rows
Definition: SGMatrix.h:463
SGMatrix< float64_t > m_kuu
SGMatrix< float64_t > m_L
Definition: Inference.h:482
The first order cost function base class.
index_t num_cols
Definition: SGMatrix.h:465
virtual SGVector< float64_t > get_derivative_related_inducing_features(SGMatrix< float64_t > BdK, const TParameter *param)
virtual float64_t minimize()=0
SG_FORCED_INLINE void lock()
Definition: Lock.h:23
SGVector< float64_t > m_ktrtr_diag
virtual SGVector< float64_t > derivative_helper_when_Wneg(SGVector< float64_t > res, const TParameter *param)
Class that models a Student&#39;s-t likelihood.
virtual SGVector< float64_t > get_third_derivative(const CLabels *lab, SGVector< float64_t > func, const TParameter *param) const
virtual void register_minimizer(Minimizer *minimizer)
Definition: Inference.cpp:116
virtual SGVector< float64_t > compute_mvmZ(SGVector< float64_t > x)
virtual SGVector< float64_t > get_parameter_derivative(const CFeatures *features, const TParameter *param, index_t index=-1)
Definition: MeanFunction.h:73
#define SG_UNREF(x)
Definition: SGObject.h:53
#define SG_DEBUG(...)
Definition: SGIO.h:106
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
The Inference Method base class.
Definition: Inference.h:81
Minimizer * m_minimizer
Definition: Inference.h:461
virtual float64_t get_derivative_implicit_term_helper(SGVector< float64_t > d)
SGMatrix< float64_t > m_inducing_features
The class Features is the base class of all feature objects.
Definition: Features.h:68
#define SG_SERROR(...)
Definition: SGIO.h:178
static float64_t exp(float64_t x)
Definition: Math.h:616
virtual SGMatrix< float64_t > get_parameter_gradient(const TParameter *param, index_t index=-1)
virtual float64_t get_derivative_related_cov(SGVector< float64_t > ddiagKi, SGMatrix< float64_t > dKuui, SGMatrix< float64_t > dKui)
virtual SGVector< float64_t > compute_mvmK(SGVector< float64_t > al)
virtual SGVector< float64_t > get_log_probability_derivative_f(const CLabels *lab, SGVector< float64_t > func, index_t i) const =0
The Kernel base class.
SGMatrix< float64_t > m_ktru
SG_FORCED_INLINE void unlock()
Definition: Lock.h:34
The minimizer base class.
Definition: Minimizer.h:43
#define SG_WARNING(...)
Definition: SGIO.h:127
#define SG_ADD(...)
Definition: SGObject.h:94
static CStudentsTLikelihood * obtain_from_generic(CLikelihoodModel *likelihood)
virtual SGMatrix< float64_t > get_chol_inv(SGMatrix< float64_t > mtx)
The FITC approximation inference method class for regression and binary Classification. Note that the number of inducing points (m) is usually far less than the number of input points (n). (the time complexity is computed based on the assumption m < n)
virtual SGVector< float64_t > get_first_derivative(const CLabels *lab, SGVector< float64_t > func, const TParameter *param) const
The Fully Independent Conditional Training inference base class for Laplace and regression for 1-D la...
virtual SGVector< float64_t > get_derivative_wrt_inducing_features(const TParameter *param)
virtual EInferenceType get_inference_type() const
Definition: Inference.h:104
CLikelihoodModel * m_model
Definition: Inference.h:470
virtual SGVector< float64_t > get_derivative_wrt_kernel(const TParameter *param)
virtual bool parameter_hash_changed()
Definition: SGObject.cpp:295
virtual SGVector< float64_t > get_parameter_gradient_diagonal(const TParameter *param, index_t index=-1)
The Likelihood model base class.
virtual float64_t get_derivative_related_mean(SGVector< float64_t > dmu)
virtual SGVector< float64_t > get_derivative_wrt_likelihood_model(const TParameter *param)
SGVector< T > clone() const
Definition: SGVector.cpp:247
virtual void set_cost_function(FirstOrderCostFunction *fun)
index_t vlen
Definition: SGVector.h:545
SGVector< float64_t > m_alpha
Definition: Inference.h:479
The first order minimizer base class.

SHOGUN Machine Learning Toolbox - Documentation