36 #include <shogun/lib/external/brent.h> 42 using namespace Eigen;
47 #ifndef DOXYGEN_SHOULD_SKIP_THIS 50 class CFITCPsiLine :
public func_base
65 virtual double operator() (
double x)
73 eigen_alpha=start_alpha+x*dalpha;
77 eigen_f=eigen_tmp+eigen_m;
86 float64_t result = eigen_alpha.dot(eigen_f-eigen_m)/2.0-
97 virtual ~SingleFITCLaplaceInferenceMethodCostFunction() { clean(); }
100 REQUIRE(obj,
"Obj must set\n");
116 REQUIRE(m_obj,
"Object not set\n");
117 return m_obj->get_psi_wrt_alpha();
119 void unset_target(
bool is_unref)
129 REQUIRE(m_obj,
"Object not set\n");
135 REQUIRE(m_obj,
"Object not set\n");
136 m_obj->get_gradient_wrt_alpha(m_derivatives);
137 return m_derivatives;
139 virtual const char* get_name()
const {
return "SingleFITCLaplaceInferenceMethodCostFunction"; }
145 SG_ADD(&m_derivatives,
"SingleFITCLaplaceInferenceMethodCostFunction__m_derivatives",
146 "derivatives in SingleFITCLaplaceInferenceMethodCostFunction",
MS_NOT_AVAILABLE);
147 SG_ADD((
CSGObject **)&m_obj,
"SingleFITCLaplaceInferenceMethodCostFunction__m_obj",
159 REQUIRE(obj,
"Obj must set\n");
178 void CSingleFITCLaplaceNewtonOptimizer::init()
183 m_opt_tolerance=1e-6;
186 SG_ADD((
CSGObject **)&m_obj,
"CSingleFITCLaplaceNewtonOptimizer__m_obj",
188 SG_ADD(&m_iter,
"CSingleFITCLaplaceNewtonOptimizer__m_iter",
190 SG_ADD(&m_tolerance,
"CSingleFITCLaplaceNewtonOptimizer__m_tolerance",
192 SG_ADD(&m_opt_tolerance,
"CSingleFITCLaplaceNewtonOptimizer__m_opt_tolerance",
194 SG_ADD(&m_opt_max,
"CSingleFITCLaplaceNewtonOptimizer__m_opt_max",
200 REQUIRE(m_obj,
"Object not set\n");
202 Map<MatrixXd> eigen_kuu((m_obj->m_kuu).matrix, (m_obj->m_kuu).num_rows, (m_obj->m_kuu).num_cols);
203 Map<MatrixXd> eigen_V((m_obj->m_V).matrix, (m_obj->m_V).num_rows, (m_obj->m_V).num_cols);
204 Map<VectorXd> eigen_dg((m_obj->m_dg).vector, (m_obj->m_dg).vlen);
205 Map<MatrixXd> eigen_R0((m_obj->m_chol_R0).matrix, (m_obj->m_chol_R0).num_rows, (m_obj->m_chol_R0).num_cols);
215 m_obj->m_W=m_obj->m_model->get_log_probability_derivative_f(m_obj->m_labels, m_obj->m_mu, 2);
216 m_obj->m_W.scale(-1.0);
219 Map<VectorXd> eigen_al((m_obj->m_al).vector, (m_obj->m_al).vlen);
222 m_obj->m_dlp=m_obj->m_model->get_log_probability_derivative_f(m_obj->m_labels, m_obj->m_mu, 1);
227 while (Psi_Old-Psi_New>m_tolerance && iter<m_iter)
230 Map<VectorXd> eigen_W((m_obj->m_W).vector, (m_obj->m_W).vlen);
231 Map<VectorXd> eigen_dlp((m_obj->m_dlp).vector, (m_obj->m_dlp).vlen);
236 if (eigen_W.minCoeff() < 0)
251 eigen_W+=(2.0/(df+1))*eigen_dlp.cwiseProduct(eigen_dlp);
255 VectorXd b=eigen_W.cwiseProduct(eigen_mu-eigen_mean)+eigen_dlp;
258 VectorXd dd=MatrixXd::Ones(b.rows(),1).cwiseQuotient(eigen_W.cwiseProduct(eigen_dg)+MatrixXd::Ones(b.rows(),1));
260 VectorXd eigen_t=eigen_W.cwiseProduct(dd);
265 eigen_tmp=eigen_V*eigen_t.asDiagonal()*eigen_V.transpose()+MatrixXd::Identity(tmp.
num_rows,tmp.
num_rows);
266 tmp=m_obj->get_chol_inv(tmp);
271 MatrixXd eigen_RV=eigen_tmp2*eigen_V;
273 VectorXd dalpha=dd.cwiseProduct(b)-eigen_t.cwiseProduct(eigen_RV.transpose()*(eigen_RV*(dd.cwiseProduct(b))))-eigen_al;
278 func.log_scale=m_obj->m_log_scale;
280 func.start_alpha=eigen_al;
281 func.alpha=&(m_obj->m_al);
282 func.dlp=&(m_obj->m_dlp);
283 func.f=&(m_obj->m_mu);
285 func.W=&(m_obj->m_W);
286 func.lik=m_obj->m_model;
287 func.lab=m_obj->m_labels;
291 Psi_New=local_min(0, m_opt_max, m_opt_tolerance, func, x);
294 if (Psi_Old-Psi_New>m_tolerance && iter>=m_iter)
296 SG_SWARNING(
"Max iterations (%d) reached, but convergence level (%f) is not yet below tolerance (%f)\n", m_iter, Psi_Old-Psi_New, m_tolerance);
313 void CSingleFITCLaplaceInferenceMethod::init()
384 eigen_res=eigen_x.cwiseProduct(eigen_t)-eigen_Rvdd.transpose()*(eigen_Rvdd*eigen_x);
399 eigen_res= eigen_V.transpose()*(eigen_V*eigen_al)+eigen_dg.cwiseProduct(eigen_al);
406 REQUIRE(inference!=NULL,
"Inference should be not NULL");
409 SG_SERROR(
"Provided inference is not of type CSingleFITCLaplaceInferenceMethod!\n")
421 LLT<MatrixXd> chol(eigen_mtx.colwise().reverse().rowwise().reverse().matrix());
427 eigen_res=tmp.colwise().reverse().rowwise().reverse().matrix().triangularView<Upper>(
439 SG_WARNING(
"nlZ cannot be computed since W is too negative");
455 LLT<MatrixXd> chol(A);
462 float64_t result=eigen_alpha.dot(eigen_mu-eigen_mean)/2.0-lp+
463 A.diagonal().array().log().sum()+(eigen_W.cwiseProduct(eigen_dg)+MatrixXd::Ones(eigen_dg.rows(),1)).array().log().sum()/2.0;
498 eigen_dg=eigen_ktrtr_diag*
CMath::exp(
m_log_scale*2.0)-(eigen_V.cwiseProduct(eigen_V)).colwise().sum().adjoint();
529 eigen_mu=eigen_tmp+eigen_mean;
531 Psi_New=eigen_alpha.dot(eigen_tmp)/2.0-
537 if (Psi_Def < Psi_New)
549 REQUIRE(minimizer,
"Minimizer must set\n");
550 if (!dynamic_cast<CSingleFITCLaplaceNewtonOptimizer*>(minimizer))
553 REQUIRE(opt,
"The provided minimizer is not supported\n")
566 #ifdef USE_REFERENCE_COUNTING 567 if(this->ref_count()>1)
576 REQUIRE(minimizer,
"The provided minimizer is not supported\n");
579 cost_fun->set_target(
this);
580 #ifdef USE_REFERENCE_COUNTING 581 if(this->ref_count()>1)
587 cost_fun->unset_target(cleanup);
601 eigen_mu=eigen_tmp+eigen_mean;
607 eigen_post_alpha=eigen_R0.transpose()*(eigen_V*eigen_al);
630 VectorXd Wd0_1=eigen_W.cwiseProduct(eigen_dg)+MatrixXd::Ones(eigen_W.rows(),1);
634 if (eigen_W.minCoeff()>0)
636 eigen_sW=eigen_W.cwiseSqrt();
640 eigen_sW=((eigen_W.array().abs()+eigen_W.array())/2).sqrt()-((eigen_W.array().abs()-eigen_W.array())/2).sqrt();
642 if (!(Wd0_1.array().abs().matrix()==Wd0_1))
650 VectorXd dd=MatrixXd::Ones(Wd0_1.rows(),1).cwiseQuotient(Wd0_1);
651 eigen_t=eigen_W.cwiseProduct(dd);
657 eigen_A=eigen_V*eigen_t.asDiagonal()*eigen_V.transpose()+MatrixXd::Identity(A.
num_rows,A.
num_rows);
660 MatrixXd R0tV=eigen_R0.transpose()*eigen_V;
663 MatrixXd B=R0tV*eigen_t.asDiagonal();
670 eigen_L=-B*R0tV.transpose();
675 MatrixXd eigen_RV=eigen_tmp*eigen_V;
680 eigen_Rvdd=eigen_RV*eigen_t.asDiagonal();
685 B=B*eigen_RV.transpose();
687 eigen_L+=B*B.transpose();
692 B=B*eigen_V.transpose();
694 FullPivLU<MatrixXd> lu(eigen_A);
695 eigen_L+=B*lu.inverse()*B.transpose();
702 eigen_g=((eigen_dg.cwiseProduct(dd)).array()+
704 ).array().pow(2).colwise().sum().transpose())/2;
720 eigen_B=eigen_R0.transpose()*eigen_V;
726 eigen_w=eigen_B*eigen_al;
736 eigen_dfhat=eigen_g.cwiseProduct(eigen_d3lp);
755 eigen_dA=2*eigen_dKui-eigen_dKuui*eigen_R0tV;
762 eigen_v=eigen_ddiagKi-eigen_dA.cwiseProduct(eigen_R0tV).colwise().sum().transpose();
774 eigen_b=eigen_dA.transpose()*(eigen_R0tV*eigen_dlp)+eigen_v.cwiseProduct(eigen_dlp);
779 result-=eigen_dfhat.dot(eigen_b-eigen_KZb);
786 REQUIRE(param,
"Param not set\n");
789 && strcmp(param->
m_name,
"log_inducing_noise")
790 && strcmp(param->
m_name,
"inducing_features")),
791 "Can't compute derivative of" 792 " the nagative log marginal likelihood wrt %s.%s parameter\n",
797 if (!strcmp(param->
m_name,
"inducing_features"))
819 if (!strcmp(param->
m_name,
"log_inducing_noise"))
866 result[0]=-eigen_g.dot(eigen_d2lp_dhyp)-eigen_lp_dhyp.sum();
880 REQUIRE(param,
"Param not set\n");
882 int64_t len=
const_cast<TParameter *
>(param)->m_datatype.get_num_elements();
944 return eigen_dfhat.dot(eigen_d-eigen_tmp);
951 REQUIRE(param,
"Param not set\n");
953 int64_t len=
const_cast<TParameter *
>(param)->m_datatype.get_num_elements();
972 REQUIRE(param,
"Param not set\n");
973 SG_WARNING(
"Derivative wrt %s cannot be computed since W (the Hessian (diagonal) matrix) is too negative\n", param->
m_name);
999 eigen_q=eigen_dfhat-eigen_q;
1009 eigen_neg_v-=2*eigen_dlp.cwiseProduct(eigen_q);
1019 eigen_BdK=eigen_B*eigen_neg_v.asDiagonal()+eigen_w*(eigen_al.transpose())+
1020 (eigen_B*eigen_Rvdd.transpose())*eigen_Rvdd+
1021 (eigen_B*eigen_dlp)*eigen_q.transpose()+(eigen_B*eigen_q)*eigen_dlp.transpose();
1040 VectorXd eigen_t1=eigen_B.cwiseProduct(eigen_B).colwise().sum().adjoint();
1046 eigen_b=(eigen_t1.cwiseProduct(eigen_dlp)-eigen_B.transpose()*(eigen_B*eigen_dlp))*factor;
1095 MatrixXd diagonal_part=eigen_dg.asDiagonal();
1097 MatrixXd prior=eigen_V.transpose()*eigen_V+diagonal_part;
1100 eigen_Sigma=prior-tmp.adjoint()*eigen_L*tmp;
1130 eigen_f=eigen_tmp+eigen_mean_f;
1133 float64_t psi=eigen_alpha.dot(eigen_tmp) * 0.5;
1154 eigen_f=eigen_tmp+eigen_mean_f;
1164 eigen_tmp2=eigen_alpha-eigen_dlp_f;
1167 eigen_gradient=eigen_tmp3;
SGVector< float64_t > m_dg
virtual bool init(CFeatures *lhs, CFeatures *rhs)
virtual SGVector< float64_t > get_log_probability_f(const CLabels *lab, SGVector< float64_t > func) const =0
virtual void update_deriv()
void set_target(CSingleFITCLaplaceInferenceMethod *obj)
SGVector< float64_t > m_dfhat
virtual void update_parameter_hash()
virtual SGVector< float64_t > get_derivative_wrt_inference_method(const TParameter *param)
virtual SGVector< float64_t > get_derivative_wrt_inducing_noise(const TParameter *param)
SGVector< float64_t > m_w
void get_gradient_wrt_alpha(SGVector< float64_t > gradient)
The class Labels models labels, i.e. class assignments of objects.
static const float64_t INFTY
infinity
virtual int32_t get_num_labels() const =0
static T sum(T *vec, int32_t len)
Return sum(vec)
SGVector< float64_t > m_mu
The build-in minimizer for SingleFITCLaplaceInference.
virtual void update_approx_cov()
virtual void update_alpha()
SGVector< float64_t > m_W
SGVector< float64_t > m_mean_f
virtual SGVector< float64_t > get_derivative_wrt_inference_method(const TParameter *param)
virtual SGVector< float64_t > get_derivative_wrt_mean(const TParameter *param)
virtual SGVector< float64_t > get_mean_vector(const CFeatures *features) const =0
CSingleFITCLaplaceInferenceMethod()
virtual SGVector< float64_t > get_derivative_related_cov_diagonal()
virtual float64_t get_derivative_related_cov(SGVector< float64_t > ddiagKi, SGMatrix< float64_t > dKuui, SGMatrix< float64_t > dKui)
static CSingleFITCLaplaceInferenceMethod * obtain_from_generic(CInference *inference)
SGVector< float64_t > m_d3lp
An abstract class of the mean function.
std::enable_if<!std::is_same< T, complex128_t >::value, float64_t >::type mean(const Container< T > &a)
SGVector< float64_t > m_g
virtual void update_init()
SGMatrix< float64_t > m_ktrtr
float64_t get_degrees_freedom() const
virtual SGVector< float64_t > get_second_derivative(const CLabels *lab, SGVector< float64_t > func, const TParameter *param) const
virtual void register_minimizer(Minimizer *minimizer)
SGVector< float64_t > m_d2lp
void scale(T alpha)
Scale vector inplace.
SGMatrix< T > clone() const
virtual SGVector< float64_t > get_derivative_wrt_inducing_noise(const TParameter *param)
SGVector< float64_t > m_sW
Class SGObject is the base class of all shogun objects.
virtual float64_t get_derivative_related_mean(SGVector< float64_t > dmu)
virtual void unset_cost_function(bool is_unref=true)
virtual SGVector< float64_t > get_diagonal_vector()
virtual void compute_gradient()
SGMatrix< float64_t > m_Sigma
SGMatrix< float64_t > m_chol_R0
void unset_target(bool is_unref)
virtual CFeatures * get_inducing_features()
SGMatrix< float64_t > m_Rvdd
SGMatrix< float64_t > m_kuu
SGMatrix< float64_t > m_L
The first order cost function base class.
friend class SingleFITCLaplaceInferenceMethodCostFunction
virtual float64_t get_negative_log_marginal_likelihood()
virtual SGVector< float64_t > get_derivative_related_inducing_features(SGMatrix< float64_t > BdK, const TParameter *param)
virtual float64_t minimize()=0
SG_FORCED_INLINE void lock()
SGVector< float64_t > m_ktrtr_diag
virtual SGVector< float64_t > derivative_helper_when_Wneg(SGVector< float64_t > res, const TParameter *param)
SGVector< float64_t > m_t
Class that models a Student's-t likelihood.
virtual SGVector< float64_t > get_third_derivative(const CLabels *lab, SGVector< float64_t > func, const TParameter *param) const
virtual void register_minimizer(Minimizer *minimizer)
virtual SGVector< float64_t > get_posterior_mean()
virtual SGVector< float64_t > compute_mvmZ(SGVector< float64_t > x)
virtual SGVector< float64_t > get_parameter_derivative(const CFeatures *features, const TParameter *param, index_t index=-1)
virtual void compute_gradient()
virtual void update_chol()
float64_t get_psi_wrt_alpha()
all of classes and functions are contained in the shogun namespace
SGMatrix< float64_t > m_V
The Inference Method base class.
virtual SGMatrix< float64_t > get_posterior_covariance()
virtual float64_t get_derivative_implicit_term_helper(SGVector< float64_t > d)
SGMatrix< float64_t > m_inducing_features
SGVector< float64_t > m_dlp
The class Features is the base class of all feature objects.
static float64_t exp(float64_t x)
virtual float64_t minimize()
virtual SGMatrix< float64_t > get_parameter_gradient(const TParameter *param, index_t index=-1)
virtual ~CSingleFITCLaplaceInferenceMethod()
virtual float64_t get_derivative_related_cov(SGVector< float64_t > ddiagKi, SGMatrix< float64_t > dKuui, SGMatrix< float64_t > dKui)
virtual SGVector< float64_t > compute_mvmK(SGVector< float64_t > al)
virtual SGVector< float64_t > get_log_probability_derivative_f(const CLabels *lab, SGVector< float64_t > func, index_t i) const =0
SGMatrix< float64_t > m_ktru
SG_FORCED_INLINE void unlock()
virtual const char * get_name() const
friend class CSingleFITCLaplaceNewtonOptimizer
The minimizer base class.
static CStudentsTLikelihood * obtain_from_generic(CLikelihoodModel *likelihood)
virtual SGMatrix< float64_t > get_chol_inv(SGMatrix< float64_t > mtx)
SGMatrix< float64_t > m_B
The FITC approximation inference method class for regression and binary Classification. Note that the number of inducing points (m) is usually far less than the number of input points (n). (the time complexity is computed based on the assumption m < n)
virtual SGVector< float64_t > get_first_derivative(const CLabels *lab, SGVector< float64_t > func, const TParameter *param) const
The Fully Independent Conditional Training inference base class for Laplace and regression for 1-D la...
virtual SGVector< float64_t > get_derivative_wrt_inducing_features(const TParameter *param)
virtual EInferenceType get_inference_type() const
CLikelihoodModel * m_model
virtual SGVector< float64_t > get_derivative_wrt_kernel(const TParameter *param)
virtual bool parameter_hash_changed()
virtual SGVector< float64_t > get_parameter_gradient_diagonal(const TParameter *param, index_t index=-1)
The Likelihood model base class.
SGVector< float64_t > m_al
virtual float64_t get_derivative_related_mean(SGVector< float64_t > dmu)
virtual SGVector< float64_t > get_derivative_wrt_likelihood_model(const TParameter *param)
SGVector< T > clone() const
virtual void set_cost_function(FirstOrderCostFunction *fun)
float64_t m_log_ind_noise
SGVector< float64_t > m_alpha
The first order minimizer base class.