11 #include <shogun/lib/config.h> 51 void CLibLinear::init()
87 SG_ERROR(
"Specified features are not of type CDotFeatures\n")
101 if (num_feat!=num_train_labels)
103 SG_ERROR(
"L1 methods require the data to be transposed: " 104 "number of features %d does not match number of " 105 "training labels %d\n",
106 num_feat, num_train_labels);
113 if (num_vec!=num_train_labels)
115 SG_ERROR(
"number of vectors %d does not match " 116 "number of training labels %d\n",
117 num_vec, num_train_labels);
126 liblinear_problem prob;
142 prob.y=SG_MALLOC(
double, prob.l);
148 for (int32_t i=0; i<prob.l; i++)
153 else if (prob.y[i] == -1)
156 SG_ERROR(
"labels should be +1/-1 only\n")
161 for(
int i=0;i<prob.l;i++)
168 SG_INFO(
"%d training points %d dims\n", prob.l, prob.n)
170 function *fun_obj=NULL;
175 fun_obj=
new l2r_lr_fun(&prob, Cs);
177 SG_DEBUG(
"starting L2R_LR training via tron\n")
185 fun_obj=
new l2r_l2_svc_fun(&prob, Cs);
211 solve_l2r_lr_dual(w, &prob,
epsilon, Cp, Cn);
215 SG_ERROR(
"Error: unknown solver_type\n")
257 #define GETI(i) (y[i]+1) 260 void CLibLinear::solve_l2r_l1l2_svc(
265 int w_size = prob->n;
268 double *QD = SG_MALLOC(
double, l);
269 int *index = SG_MALLOC(
int, l);
270 double *alpha = SG_MALLOC(
double, l);
271 int32_t *y = SG_MALLOC(int32_t, l);
278 double PGmax_new, PGmin_new;
281 double diag[3] = {0.5/Cn, 0, 0.5/Cp};
296 for(i=0; i<w_size; i++)
310 QD[i] = diag[
GETI(i)];
312 QD[i] += prob->x->dot(i, prob->x,i);
326 for (i=0; i<active_size; i++)
332 for (s=0;s<active_size;s++)
337 G = prob->x->dense_dot(i, w.
vector, n);
346 C = upper_bound[
GETI(i)];
347 G += alpha[i]*diag[
GETI(i)];
362 else if (alpha[i] == C)
380 if(fabs(PG) > 1.0e-12)
382 double alpha_old = alpha[i];
384 d = (alpha[i] - alpha_old)*yi;
386 prob->x->add_to_dense_vec(d, i, w.
vector, n);
409 PGmax_old = PGmax_new;
410 PGmin_old = PGmin_new;
418 SG_INFO(
"optimization finished, #iter = %d\n",iter)
421 SG_WARNING(
"reaching max number of iterations\nUsing -s 2 may be faster" 422 "(also see liblinear FAQ)\n\n");
429 for(i=0; i<w_size; i++)
433 v += alpha[i]*(alpha[i]*diag[
GETI(i)] - 2);
437 SG_INFO(
"Objective value = %lf\n",v/2)
458 #define GETI(i) (y[i]+1) 461 void CLibLinear::solve_l1r_l2_svc(
463 liblinear_problem *prob_col,
double eps,
double Cp,
double Cn)
466 int w_size = prob_col->n;
468 int active_size = w_size;
469 int max_num_linesearch = 20;
472 double d, G_loss, G, H;
476 double d_old, d_diff;
477 double loss_old=0, loss_new;
478 double appxcond, cond;
480 int *index = SG_MALLOC(
int, w_size);
481 int32_t *y = SG_MALLOC(int32_t, l);
482 double *b = SG_MALLOC(
double, l);
483 double *xj_sq = SG_MALLOC(
double, w_size);
490 double C[3] = {Cn,0,Cp};
493 if (prob_col->use_bias)
499 if(prob_col->y[j] > 0)
505 for(j=0; j<w_size; j++)
513 for (ind=0; ind<l; ind++)
514 xj_sq[n] += C[
GETI(ind)];
518 iterator=x->get_feature_iterator(j);
519 while (x->get_next_feature(ind, val, iterator))
520 xj_sq[j] += C[
GETI(ind)]*val*val;
521 x->free_feature_iterator(iterator);
534 for(j=0; j<active_size; j++)
540 for(s=0; s<active_size; s++)
548 for (ind=0; ind<l; ind++)
552 double tmp = C[
GETI(ind)]*y[ind];
553 G_loss -= tmp*b[ind];
560 iterator=x->get_feature_iterator(j);
562 while (x->get_next_feature(ind, val, iterator))
566 double tmp = C[
GETI(ind)]*val*y[ind];
567 G_loss -= tmp*b[ind];
571 x->free_feature_iterator(iterator);
582 double violation = 0;
589 else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
598 violation = fabs(Gp);
600 violation = fabs(Gn);
607 else if(Gn >= H*w.
vector[j])
612 if(fabs(d) < 1.0e-12)
615 double delta = fabs(w.
vector[j]+d)-fabs(w.
vector[j]) + G*d;
618 for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
621 cond = fabs(w.
vector[j]+d)-fabs(w.
vector[j]) - sigma*delta;
623 appxcond = xj_sq[j]*d*d + G_loss*d + cond;
628 for (ind=0; ind<l; ind++)
629 b[ind] += d_diff*y[ind];
634 iterator=x->get_feature_iterator(j);
635 while (x->get_next_feature(ind, val, iterator))
636 b[ind] += d_diff*val*y[ind];
638 x->free_feature_iterator(iterator);
643 if(num_linesearch == 0)
650 for (ind=0; ind<l; ind++)
653 loss_old += C[
GETI(ind)]*b[ind]*b[ind];
654 double b_new = b[ind] + d_diff*y[ind];
657 loss_new += C[
GETI(ind)]*b_new*b_new;
662 iterator=x->get_feature_iterator(j);
663 while (x->get_next_feature(ind, val, iterator))
666 loss_old += C[
GETI(ind)]*b[ind]*b[ind];
667 double b_new = b[ind] + d_diff*val*y[ind];
670 loss_new += C[
GETI(ind)]*b_new*b_new;
672 x->free_feature_iterator(iterator);
680 for (ind=0; ind<l; ind++)
682 double b_new = b[ind] + d_diff*y[ind];
685 loss_new += C[
GETI(ind)]*b_new*b_new;
690 iterator=x->get_feature_iterator(j);
691 while (x->get_next_feature(ind, val, iterator))
693 double b_new = b[ind] + d_diff*val*y[ind];
696 loss_new += C[
GETI(ind)]*b_new*b_new;
698 x->free_feature_iterator(iterator);
702 cond = cond + loss_new - loss_old;
716 if(num_linesearch >= max_num_linesearch)
719 for(
int i=0; i<l; i++)
722 for(
int i=0; i<n; i++)
727 iterator=x->get_feature_iterator(i);
728 while (x->get_next_feature(ind, val, iterator))
729 b[ind] -= w.
vector[i]*val*y[ind];
730 x->free_feature_iterator(iterator);
735 for (ind=0; ind<l; ind++)
736 b[ind] -= w.
vector[n]*y[ind];
742 Gmax_init = Gmax_new;
748 if(Gmax_new <= eps*Gmax_init)
750 if(active_size == w_size)
754 active_size = w_size;
764 SG_INFO(
"optimization finished, #iter = %d\n", iter)
766 SG_WARNING(
"\nWARNING: reaching max number of iterations\n")
772 for(j=0; j<w_size; j++)
782 v += C[
GETI(j)]*b[j]*b[j];
784 SG_INFO(
"Objective value = %lf\n", v)
785 SG_INFO(
"#nonzeros/#features = %d/%d\n", nnz, w_size)
805 #define GETI(i) (y[i]+1) 808 void CLibLinear::solve_l1r_lr(
810 const liblinear_problem *prob_col,
double eps,
811 double Cp,
double Cn)
814 int w_size = prob_col->n;
816 int active_size = w_size;
817 int max_num_linesearch = 20;
825 double sum1, appxcond1;
826 double sum2, appxcond2;
829 int *index = SG_MALLOC(
int, w_size);
830 int32_t *y = SG_MALLOC(int32_t, l);
831 double *exp_wTx = SG_MALLOC(
double, l);
832 double *exp_wTx_new = SG_MALLOC(
double, l);
833 double *xj_max = SG_MALLOC(
double, w_size);
834 double *C_sum = SG_MALLOC(
double, w_size);
835 double *xjneg_sum = SG_MALLOC(
double, w_size);
836 double *xjpos_sum = SG_MALLOC(
double, w_size);
843 double C[3] = {Cn,0,Cp};
846 if (prob_col->use_bias)
852 if(prob_col->y[j] > 0)
857 for(j=0; j<w_size; j++)
868 for (ind=0; ind<l; ind++)
872 C_sum[j] += C[
GETI(ind)];
874 xjneg_sum[j] += C[
GETI(ind)];
876 xjpos_sum[j] += C[
GETI(ind)];
886 C_sum[j] += C[
GETI(ind)];
888 xjneg_sum[j] += C[
GETI(ind)]*val;
890 xjpos_sum[j] += C[
GETI(ind)]*val;
904 for(j=0; j<active_size; j++)
910 for(s=0; s<active_size; s++)
919 for (ind=0; ind<l; ind++)
921 double exp_wTxind = exp_wTx[ind];
922 double tmp1 = 1.0/(1+exp_wTxind);
923 double tmp2 = C[
GETI(ind)]*tmp1;
924 double tmp3 = tmp2*exp_wTxind;
935 double exp_wTxind = exp_wTx[ind];
936 double tmp1 = val/(1+exp_wTxind);
937 double tmp2 = C[
GETI(ind)]*tmp1;
938 double tmp3 = tmp2*exp_wTxind;
946 G = -sum2 + xjneg_sum[j];
950 double violation = 0;
957 else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
966 violation = fabs(Gp);
968 violation = fabs(Gn);
975 else if(Gn >= H*w.
vector[j])
980 if(fabs(d) < 1.0e-12)
985 double delta = fabs(w.
vector[j]+d)-fabs(w.
vector[j]) + G*d;
987 for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
989 cond = fabs(w.
vector[j]+d)-fabs(w.
vector[j]) - sigma*delta;
993 double tmp = exp(d*xj_max[j]);
994 appxcond1 = log(1+sum1*(tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond - d*xjpos_sum[j];
995 appxcond2 = log(1+sum2*(1/tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond + d*xjneg_sum[j];
1000 for (ind=0; ind<l; ind++)
1001 exp_wTx[ind] *= exp(d);
1008 exp_wTx[ind] *= exp(d*val);
1015 cond += d*xjneg_sum[j];
1021 for (ind=0; ind<l; ind++)
1023 double exp_dx = exp(d);
1024 exp_wTx_new[i] = exp_wTx[ind]*exp_dx;
1025 cond += C[
GETI(ind)]*log((1+exp_wTx_new[i])/(exp_dx+exp_wTx_new[i]));
1035 double exp_dx = exp(d*val);
1036 exp_wTx_new[i] = exp_wTx[ind]*exp_dx;
1037 cond += C[
GETI(ind)]*log((1+exp_wTx_new[i])/(exp_dx+exp_wTx_new[i]));
1048 for (ind=0; ind<l; ind++)
1050 exp_wTx[ind] = exp_wTx_new[i];
1059 exp_wTx[ind] = exp_wTx_new[i];
1076 if(num_linesearch >= max_num_linesearch)
1079 for(
int i=0; i<l; i++)
1082 for(
int i=0; i<w_size; i++)
1084 if(w.
vector[i]==0)
continue;
1088 for (ind=0; ind<l; ind++)
1089 exp_wTx[ind] += w.
vector[i];
1095 exp_wTx[ind] += w.
vector[i]*val;
1100 for(
int i=0; i<l; i++)
1101 exp_wTx[i] = exp(exp_wTx[i]);
1106 Gmax_init = Gmax_new;
1110 if(Gmax_new <= eps*Gmax_init)
1112 if(active_size == w_size)
1116 active_size = w_size;
1122 Gmax_old = Gmax_new;
1126 SG_INFO(
"optimization finished, #iter = %d\n", iter)
1128 SG_WARNING(
"\nWARNING: reaching max number of iterations\n")
1134 for(j=0; j<w_size; j++)
1142 v += C[
GETI(j)]*log(1+1/exp_wTx[j]);
1144 v += C[
GETI(j)]*log(1+exp_wTx[j]);
1146 SG_INFO(
"Objective value = %lf\n", v)
1147 SG_INFO(
"#nonzeros/#features = %d/%d\n", nnz, w_size)
1152 SG_FREE(exp_wTx_new);
1178 #define GETI(i) (y[i]+1) 1181 void CLibLinear::solve_l2r_lr_dual(
SGVector<float64_t>& w,
const liblinear_problem *prob,
double eps,
double Cp,
double Cn)
1184 int w_size = prob->n;
1186 double *xTx =
new double[l];
1187 int max_iter = 1000;
1188 int *index =
new int[l];
1189 double *alpha =
new double[2*l];
1190 int32_t *y =
new int32_t[l];
1191 int max_inner_iter = 100;
1192 double innereps = 1e-2;
1194 double upper_bound[3] = {Cn, 0, Cp};
1195 double Gmax_init = 0;
1215 alpha[2*i+1] = upper_bound[
GETI(i)] - alpha[2*i];
1218 for(i=0; i<w_size; i++)
1222 xTx[i] = prob->x->dot(i, prob->x,i);
1223 prob->x->add_to_dense_vec(y[i]*alpha[2*i], i, w.
vector, w_size);
1227 w.
vector[w_size]+=y[i]*alpha[2*i];
1233 while (iter < max_iter)
1240 int newton_iter = 0;
1246 double C = upper_bound[
GETI(i)];
1247 double ywTx = 0, xisq = xTx[i];
1249 ywTx = prob->x->dense_dot(i, w.
vector, w_size);
1254 double a = xisq, b = ywTx;
1257 int ind1 = 2*i, ind2 = 2*i+1, sign = 1;
1258 if(0.5*a*(alpha[ind2]-alpha[ind1])+b < 0)
1266 double alpha_old = alpha[ind1];
1267 double z = alpha_old;
1270 double gp = a*(z-alpha_old)+sign*b+
CMath::log(z/(C-z));
1274 const double eta = 0.1;
1276 while (inner_iter <= max_inner_iter)
1278 if(fabs(gp) < innereps)
1280 double gpp = a + C/(C-z)/z;
1281 double tmpz = z - gp/gpp;
1286 gp = a*(z-alpha_old)+sign*b+log(z/(C-z));
1296 prob->x->add_to_dense_vec(sign*(z-alpha_old)*yi, i, w.
vector, w_size);
1299 w.
vector[w_size]+=sign*(z-alpha_old)*yi;
1312 if(newton_iter <= l/10)
1313 innereps =
CMath::max(innereps_min, 0.1*innereps);
1318 SG_INFO(
"optimization finished, #iter = %d\n",iter)
1319 if (iter >= max_iter)
1320 SG_WARNING(
"reaching max number of iterations\nUsing -s 0 may be faster (also see FAQ)\n\n")
1325 for(i=0; i<w_size; i++)
1329 v += alpha[2*i] * log(alpha[2*i]) + alpha[2*i+1] * log(alpha[2*i+1])
1330 - upper_bound[
GETI(i)] * log(upper_bound[
GETI(i)]);
1331 SG_INFO(
"Objective value = %lf\n", v)
1343 SG_ERROR(
"Please assign labels first!\n")
1347 if (num_labels!=linear_term.
vlen)
1349 SG_ERROR(
"Number of labels (%d) does not match number" 1350 " of entries (%d) in linear term \n", num_labels,
1360 SG_ERROR(
"Please assign linear term first!\n")
1368 SG_ERROR(
"Please assign labels first!\n")
Class Time that implements a stopwatch based on either cpu time or wall clock time.
virtual ELabelType get_label_type() const =0
L2 regularized linear logistic regression via dual.
virtual void set_w(const SGVector< float64_t > src_w)
The class Labels models labels, i.e. class assignments of objects.
static const float64_t INFTY
infinity
bool has_property(EFeatureProperty p) const
virtual int32_t get_num_labels() const =0
static float64_t log10(float64_t v)
L2 regularized SVM with L2-loss using newton in the primal.
L1 regularized logistic regression.
virtual int32_t get_num_vectors() const =0
void tron(float64_t *w, float64_t max_train_time)
float64_t m_max_train_time
L1 regularized SVM with L2-loss using dual coordinate descent.
Features that support dot products among other operations.
SGVector< float64_t > get_linear_term()
virtual int32_t get_dim_feature_space() const =0
float64_t cur_time_diff(bool verbose=false)
void set_linear_term(const SGVector< float64_t > linear_term)
void set_max_iterations(int32_t max_iter=1000)
static void clear_cancel()
L2 regularized linear logistic regression.
virtual void free_feature_iterator(void *iterator)=0
L2 regularized SVM with L2-loss using dual coordinate descent.
virtual void set_features(CDotFeatures *feat)
Class LinearMachine is a generic interface for all kinds of linear machines like classifiers.
static void fill_vector(T *vec, int32_t len, T value)
LIBLINEAR_SOLVER_TYPE liblinear_solver_type
static bool cancel_computations()
virtual void * get_feature_iterator(int32_t vector_index)=0
virtual void set_compute_bias(bool compute_bias)
virtual bool get_next_feature(int32_t &index, float64_t &value, void *iterator)=0
all of classes and functions are contained in the shogun namespace
The class Features is the base class of all feature objects.
static float64_t log(float64_t v)
virtual bool train_machine(CFeatures *data=NULL)
Binary Labels for binary classification.
static void swap(T &a, T &b)
virtual void set_bias(float64_t b)
L2 regularized linear SVM with L1-loss using dual coordinate descent.
virtual void set_labels(CLabels *lab)
#define SG_SABS_PROGRESS(...)
SGVector< float64_t > m_linear_term