SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
libncbm.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Copyright (C) 2012 Viktor Gal
8  *
9  */
10 
12 #include <shogun/lib/Time.h>
13 
14 #include <shogun/lib/external/libqp.h>
16 
17 #include <vector>
18 
19 namespace shogun
20 {
21 
23 static uint32_t maxCPs;
24 static const float64_t epsilon=0.0;
25 
26 static const float64_t *get_col(uint32_t i)
27 {
28  return (&HMatrix[maxCPs*i]);
29 }
30 
32 {
33  /* */
39 };
40 
41 inline static line_search_res zoom
42 (
43  CDualLibQPBMSOSVM *machine,
44  float64_t lambda,
45  float64_t a_lo,
46  float64_t a_hi,
47  float64_t initial_fval,
48  SGVector<float64_t>& initial_solution,
49  SGVector<float64_t>& search_dir,
50  float64_t wolfe_c1,
51  float64_t wolfe_c2,
52  float64_t init_lgrad,
53  float64_t f_lo,
54  float64_t g_lo,
55  float64_t f_hi,
56  float64_t g_hi
57 )
58 {
59  line_search_res ls_res;
60  ls_res.solution.resize_vector(initial_solution.vlen);
61  ls_res.gradient.resize_vector(initial_solution.vlen);
62 
63  SGVector<float64_t> cur_solution(initial_solution.vlen);
64  cur_solution.zero();
65  SGVector<float64_t> cur_grad(initial_solution.vlen);
66 
67  uint32_t iter = 0;
68  while (1)
69  {
70  float64_t d1 = g_lo+g_hi - 3*(f_lo-f_hi)/(a_lo-a_hi);
71  float64_t d2 = CMath::sqrt(d1*d1 - g_lo*g_hi);
72  float64_t a_j = a_hi -(a_hi-a_lo)*(g_hi+d2-d1)/(g_hi-g_lo+2*d2);
73 
74  if (a_lo < a_hi)
75  {
76  if ((a_j < a_lo) || (a_j > a_hi))
77  {
78  a_j = 0.5*(a_lo+a_hi);
79  }
80  }
81  else
82  {
83  if ((a_j > a_lo) || (a_j < a_hi))
84  {
85  a_j = 0.5*(a_lo+a_hi);
86  }
87  }
88 
89  cur_solution.add(cur_solution.vector, 1.0,
90  initial_solution.vector, a_j,
91  search_dir.vector, cur_solution.vlen);
92 
93  float64_t cur_fval = machine->risk(cur_grad.vector, cur_solution.vector);
94  float64_t cur_reg
95  = 0.5*lambda*cur_solution.dot(cur_solution.vector,
96  cur_solution.vector, cur_solution.vlen);
97  cur_fval += cur_reg;
98 
99  cur_grad.vec1_plus_scalar_times_vec2(cur_grad.vector, lambda, cur_solution.vector, cur_grad.vlen);
100 
101  if
102  (
103  (cur_fval > (initial_fval+wolfe_c1*a_j*init_lgrad))
104  ||
105  (cur_fval > f_lo)
106  )
107  {
108  a_hi = a_j;
109  f_hi = cur_fval;
110  g_hi = cur_grad.dot(cur_grad.vector, search_dir.vector, cur_grad.vlen);
111  }
112  else
113  {
114  float64_t cur_lgrad
115  = cur_grad.dot(cur_grad.vector, search_dir.vector, cur_grad.vlen);
116 
117  if (CMath::abs(cur_lgrad) < -wolfe_c2*init_lgrad)
118  {
119  ls_res.a = a_j;
120  ls_res.fval = cur_fval;
121  ls_res.reg = cur_reg;
122  ls_res.gradient = cur_grad;
123  ls_res.solution = cur_solution;
124 // SG_SPRINT("in zoom (wolfe2): %f\n", cur_fval)
125  return ls_res;
126  }
127 
128  if (cur_lgrad*(a_hi - a_lo) >= 0)
129  {
130  a_hi = a_lo;
131  f_hi = f_lo;
132  g_hi = g_lo;
133  }
134  a_lo = a_j;
135  f_lo = cur_fval;
136  g_lo = cur_lgrad;
137  }
138 
139  if
140  (
141  (CMath::abs(a_lo - a_hi) <= 0.01*a_lo)
142  ||
143  (iter >= 5)
144  )
145  {
146  ls_res.a = a_j;
147  ls_res.fval = cur_fval;
148  ls_res.reg = cur_reg;
149  ls_res.gradient = cur_grad;
150  ls_res.solution = cur_solution;
151 // SG_SPRINT("in zoom iter: %d %f\n", iter, cur_fval)
152  return ls_res;
153  }
154  iter++;
155  }
156 }
157 
158 inline std::vector<line_search_res> line_search_with_strong_wolfe
159 (
160  CDualLibQPBMSOSVM *machine,
161  float64_t lambda,
162  float64_t initial_val,
163  SGVector<float64_t>& initial_solution,
164  SGVector<float64_t>& initial_grad,
165  SGVector<float64_t>& search_dir,
166  float64_t astart,
167  float64_t amax = 1.1,
168  float64_t wolfe_c1 = 1E-4,
169  float64_t wolfe_c2 = 0.9,
170  float64_t max_iter = 5
171 )
172 {
173  /* NOTE:
174  * model->risk returns only the risk as it's name says
175  * have to cur_fval = model.risk + (lambda*0.5*w*w')
176  *
177  * subgrad should be: subgrad + (lambda*w)
178  *
179  */
180 
181  uint32_t iter = 0;
182 
183  initial_grad.vec1_plus_scalar_times_vec2(initial_grad.vector, lambda, initial_solution.vector, initial_grad.vlen);
184 
185  float64_t initial_lgrad = initial_grad.dot(initial_grad.vector, search_dir.vector, initial_grad.vlen);
186  float64_t prev_lgrad = initial_lgrad;
187  float64_t prev_fval = initial_val;
188 
189  float64_t prev_a = 0;
190  float64_t cur_a = astart;
191 
192  std::vector<line_search_res> ret;
193  while (1)
194  {
195  SGVector<float64_t> x(initial_solution.vlen);
196  SGVector<float64_t> cur_subgrad(initial_solution.vlen);
197 
198  x.add(x.vector, 1.0, initial_solution.vector, cur_a, search_dir.vector, x.vlen);
199  float64_t cur_fval = machine->risk(cur_subgrad.vector, x.vector);
200  float64_t cur_reg = 0.5*lambda*x.dot(x.vector, x.vector, x.vlen);
201  cur_fval += cur_reg;
202 
203  cur_subgrad.vec1_plus_scalar_times_vec2(cur_subgrad.vector, lambda, x.vector, x.vlen);
204  if (iter == 0)
205  {
206  line_search_res initial_step;
207  initial_step.fval = cur_fval;
208  initial_step.reg = cur_reg;
209  initial_step.gradient = cur_subgrad;
210  initial_step.solution = x;
211  ret.push_back(initial_step);
212  }
213 
214  float64_t cur_lgrad
215  = cur_subgrad.dot(cur_subgrad.vector, search_dir.vector,
216  cur_subgrad.vlen);
217  if
218  (
219  (cur_fval > initial_val+wolfe_c1*cur_a*initial_lgrad)
220  ||
221  (cur_fval >= prev_fval && iter > 0)
222  )
223  {
224  ret.push_back(
225  zoom(machine, lambda, prev_a, cur_a, initial_val,
226  initial_solution, search_dir, wolfe_c1, wolfe_c2,
227  initial_lgrad, prev_fval, prev_lgrad, cur_fval, cur_lgrad)
228  );
229  return ret;
230  }
231 
232  if (CMath::abs(cur_lgrad) <= -wolfe_c2*initial_lgrad)
233  {
234  line_search_res ls_res;
235  ls_res.a = cur_a;
236  ls_res.fval = cur_fval;
237  ls_res.reg = cur_reg;
238  ls_res.solution = x;
239  ls_res.gradient = cur_subgrad;
240  ret.push_back(ls_res);
241  return ret;
242  }
243 
244  if (cur_lgrad >= 0)
245  {
246  ret.push_back(
247  zoom(machine, lambda, cur_a, prev_a, initial_val,
248  initial_solution, search_dir, wolfe_c1, wolfe_c2,
249  initial_lgrad, cur_fval, cur_lgrad, prev_fval, prev_lgrad)
250  );
251  return ret;
252  }
253  iter++;
254  if ((CMath::abs(cur_a - amax) <= 0.01*amax) || (iter >= max_iter))
255  {
256  line_search_res ls_res;
257  ls_res.a = cur_a;
258  ls_res.fval = cur_fval;
259  ls_res.reg = cur_reg;
260  ls_res.solution = x;
261  ls_res.gradient = cur_subgrad;
262  ret.push_back(ls_res);
263  return ret;
264  }
265 
266  prev_a = cur_a;
267  prev_fval = cur_fval;
268  prev_lgrad = cur_lgrad;
269 
270  cur_a = (cur_a + amax)*0.5;
271  }
272 }
273 
274 inline void update_H(BmrmStatistics& ncbm,
275  bmrm_ll* head,
276  bmrm_ll* tail,
278  SGVector<float64_t>& diag_H,
279  float64_t lambda,
280  uint32_t maxCP,
281  int32_t w_dim)
282 {
283  float64_t* a_2 = get_cutting_plane(tail);
284  bmrm_ll* cp_ptr=head;
285 
286  for (uint32_t i=0; i < ncbm.nCP; ++i)
287  {
288  float64_t* a_1 = get_cutting_plane(cp_ptr);
289  cp_ptr=cp_ptr->next;
290 
291  float64_t dot_val = SGVector<float64_t>::dot(a_2, a_1, w_dim);
292 
293  H.matrix[LIBBMRM_INDEX(ncbm.nCP, i, maxCP)]
294  = H.matrix[LIBBMRM_INDEX(i, ncbm.nCP, maxCP)]
295  = dot_val/lambda;
296  }
297 
298  /* set the diagonal element, i.e. subgrad_i*subgrad_i' */
299  float64_t dot_val = SGVector<float64_t>::dot(a_2, a_2, w_dim);
300  H[LIBBMRM_INDEX(ncbm.nCP, ncbm.nCP, maxCP)]=dot_val/lambda;
301 
302  diag_H[ncbm.nCP]=H[LIBBMRM_INDEX(ncbm.nCP, ncbm.nCP, maxCP)];
303 
304  ncbm.nCP++;
305 }
306 
307 
309  CDualLibQPBMSOSVM *machine,
310  float64_t *w,
311  float64_t TolRel,
312  float64_t TolAbs,
313  float64_t _lambda,
314  uint32_t _BufSize,
315  bool cleanICP,
316  uint32_t cleanAfter,
317  bool is_convex,
318  bool line_search,
319  bool verbose
320  )
321 {
322  BmrmStatistics ncbm;
323  libqp_state_T qp_exitflag={0, 0, 0, 0};
324  int32_t w_dim = machine->get_model()->get_dim();
325 
326  maxCPs = _BufSize;
327  BufSize = _BufSize;
328 
329  ncbm.nCP=0;
330  ncbm.nIter=0;
331  ncbm.exitflag=0;
332 
333  /* variables for timing the algorithm*/
334  CTime ttime;
335  float64_t tstart, tstop;
336  tstart=ttime.cur_time_diff(false);
337 
338  /* matrix of subgradiends */
339  SGMatrix<float64_t> A(w_dim, maxCPs);
340 
341  /* bias vector */
343  bias.zero();
344 
345  /* Matrix for storing H = A*A' */
347  HMatrix = H.matrix;
348 
349  /* diag_H */
350  SGVector<float64_t> diag_H(maxCPs);
351  diag_H.zero();
352 
353  /* x the solution vector of the dual problem:
354  * 1/lambda x*H*x' - x*bias
355  */
357  x.zero();
358 
359  /* for sum_{i in I_k} x[i] <= b[k] for all k such that S[k] == 1 */
360  float64_t b = 1.0;
361  uint8_t S = 1;
363  I.set_const(1);
364 
365  /* libqp paramteres */
366  uint32_t QPSolverMaxIter = 0xFFFFFFFF;
367  float64_t QPSolverTolRel = 1E-9;
368 
369  /* variables for maintaining inactive planes */
370  SGVector<bool> map(maxCPs);
371  map.set_const(true);
372  ICP_stats icp_stats;
373  icp_stats.maxCPs = maxCPs;
374  icp_stats.ICPcounter = (uint32_t*) LIBBMRM_CALLOC(maxCPs, uint32_t);
375  icp_stats.ICPs = (float64_t**) LIBBMRM_CALLOC(maxCPs, float64_t*);
376  icp_stats.ACPs = (uint32_t*) LIBBMRM_CALLOC(maxCPs, uint32_t);
378  if
379  (
380  icp_stats.ICPcounter == NULL || icp_stats.ICPs == NULL
381  || icp_stats.ACPs == NULL || icp_stats.H_buff == NULL
382  )
383  {
384  ncbm.exitflag=-2;
385  LIBBMRM_FREE(icp_stats.ICPcounter);
386  LIBBMRM_FREE(icp_stats.ICPs);
387  LIBBMRM_FREE(icp_stats.ACPs);
388  LIBBMRM_FREE(icp_stats.H_buff);
389  return ncbm;
390  }
391 
392  /* best */
393  float64_t best_Fp = CMath::INFTY;
394  float64_t best_risk = CMath::INFTY;
395  SGVector<float64_t> best_w(w_dim);
396  best_w.zero();
397  SGVector<float64_t> best_subgrad(w_dim);
398  best_subgrad.zero();
399 
400  /* initial solution */
401  SGVector<float64_t> cur_subgrad(w_dim);
402  SGVector<float64_t> cur_w(w_dim);
403  memcpy(cur_w.vector, w, sizeof(float64_t)*w_dim);
404 
405  float64_t cur_risk = machine->risk(cur_subgrad.vector, cur_w.vector);
406  bias[0] = -cur_risk;
407  best_Fp = 0.5*_lambda*cur_w.dot(cur_w.vector, cur_w.vector, cur_w.vlen) + cur_risk;
408  best_risk = cur_risk;
409  memcpy(best_w.vector, cur_w.vector, sizeof(float64_t)*w_dim);
410  memcpy(best_subgrad.vector, cur_subgrad.vector, sizeof(float64_t)*w_dim);
411 
412  /* create a double-linked list over the A the subgrad matrix */
413  bmrm_ll *CPList_head, *CPList_tail, *cp_ptr, *cp_list=NULL;
414  cp_list = (bmrm_ll*) SG_CALLOC(bmrm_ll, 1);
415  if (cp_list==NULL)
416  {
417  ncbm.exitflag=-2;
418  return ncbm;
419  }
420  /* save the subgradient */
421  memcpy(A.matrix, cur_subgrad.vector, sizeof(float64_t)*w_dim);
422  map[0] = false;
423  cp_list->address=&A[0];
424  cp_list->idx=0;
425  cp_list->prev=NULL;
426  cp_list->next=NULL;
427  CPList_head=cp_list;
428  CPList_tail=cp_list;
429 
430  update_H(ncbm, CPList_head, CPList_tail, H, diag_H, _lambda, maxCPs, w_dim);
431  tstop=ttime.cur_time_diff(false);
432  if (verbose)
433  SG_SPRINT("%4d: tim=%.3lf, Fp=%lf, Fd=%lf, R=%lf\n",
434  ncbm.nIter, tstop-tstart, ncbm.Fp, ncbm.Fd, cur_risk);
435 
436  float64_t astar = 0.01;
437 
438  SG_SPRINT("clean icps: %d\n", cleanICP)
439  while (ncbm.exitflag==0)
440  {
441  tstart=ttime.cur_time_diff(false);
442  ncbm.nIter++;
443 
444  //diag_H.display_vector();
445  //bias.display_vector();
446 
447  /* solve the dual of the problem, namely:
448  *
449  */
450  qp_exitflag =
451  libqp_splx_solver(&get_col, diag_H.vector, bias.vector, &b, I.vector, &S, x.vector,
452  ncbm.nCP, QPSolverMaxIter, 0.0, QPSolverTolRel, -LIBBMRM_PLUS_INF, 0);
453 
454  ncbm.Fd = -qp_exitflag.QP;
455 
456  ncbm.qp_exitflag=qp_exitflag.exitflag;
457 
458  /* Update ICPcounter (add one to unused and reset used)
459  * + compute number of active CPs */
460  ncbm.nzA=0;
461 
462  for (uint32_t i=0; i < ncbm.nCP; ++i)
463  {
464  if (x[i] > epsilon)
465  {
466  /* cp was active => reset counter */
467  ++ncbm.nzA;
468  icp_stats.ICPcounter[i]=0;
469  }
470  else
471  {
472  icp_stats.ICPcounter[i]++;
473  }
474  }
475 
476  /* Inactive Cutting Planes (ICP) removal */
477  if (cleanICP)
478  {
479  clean_icp(&icp_stats, ncbm, &CPList_head, &CPList_tail,
480  H.matrix, diag_H.vector, x.vector,
481  map.vector, cleanAfter, bias.vector, I.vector);
482  }
483 
484  /* calculate the new w
485  * w[i] = -1/lambda*A[i]*x[i]
486  */
487  cur_w.zero();
488  cp_ptr=CPList_head;
489  for (uint32_t i=0; i < ncbm.nCP; ++i)
490  {
491  float64_t* A_1 = get_cutting_plane(cp_ptr);
492  cp_ptr=cp_ptr->next;
493  SGVector<float64_t>::vec1_plus_scalar_times_vec2(cur_w.vector, -x[i]/_lambda, A_1, w_dim);
494  }
495 
496  bool calc_gap = false;
497  if (calc_gap)
498  {
499  SGVector<float64_t> scores(ncbm.nCP);
500  cp_ptr=CPList_head;
501 
502  for (uint32_t i=0; i < ncbm.nCP; ++i)
503  {
504  float64_t* a_1 = get_cutting_plane(cp_ptr);
505  cp_ptr = cp_ptr->next;
506  scores[i] = cur_w.dot(cur_w.vector, a_1, w_dim);
507  }
508  scores.vec1_plus_scalar_times_vec2(scores.vector, -1.0, bias.vector, scores.vlen);
509 
510  float64_t w_norm = cur_w.dot(cur_w.vector, cur_w.vector, cur_w.vlen);
511  float64_t PO = 0.5*_lambda*w_norm + scores.max(scores.vector, scores.vlen);
512  float64_t QP_gap = PO - ncbm.Fd;
513 
514  SG_SPRINT("%4d: primal:%f dual:%f QP_gap:%f\n", ncbm.nIter, PO, ncbm.Fd, QP_gap)
515  }
516 
517  /* Stopping conditions */
518  if ((best_Fp - ncbm.Fd) <= TolRel*LIBBMRM_ABS(best_Fp))
519  ncbm.exitflag = 1;
520 
521  if ((best_Fp - ncbm.Fd) <= TolAbs)
522  ncbm.exitflag = 2;
523 
524  if (ncbm.nCP >= maxCPs)
525  ncbm.exitflag = -1;
526 
527  // next CP would exceed BufSize/maxCPs
528  if (ncbm.nCP+3 >= maxCPs)
529  ncbm.exitflag=-1;
530 
531  tstop=ttime.cur_time_diff(false);
532 
533  /* Verbose output */
534  if (verbose)
535  SG_SPRINT("%4d: tim=%.3lf, Fp=%lf, Fd=%lf, (Fp-Fd)=%lf, (Fp-Fd)/Fp=%lf, R=%lf, nCP=%d, nzA=%d, QPexitflag=%d, best_fp=%f, gap=%f\n",
536  ncbm.nIter, tstop-tstart, ncbm.Fp, ncbm.Fd, ncbm.Fp-ncbm.Fd,
537  (ncbm.Fp-ncbm.Fd)/ncbm.Fp, cur_risk, ncbm.nCP, ncbm.nzA, qp_exitflag.exitflag, best_Fp, (best_Fp-ncbm.Fd)/best_Fp);
538 
539  if (ncbm.exitflag!=0)
540  break;
541 
542  std::vector<line_search_res> wbest_candidates;
543  if (!line_search)
544  {
545  cur_risk = machine->risk(cur_subgrad.vector, cur_w.vector);
546 
547  add_cutting_plane(&CPList_tail, map, A.matrix,
548  find_free_idx(map, maxCPs), cur_subgrad.vector, w_dim);
549 
550  bias[ncbm.nCP] = cur_w.dot(cur_w.vector, cur_subgrad.vector, cur_w.vlen) - cur_risk;
551 
552  update_H(ncbm, CPList_head, CPList_tail, H, diag_H, _lambda, maxCPs, w_dim);
553 
554  // add as a new wbest candidate
555  line_search_res ls;
556  ls.fval = cur_risk+0.5*_lambda*cur_w.dot(cur_w.vector, cur_w.vector, cur_w.vlen);
557  ls.solution = cur_w;
558  ls.gradient = cur_subgrad;
559 
560  wbest_candidates.push_back(ls);
561  }
562  else
563  {
564  tstart=ttime.cur_time_diff(false);
565  /* do line searching */
566  SGVector<float64_t> search_dir(w_dim);
567  search_dir.add(search_dir.vector, 1.0, cur_w.vector, -1.0, best_w.vector, w_dim);
568 
569  float64_t norm_dir = search_dir.twonorm(search_dir.vector, search_dir.vlen);
570  float64_t astart;
571  uint32_t cp_min_approx = 0;
572  if (cp_min_approx || (ncbm.nIter == 1))
573  {
574  astart = 1.0;
575  }
576  else
577  {
578  astart = CMath::min(astar/norm_dir,1.0);
579  if (astart == 0)
580  astart = 1.0;
581  }
582 
583  /* line search */
584  std::vector<line_search_res> ls_res
585  = line_search_with_strong_wolfe(machine, _lambda, best_Fp, best_w, best_subgrad, search_dir, astart);
586 
587  if (ls_res[0].fval != ls_res[1].fval)
588  {
589  ls_res[0].gradient.vec1_plus_scalar_times_vec2(ls_res[0].gradient.vector, -_lambda, ls_res[0].solution.vector, w_dim);
590 
591  add_cutting_plane(&CPList_tail, map, A.matrix,
592  find_free_idx(map, maxCPs), ls_res[0].gradient, w_dim);
593 
594  bias[ncbm.nCP]
595  = SGVector<float64_t>::dot(ls_res[0].solution.vector, ls_res[0].gradient, w_dim)
596  - (ls_res[0].fval - ls_res[0].reg);
597 
598  update_H(ncbm, CPList_head, CPList_tail, H, diag_H, _lambda, maxCPs, w_dim);
599 
600  wbest_candidates.push_back(ls_res[0]);
601  }
602 
603  ls_res[1].gradient.vec1_plus_scalar_times_vec2(ls_res[1].gradient.vector, -_lambda, ls_res[1].solution.vector, w_dim);
604 
605  add_cutting_plane(&CPList_tail, map, A.matrix,
606  find_free_idx(map, maxCPs), ls_res[1].gradient.vector, w_dim);
607 
608  bias[ncbm.nCP]
609  = ls_res[1].solution.dot(ls_res[1].solution.vector, ls_res[1].gradient.vector, w_dim)
610  - (ls_res[1].fval - ls_res[1].reg);
611 
612  update_H(ncbm, CPList_head, CPList_tail, H, diag_H, _lambda, maxCPs, w_dim);
613 
614  wbest_candidates.push_back(ls_res[1]);
615 
616  if ((best_Fp <= ls_res[1].fval) && (astart != 1))
617  {
618  cur_risk = machine->risk(cur_subgrad.vector, cur_w.vector);
619 
620  add_cutting_plane(&CPList_tail, map, A.matrix,
621  find_free_idx(map, maxCPs), cur_subgrad.vector, w_dim);
622 
623  bias[ncbm.nCP]
624  = cur_w.dot(cur_w.vector, cur_subgrad.vector, cur_w.vlen) - cur_risk;
625 
626  update_H(ncbm, CPList_head, CPList_tail, H, diag_H, _lambda, maxCPs, w_dim);
627 
628  /* add as a new wbest candidate */
629  line_search_res ls;
630  ls.fval = cur_risk+0.5*_lambda*cur_w.dot(cur_w.vector, cur_w.vector, cur_w.vlen);
631  ls.solution = cur_w;
632  ls.gradient = cur_subgrad;
633  SG_SPRINT("%lf\n", ls.fval)
634 
635  wbest_candidates.push_back(ls);
636  }
637 
638  astar = ls_res[1].a * norm_dir;
639 
640  tstop=ttime.cur_time_diff(false);
641  SG_SPRINT("\t\tline search time: %.5lf\n", tstop-tstart)
642  }
643 
644  /* search for the best w among the new candidates */
645  if (verbose)
646  SG_SPRINT("\t searching for the best Fp:\n")
647  for (size_t i = 0; i < wbest_candidates.size(); i++)
648  {
649  if (verbose)
650  SG_SPRINT("\t\t %d fcurrent: %.16lf\n", i, wbest_candidates[i].fval)
651 
652  if (wbest_candidates[i].fval < best_Fp)
653  {
654  best_Fp = wbest_candidates[i].fval;
655  best_risk = wbest_candidates[i].fval - wbest_candidates[i].reg;
656  memcpy(best_w, wbest_candidates[i].solution.vector, sizeof(float64_t)*w_dim);
657  memcpy(best_subgrad.vector, wbest_candidates[i].gradient.vector, sizeof(float64_t)*w_dim);
658 
659  ncbm.Fp = best_Fp;
660 
661  if (verbose)
662  SG_SPRINT("\t\t new best norm: %f\n",
663  best_w.twonorm(best_w.vector, w_dim));
664  }
665 
666  if (!is_convex)
667  {
668  index_t cp_idx = ncbm.nCP-(wbest_candidates.size()-i);
669 
670  /* conflict */
671  float64_t score
673  wbest_candidates[i].gradient.vector, w_dim)
674  + (-1.0*bias[cp_idx]);
675  if (score > best_risk)
676  {
677  float64_t U
678  = best_risk
680  wbest_candidates[i].gradient.vector, w_dim);
681 
682  float64_t L
683  = best_Fp - wbest_candidates[i].reg
684  - SGVector<float64_t>::dot(wbest_candidates[i].solution.vector,
685  wbest_candidates[i].gradient.vector, w_dim);
686 
687  if (verbose)
688  SG_SPRINT("CONFLICT Rbest=%.6lg score=%g L=%.6lg U=%.6lg\n", best_risk, score, L, U)
689  if (L <= U)
690  {
691  if (verbose)
692  SG_SPRINT("%.6lf < %.6lf => changing bias[%d]=%g\n", L, U, cp_idx, L)
693  bias[cp_idx]= -L;
694  }
695  else
696  {
697  wbest_candidates[i].gradient.zero();
698  SGVector<float64_t>::vec1_plus_scalar_times_vec2(wbest_candidates[i].gradient.vector, -_lambda, best_w.vector, w_dim);
699 
700  cp_ptr = CPList_tail;
701  for (size_t j = wbest_candidates.size()-1; i < j; --j)
702  {
703  cp_ptr = cp_ptr->prev;
704  SG_SPRINT("tail - %d\n (%d)", j, i)
705  }
706 
707  float64_t* cp = get_cutting_plane(cp_ptr);
708  LIBBMRM_MEMCPY(cp, wbest_candidates[i].gradient.vector, w_dim*sizeof(float64_t));
709 
710  /* update the corresponding column and row in H */
711  cp_ptr = CPList_head;
712  for (uint32_t j = 0; j < ncbm.nCP-1; ++j)
713  {
714  float64_t* a = get_cutting_plane(cp_ptr);
715  cp_ptr = cp_ptr->next;
716  float64_t dot_val
717  = SGVector<float64_t>::dot(a, wbest_candidates[i].gradient.vector, w_dim);
718 
719  H.matrix[LIBBMRM_INDEX(cp_idx, j, maxCPs)]
720  = H.matrix[LIBBMRM_INDEX(j, cp_idx, maxCPs)]
721  = dot_val/_lambda;
722  }
723 
724  diag_H[LIBBMRM_INDEX(cp_idx, cp_idx, maxCPs)]
725  = SGVector<float64_t>::dot(wbest_candidates[i].gradient.vector,
726  wbest_candidates[i].gradient.vector, w_dim);
727 
728 
729  bias[cp_idx]
730  = best_Fp - wbest_candidates[i].reg
731  - SGVector<float64_t>::dot(wbest_candidates[i].solution.vector,
732  wbest_candidates[i].gradient.vector, w_dim);
733 
734  if (verbose)
735  SG_SPRINT("solved by changing nCP=%d bias:%g (%g)\n", cp_idx, bias[cp_idx], L)
736  }
737  }
738  }
739  }
740 
741  /* Inactive Cutting Planes (ICP) removal
742  if (cleanICP)
743  {
744  clean_icp(&icp_stats, ncbm, &CPList_head, &CPList_tail,
745  H.matrix, diag_H.vector, x.vector,
746  map.vector, cleanAfter, bias.vector, I.vector);
747  }
748  */
749  }
750 
751  memcpy(w, best_w.vector, sizeof(float64_t)*w_dim);
752 
753  /* free ICP_stats variables */
754  LIBBMRM_FREE(icp_stats.ICPcounter);
755  LIBBMRM_FREE(icp_stats.ICPs);
756  LIBBMRM_FREE(icp_stats.ACPs);
757  LIBBMRM_FREE(icp_stats.H_buff);
758 
759  return ncbm;
760 }
761 
762 } /* namespace shogun */
#define LIBBMRM_CALLOC(x, y)
Definition: libbmrm.h:23
Class Time that implements a stopwatch based on either cpu time or wall clock time.
Definition: Time.h:46
static float64_t dot(const bool *v1, const bool *v2, int32_t n)
compute dot product between v1 and v2 (blas optimized)
Definition: SGVector.h:344
static float64_t * H
Definition: libbmrm.cpp:27
static const double * get_col(uint32_t j)
float64_t * H_buff
Definition: libbmrm.h:63
Class DualLibQPBMSOSVM that uses Bundle Methods for Regularized Risk Minimization algorithms for stru...
float64_t * get_cutting_plane(bmrm_ll *ptr)
Definition: libbmrm.h:118
uint32_t idx
Definition: libbmrm.h:44
int32_t index_t
Definition: common.h:60
static const float64_t INFTY
infinity
Definition: Math.h:1393
static T max(T *vec, int32_t len)
Definition: SGVector.cpp:981
void update_H(BmrmStatistics &ncbm, bmrm_ll *head, bmrm_ll *tail, SGMatrix< float64_t > &H, SGVector< float64_t > &diag_H, float64_t lambda, uint32_t maxCP, int32_t w_dim)
Definition: libncbm.cpp:274
bmrm_ll * next
Definition: libbmrm.h:40
uint32_t * ACPs
Definition: libbmrm.h:60
virtual int32_t get_dim() const =0
uint32_t find_free_idx(bool *map, uint32_t size)
Definition: libbmrm.h:126
static line_search_res zoom(CDualLibQPBMSOSVM *machine, float64_t lambda, float64_t a_lo, float64_t a_hi, float64_t initial_fval, SGVector< float64_t > &initial_solution, SGVector< float64_t > &search_dir, float64_t wolfe_c1, float64_t wolfe_c2, float64_t init_lgrad, float64_t f_lo, float64_t g_lo, float64_t f_hi, float64_t g_hi)
Definition: libncbm.cpp:42
void add(const SGVector< T > x)
Definition: SGVector.cpp:329
SGVector< float64_t > solution
Definition: libncbm.cpp:37
static const float64_t epsilon
Definition: libbmrm.cpp:25
float64_t cur_time_diff(bool verbose=false)
Definition: Time.cpp:67
#define SG_SPRINT(...)
Definition: SGIO.h:182
#define LIBBMRM_FREE(x)
Definition: libbmrm.h:25
#define LIBBMRM_ABS(A)
Definition: libbmrm.h:29
static const uint32_t QPSolverMaxIter
Definition: libbmrm.cpp:24
bmrm_ll * prev
Definition: libbmrm.h:38
double float64_t
Definition: common.h:48
static float64_t * HMatrix
Definition: libncbm.cpp:22
virtual float64_t risk(float64_t *subgrad, float64_t *W, TMultipleCPinfo *info=0, EStructRiskType rtype=N_SLACK_MARGIN_RESCALING)
#define LIBBMRM_MEMCPY(x, y, z)
Definition: libbmrm.h:26
float64_t ** ICPs
Definition: libbmrm.h:57
std::vector< line_search_res > line_search_with_strong_wolfe(CDualLibQPBMSOSVM *machine, float64_t lambda, float64_t initial_val, SGVector< float64_t > &initial_solution, SGVector< float64_t > &initial_grad, SGVector< float64_t > &search_dir, float64_t astart, float64_t amax=1.1, float64_t wolfe_c1=1E-4, float64_t wolfe_c2=0.9, float64_t max_iter=5)
Definition: libncbm.cpp:159
void set_const(T const_elem)
Definition: SGVector.cpp:124
uint32_t maxCPs
Definition: libbmrm.h:51
static uint32_t maxCPs
Definition: libncbm.cpp:23
#define LIBBMRM_INDEX(ROW, COL, NUM_ROWS)
Definition: libbmrm.h:28
#define IGNORE_IN_CLASSLIST
Definition: CPLEXSVM.h:21
void add_cutting_plane(bmrm_ll **tail, bool *map, float64_t *A, uint32_t free_idx, float64_t *cp_data, uint32_t dim)
Definition: libbmrm.cpp:30
uint32_t * ICPcounter
Definition: libbmrm.h:54
static T min(T a, T b)
return the minimum of two integers
Definition: Math.h:153
float64_t * address
Definition: libbmrm.h:42
BmrmStatistics svm_ncbm_solver(CDualLibQPBMSOSVM *machine, float64_t *w, float64_t TolRel, float64_t TolAbs, float64_t _lambda, uint32_t _BufSize, bool cleanICP, uint32_t cleanAfter, bool is_convex, bool line_search, bool verbose)
Definition: libncbm.cpp:308
CStructuredModel * get_model() const
SGVector< float64_t > gradient
Definition: libncbm.cpp:38
void resize_vector(int32_t n)
Definition: SGVector.cpp:307
static float32_t sqrt(float32_t x)
x^0.5
Definition: Math.h:308
#define LIBBMRM_PLUS_INF
Definition: libbmrm.h:22
void clean_icp(ICP_stats *icp_stats, BmrmStatistics &bmrm, bmrm_ll **head, bmrm_ll **tail, float64_t *&Hmat, float64_t *&diag_H, float64_t *&beta, bool *&map, uint32_t cleanAfter, float64_t *&b, uint32_t *&I, uint32_t cp_models)
Definition: libbmrm.cpp:93
index_t vlen
Definition: SGVector.h:706
static void vec1_plus_scalar_times_vec2(T *vec1, const T scalar, const T *vec2, int32_t n)
x=x+alpha*y
Definition: SGVector.cpp:580
static T abs(T a)
return the absolute value of a number
Definition: Math.h:179
static T twonorm(const T *x, int32_t len)
|| x ||_2
uint32_t BufSize
Definition: libbmrm.cpp:28

SHOGUN Machine Learning Toolbox - Documentation