SHOGUN  6.0.0
KMeansMiniBatch.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2014 Parijat Mazumdar
8  */
9 
14 
15 #ifdef _WIN32
16 #undef far
17 #undef near
18 #endif
19 
20 using namespace shogun;
21 
22 namespace shogun
23 {
25 {
26  init_mb_params();
27 }
28 
29 CKMeansMiniBatch::CKMeansMiniBatch(int32_t k_i, CDistance* d_i, bool use_kmpp_i):CKMeansBase(k_i, d_i, use_kmpp_i)
30 {
31  init_mb_params();
32 }
33 
34 CKMeansMiniBatch::CKMeansMiniBatch(int32_t k_i, CDistance* d_i, SGMatrix<float64_t> centers_i):CKMeansBase(k_i, d_i, centers_i)
35 {
36  init_mb_params();
37 }
38 
40 {
41 }
42 
44 {
45  REQUIRE(b>0, "Parameter bach size should be > 0");
46  batch_size=b;
47 }
48 
50 {
51  return batch_size;
52 }
53 
55 {
56  REQUIRE(i>0, "Parameter number of iterations should be > 0");
57  minib_iter=i;
58 }
59 
61 {
62  return minib_iter;
63 }
64 
65 void CKMeansMiniBatch::set_mb_params(int32_t b, int32_t t)
66 {
67  REQUIRE(b>0, "Parameter bach size should be > 0");
68  REQUIRE(t>0, "Parameter number of iterations should be > 0");
69  batch_size=b;
70  minib_iter=t;
71 }
72 
74 {
76  "batch size not set to positive value. Current batch size %d \n", batch_size);
78  "number of iterations not set to positive value. Current iterations %d \n", minib_iter);
79 
83  CFeatures* rhs_cache=distance->replace_rhs(rhs_mus);
84  int32_t XSize=lhs->get_num_vectors();
85  int32_t dims=lhs->get_num_features();
86 
88  v.zero();
89 
90  for (int32_t i=0; i<minib_iter; i++)
91  {
92  SGVector<int32_t> M=mbchoose_rand(batch_size,XSize);
94  for (int32_t j=0; j<batch_size; j++)
95  {
97  for (int32_t p=0; p<k; p++)
98  dists[p]=distance->distance(M[j],p);
99 
100  int32_t imin=0;
101  float64_t min=dists[0];
102  for (int32_t p=1; p<k; p++)
103  {
104  if (dists[p]<min)
105  {
106  imin=p;
107  min=dists[p];
108  }
109  }
110  ncent[j]=imin;
111  }
112  for (int32_t j=0; j<batch_size; j++)
113  {
114  int32_t near=ncent[j];
115  SGVector<float64_t> c_alive=rhs_mus->get_feature_vector(near);
117  v[near]+=1.0;
118  float64_t eta=1.0/v[near];
119  for (int32_t c=0; c<dims; c++)
120  {
121  c_alive[c]=(1.0-eta)*c_alive[c]+eta*x[c];
122  }
123  }
124  }
125  SG_UNREF(lhs);
126  distance->replace_rhs(rhs_cache);
127  delete rhs_mus;
128 }
129 
130 SGVector<int32_t> CKMeansMiniBatch::mbchoose_rand(int32_t b, int32_t num)
131 {
134  chosen.zero();
135  int32_t ch=0;
136  while (ch<b)
137  {
138  const int32_t n=CMath::random(0,num-1);
139  if (chosen[n]==0)
140  {
141  chosen[n]+=1;
142  ret[ch]=n;
143  ch++;
144  }
145  }
146  return ret;
147 }
148 
149 void CKMeansMiniBatch::init_mb_params()
150 {
151  batch_size=-1;
152  minib_iter=-1;
153 }
154 
156 {
157  initialize_training(data);
160  return true;
161 }
162 
163 }
int32_t get_batch_size() const
Class Distance, a base class for all the distances used in the Shogun toolbox.
Definition: Distance.h:87
CFeatures * get_lhs()
Definition: Distance.h:218
#define REQUIRE(x,...)
Definition: SGIO.h:205
SGMatrix< float64_t > mus
Definition: KMeansBase.h:199
ST * get_feature_vector(int32_t num, int32_t &len, bool &dofree)
static uint64_t random()
Definition: Math.h:1014
void compute_cluster_variances()
Definition: KMeansBase.cpp:90
void initialize_training(CFeatures *data=NULL)
Definition: KMeansBase.cpp:138
void set_batch_size(int32_t b)
double float64_t
Definition: common.h:60
virtual bool train_machine(CFeatures *data=NULL)
virtual CFeatures * replace_rhs(CFeatures *rhs)
Definition: Distance.cpp:147
void set_mb_params(int32_t b, int32_t t)
int32_t get_num_features() const
virtual float64_t distance(int32_t idx_a, int32_t idx_b)
Definition: Distance.cpp:183
#define SG_UNREF(x)
Definition: SGObject.h:53
virtual int32_t get_num_vectors() const
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
static CDenseFeatures * obtain_from_generic(CFeatures *const base_features)
The class Features is the base class of all feature objects.
Definition: Features.h:68

SHOGUN Machine Learning Toolbox - Documentation