Tapkee
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
spe.hpp
Go to the documentation of this file.
1 /* This software is distributed under BSD 3-clause license (see LICENSE file).
2  *
3  * Copyright (c) 2012-2013 Sergey Lisitsyn, Fernando J. Iglesias Garcia
4  */
5 
6 #ifndef TAPKEE_SPE_H_
7 #define TAPKEE_SPE_H_
8 
9 /* Tapkee includes */
10 #include <tapkee/defines.hpp>
11 #include <tapkee/utils/time.hpp>
12 /* End of Tapkee includes */
13 
14 namespace tapkee
15 {
16 namespace tapkee_internal
17 {
18 
19 template <class RandomAccessIterator, class PairwiseCallback>
20 DenseMatrix spe_embedding(RandomAccessIterator begin, RandomAccessIterator end,
21  PairwiseCallback callback, const Neighbors& neighbors,
22  IndexType target_dimension, bool global_strategy,
23  ScalarType tolerance, int nupdates, IndexType max_iter)
24 {
25  timed_context context("SPE embedding computation");
26  IndexType k = 0;
27  if (!global_strategy)
28  k = neighbors[0].size();
29 
30  // The number of data points
31  int N = end-begin;
32  while (nupdates > N/2)
33  nupdates = N/2;
34 
35  // Look for the maximum distance
36  ScalarType max = 0.0;
37  for (RandomAccessIterator i_iter=begin; i_iter!=end; ++i_iter)
38  {
39  for (RandomAccessIterator j_iter=i_iter+1; j_iter!=end; ++j_iter)
40  {
41  max = std::max(max, callback.distance(*i_iter,*j_iter));
42  }
43  }
44 
45  // Distances normalizer used in global strategy
46  ScalarType alpha = 0.0;
47  if (global_strategy)
48  alpha = 1.0 / max * std::sqrt(2.0);
49 
50  // Random embedding initialization, Y is the short for embedding_feature_matrix
51  DenseMatrix Y = (DenseMatrix::Random(target_dimension,N)
52  + DenseMatrix::Ones(target_dimension,N)) / 2;
53  // Auxiliary diffference embedding feature matrix
54  DenseMatrix Yd(target_dimension,nupdates);
55 
56  // SPE's main loop
57 
58  typedef std::vector<int> Indices;
59  typedef std::vector<int>::iterator IndexIterator;
60 
61  // Maximum number of iterations
62  if (max_iter == 0)
63  {
64  max_iter = 2000 + static_cast<IndexType>(floor(0.04 * N*N));
65  if (!global_strategy)
66  max_iter *= 3;
67  }
68 
69  // Learning parameter
70  ScalarType lambda = 1.0;
71  // Vector of indices used for shuffling
72  Indices indices(N);
73  for (int i=0; i<N; ++i)
74  indices[i] = i;
75  // Vector with distances in the original space of the points to update
76  DenseVector Rt(nupdates);
77  DenseVector scale(nupdates);
78  DenseVector D(nupdates);
79  // Pointers to the indices of the elements to update
80  IndexIterator ind1;
81  IndexIterator ind2;
82  // Helper used in local strategy
83  Indices ind1Neighbors;
84  if (!global_strategy)
85  ind1Neighbors.resize(k*nupdates);
86 
87  for (IndexType i=0; i<max_iter; ++i)
88  {
89  // Shuffle to select the vectors to update in this iteration
90  tapkee::random_shuffle(indices.begin(),indices.end());
91 
92  ind1 = indices.begin();
93  ind2 = indices.begin()+nupdates;
94 
95  // With local strategy, the seecond set of indices is selected among
96  // neighbors of the first set
97  if (!global_strategy)
98  {
99  // Neighbors of interest
100  for(int j=0; j<nupdates; ++j)
101  {
102  const LocalNeighbors& current_neighbors =
103  neighbors[*ind1++];
104 
105  for(IndexType kk=0; kk<k; ++kk)
106  ind1Neighbors[kk + j*k] = current_neighbors[kk];
107  }
108  // Restore ind1
109  ind1 = indices.begin();
110 
111  // Generate pseudo-random indices and select final indices
112  for(int j=0; j<nupdates; ++j)
113  {
114  IndexType r = static_cast<IndexType>(floor(tapkee::uniform_random()*(k-1)) + k*j);
115  indices[nupdates+j] = ind1Neighbors[r];
116  }
117  }
118 
119 
120  // Compute distances between the selected points in the embedded space
121  for(int j=0; j<nupdates; ++j)
122  {
123  //FIXME it seems that here Euclidean distance is forced
124  D[j] = (Y.col(*ind1) - Y.col(*ind2)).norm();
125  ++ind1, ++ind2;
126  }
127 
128  // Get the corresponding distances in the original space
129  if (global_strategy)
130  Rt.fill(alpha);
131  else // local_strategy
132  Rt.fill(1);
133 
134  ind1 = indices.begin();
135  ind2 = indices.begin()+nupdates;
136  for (int j=0; j<nupdates; ++j)
137  Rt[j] *= callback.distance(*(begin + *ind1++), *(begin + *ind2++));
138 
139  // Compute some terms for update
140 
141  // Scale factor
142  D.array() += tolerance;
143  scale = (Rt-D).cwiseQuotient(D);
144 
145  ind1 = indices.begin();
146  ind2 = indices.begin()+nupdates;
147  // Difference matrix
148  for (int j=0; j<nupdates; ++j)
149  {
150  Yd.col(j).noalias() = Y.col(*ind1) - Y.col(*ind2);
151 
152  ++ind1, ++ind2;
153  }
154 
155  ind1 = indices.begin();
156  ind2 = indices.begin()+nupdates;
157  // Update the location of the vectors in the embedded space
158  for (int j=0; j<nupdates; ++j)
159  {
160  Y.col(*ind1) += lambda / 2 * scale[j] * Yd.col(j);
161  Y.col(*ind2) -= lambda / 2 * scale[j] * Yd.col(j);
162 
163  ++ind1, ++ind2;
164  }
165 
166  // Update the learning parameter
167  lambda = lambda - ( lambda / max_iter );
168  }
169 
170  return Y.transpose();
171 }
172 
173 } // End of namespace tapkee_internal
174 } // End of namespace tapkee
175 
176 #endif /* TAPKEE_SPE_H_ */
void random_shuffle(RAI first, RAI last)
Definition: random.hpp:58
Eigen::Matrix< tapkee::ScalarType, Eigen::Dynamic, Eigen::Dynamic > DenseMatrix
dense matrix type (non-overridable)
Definition: types.hpp:23
ScalarType uniform_random()
Definition: random.hpp:30
double ScalarType
default scalar value (can be overrided with TAPKEE_CUSTOM_INTERNAL_NUMTYPE define) ...
Definition: types.hpp:15
Eigen::Matrix< tapkee::ScalarType, Eigen::Dynamic, 1 > DenseVector
dense vector type (non-overridable)
Definition: types.hpp:21
TAPKEE_INTERNAL_VECTOR< tapkee::IndexType > LocalNeighbors
Definition: synonyms.hpp:39
int IndexType
indexing type (non-overridable) set to int for compatibility with OpenMP 2.0
Definition: types.hpp:19
TAPKEE_INTERNAL_VECTOR< tapkee::tapkee_internal::LocalNeighbors > Neighbors
Definition: synonyms.hpp:40
DenseMatrix spe_embedding(RandomAccessIterator begin, RandomAccessIterator end, PairwiseCallback callback, const Neighbors &neighbors, IndexType target_dimension, bool global_strategy, ScalarType tolerance, int nupdates, IndexType max_iter)
Definition: spe.hpp:20