Tapkee
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
external/barnes_hut_sne/vptree.hpp
Go to the documentation of this file.
1 
32 #include <stdlib.h>
33 #include <algorithm>
34 #include <vector>
35 #include <stdio.h>
36 #include <queue>
37 #include <limits>
38 
39 #ifndef VPTREE_H
40 #define VPTREE_H
41 
42 namespace tsne
43 {
44 
45 class DataPoint
46 {
47  int _D;
48  int _ind;
49  double* _x;
50 
51 public:
52  DataPoint() : _D(1), _ind(-1), _x(NULL) { }
53  DataPoint(int Dv, int indv, double* xv) : _D(Dv), _ind(indv), _x(NULL)
54  {
55  _x = (double*) malloc(_D * sizeof(double));
56  for(int d = 0; d < _D; d++) _x[d] = xv[d];
57  }
58  DataPoint(const DataPoint& other) : _D(), _ind(0), _x(NULL) // this makes a deep copy -- should not free anything
59  {
60  if(this != &other) {
61  _D = other.dimensionality();
62  _ind = other.index();
63  _x = (double*) malloc(_D * sizeof(double));
64  for(int d = 0; d < _D; d++) _x[d] = other.x(d);
65  }
66  }
67  ~DataPoint() { if(_x != NULL) free(_x); }
68  DataPoint& operator= (const DataPoint& other) { // asignment should free old object
69  if(this != &other) {
70  if(_x != NULL) free(_x);
71  _D = other.dimensionality();
72  _ind = other.index();
73  _x = (double*) malloc(_D * sizeof(double));
74  for(int d = 0; d < _D; d++) _x[d] = other.x(d);
75  }
76  return *this;
77  }
78  int index() const { return _ind; }
79  int dimensionality() const { return _D; }
80  double x(int d) const { return _x[d]; }
81 };
82 
83 
84 double euclidean_distance(const DataPoint &t1, const DataPoint &t2) {
85  double dd = .0;
86  for(int d = 0; d < t1.dimensionality(); d++) dd += (t1.x(d) - t2.x(d)) * (t1.x(d) - t2.x(d));
87  return dd;
88 }
89 
90 
91 template<typename T, double (*distance)( const T&, const T& )>
92 class VpTree
93 {
94 public:
95 
96  // Default constructor
97  VpTree() : _items(), _tau(0.0), _root(0) {}
98 
99  // Destructor
101  delete _root;
102  }
103 
104  // Function to create a new VpTree from data
105  void create(const std::vector<T>& items) {
106  delete _root;
107  _items = items;
108  _root = buildFromPoints(0, items.size());
109  }
110 
111  // Function that uses the tree to find the k nearest neighbors of target
112  void search(const T& target, int k, std::vector<T>* results, std::vector<double>* distances)
113  {
114 
115  // Use a priority queue to store intermediate results on
116  std::priority_queue<HeapItem> heap;
117 
118  // Variable that tracks the distance to the farthest point in our results
119  _tau = DBL_MAX;
120 
121  // Perform the searcg
122  search(_root, target, k, heap);
123 
124  // Gather final results
125  results->clear(); distances->clear();
126  while(!heap.empty()) {
127  results->push_back(_items[heap.top().index]);
128  distances->push_back(heap.top().dist);
129  heap.pop();
130  }
131 
132  // Results are in reverse order
133  std::reverse(results->begin(), results->end());
134  std::reverse(distances->begin(), distances->end());
135  }
136 
137 private:
138 
139  VpTree(const VpTree&);
140  VpTree& operator=(const VpTree&);
141 
142  std::vector<T> _items;
143  double _tau;
144 
145  // Single node of a VP tree (has a point and radius; left children are closer to point than the radius)
146  struct Node
147  {
148  int index; // index of point in node
149  double threshold; // radius(?)
150  Node* left; // points closer by than threshold
151  Node* right; // points farther away than threshold
152 
153  Node() : index(0), threshold(0.), left(0), right(0) {}
154 
155  ~Node()
156  {
157  delete left;
158  delete right;
159  }
160 
161  Node(const Node&);
162  Node& operator=(const Node&);
163 
164  }* _root;
165 
166 
167  // An item on the intermediate result queue
168  struct HeapItem {
169  HeapItem(int indexv, double distv) :
170  index(indexv), dist(distv) {}
171  int index;
172  double dist;
173  bool operator<(const HeapItem& o) const {
174  return dist < o.dist;
175  }
176  };
177 
178  // Distance comparator for use in std::nth_element
180  {
181  const T& item;
182  DistanceComparator(const T& itemv) : item(itemv) {}
183  bool operator()(const T& a, const T& b) {
184  return distance(item, a) < distance(item, b);
185  }
186  };
187 
188  // Function that (recursively) fills the tree
189  Node* buildFromPoints( int lower, int upper )
190  {
191  if (upper == lower) { // indicates that we're done here!
192  return NULL;
193  }
194 
195  // Lower index is center of current node
196  Node* node = new Node();
197  node->index = lower;
198 
199  if (upper - lower > 1) { // if we did not arrive at leaf yet
200 
201  // Choose an arbitrary point and move it to the start
202  int i = (int) (tapkee::uniform_random() * (upper - lower - 1)) + lower;
203  std::swap(_items[lower], _items[i]);
204 
205  // Partition around the median distance
206  int median = (upper + lower) / 2;
207  std::nth_element(_items.begin() + lower + 1,
208  _items.begin() + median,
209  _items.begin() + upper,
210  DistanceComparator(_items[lower]));
211 
212  // Threshold of the new node will be the distance to the median
213  node->threshold = distance(_items[lower], _items[median]);
214 
215  // Recursively build tree
216  node->index = lower;
217  node->left = buildFromPoints(lower + 1, median);
218  node->right = buildFromPoints(median, upper);
219  }
220 
221  // Return result
222  return node;
223  }
224 
225  // Helper function that searches the tree
226  void search(Node* node, const T& target, int k, std::priority_queue<HeapItem>& heap)
227  {
228  if(node == NULL) return; // indicates that we're done here
229 
230  // Compute distance between target and current node
231  double dist = distance(_items[node->index], target);
232 
233  // If current node within radius tau
234  if(dist < _tau) {
235  if(heap.size() == static_cast<size_t>(k)) heap.pop(); // remove furthest node from result list (if we already have k results)
236  heap.push(HeapItem(node->index, dist)); // add current node to result list
237  if(heap.size() == static_cast<size_t>(k)) _tau = heap.top().dist; // update value of tau (farthest point in result list)
238  }
239 
240  // Return if we arrived at a leaf
241  if(node->left == NULL && node->right == NULL) {
242  return;
243  }
244 
245  // If the target lies within the radius of ball
246  if(dist < node->threshold) {
247  if(dist - _tau <= node->threshold) { // if there can still be neighbors inside the ball, recursively search left child first
248  search(node->left, target, k, heap);
249  }
250 
251  if(dist + _tau >= node->threshold) { // if there can still be neighbors outside the ball, recursively search right child
252  search(node->right, target, k, heap);
253  }
254 
255  // If the target lies outsize the radius of the ball
256  } else {
257  if(dist + _tau >= node->threshold) { // if there can still be neighbors outside the ball, recursively search right child first
258  search(node->right, target, k, heap);
259  }
260 
261  if (dist - _tau <= node->threshold) { // if there can still be neighbors inside the ball, recursively search left child
262  search(node->left, target, k, heap);
263  }
264  }
265  }
266 };
267 
268 }
269 
270 #endif
ScalarType distance(Callback &cb, const CoverTreePoint< RandomAccessIterator > &l, const CoverTreePoint< RandomAccessIterator > &r, ScalarType upper_bound)
void search(const T &target, int k, std::vector< T > *results, std::vector< double > *distances)
bool operator<(const HeapItem &o) const
double euclidean_distance(const DataPoint &t1, const DataPoint &t2)
DataPoint(const DataPoint &other)
ScalarType uniform_random()
Definition: random.hpp:30
DataPoint(int Dv, int indv, double *xv)
struct tsne::VpTree::Node * _root
void search(Node *node, const T &target, int k, std::priority_queue< HeapItem > &heap)
Node & operator=(const Node &)
void create(const std::vector< T > &items)
VpTree & operator=(const VpTree &)
DataPoint & operator=(const DataPoint &other)
Node * buildFromPoints(int lower, int upper)