GeographicLib  1.47
NearestNeighbor.hpp
Go to the documentation of this file.
1 /**
2  * \file NearestNeighbor.hpp
3  * \brief Header for GeographicLib::NearestNeighbor class
4  *
5  * Copyright (c) Charles Karney (2016) <charles@karney.com> and licensed under
6  * the MIT/X11 License. For more information, see
7  * http://geographiclib.sourceforge.net/
8  **********************************************************************/
9 
10 #if !defined(GEOGRAPHICLIB_NEARESTNEIGHBOR_HPP)
11 #define GEOGRAPHICLIB_NEARESTNEIGHBOR_HPP 1
12 
13 #include <algorithm> // for nth_element, max_element, etc.
14 #include <vector>
15 #include <queue> // for priority_queue
16 #include <utility> // for swap + pair
17 #include <cstring>
18 #include <limits>
19 #include <cmath>
20 #include <iostream>
21 #include <sstream>
22 // Only for GEOGRAPHICLIB_STATIC_ASSERT and GeographicLib::GeographicErr
24 
25 #if !defined(GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION)
26 #define GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION 0
27 #endif
28 #if GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION
29 #include <boost/serialization/nvp.hpp>
30 #include <boost/serialization/split_member.hpp>
31 #include <boost/serialization/array.hpp>
32 #include <boost/serialization/vector.hpp>
33 #endif
34 
35 #if defined(_MSC_VER)
36 // Squelch warnings about constant conditional expressions
37 # pragma warning (push)
38 # pragma warning (disable: 4127)
39 #endif
40 
41 namespace GeographicLib {
42 
43  /**
44  * \brief Nearest-neighbor calculations
45  *
46  * This class implements nearest-neighbor calculations using the
47  * vantage-point tree described by
48  * - J. K. Uhlmann,
49  * <a href="doi:10.1016/0020-0190(91)90074-r">
50  * Satisfying general proximity/similarity queries with metric trees</a>,
51  * Information Processing Letters 40 175&ndash;179 (1991).
52  * - P. N. Yianilos,
53  * <a href="http://pnylab.com/pny/papers/vptree/vptree/">
54  * Data structures and algorithms for nearest neighbor search in general
55  * metric spaces</a>, Proc. 4th ACM-SIAM Symposium on Discrete Algorithms,
56  * (SIAM, 1993). pp. 311&ndash;321.
57  *
58  * Given a set of points in some space and a distance function \e d
59  * satisfying the metric conditions:
60  * \f[
61  * \begin{align}
62  * d(x,y) &\ge 0,\\
63  * d(x,y) &= 0, \ \text{iff $x = y$},\\
64  * d(x,y) &= d(y,x),\\
65  * d(x,z) &\ge d(x,y) + d(y,z),
66  * \end{align}
67  * \f]
68  * the vantage-point (VP) tree provides an efficient way of determining
69  * nearest neighbors. Typically the cost of constructing a VP tree of \e N
70  * points is \e N log \e N, while the cost of a query is log \e N. Thus a VP
71  * tree should be used in situations where \e N is large and at least log \e
72  * N queries are to be made. The condition, \e N is large, means that
73  * \f$ N \gg 2^D \f$, where \e D is the dimensionality of the space.
74  *
75  * - This implementation includes Yianilos' upper and lower bounds for the
76  * inside and outside sets. This helps limit the number of searches
77  * (compared to just using the median distance).
78  * - Rather than do a depth-first or breath-first search on the tree, the
79  * nodes to be processed are put on a priority queue with the nodes most
80  * likely to contain close points processed first. Frequently, this allows
81  * nodes lower down on the priority queue to be skipped.
82  * - This technique also allows non-exhaustive searchs to be performed (to
83  * answer questions such as "are there any points within 1km of the query
84  * point?).
85  * - When building the tree, the points furthest from the parent vantage
86  * point in both the inside and outside sets are selected as the children's
87  * vantage points. This information is already available from the
88  * computation of the upper and lower bounds of the children. This choice
89  * seems to lead to a reasonably optimized tree.
90  * - The leaf nodes can contain a bucket of points (instead of just a vantage
91  * point).
92  *
93  * This class is templated so that it can handle arbitrary metric spaces as
94  * follows:
95  *
96  * @tparam real the type used for measuring distances; it can be a real or
97  * signed integer type.
98  * @tparam position the type for specifying points.
99  * @tparam distance the type for a function object which takes takes two
100  * positions as arguments and returns the distance (of type \e real).
101  *
102  * The \e real type must support numeric_limits queries (specifically:
103  * is_signed, is_integer, max(), digits, and digits10).
104  *
105  * Example of use:
106  * \include example-NearestNeighbor.cpp
107  **********************************************************************/
108  template <typename real, typename position, class distance>
110  // For tracking changes to the I/O format
111  static const int version = 1;
112  // This is what we get "free"; but if sizeof(real) = 1 (unlikely), allow 4
113  // slots (and this accommodates the default value bucket = 4).
114  static const int maxbucket = (2 + ((4 * sizeof(real)) / sizeof(int) >= 2 ?
115  (4 * sizeof(real)) / sizeof(int) : 2));
116  public:
117 
118  /**
119  * Default constructor for NearestNeighbor.
120  *
121  * This is equivalent to specifying an empty set of points.
122  **********************************************************************/
123  NearestNeighbor() : _numpoints(0) {}
124 
125  /**
126  * Constructor for NearestNeighbor.
127  *
128  * @param[in] pts a vector of points to include in the set.
129  * @param[in] dist the distance function object.
130  * @param[in] bucket the size of the buckets at the leaf nodes; this must
131  * lie in [0, 2 + 4*sizeof(real)/sizeof(int)] (default 4).
132  * @exception GeographicErr if the value of \e bucket is out of bounds or
133  * the size of \e pts is too big for an int.
134  * @exception std::bad_alloc if memory for the tree can't be allocated.
135  *
136  * The distances computed by \e dist must satisfy the standard metric
137  * conditions. If not, the results are undefined. Neither the data in \e
138  * pts nor the query points should contain NaNs or infinities because such
139  * data violates the metric conditions.
140  *
141  * \e pts may contain coincident points (i.e., the distance between them
142  * vanishes); these are treated as distinct.
143  *
144  * The choice of \e bucket is a tradeoff between space and efficiency. A
145  * larger \e bucket decreases the size of the NearestNeigbor object which
146  * scales as pts.size() / max(1, bucket) and reduces the number of distance
147  * calculations to construct the object by log2(bucket) * pts.size().
148  * However each search then requires about bucket additional distance
149  * calculations.
150  *
151  * \warning The same arguments \e pts and \e dist must be provided
152  * to the Search() function.
153  **********************************************************************/
154  NearestNeighbor(const std::vector<position>& pts, const distance& dist,
155  int bucket = 4) {
156  Initialize(pts, dist, bucket);
157  }
158 
159  /**
160  * Initialize or re-initialize NearestNeighbor.
161  *
162  * @param[in] pts a vector of points to include in the tree.
163  * @param[in] dist the distance function object.
164  * @param[in] bucket the size of the buckets at the leaf nodes; this must
165  * lie in [0, 2 + 4*sizeof(real)/sizeof(int)] (default 4).
166  * @exception GeographicErr if the value of \e bucket is out of bounds or
167  * the size of \e pts is too big for an int.
168  * @exception std::bad_alloc if memory for the tree can't be allocated.
169  *
170  * See also the documentation on the constructor.
171  *
172  * If an exception is thrown, the state of the NearestNeighbor is
173  * unchanged.
174  **********************************************************************/
175  void Initialize(const std::vector<position>& pts, const distance& dist,
176  int bucket = 4) {
177  GEOGRAPHICLIB_STATIC_ASSERT(std::numeric_limits<real>::is_signed,
178  "real must be a signed type");
179  if (!( 0 <= bucket && bucket <= maxbucket ))
181  ("bucket must lie in [0, 2 + 4*sizeof(real)/sizeof(int)]");
182  if (pts.size() > size_t(std::numeric_limits<int>::max()))
183  throw GeographicLib::GeographicErr("pts array too big");
184  // the pair contains distance+id
185  std::vector<item> ids(pts.size());
186  for (int k = int(ids.size()); k--;)
187  ids[k] = std::make_pair(real(0), k);
188  int cost = 0;
189  std::vector<Node> tree;
190  init(pts, dist, bucket, tree, ids, cost,
191  0, int(ids.size()), int(ids.size()/2));
192  _tree.swap(tree);
193  _numpoints = int(pts.size());
194  _bucket = bucket;
195  _mc = _sc = 0;
196  _cost = cost; _c1 = _k = _cmax = 0;
197  _cmin = std::numeric_limits<int>::max();
198  }
199 
200  /**
201  * Search the NearestNeighbor.
202  *
203  * @param[in] pts the vector of points used for initialization.
204  * @param[in] dist the distance function object used for initialization.
205  * @param[in] query the query point.
206  * @param[out] ind a vector of indices to the closest points found.
207  * @param[in] k the number of points to search for (default = 1).
208  * @param[in] maxdist only return points with distances of \e maxdist or
209  * less from \e query (default is the maximum \e real).
210  * @param[in] mindist only return points with distances of more than
211  * \e mindist from \e query (default = &minus;1).
212  * @param[in] exhaustive whether to do an exhaustive search (default true).
213  * @param[in] tol the tolerance on the results (default 0).
214  * @return the distance to the closest point found (&minus;1 if no points
215  * are found).
216  *
217  * The indices returned in \e ind are sorted by distance from \e query
218  * (closest first).
219  *
220  * With \e exhaustive = true and \e tol = 0 (their default values), this
221  * finds the indices of \e k closest neighbors to \e query whose distances
222  * to \e query are in (\e mindist, \e maxdist]. If \e mindist and \e
223  * maxdist have their default values, then these bounds have no effect. If
224  * \e query is one of the points in the tree, then set \e mindist = 0 to
225  * prevent this point (and other coincident points) from being returned.
226  *
227  * If \e exhaustive = false, exit as soon as \e k results satisfying the
228  * distance criteria are found. If less than \e k results are returned
229  * then the search was exhaustive even if \e exhaustive = false.
230  *
231  * If \e tol is positive, do an approximate search; in this case the
232  * results are to be interpreted as follows: if the <i>k</i>'th distance is
233  * \e dk, then all results with distances less than or equal \e dk &minus;
234  * \e tol are correct; all others are suspect &mdash; there may be other
235  * closer results with distances greater or equal to \e dk &minus; \e tol.
236  * If less than \e k results are found, then the search is exact.
237  *
238  * \e mindist should be used to exclude a "small" neighborhood of the query
239  * point (relative to the average spacing of the data). If \e mindist is
240  * large, the efficiency of the search deteriorates.
241  *
242  * \warning The arguments \e pts and \e dist must be identical to
243  * those used to initialize the NearestNeighbor; if not, the behavior of
244  * this function is undefined (however, if the size of \e pts is wrong,
245  * this function exits with no results returned).
246  **********************************************************************/
247  real Search(const std::vector<position>& pts, const distance& dist,
248  const position& query,
249  std::vector<int>& ind,
250  int k = 1,
251  real maxdist = std::numeric_limits<real>::max(),
252  real mindist = -1,
253  bool exhaustive = true,
254  real tol = 0) const {
255  std::priority_queue<item> results;
256  if (_numpoints > 0 && _numpoints == int(pts.size()) &&
257  k > 0 && maxdist > mindist) {
258  // distance to the kth closest point so far
259  real tau = maxdist;
260  // first is negative of how far query is outside boundary of node
261  // +1 if on boundary or inside
262  // second is node index
263  std::priority_queue<item> todo;
264  todo.push(std::make_pair(real(1), int(_tree.size()) - 1));
265  int c = 0;
266  while (!todo.empty()) {
267  int n = todo.top().second;
268  real d = -todo.top().first;
269  todo.pop();
270  real tau1 = tau - tol;
271  // compare tau and d again since tau may have become smaller.
272  if (!( n >= 0 && tau1 >= d )) continue;
273  const Node& current = _tree[n];
274  real dst = 0; // to suppress warning about uninitialized variable
275  bool exitflag = false, leaf = current.index < 0;
276  for (int i = 0; i < (leaf ? _bucket : 1); ++i) {
277  int index = leaf ? current.leaves[i] : current.index;
278  if (index < 0) break;
279  dst = dist(pts[index], query);
280  ++c;
281 
282  if (dst > mindist && dst <= tau) {
283  if (int(results.size()) == k) results.pop();
284  results.push(std::make_pair(dst, index));
285  if (int(results.size()) == k) {
286  if (exhaustive)
287  tau = results.top().first;
288  else {
289  exitflag = true;
290  break;
291  }
292  if (tau <= tol) {
293  exitflag = true;
294  break;
295  }
296  }
297  }
298  }
299  if (exitflag) break;
300 
301  if (current.index < 0) continue;
302  tau1 = tau - tol;
303  for (int l = 0; l < 2; ++l) {
304  if (current.data.child[l] >= 0 &&
305  dst + current.data.upper[l] >= mindist) {
306  if (dst < current.data.lower[l]) {
307  d = current.data.lower[l] - dst;
308  if (tau1 >= d)
309  todo.push(std::make_pair(-d, current.data.child[l]));
310  } else if (dst > current.data.upper[l]) {
311  d = dst - current.data.upper[l];
312  if (tau1 >= d)
313  todo.push(std::make_pair(-d, current.data.child[l]));
314  } else
315  todo.push(std::make_pair(real(1), current.data.child[l]));
316  }
317  }
318  }
319  ++_k;
320  _c1 += c;
321  double omc = _mc;
322  _mc += (c - omc) / _k;
323  _sc += (c - omc) * (c - _mc);
324  if (c > _cmax) _cmax = c;
325  if (c < _cmin) _cmin = c;
326  }
327 
328  real d = -1;
329  ind.resize(results.size());
330 
331  for (int i = int(ind.size()); i--;) {
332  ind[i] = int(results.top().second);
333  if (i == 0) d = results.top().first;
334  results.pop();
335  }
336  return d;
337 
338  }
339 
340  /**
341  * @return the total number of points in the set.
342  **********************************************************************/
343  int NumPoints() const { return _numpoints; }
344 
345  /**
346  * Write the object to an I/O stream.
347  *
348  * @param[in,out] os the stream to write to.
349  * @param[in] bin if true (the default) save in binary mode.
350  * @exception std::bad_alloc if memory for the string representation of the
351  * object can't be allocated.
352  *
353  * The counters tracking the statistics of searches are not saved; however
354  * the initializtion cost is saved. The format of the binary saves is \e
355  * not portable.
356  *
357  * \note <a href="http://www.boost.org/libs/serialization/doc">
358  * Boost serialization</a> can also be used to save and restore a
359  * NearestNeighbor object. This requires that the
360  * GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION macro be defined.
361  **********************************************************************/
362  void Save(std::ostream& os, bool bin = true) const {
363  int realspec = std::numeric_limits<real>::digits *
364  (std::numeric_limits<real>::is_integer ? -1 : 1);
365  if (bin) {
366  char id[] = "NearestNeighbor_";
367  os.write(id, 16);
368  int buf[6];
369  buf[0] = version;
370  buf[1] = realspec;
371  buf[2] = _bucket;
372  buf[3] = _numpoints;
373  buf[4] = int(_tree.size());
374  buf[5] = _cost;
375  os.write(reinterpret_cast<const char *>(buf), 6 * sizeof(int));
376  for (int i = 0; i < int(_tree.size()); ++i) {
377  const Node& node = _tree[i];
378  os.write(reinterpret_cast<const char *>(&node.index), sizeof(int));
379  if (node.index >= 0) {
380  os.write(reinterpret_cast<const char *>(node.data.lower),
381  2 * sizeof(real));
382  os.write(reinterpret_cast<const char *>(node.data.upper),
383  2 * sizeof(real));
384  os.write(reinterpret_cast<const char *>(node.data.child),
385  2 * sizeof(int));
386  } else {
387  os.write(reinterpret_cast<const char *>(node.leaves),
388  _bucket * sizeof(int));
389  }
390  }
391  } else {
392  std::stringstream ostring;
393  // Ensure enough precision for type real. If real is actually a
394  // signed integer type, full precision is used anyway. With C++11,
395  // max_digits10 can be used instead.
396  ostring.precision(std::numeric_limits<real>::digits10 + 2);
397  ostring << version << " " << realspec << " " << _bucket << " "
398  << _numpoints << " " << _tree.size() << " " << _cost;
399  for (int i = 0; i < int(_tree.size()); ++i) {
400  const Node& node = _tree[i];
401  ostring << "\n" << node.index;
402  if (node.index >= 0) {
403  for (int l = 0; l < 2; ++l)
404  ostring << " " << node.data.lower[l] << " " << node.data.upper[l]
405  << " " << node.data.child[l];
406  } else {
407  for (int l = 0; l < _bucket; ++l)
408  ostring << " " << node.leaves[l];
409  }
410  }
411  os << ostring.str();
412  }
413  }
414 
415  /**
416  * Read the object from an I/O stream.
417  *
418  * @param[in,out] is the stream to read from
419  * @param[in] bin if true (the default) load in binary mode.
420  * @exception GeographicErr if the state read from \e is is illegal.
421  *
422  * The counters tracking the statistics of searches are reset by this
423  * operation. Binary data must have been saved on a machine with the same
424  * architecture. If an exception is thrown, the state of the
425  * NearestNeighbor is unchanged.
426  *
427  * \note <a href="http://www.boost.org/libs/serialization/doc">
428  * Boost serialization</a> can also be used to save and restore a
429  * NearestNeighbor object. This requires that the
430  * GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION macro be defined.
431  *
432  * \warning The same arguments \e pts and \e dist used for
433  * initialization must be provided to the Search() function.
434  **********************************************************************/
435  void Load(std::istream& is, bool bin = true) {
436  int version1, realspec, bucket, numpoints, treesize, cost;
437  if (bin) {
438  char id[17];
439  is.read(id, 16);
440  id[16] = '\0';
441  if (!(std::strcmp(id, "NearestNeighbor_") == 0))
442  throw GeographicLib::GeographicErr("Bad ID");
443  is.read(reinterpret_cast<char *>(&version1), sizeof(int));
444  is.read(reinterpret_cast<char *>(&realspec), sizeof(int));
445  is.read(reinterpret_cast<char *>(&bucket), sizeof(int));
446  is.read(reinterpret_cast<char *>(&numpoints), sizeof(int));
447  is.read(reinterpret_cast<char *>(&treesize), sizeof(int));
448  is.read(reinterpret_cast<char *>(&cost), sizeof(int));
449  } else {
450  if (!( is >> version1 >> realspec >> bucket >> numpoints >> treesize
451  >> cost ))
452  throw GeographicLib::GeographicErr("Bad header");
453  }
454  if (!( version1 == version ))
455  throw GeographicLib::GeographicErr("Incompatible version");
456  if (!( realspec == std::numeric_limits<real>::digits *
457  (std::numeric_limits<real>::is_integer ? -1 : 1) ))
458  throw GeographicLib::GeographicErr("Different real types");
459  if (!( 0 <= bucket && bucket <= maxbucket ))
460  throw GeographicLib::GeographicErr("Bad bucket size");
461  if (!( 0 <= treesize && treesize <= numpoints ))
462  throw GeographicLib::GeographicErr("Bad number of points or tree size");
463  std::vector<Node> tree;
464  tree.reserve(treesize);
465  for (int i = 0; i < treesize; ++i) {
466  Node node;
467  if (bin) {
468  is.read(reinterpret_cast<char *>(&node.index), sizeof(int));
469  if (node.index >= 0) {
470  is.read(reinterpret_cast<char *>(node.data.lower),
471  2 * sizeof(real));
472  is.read(reinterpret_cast<char *>(node.data.upper),
473  2 * sizeof(real));
474  is.read(reinterpret_cast<char *>(node.data.child),
475  2 * sizeof(int));
476  } else {
477  is.read(reinterpret_cast<char *>(node.leaves),
478  bucket * sizeof(int));
479  for (int l = bucket; l < maxbucket; ++l)
480  node.leaves[l] = 0;
481  }
482  } else {
483  if (!( is >> node.index ))
484  throw GeographicLib::GeographicErr("Bad index");
485  if (node.index >= 0) {
486  for (int l = 0; l < 2; ++l) {
487  if (!( is >> node.data.lower[l] >> node.data.upper[l]
488  >> node.data.child[l] ))
489  throw GeographicLib::GeographicErr("Bad node data");
490  }
491  } else {
492  // Must be at least one valid leaf followed by a sequence end
493  // markers (-1).
494  for (int l = 0; l < bucket; ++l) {
495  if (!( is >> node.leaves[l] ))
496  throw GeographicLib::GeographicErr("Bad leaf data");
497  }
498  for (int l = bucket; l < maxbucket; ++l)
499  node.leaves[l] = 0;
500  }
501  }
502  node.Check(numpoints, treesize, bucket);
503  tree.push_back(node);
504  }
505  _tree.swap(tree);
506  _numpoints = numpoints;
507  _bucket = bucket;
508  _mc = _sc = 0;
509  _cost = cost; _c1 = _k = _cmax = 0;
510  _cmin = std::numeric_limits<int>::max();
511  }
512 
513  /**
514  * The accumulated statistics on the searches so far.
515  *
516  * @param[out] setupcost the cost of initializing the NearestNeighbor.
517  * @param[out] numsearches the number of calls to Search().
518  * @param[out] searchcost the total cost of the calls to Search().
519  * @param[out] mincost the minimum cost of a Search().
520  * @param[out] maxcost the maximum cost of a Search().
521  * @param[out] mean the mean cost of a Search().
522  * @param[out] sd the standard deviation in the cost of a Search().
523  *
524  * Here "cost" measures the number of distance calculations needed. Note
525  * that the accumulation of statistics is \e not thread safe.
526  **********************************************************************/
527  void Statistics(int& setupcost, int& numsearches, int& searchcost,
528  int& mincost, int& maxcost,
529  double& mean, double& sd) const {
530  setupcost = _cost; numsearches = _k; searchcost = _c1;
531  mincost = _cmin; maxcost = _cmax;
532  mean = _mc; sd = std::sqrt(_sc / (_k - 1));
533  }
534 
535  /**
536  * Reset the counters for the accumulated statistics on the searches so
537  * far.
538  **********************************************************************/
539  void ResetStatistics() const {
540  _mc = _sc = 0;
541  _c1 = _k = _cmax = 0;
542  _cmin = std::numeric_limits<int>::max();
543  }
544 
545  private:
546  // Package up a real and an int. We will want to sort on the real so put
547  // it first.
548  typedef std::pair<real, int> item;
549  class Node {
550  public:
551  struct bounds {
552  real lower[2], upper[2]; // bounds on inner/outer distances
553  int child[2];
554  };
555  union {
556  bounds data;
557  int leaves[maxbucket];
558  };
559  int index;
560 
561  Node()
562  : index(-1)
563  {
564  for (int i = 0; i < 2; ++i) {
565  data.lower[i] = data.upper[i] = 0;
566  data.child[i] = -1;
567  }
568  }
569 
570  // Sanity check on a Node
571  void Check(int numpoints, int treesize, int bucket) const {
572  if (!( -1 <= index && index << numpoints ))
573  throw GeographicLib::GeographicErr("Bad index");
574  if (index >= 0) {
575  if (!( -1 <= data.child[0] && data.child[0] < treesize &&
576  -1 <= data.child[1] && data.child[1] < treesize ))
577  throw GeographicLib::GeographicErr("Bad child pointers");
578  if (!( 0 <= data.lower[0] && data.lower[0] <= data.upper[0] &&
579  data.upper[0] <= data.lower[1] &&
580  data.lower[1] <= data.upper[1] ))
581  throw GeographicLib::GeographicErr("Bad bounds");
582  } else {
583  // Must be at least one valid leaf followed by a sequence end markers
584  // (-1).
585  bool start = true;
586  for (int l = 0; l < bucket; ++l) {
587  if (!( (start ?
588  ((l == 0 ? 0 : -1) <= leaves[l] && leaves[l] < numpoints) :
589  leaves[l] == -1) ))
590  throw GeographicLib::GeographicErr("Bad leaf data");
591  start = leaves[l] >= 0;
592  }
593  for (int l = bucket; l < maxbucket; ++l) {
594  if (leaves[l] != 0)
595  throw GeographicLib::GeographicErr("Bad leaf data");
596  }
597  }
598  }
599 
600 #if GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION
601  friend class boost::serialization::access;
602  template<class Archive> void save(Archive& ar, const unsigned int) const {
603  ar & boost::serialization::make_nvp("index", index);
604  if (index < 0)
605  ar & boost::serialization::make_nvp("leaves", leaves);
606  else
607  ar & boost::serialization::make_nvp("lower", data.lower)
608  & boost::serialization::make_nvp("upper", data.upper)
609  & boost::serialization::make_nvp("child", data.child);
610  }
611  template<class Archive> void load(Archive& ar, const unsigned int) {
612  ar & boost::serialization::make_nvp("index", index);
613  if (index < 0)
614  ar & boost::serialization::make_nvp("leaves", leaves);
615  else
616  ar & boost::serialization::make_nvp("lower", data.lower)
617  & boost::serialization::make_nvp("upper", data.upper)
618  & boost::serialization::make_nvp("child", data.child);
619  }
620  template<class Archive>
621  void serialize(Archive& ar, const unsigned int file_version)
622  { boost::serialization::split_member(ar, *this, file_version); }
623 #endif
624  };
625 #if GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION
626  friend class boost::serialization::access;
627  template<class Archive> void save(Archive& ar, const unsigned) const {
628  int realspec = std::numeric_limits<real>::digits *
629  (std::numeric_limits<real>::is_integer ? -1 : 1);
630  // Need to use version1, otherwise load error in debug mode on Linux:
631  // undefined reference to GeographicLib::NearestNeighbor<...>::version.
632  int version1 = version;
633  ar & boost::serialization::make_nvp("version", version1)
634  & boost::serialization::make_nvp("realspec", realspec)
635  & boost::serialization::make_nvp("bucket", _bucket)
636  & boost::serialization::make_nvp("numpoints", _numpoints)
637  & boost::serialization::make_nvp("cost", _cost)
638  & boost::serialization::make_nvp("tree", _tree);
639  }
640  template<class Archive> void load(Archive& ar, const unsigned) {
641  int version1, realspec, bucket, numpoints, cost;
642  ar & boost::serialization::make_nvp("version", version1);
643  if (version1 != version)
644  throw GeographicLib::GeographicErr("Incompatible version");
645  std::vector<Node> tree;
646  ar & boost::serialization::make_nvp("realspec", realspec);
647  if (!( realspec == std::numeric_limits<real>::digits *
648  (std::numeric_limits<real>::is_integer ? -1 : 1) ))
649  throw GeographicLib::GeographicErr("Different real types");
650  ar & boost::serialization::make_nvp("bucket", bucket);
651  if (!( 0 <= bucket && bucket <= maxbucket ))
652  throw GeographicLib::GeographicErr("Bad bucket size");
653  ar & boost::serialization::make_nvp("numpoints", numpoints)
654  & boost::serialization::make_nvp("cost", cost)
655  & boost::serialization::make_nvp("tree", tree);
656  if (!( 0 <= int(tree.size()) && int(tree.size()) <= numpoints ))
657  throw GeographicLib::GeographicErr("Bad number of points or tree size");
658  for (int i = 0; i < int(tree.size()); ++i)
659  tree[i].Check(numpoints, int(tree.size()), bucket);
660  _tree.swap(tree);
661  _numpoints = numpoints;
662  _bucket = bucket;
663  _mc = _sc = 0;
664  _cost = cost; _c1 = _k = _cmax = 0;
665  _cmin = std::numeric_limits<int>::max();
666  }
667  template<class Archive>
668  void serialize(Archive& ar, const unsigned int file_version)
669  { boost::serialization::split_member(ar, *this, file_version); }
670 #endif
671 
672  int _numpoints, _bucket, _cost;
673  std::vector<Node> _tree;
674  // Counters to track stastistics on the cost of searches
675  mutable double _mc, _sc;
676  mutable int _c1, _k, _cmin, _cmax;
677 
678  int init(const std::vector<position>& pts, const distance& dist, int bucket,
679  std::vector<Node>& tree, std::vector<item>& ids, int& cost,
680  int l, int u, int vp) {
681 
682  if (u == l)
683  return -1;
684  Node node;
685 
686  if (u - l > (bucket == 0 ? 1 : bucket)) {
687 
688  // choose a vantage point and move it to the start
689  int i = vp;
690  std::swap(ids[l], ids[i]);
691 
692  int m = (u + l + 1) / 2;
693 
694  for (int k = l + 1; k < u; ++k) {
695  ids[k].first = dist(pts[ids[l].second], pts[ids[k].second]);
696  ++cost;
697  }
698  // partitian around the median distance
699  std::nth_element(ids.begin() + l + 1, ids.begin() + m, ids.begin() + u);
700  node.index = ids[l].second;
701  if (m > l + 1) { // node.child[0] is possibly empty
702  typename std::vector<item>::iterator
703  t = std::min_element(ids.begin() + l + 1, ids.begin() + m);
704  node.data.lower[0] = t->first;
705  t = std::max_element(ids.begin() + l + 1, ids.begin() + m);
706  node.data.upper[0] = t->first;
707  // Use point with max distance as vantage point; this point act as a
708  // "corner" point and leads to a good partition.
709  node.data.child[0] = init(pts, dist, bucket, tree, ids, cost,
710  l + 1, m, int(t - ids.begin()));
711  }
712  typename std::vector<item>::iterator
713  t = std::max_element(ids.begin() + m, ids.begin() + u);
714  node.data.lower[1] = ids[m].first;
715  node.data.upper[1] = t->first;
716  // Use point with max distance as vantage point here too
717  node.data.child[1] = init(pts, dist, bucket, tree, ids, cost,
718  m, u, int(t - ids.begin()));
719  } else {
720  if (bucket == 0)
721  node.index = ids[l].second;
722  else {
723  node.index = -1;
724  // Sort the bucket entries so that the tree is independent of the
725  // implementation of nth_element.
726  std::sort(ids.begin() + l, ids.begin() + u);
727  for (int i = l; i < u; ++i)
728  node.leaves[i-l] = ids[i].second;
729  for (int i = u - l; i < bucket; ++i)
730  node.leaves[i] = -1;
731  for (int i = bucket; i < maxbucket; ++i)
732  node.leaves[i] = 0;
733  }
734  }
735 
736  tree.push_back(node);
737  return int(tree.size()) - 1;
738  }
739 
740  };
741 
742 } // namespace GeographicLib
743 
744 #if defined(_MSC_VER)
745 # pragma warning (pop)
746 #endif
747 
748 #endif // GEOGRAPHICLIB_NEARESTNEIGHBOR_HPP
GeographicLib::Math::real real
Definition: GeodSolve.cpp:31
void Load(std::istream &is, bool bin=true)
void Statistics(int &setupcost, int &numsearches, int &searchcost, int &mincost, int &maxcost, double &mean, double &sd) const
NearestNeighbor(const std::vector< position > &pts, const distance &dist, int bucket=4)
void Initialize(const std::vector< position > &pts, const distance &dist, int bucket=4)
void Save(std::ostream &os, bool bin=true) const
Namespace for GeographicLib.
Definition: Accumulator.cpp:12
Exception handling for GeographicLib.
Definition: Constants.hpp:388
Header for GeographicLib::Constants class.
real Search(const std::vector< position > &pts, const distance &dist, const position &query, std::vector< int > &ind, int k=1, real maxdist=std::numeric_limits< real >::max(), real mindist=-1, bool exhaustive=true, real tol=0) const
Nearest-neighbor calculations.