#ifndef __sampling_h #define __sampling_h #include #include #include "util/random.hh" // Utility functions for proper sub-sampling. // (c) 2007-2012 Ulrich Germann namespace Moses { // select a random sample of size /s/ without restitution from the // range of integers [0,N); template void randomSample(std::vector& v, size_t s, size_t N) { // see also Knuth: Art of Computer Programming Vol. 2, p. 142 s = std::min(s,N); v.resize(s); // the first option tries to be a bit more efficient than O(N) in // picking the samples. The threshold is an ad-hoc, off-the-cuff // guess. I still need to figure out the optimal break-even point // between a linear sweep and repeatedly picking random numbers with // the risk of hitting the same number many times. if (s*10 check(N,0); for (size_t i = 0; i < v.size(); i++) { size_t x = util::rand_excl(N); while (check[x]) x = util::rand_excl(N); check[x]=true; v[i] = x; } } else { size_t m=0; for (size_t t = 0; m <= s && t < N; t++) if (s==N || util::rand_excl(N-t) < s-m) v[m++] = t; } } }; #endif