tlx
Loading...
Searching...
No Matches
random_bipartition_shuffle.hpp
Go to the documentation of this file.
1/*******************************************************************************
2 * tlx/algorithm/random_bipartition_shuffle.hpp
3 *
4 * Part of tlx - http://panthema.net/tlx
5 *
6 * Copyright (C) 2018 Manuel Penschuck <tlx@manuel.jetzt>
7 *
8 * All rights reserved. Published under the Boost Software License, Version 1.0
9 ******************************************************************************/
10
11#ifndef TLX_ALGORITHM_RANDOM_BIPARTITION_SHUFFLE_HEADER
12#define TLX_ALGORITHM_RANDOM_BIPARTITION_SHUFFLE_HEADER
13
14#include <cassert>
15#include <iterator>
16#include <random>
17
18namespace tlx {
19
20//! \addtogroup tlx_algorithm
21//! \{
22
23/*!
24 * Similar to std::shuffle, but only generates a random bi-partition.
25 * In the result, the
26 * the left partition class is stored at positions 0 to size_left_partition-1
27 * the right partition class is stored at positions size_left_partition to n
28 * where n = std::distance(begin, end).
29 *
30 * Each element is moved to the left partition with probability
31 * (size_left_partition / n). There are no guarantees on the order WITHIN a
32 * partition (which makes this function considerable faster than std::shuffle
33 * especially if partition sizes are unbalanced). The runtime complexity is
34 * linear in the size of the smaller partition class.
35 *
36 * \param begin Iterator to the begin of the data frame
37 * \param end Iterator to the end of the data frame
38 * \param size_left_partition Number of elements to be put into the left
39 * partition: 0 <= size_left_partition <= n
40 * \param urng Random number generator compatible with STL
41 * interface, e.g. std::mt19937
42 */
43template <typename RandomAccessIt, typename RandomBits>
44void random_bipartition_shuffle(RandomAccessIt begin, RandomAccessIt end,
45 size_t size_left_partition, RandomBits& urng) {
46 const size_t size = std::distance(begin, end);
47 assert(size_left_partition <= size);
48
49 // ensure that both paritions are non-empty
50 const size_t size_right_partition = size - size_left_partition;
51 if (!size_left_partition || !size_right_partition) return;
52
53 // this idea is borrow from GNU stdlibc++ and generates two random
54 // variates uniform on [0, a) and [0, b) respectively.
55 auto two_random_variates =
56 [&urng](size_t a, size_t b) {
57 auto x = std::uniform_int_distribution<size_t>{ 0, (a * b) - 1 }(urng);
58 return std::make_pair(x / b, x % b);
59 };
60
61 using std::swap; // allow ADL
62
63 // Perform a Fisher-Yates shuffle, but iterate only over the positions
64 // of the smaller partition.
65 if (size_left_partition < size_right_partition) {
66 auto it = begin;
67
68 // To avoid wasted random bits, we draw two variates at once
69 // and shuffle two positions per iteration
70 for (size_t i = 1; i < size_left_partition; i += 2) {
71 auto rand = two_random_variates(size - i + 1, size - i);
72 swap(*it++, *std::next(begin, size - 1 - rand.first));
73 swap(*it++, *std::next(begin, size - 1 - rand.second));
74 }
75
76 // In case the partition contains an odd number of elements,
77 // there's a special case for the last element
78 if (size_left_partition % 2) {
79 auto x = std::uniform_int_distribution<size_t>{ size_left_partition - 1, size - 1 }(urng);
80 swap(*it, *std::next(begin, x));
81 }
82 }
83 else {
84 // Symmetric case to above, this time shuffling the right partition
85 auto it = std::next(begin, size - 1);
86
87 for (size_t i = size - 2; i >= size_left_partition; i -= 2) {
88 auto rand = two_random_variates(i + 2, i + 1);
89 swap(*it--, *std::next(begin, rand.first));
90 swap(*it--, *std::next(begin, rand.second));
91 }
92
93 if (size_right_partition % 2) {
94 auto x = std::uniform_int_distribution<size_t>{ 0, size_left_partition }(urng);
95 swap(*it--, *std::next(begin, x));
96 }
97 }
98}
99
100//! \}
101
102} // namespace tlx
103
104#endif // !TLX_ALGORITHM_RANDOM_BIPARTITION_SHUFFLE_HEADER
105
106/******************************************************************************/
void random_bipartition_shuffle(RandomAccessIt begin, RandomAccessIt end, size_t size_left_partition, RandomBits &urng)
Similar to std::shuffle, but only generates a random bi-partition.
void swap(CountingPtr< A, D > &a1, CountingPtr< A, D > &a2) noexcept
swap enclosed object with another counting pointer (no reference counts need change)