32 #ifndef _GLIBCXX_PARALLEL_RANDOM_SHUFFLE_H
33 #define _GLIBCXX_PARALLEL_RANDOM_SHUFFLE_H 1
40 namespace __gnu_parallel
51 template<
typename RandomAccessIterator>
55 typedef typename traits_type::value_type value_type;
56 typedef typename traits_type::difference_type difference_type;
90 template<
typename RandomAccessIterator,
typename RandomNumberGenerator>
113 template<
typename RandomNumberGenerator>
116 {
return rng.genrand_bits(logp); }
120 template<
typename RandomAccessIterator,
typename RandomNumberGenerator>
123 RandomNumberGenerator>* pus)
126 typedef typename traits_type::value_type value_type;
127 typedef typename traits_type::difference_type difference_type;
134 difference_type length = sd->
starts[iam + 1] - sd->
starts[iam];
136 difference_type* dist =
new difference_type[sd->
num_bins + 1];
138 value_type** temporaries =
new value_type*[d->
num_threads];
148 for (difference_type i = 0; i < length; ++i)
154 ++(dist[oracle + 1]);
158 sd->
dist[b][iam + 1] = dist[b];
167 __gnu_sequential::partial_sum(sd->
dist[s + 1],
175 for (
bin_index s = 0; s < d->bins_begin; ++s)
176 global_offset += sd->dist[s + 1][d->num_threads];
180 for (
bin_index s = d->bins_begin; s < d->bins_end; ++s)
182 for (
int t = 0; t < d->num_threads + 1; ++t)
183 sd->dist[s + 1][t] += offset;
184 offset = sd->dist[s + 1][d->num_threads];
187 sd->temporaries[iam] =
static_cast<value_type*
>(
188 ::operator
new(
sizeof(value_type) * offset));
193 for (
bin_index b = 0; b < sd->num_bins + 1; ++b)
194 dist[b] = sd->dist[b][iam];
195 for (
bin_index b = 0; b < sd->num_bins; ++b)
196 bin_proc[b] = sd->bin_proc[b];
198 temporaries[t] = sd->temporaries[t];
200 RandomAccessIterator source = sd->source;
201 difference_type start = sd->starts[iam];
204 for (difference_type i = 0; i < length; ++i)
210 ::new(&(temporaries[target_p][dist[target_bin + 1]++]))
211 value_type(*(source + i + start));
217 delete[] temporaries;
222 for (
bin_index b = d->bins_begin; b < d->bins_end; ++b)
225 sd->temporaries[iam] +
226 ((b == d->bins_begin) ? 0 : sd->dist[b][d->num_threads]),
228 sd->temporaries[iam] + sd->dist[b + 1][d->num_threads];
230 std::copy(begin, end, sd->source + global_offset +
231 ((b == d->bins_begin) ? 0 : sd->dist[b][d->num_threads]));
234 ::operator
delete(sd->temporaries[iam]);
246 return (T)1 << (
__log2(x - 1) + 1);
256 template<
typename RandomAccessIterator,
typename RandomNumberGenerator>
259 RandomAccessIterator end,
261 <RandomAccessIterator>::difference_type n,
263 RandomNumberGenerator& rng)
266 typedef typename traits_type::value_type value_type;
267 typedef typename traits_type::difference_type difference_type;
278 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
282 num_bins_cache = std::max<difference_type>(
283 1, n / (__s.L1_cache_size_lb /
sizeof(value_type)));
288 num_bins = std::min<difference_type>(n, num_bins_cache);
290 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
292 num_bins = std::min<difference_type>(__s.
TLB_size / 2, num_bins);
296 if (num_bins < num_bins_cache)
301 num_bins_cache =
static_cast<bin_index>(std::max<difference_type>(
307 std::min(n, static_cast<difference_type>(num_bins_cache)));
309 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
312 static_cast<difference_type>(__s.
TLB_size / 2), num_bins);
315 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
319 num_threads = std::min<bin_index>(num_threads, num_bins);
321 if (num_threads <= 1)
326 difference_type* starts;
328 # pragma omp parallel num_threads(num_threads)
337 sd.
dist =
new difference_type*[num_bins + 1];
339 for (
bin_index b = 0; b < num_bins + 1; ++b)
340 sd.
dist[b] =
new difference_type[num_threads + 1];
341 for (
bin_index b = 0; b < (num_bins + 1); ++b)
346 starts = sd.
starts =
new difference_type[num_threads + 1];
351 difference_type chunk_length = n / num_threads,
352 split = n % num_threads, start = 0;
353 difference_type bin_chunk_length = num_bins / num_threads,
354 bin_split = num_bins % num_threads;
358 start += (i < split) ? (chunk_length + 1) : chunk_length;
362 bin_cursor += (i < bin_split) ?
363 (bin_chunk_length + 1) : bin_chunk_length;
365 for (; j < bin_cursor; ++j)
371 starts[num_threads] = start;
379 for (
int s = 0; s < (num_bins + 1); ++s)
392 template<
typename RandomAccessIterator,
typename RandomNumberGenerator>
395 RandomAccessIterator end,
396 RandomNumberGenerator& rng)
399 typedef typename traits_type::value_type value_type;
400 typedef typename traits_type::difference_type difference_type;
402 difference_type n = end - begin;
407 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
410 std::max<difference_type>
411 (1, n / (__s.L1_cache_size_lb /
sizeof(value_type)));
416 num_bins =
std::min(n, (difference_type)num_bins_cache);
417 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
423 if (num_bins < num_bins_cache)
428 static_cast<bin_index>(std::max<difference_type>(
435 (
std::min(n, static_cast<difference_type>(num_bins_cache)));
437 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
440 std::min<difference_type>(__s.
TLB_size / 2, num_bins);
443 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
447 int num_bits =
__log2(num_bins);
451 value_type* target =
static_cast<value_type*
>(
452 ::operator
new(
sizeof(value_type) * n));
454 difference_type* dist0 =
new difference_type[num_bins + 1],
455 * dist1 =
new difference_type[num_bins + 1];
457 for (
int b = 0; b < num_bins + 1; ++b)
462 for (difference_type i = 0; i < n; ++i)
468 ++(dist0[oracle + 1]);
472 __gnu_sequential::partial_sum(dist0, dist0 + num_bins + 1, dist0);
474 for (
int b = 0; b < num_bins + 1; ++b)
478 for (difference_type i = 0; i < n; ++i)
479 ::
new(&(target[(dist0[oracles[i]])++])) value_type(*(begin + i));
481 for (
int b = 0; b < num_bins; ++b)
484 target + dist1[b + 1],
489 std::copy(target, target + n, begin);
494 ::operator
delete(target);
497 __gnu_sequential::random_shuffle(begin, end, rng);
505 template<
typename RandomAccessIterator,
typename RandomNumberGenerator>
508 RandomAccessIterator end,
512 typedef typename traits_type::difference_type difference_type;
513 difference_type n = end - begin;
static const _Settings & get()
Get the global settings.
unsigned int uint32
32-bit unsigned integer.
Local data for a thread participating in __gnu_parallel::parallel_random_shuffle().
difference_type * starts
Start indexes of the threads' chunks.
DRandomShufflingGlobalData(RandomAccessIterator &_source)
Constructor.
unsigned int TLB_size
Size of the Translation Lookaside Buffer (underestimation).
Random number generator based on the Mersenne twister. This file is a GNU parallel extension to the S...
void parallel_random_shuffle_drs_pu(DRSSorterPU< RandomAccessIterator, RandomNumberGenerator > *pus)
Random shuffle code executed by each thread.
uint16 thread_index_t
Unsigned integer to index a thread number. The maximum thread number (for each processor) must fit in...
Properties of fundamental types.
int random_number_pow2(int logp, RandomNumberGenerator &rng)
Generate a random number in [0,2^logp).
Data known to every thread participating in __gnu_parallel::parallel_random_shuffle().
thread_index_t * bin_proc
Number of the thread that will further process the corresponding bin.
T round_up_to_pow2(T x)
Round up to the next greater power of 2.
difference_type ** dist
Two-dimensional array to hold the thread-bin distribution.
int num_bins
Number of bins to distribute to.
value_type ** temporaries
Temporary arrays for each thread.
uint32 seed
Random seed for this thread.
uint64 sequence_index_t
Unsigned integer to index elements. The total number of elements for each algorithm must fit into thi...
End-user include file. Provides advanced settings and tuning options. This file is a GNU parallel ext...
Random number generator, based on the Mersenne twister.
RandomAccessIterator & source
Begin iterator of the source.
bin_index bins_begin
Begin index for bins taken care of by this thread.
const _Tp & min(const _Tp &, const _Tp &)
This does what you think it does.
unsigned short bin_index
Type to hold the index of a bin.
void sequential_random_shuffle(RandomAccessIterator begin, RandomAccessIterator end, RandomNumberGenerator &rng)
Sequential cache-efficient random shuffle.
int num_bits
Number of bits needed to address the bins.
class _Settings Run-time settings for the parallel mode, including all tunable parameters.
int num_threads
Number of threads participating in total.
void parallel_random_shuffle(RandomAccessIterator begin, RandomAccessIterator end, RandomNumberGenerator rng=random_number())
Parallel random public call.
unsigned long long L2_cache_size
Size of the L2 cache in bytes (underestimation).
#define _GLIBCXX_CALL(n)
Macro to produce log message when entering a function.
Size __log2(Size n)
Calculates the rounded-down logarithm of n for base 2.
DRandomShufflingGlobalData< RandomAccessIterator > * sd
Pointer to global data.
bin_index bins_end
End index for bins taken care of by this thread.
void parallel_random_shuffle_drs(RandomAccessIterator begin, RandomAccessIterator end, typename std::iterator_traits< RandomAccessIterator >::difference_type n, thread_index_t num_threads, RandomNumberGenerator &rng)
Main parallel random shuffle step.