libstdc++
multiway_mergesort.h
Go to the documentation of this file.
1 // -*- C++ -*-
2 
3 // Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the terms
7 // of the GNU General Public License as published by the Free Software
8 // Foundation; either version 3, or (at your option) any later
9 // version.
10 
11 // This library is distributed in the hope that it will be useful, but
12 // WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 // General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 /** @file parallel/multiway_mergesort.h
26  * @brief Parallel multiway merge sort.
27  * This file is a GNU parallel extension to the Standard C++ Library.
28  */
29 
30 // Written by Johannes Singler.
31 
32 #ifndef _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H
33 #define _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H 1
34 
35 #include <vector>
36 
38 #include <bits/stl_algo.h>
39 #include <parallel/parallel.h>
41 
42 namespace __gnu_parallel
43 {
44 
45 /** @brief Subsequence description. */
46 template<typename _DifferenceTp>
47  struct Piece
48  {
49  typedef _DifferenceTp difference_type;
50 
51  /** @brief Begin of subsequence. */
52  difference_type begin;
53 
54  /** @brief End of subsequence. */
55  difference_type end;
56  };
57 
58 /** @brief Data accessed by all threads.
59  *
60  * PMWMS = parallel multiway mergesort */
61 template<typename RandomAccessIterator>
63  {
65  typedef typename traits_type::value_type value_type;
66  typedef typename traits_type::difference_type difference_type;
67 
68  /** @brief Number of threads involved. */
70 
71  /** @brief Input begin. */
72  RandomAccessIterator source;
73 
74  /** @brief Start indices, per thread. */
75  difference_type* starts;
76 
77  /** @brief Storage in which to sort. */
78  value_type** temporary;
79 
80  /** @brief Samples. */
81  value_type* samples;
82 
83  /** @brief Offsets to add to the found positions. */
84  difference_type* offsets;
85 
86  /** @brief Pieces of data to merge @c [thread][sequence] */
88 };
89 
90 /**
91  * @brief Select samples from a sequence.
92  * @param sd Pointer to algorithm data. Result will be placed in
93  * @c sd->samples.
94  * @param num_samples Number of samples to select.
95  */
96 template<typename RandomAccessIterator, typename _DifferenceTp>
97  void
99  _DifferenceTp num_samples)
100  {
101  typedef std::iterator_traits<RandomAccessIterator> traits_type;
102  typedef typename traits_type::value_type value_type;
103  typedef _DifferenceTp difference_type;
104 
105  thread_index_t iam = omp_get_thread_num();
106 
107  difference_type* es = new difference_type[num_samples + 2];
108 
109  equally_split(sd->starts[iam + 1] - sd->starts[iam],
110  num_samples + 1, es);
111 
112  for (difference_type i = 0; i < num_samples; ++i)
113  ::new(&(sd->samples[iam * num_samples + i]))
114  value_type(sd->source[sd->starts[iam] + es[i + 1]]);
115 
116  delete[] es;
117  }
118 
119 /** @brief Split consistently. */
120 template<bool exact, typename RandomAccessIterator,
121  typename Comparator, typename SortingPlacesIterator>
123  {
124  };
125 
126 /** @brief Split by exact splitting. */
127 template<typename RandomAccessIterator, typename Comparator,
128  typename SortingPlacesIterator>
130  <true, RandomAccessIterator, Comparator, SortingPlacesIterator>
131  {
132  void operator()(
133  const thread_index_t iam,
135  Comparator& comp,
136  const typename
137  std::iterator_traits<RandomAccessIterator>::difference_type
138  num_samples)
139  const
140  {
141 # pragma omp barrier
142 
144  seqs(sd->num_threads);
145  for (thread_index_t s = 0; s < sd->num_threads; s++)
146  seqs[s] = std::make_pair(sd->temporary[s],
147  sd->temporary[s]
148  + (sd->starts[s + 1] - sd->starts[s]));
149 
151 
152  // if not last thread
153  if (iam < sd->num_threads - 1)
154  multiseq_partition(seqs.begin(), seqs.end(),
155  sd->starts[iam + 1], offsets.begin(), comp);
156 
157  for (int seq = 0; seq < sd->num_threads; seq++)
158  {
159  // for each sequence
160  if (iam < (sd->num_threads - 1))
161  sd->pieces[iam][seq].end = offsets[seq] - seqs[seq].first;
162  else
163  // very end of this sequence
164  sd->pieces[iam][seq].end =
165  sd->starts[seq + 1] - sd->starts[seq];
166  }
167 
168 # pragma omp barrier
169 
170  for (thread_index_t seq = 0; seq < sd->num_threads; seq++)
171  {
172  // For each sequence.
173  if (iam > 0)
174  sd->pieces[iam][seq].begin = sd->pieces[iam - 1][seq].end;
175  else
176  // Absolute beginning.
177  sd->pieces[iam][seq].begin = 0;
178  }
179  }
180  };
181 
182 /** @brief Split by sampling. */
183 template<typename RandomAccessIterator, typename Comparator,
184  typename SortingPlacesIterator>
185  struct split_consistently<false, RandomAccessIterator, Comparator,
186  SortingPlacesIterator>
187  {
188  void operator()(
189  const thread_index_t iam,
191  Comparator& comp,
192  const typename
193  std::iterator_traits<RandomAccessIterator>::difference_type
194  num_samples)
195  const
196  {
197  typedef std::iterator_traits<RandomAccessIterator> traits_type;
198  typedef typename traits_type::value_type value_type;
199  typedef typename traits_type::difference_type difference_type;
200 
201  determine_samples(sd, num_samples);
202 
203 # pragma omp barrier
204 
205 # pragma omp single
206  __gnu_sequential::sort(sd->samples,
207  sd->samples + (num_samples * sd->num_threads),
208  comp);
209 
210 # pragma omp barrier
211 
212  for (thread_index_t s = 0; s < sd->num_threads; ++s)
213  {
214  // For each sequence.
215  if (num_samples * iam > 0)
216  sd->pieces[iam][s].begin =
217  std::lower_bound(sd->temporary[s],
218  sd->temporary[s]
219  + (sd->starts[s + 1] - sd->starts[s]),
220  sd->samples[num_samples * iam],
221  comp)
222  - sd->temporary[s];
223  else
224  // Absolute beginning.
225  sd->pieces[iam][s].begin = 0;
226 
227  if ((num_samples * (iam + 1)) < (num_samples * sd->num_threads))
228  sd->pieces[iam][s].end =
229  std::lower_bound(sd->temporary[s],
230  sd->temporary[s]
231  + (sd->starts[s + 1] - sd->starts[s]),
232  sd->samples[num_samples * (iam + 1)],
233  comp)
234  - sd->temporary[s];
235  else
236  // Absolute end.
237  sd->pieces[iam][s].end = sd->starts[s + 1] - sd->starts[s];
238  }
239  }
240  };
241 
242 template<bool stable, typename RandomAccessIterator, typename Comparator>
243  struct possibly_stable_sort
244  {
245  };
246 
247 template<typename RandomAccessIterator, typename Comparator>
248  struct possibly_stable_sort<true, RandomAccessIterator, Comparator>
249  {
250  void operator()(const RandomAccessIterator& begin,
251  const RandomAccessIterator& end, Comparator& comp) const
252  {
253  __gnu_sequential::stable_sort(begin, end, comp);
254  }
255  };
256 
257 template<typename RandomAccessIterator, typename Comparator>
258  struct possibly_stable_sort<false, RandomAccessIterator, Comparator>
259  {
260  void operator()(const RandomAccessIterator begin,
261  const RandomAccessIterator end, Comparator& comp) const
262  {
263  __gnu_sequential::sort(begin, end, comp);
264  }
265  };
266 
267 template<bool stable, typename SeqRandomAccessIterator,
268  typename RandomAccessIterator, typename Comparator,
269  typename DiffType>
270  struct possibly_stable_multiway_merge
271  {
272  };
273 
274 template<typename SeqRandomAccessIterator, typename RandomAccessIterator,
275  typename Comparator, typename DiffType>
276  struct possibly_stable_multiway_merge
277  <true, SeqRandomAccessIterator, RandomAccessIterator, Comparator,
278  DiffType>
279  {
280  void operator()(const SeqRandomAccessIterator& seqs_begin,
281  const SeqRandomAccessIterator& seqs_end,
282  const RandomAccessIterator& target,
283  Comparator& comp,
284  DiffType length_am) const
285  {
286  stable_multiway_merge(seqs_begin, seqs_end, target, length_am, comp,
287  sequential_tag());
288  }
289  };
290 
291 template<typename SeqRandomAccessIterator, typename RandomAccessIterator,
292  typename Comparator, typename DiffType>
293  struct possibly_stable_multiway_merge
294  <false, SeqRandomAccessIterator, RandomAccessIterator, Comparator,
295  DiffType>
296  {
297  void operator()(const SeqRandomAccessIterator& seqs_begin,
298  const SeqRandomAccessIterator& seqs_end,
299  const RandomAccessIterator& target,
300  Comparator& comp,
301  DiffType length_am) const
302  {
303  multiway_merge(seqs_begin, seqs_end, target, length_am, comp,
304  sequential_tag());
305  }
306  };
307 
308 /** @brief PMWMS code executed by each thread.
309  * @param sd Pointer to algorithm data.
310  * @param comp Comparator.
311  */
312 template<bool stable, bool exact, typename RandomAccessIterator,
313  typename Comparator>
314  void
316  Comparator& comp)
317  {
318  typedef std::iterator_traits<RandomAccessIterator> traits_type;
319  typedef typename traits_type::value_type value_type;
320  typedef typename traits_type::difference_type difference_type;
321 
322  thread_index_t iam = omp_get_thread_num();
323 
324  // Length of this thread's chunk, before merging.
325  difference_type length_local = sd->starts[iam + 1] - sd->starts[iam];
326 
327  // Sort in temporary storage, leave space for sentinel.
328 
329  typedef value_type* SortingPlacesIterator;
330 
331  sd->temporary[iam] =
332  static_cast<value_type*>(
333  ::operator new(sizeof(value_type) * (length_local + 1)));
334 
335  // Copy there.
336  std::uninitialized_copy(sd->source + sd->starts[iam],
337  sd->source + sd->starts[iam] + length_local,
338  sd->temporary[iam]);
339 
340  possibly_stable_sort<stable, SortingPlacesIterator, Comparator>()
341  (sd->temporary[iam], sd->temporary[iam] + length_local, comp);
342 
343  // Invariant: locally sorted subsequence in sd->temporary[iam],
344  // sd->temporary[iam] + length_local.
345 
346  // No barrier here: Synchronization is done by the splitting routine.
347 
348  difference_type num_samples =
351  <exact, RandomAccessIterator, Comparator, SortingPlacesIterator>()
352  (iam, sd, comp, num_samples);
353 
354  // Offset from target begin, length after merging.
355  difference_type offset = 0, length_am = 0;
356  for (thread_index_t s = 0; s < sd->num_threads; s++)
357  {
358  length_am += sd->pieces[iam][s].end - sd->pieces[iam][s].begin;
359  offset += sd->pieces[iam][s].begin;
360  }
361 
362  typedef std::vector<
364  seq_vector_type;
365  seq_vector_type seqs(sd->num_threads);
366 
367  for (int s = 0; s < sd->num_threads; ++s)
368  {
369  seqs[s] =
370  std::make_pair(sd->temporary[s] + sd->pieces[iam][s].begin,
371  sd->temporary[s] + sd->pieces[iam][s].end);
372  }
373 
374  possibly_stable_multiway_merge<
375  stable,
376  typename seq_vector_type::iterator,
377  RandomAccessIterator,
378  Comparator, difference_type>()
379  (seqs.begin(), seqs.end(),
380  sd->source + offset, comp,
381  length_am);
382 
383 # pragma omp barrier
384 
385  ::operator delete(sd->temporary[iam]);
386  }
387 
388 /** @brief PMWMS main call.
389  * @param begin Begin iterator of sequence.
390  * @param end End iterator of sequence.
391  * @param comp Comparator.
392  * @param n Length of sequence.
393  * @param num_threads Number of threads to use.
394  */
395 template<bool stable, bool exact, typename RandomAccessIterator,
396  typename Comparator>
397  void
398  parallel_sort_mwms(RandomAccessIterator begin, RandomAccessIterator end,
399  Comparator comp,
400  thread_index_t num_threads)
401  {
402  _GLIBCXX_CALL(end - begin)
403 
404  typedef std::iterator_traits<RandomAccessIterator> traits_type;
405  typedef typename traits_type::value_type value_type;
406  typedef typename traits_type::difference_type difference_type;
407 
408  difference_type n = end - begin;
409 
410  if (n <= 1)
411  return;
412 
413  // at least one element per thread
414  if (num_threads > n)
415  num_threads = static_cast<thread_index_t>(n);
416 
417  // shared variables
419  difference_type* starts;
420 
421 # pragma omp parallel num_threads(num_threads)
422  {
423  num_threads = omp_get_num_threads(); //no more threads than requested
424 
425 # pragma omp single
426  {
427  sd.num_threads = num_threads;
428  sd.source = begin;
429 
430  sd.temporary = new value_type*[num_threads];
431 
432  if (!exact)
433  {
434  difference_type size =
435  (_Settings::get().sort_mwms_oversampling * num_threads - 1)
436  * num_threads;
437  sd.samples = static_cast<value_type*>(
438  ::operator new(size * sizeof(value_type)));
439  }
440  else
441  sd.samples = NULL;
442 
443  sd.offsets = new difference_type[num_threads - 1];
444  sd.pieces = new std::vector<Piece<difference_type> >[num_threads];
445  for (int s = 0; s < num_threads; ++s)
446  sd.pieces[s].resize(num_threads);
447  starts = sd.starts = new difference_type[num_threads + 1];
448 
449  difference_type chunk_length = n / num_threads;
450  difference_type split = n % num_threads;
451  difference_type pos = 0;
452  for (int i = 0; i < num_threads; ++i)
453  {
454  starts[i] = pos;
455  pos += (i < split) ? (chunk_length + 1) : chunk_length;
456  }
457  starts[num_threads] = pos;
458  } //single
459 
460  // Now sort in parallel.
461  parallel_sort_mwms_pu<stable, exact>(&sd, comp);
462  } //parallel
463 
464  delete[] starts;
465  delete[] sd.temporary;
466 
467  if (!exact)
468  ::operator delete(sd.samples);
469 
470  delete[] sd.offsets;
471  delete[] sd.pieces;
472  }
473 } //namespace __gnu_parallel
474 
475 #endif /* _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H */
void multiseq_partition(RanSeqs begin_seqs, RanSeqs end_seqs, RankType rank, RankIterator begin_offsets, Comparator comp=std::less< typename std::iterator_traits< typename std::iterator_traits< RanSeqs >::value_type::first_type >::value_type >())
Splits several sorted sequences at a certain global rank, resulting in a splitting point for each seq...
static const _Settings & get()
Get the global settings.
pair holds two objects of arbitrary type.
Definition: stl_pair.h:67
iterator begin()
Definition: stl_vector.h:425
RandomAccessIteratorOut multiway_merge(RandomAccessIteratorPairIterator seqs_begin, RandomAccessIteratorPairIterator seqs_end, RandomAccessIteratorOut target, _DifferenceTp length, Comparator comp, __gnu_parallel::sequential_tag)
Multiway Merge Frontend.
difference_type end
End of subsequence.
void determine_samples(PMWMSSortingData< RandomAccessIterator > *sd, _DifferenceTp num_samples)
Select samples from a sequence.
Subsequence description.
thread_index_t num_threads
Number of threads involved.
uint16 thread_index_t
Unsigned integer to index a thread number. The maximum thread number (for each processor) must fit in...
Definition: types.h:141
unsigned int sort_mwms_oversampling
Oversampling factor for parallel std::sort (MWMS).
Definition: settings.h:229
A standard container which offers fixed time access to individual elements in any order...
Definition: stl_vector.h:170
RandomAccessIterator source
Input begin.
Implementation of sequential and parallel multiway merge.
Data accessed by all threads.
_ForwardIterator uninitialized_copy(_InputIterator __first, _InputIterator __last, _ForwardIterator __result)
Copies the range [first,last) into result.
End-user include file. Provides advanced settings and tuning options. This file is a GNU parallel ext...
difference_type * offsets
Offsets to add to the found positions.
value_type ** temporary
Storage in which to sort.
void parallel_sort_mwms(RandomAccessIterator begin, RandomAccessIterator end, Comparator comp, thread_index_t num_threads)
PMWMS main call.
OutputIterator equally_split(difference_type n, thread_index_t num_threads, OutputIterator s)
Function to split a sequence into parts of almost equal size.
Definition: equally_split.h:48
difference_type * starts
Start indices, per thread.
iterator end()
Definition: stl_vector.h:443
Includes the original header files concerned with iterators except for stream iterators. This file is a GNU parallel extension to the Standard C++ Library.
difference_type begin
Begin of subsequence.
void parallel_sort_mwms_pu(PMWMSSortingData< RandomAccessIterator > *sd, Comparator &comp)
PMWMS code executed by each thread.
#define _GLIBCXX_CALL(n)
Macro to produce log message when entering a function.
std::vector< Piece< difference_type > > * pieces
Pieces of data to merge [thread][sequence].