Intel(R) Threading Building Blocks Doxygen Documentation  version 4.2.3
parallel_scan.h
Go to the documentation of this file.
1 /*
2  Copyright (c) 2005-2020 Intel Corporation
3 
4  Licensed under the Apache License, Version 2.0 (the "License");
5  you may not use this file except in compliance with the License.
6  You may obtain a copy of the License at
7 
8  http://www.apache.org/licenses/LICENSE-2.0
9 
10  Unless required by applicable law or agreed to in writing, software
11  distributed under the License is distributed on an "AS IS" BASIS,
12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  See the License for the specific language governing permissions and
14  limitations under the License.
15 */
16 
17 #ifndef __TBB_parallel_scan_H
18 #define __TBB_parallel_scan_H
19 
20 #define __TBB_parallel_scan_H_include_area
22 
23 #include "task.h"
24 #include "aligned_space.h"
25 #include <new>
26 #include "partitioner.h"
27 
28 namespace tbb {
29 
31 
32 struct pre_scan_tag {
33  static bool is_final_scan() {return false;}
34  operator bool() {return is_final_scan();}
35 };
36 
38 
40  static bool is_final_scan() {return true;}
41  operator bool() {return is_final_scan();}
42 };
43 
45 namespace internal {
46 
48 
49  template<typename Range, typename Body>
50  class final_sum: public task {
51  public:
52  Body my_body;
53  private:
54  aligned_space<Range> my_range;
57  public:
58  final_sum( Body& body_ ) :
59  my_body(body_,split())
60  {
62  }
64  my_range.begin()->~Range();
65  }
66  void finish_construction( const Range& range_, Body* stuff_last_ ) {
67  new( my_range.begin() ) Range(range_);
68  my_stuff_last = stuff_last_;
69  }
70  private:
72  my_body( *my_range.begin(), final_scan_tag() );
73  if( my_stuff_last )
74  my_stuff_last->assign(my_body);
75  return NULL;
76  }
77  };
78 
80 
81  template<typename Range, typename Body>
82  class sum_node: public task {
84  public:
88  private:
93  Range my_range;
94  sum_node( const Range range_, bool left_is_final_ ) :
95  my_stuff_last(NULL),
96  my_left_sum(NULL),
97  my_left(NULL),
98  my_right(NULL),
99  my_left_is_final(left_is_final_),
100  my_range(range_)
101  {
102  // Poison fields that will be set by second pass.
105  }
106  task* create_child( const Range& range_, final_sum_type& f, sum_node* n, final_sum_type* incoming_, Body* stuff_last_ ) {
107  if( !n ) {
108  f.recycle_as_child_of( *this );
109  f.finish_construction( range_, stuff_last_ );
110  return &f;
111  } else {
112  n->my_body = &f;
113  n->my_incoming = incoming_;
114  n->my_stuff_last = stuff_last_;
115  return n;
116  }
117  }
119  if( my_body ) {
120  if( my_incoming )
121  my_left_sum->my_body.reverse_join( my_incoming->my_body );
123  sum_node& c = *this;
126  set_ref_count( (a!=NULL)+(b!=NULL) );
127  my_body = NULL;
128  if( a ) spawn(*b);
129  else a = b;
130  return a;
131  } else {
132  return NULL;
133  }
134  }
135  template<typename Range_,typename Body_,typename Partitioner_>
136  friend class start_scan;
137 
138  template<typename Range_,typename Body_>
139  friend class finish_scan;
140  };
141 
143 
144  template<typename Range, typename Body>
145  class finish_scan: public task {
150  public:
153 
155  __TBB_ASSERT( my_result.ref_count()==(my_result.my_left!=NULL)+(my_result.my_right!=NULL), NULL );
156  if( my_result.my_left )
157  my_result.my_left_is_final = false;
158  if( my_right_zombie && my_sum )
159  ((*my_sum)->my_body).reverse_join(my_result.my_left_sum->my_body);
160  __TBB_ASSERT( !my_return_slot, NULL );
163  } else {
164  destroy( my_result );
165  }
166  if( my_right_zombie && !my_sum && !my_result.my_right ) {
167  destroy(*my_right_zombie);
168  my_right_zombie = NULL;
169  }
170  return NULL;
171  }
172 
173  finish_scan( sum_node_type*& return_slot_, final_sum_type** sum_, sum_node_type& result_ ) :
174  my_sum(sum_),
175  my_return_slot(return_slot_),
176  my_right_zombie(NULL),
177  my_result(result_)
178  {
179  __TBB_ASSERT( !my_return_slot, NULL );
180  }
181  };
182 
184 
185  template<typename Range, typename Body, typename Partitioner=simple_partitioner>
186  class start_scan: public task {
197  Range my_range;
198  typename Partitioner::partition_type my_partition;
200  public:
201  start_scan( sum_node_type*& return_slot_, start_scan& parent_, sum_node_type* parent_sum_ ) :
202  my_body(parent_.my_body),
203  my_sum(parent_.my_sum),
204  my_return_slot(&return_slot_),
205  my_parent_sum(parent_sum_),
206  my_is_final(parent_.my_is_final),
207  my_is_right_child(false),
208  my_range(parent_.my_range,split()),
209  my_partition(parent_.my_partition,split())
210  {
211  __TBB_ASSERT( !*my_return_slot, NULL );
212  }
213 
214  start_scan( sum_node_type*& return_slot_, const Range& range_, final_sum_type& body_, const Partitioner& partitioner_) :
215  my_body(&body_),
216  my_sum(NULL),
217  my_return_slot(&return_slot_),
218  my_parent_sum(NULL),
219  my_is_final(true),
220  my_is_right_child(false),
221  my_range(range_),
222  my_partition(partitioner_)
223  {
224  __TBB_ASSERT( !*my_return_slot, NULL );
225  }
226 
227  static void run( const Range& range_, Body& body_, const Partitioner& partitioner_ ) {
228  if( !range_.empty() ) {
229  typedef internal::start_scan<Range,Body,Partitioner> start_pass1_type;
230  internal::sum_node<Range,Body>* root = NULL;
231  final_sum_type* temp_body = new(task::allocate_root()) final_sum_type( body_ );
232  start_pass1_type& pass1 = *new(task::allocate_root()) start_pass1_type(
233  /*my_return_slot=*/root,
234  range_,
235  *temp_body,
236  partitioner_ );
237  temp_body->my_body.reverse_join(body_);
238  task::spawn_root_and_wait( pass1 );
239  if( root ) {
240  root->my_body = temp_body;
241  root->my_incoming = NULL;
242  root->my_stuff_last = &body_;
243  task::spawn_root_and_wait( *root );
244  } else {
245  body_.assign(temp_body->my_body);
246  temp_body->finish_construction( range_, NULL );
247  temp_body->destroy(*temp_body);
248  }
249  }
250  }
251  };
252 
253  template<typename Range, typename Body, typename Partitioner>
255  typedef internal::finish_scan<Range,Body> finish_pass1_type;
256  finish_pass1_type* p = my_parent_sum ? static_cast<finish_pass1_type*>( parent() ) : NULL;
257  // Inspecting p->result.left_sum would ordinarily be a race condition.
258  // But we inspect it only if we are not a stolen task, in which case we
259  // know that task assigning to p->result.left_sum has completed.
260  bool treat_as_stolen = my_is_right_child && (is_stolen_task() || my_body!=p->my_result.my_left_sum);
261  if( treat_as_stolen ) {
262  // Invocation is for right child that has been really stolen or needs to be virtually stolen
263  p->my_right_zombie = my_body = new( allocate_root() ) final_sum_type(my_body->my_body);
264  my_is_final = false;
265  }
266  task* next_task = NULL;
267  if( (my_is_right_child && !treat_as_stolen) || !my_range.is_divisible() || my_partition.should_execute_range(*this) ) {
268  if( my_is_final )
269  (my_body->my_body)( my_range, final_scan_tag() );
270  else if( my_sum )
271  (my_body->my_body)( my_range, pre_scan_tag() );
272  if( my_sum )
273  *my_sum = my_body;
274  __TBB_ASSERT( !*my_return_slot, NULL );
275  } else {
276  sum_node_type* result;
277  if( my_parent_sum )
278  result = new(allocate_additional_child_of(*my_parent_sum)) sum_node_type(my_range,/*my_left_is_final=*/my_is_final);
279  else
280  result = new(task::allocate_root()) sum_node_type(my_range,/*my_left_is_final=*/my_is_final);
281  finish_pass1_type& c = *new( allocate_continuation()) finish_pass1_type(*my_return_slot,my_sum,*result);
282  // Split off right child
283  start_scan& b = *new( c.allocate_child() ) start_scan( /*my_return_slot=*/result->my_right, *this, result );
284  b.my_is_right_child = true;
285  // Left child is recycling of *this. Must recycle this before spawning b,
286  // otherwise b might complete and decrement c.ref_count() to zero, which
287  // would cause c.execute() to run prematurely.
288  recycle_as_child_of(c);
289  c.set_ref_count(2);
290  c.spawn(b);
291  my_sum = &result->my_left_sum;
292  my_return_slot = &result->my_left;
293  my_is_right_child = false;
294  next_task = this;
295  my_parent_sum = result;
296  __TBB_ASSERT( !*my_return_slot, NULL );
297  }
298  return next_task;
299  }
300 
301  template<typename Range, typename Value, typename Scan, typename ReverseJoin>
303  Value my_sum;
304  const Value& identity_element;
305  const Scan& my_scan;
306  const ReverseJoin& my_reverse_join;
307  public:
308  lambda_scan_body( const Value& identity, const Scan& scan, const ReverseJoin& rev_join)
309  : my_sum(identity)
310  , identity_element(identity)
311  , my_scan(scan)
312  , my_reverse_join(rev_join) {}
313 
317  , my_scan(b.my_scan)
319 
320  template<typename Tag>
321  void operator()( const Range& r, Tag tag ) {
322  my_sum = my_scan(r, my_sum, tag);
323  }
324 
327  }
328 
330  my_sum = b.my_sum;
331  }
332 
333  Value result() const {
334  return my_sum;
335  }
336  };
337 } // namespace internal
339 
340 // Requirements on Range concept are documented in blocked_range.h
341 
359 
361 
362 template<typename Range, typename Body>
363 void parallel_scan( const Range& range, Body& body ) {
365 }
366 
368 
369 template<typename Range, typename Body>
370 void parallel_scan( const Range& range, Body& body, const simple_partitioner& partitioner ) {
372 }
373 
375 
376 template<typename Range, typename Body>
377 void parallel_scan( const Range& range, Body& body, const auto_partitioner& partitioner ) {
379 }
380 
382 
383 template<typename Range, typename Value, typename Scan, typename ReverseJoin>
384 Value parallel_scan( const Range& range, const Value& identity, const Scan& scan, const ReverseJoin& reverse_join ) {
385  internal::lambda_scan_body<Range, Value, Scan, ReverseJoin> body(identity, scan, reverse_join);
387  return body.result();
388 }
389 
391 
392 template<typename Range, typename Value, typename Scan, typename ReverseJoin>
393 Value parallel_scan( const Range& range, const Value& identity, const Scan& scan, const ReverseJoin& reverse_join, const simple_partitioner& partitioner ) {
394  internal::lambda_scan_body<Range, Value, Scan, ReverseJoin> body(identity, scan, reverse_join);
395  tbb::parallel_scan(range,body,partitioner);
396  return body.result();
397 }
398 
400 
401 template<typename Range, typename Value, typename Scan, typename ReverseJoin>
402 Value parallel_scan( const Range& range, const Value& identity, const Scan& scan, const ReverseJoin& reverse_join, const auto_partitioner& partitioner ) {
403  internal::lambda_scan_body<Range, Value, Scan, ReverseJoin> body(identity, scan, reverse_join);
404  tbb::parallel_scan(range,body,partitioner);
405  return body.result();
406 }
407 
409 
410 } // namespace tbb
411 
413 #undef __TBB_parallel_scan_H_include_area
414 
415 #endif /* __TBB_parallel_scan_H */
416 
__TBB_DEFAULT_PARTITIONER
#define __TBB_DEFAULT_PARTITIONER
Definition: tbb_config.h:596
tbb::internal::start_scan::run
static void run(const Range &range_, Body &body_, const Partitioner &partitioner_)
Definition: parallel_scan.h:227
tbb::pre_scan_tag
Used to indicate that the initial scan is being performed.
Definition: parallel_scan.h:32
tbb::task::recycle_as_continuation
void recycle_as_continuation()
Change this to be a continuation of its former self.
Definition: task.h:711
internal
Definition: _flow_graph_async_msg_impl.h:24
tbb::internal::sum_node::my_range
Range my_range
Definition: parallel_scan.h:93
tbb::internal::sum_node::sum_node
sum_node(const Range range_, bool left_is_final_)
Definition: parallel_scan.h:94
__TBB_ASSERT
#define __TBB_ASSERT(predicate, comment)
No-op version of __TBB_ASSERT.
Definition: tbb_stddef.h:165
tbb::task::ref_count
int ref_count() const
The internal reference count.
Definition: task.h:915
tbb::internal::finish_scan::my_sum
final_sum_type **const my_sum
Definition: parallel_scan.h:148
tbb::internal::no_assign
Base class for types that should not be assigned.
Definition: tbb_stddef.h:322
tbb::internal::finish_scan::sum_node_type
sum_node< Range, Body > sum_node_type
Definition: parallel_scan.h:146
tbb::internal::sum_node::create_child
task * create_child(const Range &range_, final_sum_type &f, sum_node *n, final_sum_type *incoming_, Body *stuff_last_)
Definition: parallel_scan.h:106
tbb::parallel_scan
void parallel_scan(const Range &range, Body &body)
Parallel prefix with default partitioner.
Definition: parallel_scan.h:363
tbb::internal::poison_pointer
void poison_pointer(T *__TBB_atomic &)
Definition: tbb_stddef.h:305
tbb::internal::lambda_scan_body::my_sum
Value my_sum
Definition: parallel_scan.h:303
tbb::internal::sum_node::my_incoming
final_sum_type * my_incoming
Definition: parallel_scan.h:85
tbb
The graph class.
Definition: serial/tbb/parallel_for.h:46
tbb::task
Base class for user-defined tasks.
Definition: task.h:615
tbb::internal::lambda_scan_body::my_scan
const Scan & my_scan
Definition: parallel_scan.h:305
tbb::auto_partitioner
An auto partitioner.
Definition: partitioner.h:613
tbb::internal::sum_node::execute
task * execute() __TBB_override
Should be overridden by derived classes.
Definition: parallel_scan.h:118
tbb::task::allocate_root
static internal::allocate_root_proxy allocate_root()
Returns proxy for overloaded new that allocates a root task.
Definition: task.h:663
tbb::internal::start_scan::my_range
Range my_range
Definition: parallel_scan.h:197
tbb::internal::finish_scan
Combine partial results.
Definition: parallel_scan.h:145
tbb::internal::sum_node::my_left_is_final
bool my_left_is_final
Definition: parallel_scan.h:92
tbb::pre_scan_tag::is_final_scan
static bool is_final_scan()
Definition: parallel_scan.h:33
partitioner.h
tbb::internal::lambda_scan_body::my_reverse_join
const ReverseJoin & my_reverse_join
Definition: parallel_scan.h:306
tbb::internal::final_sum
Performs final scan for a leaf.
Definition: parallel_scan.h:50
tbb::internal::start_scan::my_is_right_child
bool my_is_right_child
Definition: parallel_scan.h:196
tbb::split
Dummy type that distinguishes splitting constructor from copy constructor.
Definition: tbb_stddef.h:416
tbb::internal::final_sum::my_body
Body my_body
Definition: parallel_scan.h:52
aligned_space.h
tbb::task::set_ref_count
void set_ref_count(int count)
Set reference count.
Definition: task.h:761
tbb::internal::sum_node::my_left
sum_node * my_left
Definition: parallel_scan.h:90
tbb::internal::start_scan::start_scan
start_scan(sum_node_type *&return_slot_, const Range &range_, final_sum_type &body_, const Partitioner &partitioner_)
Definition: parallel_scan.h:214
tbb::internal::lambda_scan_body::result
Value result() const
Definition: parallel_scan.h:333
tbb::internal::finish_scan::execute
task * execute() __TBB_override
Should be overridden by derived classes.
Definition: parallel_scan.h:154
task.h
tbb::internal::lambda_scan_body::lambda_scan_body
lambda_scan_body(lambda_scan_body &b, split)
Definition: parallel_scan.h:314
tbb::internal::start_scan::my_parent_sum
sum_node_type * my_parent_sum
Definition: parallel_scan.h:194
tbb::internal::lambda_scan_body::reverse_join
void reverse_join(lambda_scan_body &a)
Definition: parallel_scan.h:325
tbb::internal::lambda_scan_body
Definition: parallel_scan.h:302
tbb::internal::sum_node::my_body
final_sum_type * my_body
Definition: parallel_scan.h:86
tbb::internal::start_scan::final_sum_type
final_sum< Range, Body > final_sum_type
Definition: parallel_scan.h:188
tbb::internal::finish_scan::final_sum_type
final_sum< Range, Body > final_sum_type
Definition: parallel_scan.h:147
tbb::internal::start_scan::sum_node_type
sum_node< Range, Body > sum_node_type
Definition: parallel_scan.h:187
tbb::internal::finish_scan::my_right_zombie
final_sum_type * my_right_zombie
Definition: parallel_scan.h:151
tbb::internal::lambda_scan_body::identity_element
const Value & identity_element
Definition: parallel_scan.h:304
tbb::internal::final_sum::~final_sum
~final_sum()
Definition: parallel_scan.h:63
tbb::internal::start_scan::my_sum
final_sum_type ** my_sum
Definition: parallel_scan.h:191
tbb::internal::start_scan::my_return_slot
sum_node_type ** my_return_slot
Definition: parallel_scan.h:192
parent
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id parent
Definition: ittnotify_static.h:176
tbb::internal::final_sum::finish_construction
void finish_construction(const Range &range_, Body *stuff_last_)
Definition: parallel_scan.h:66
_warning_suppress_disable_notice.h
tbb::internal::finish_scan::finish_scan
finish_scan(sum_node_type *&return_slot_, final_sum_type **sum_, sum_node_type &result_)
Definition: parallel_scan.h:173
tbb::internal::start_scan
Initial task to split the work.
Definition: parallel_scan.h:186
__TBB_override
#define __TBB_override
Definition: tbb_stddef.h:240
tbb::simple_partitioner
A simple partitioner.
Definition: partitioner.h:586
tbb::internal::final_sum::execute
task * execute() __TBB_override
Should be overridden by derived classes.
Definition: parallel_scan.h:71
tbb::internal::sum_node
Split work to be done in the scan.
Definition: parallel_scan.h:82
tbb::internal::sum_node::my_stuff_last
Body * my_stuff_last
Definition: parallel_scan.h:87
tbb::task::spawn_root_and_wait
static void spawn_root_and_wait(task &root)
Spawn task allocated by allocate_root, wait for it to complete, and deallocate it.
Definition: task.h:808
tbb::internal::lambda_scan_body::lambda_scan_body
lambda_scan_body(const Value &identity, const Scan &scan, const ReverseJoin &rev_join)
Definition: parallel_scan.h:308
tbb::internal::finish_scan::my_result
sum_node_type & my_result
Definition: parallel_scan.h:152
tbb::internal::start_scan::my_body
final_sum_type * my_body
Definition: parallel_scan.h:189
tbb::internal::final_sum::my_stuff_last
Body * my_stuff_last
Where to put result of last subrange, or NULL if not last subrange.
Definition: parallel_scan.h:56
tbb::internal::lambda_scan_body::operator()
void operator()(const Range &r, Tag tag)
Definition: parallel_scan.h:321
tbb::internal::start_scan::my_is_final
bool my_is_final
Definition: parallel_scan.h:195
_warning_suppress_enable_notice.h
p
void const char const char int ITT_FORMAT __itt_group_sync p
Definition: ittnotify_static.h:91
tbb::internal::sum_node::my_right
sum_node * my_right
Definition: parallel_scan.h:91
tbb::final_scan_tag::is_final_scan
static bool is_final_scan()
Definition: parallel_scan.h:40
tbb::internal::start_scan::execute
task * execute() __TBB_override
Should be overridden by derived classes.
Definition: parallel_scan.h:254
tbb::internal::finish_scan::my_return_slot
sum_node_type *& my_return_slot
Definition: parallel_scan.h:149
tbb::internal::sum_node::my_left_sum
final_sum_type * my_left_sum
Definition: parallel_scan.h:89
tbb::internal::final_sum::final_sum
final_sum(Body &body_)
Definition: parallel_scan.h:58
tbb::final_scan_tag
Used to indicate that the final scan is being performed.
Definition: parallel_scan.h:39
tbb::internal::lambda_scan_body::assign
void assign(lambda_scan_body &b)
Definition: parallel_scan.h:329
tbb::internal::final_sum::my_range
aligned_space< Range > my_range
Definition: parallel_scan.h:54
tbb::internal::sum_node::final_sum_type
final_sum< Range, Body > final_sum_type
Definition: parallel_scan.h:83
tbb::task::recycle_as_child_of
void recycle_as_child_of(task &new_parent)
Change this to be a child of new_parent.
Definition: task.h:725
tbb::internal::start_scan::my_partition
Partitioner::partition_type my_partition
Definition: parallel_scan.h:198

Copyright © 2005-2020 Intel Corporation. All Rights Reserved.

Intel, Pentium, Intel Xeon, Itanium, Intel XScale and VTune are registered trademarks or trademarks of Intel Corporation or its subsidiaries in the United States and other countries.

* Other names and brands may be claimed as the property of others.