GetFEM  5.4.3
getfem_omp.h
Go to the documentation of this file.
1 /* -*- c++ -*- (enables emacs c++ mode) */
2 /*===========================================================================
3 
4  Copyright (C) 2012-2020 Andriy Andreykiv
5 
6  This file is a part of GetFEM
7 
8  GetFEM is free software; you can redistribute it and/or modify it
9  under the terms of the GNU Lesser General Public License as published
10  by the Free Software Foundation; either version 3 of the License, or
11  (at your option) any later version along with the GCC Runtime Library
12  Exception either version 3.1 or (at your option) any later version.
13  This program is distributed in the hope that it will be useful, but
14  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15  or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16  License and GCC Runtime Library Exception for more details.
17  You should have received a copy of the GNU Lesser General Public License
18  along with this program; if not, write to the Free Software Foundation,
19  Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
20 
21  As a special exception, you may use this file as it is a part of a free
22  software library without restriction. Specifically, if other files
23  instantiate templates or use macros or inline functions from this file,
24  or you compile this file and link it with other files to produce an
25  executable, this file does not by itself cause the resulting executable
26  to be covered by the GNU Lesser General Public License. This exception
27  does not however invalidate any other reasons why the executable file
28  might be covered by the GNU Lesser General Public License.
29 
30 ===========================================================================*/
31 
32 /**@file getfem_omp.h
33 @author Andriy Andreykiv <andriy.andreykiv@gmail.com>
34 @date May 14th, 2013.
35 @brief Tools for multithreaded, OpenMP and Boost based parallelization.
36 
37 This is the kernel of getfem.
38 */
39 #pragma once
40 
41 #include <atomic>
42 #include <memory>
43 #include <set>
44 #include <vector>
45 
46 #include "bgeot_config.h"
47 
48 #ifdef GETFEM_HAS_OPENMP
49  #include <mutex>
50 #endif
51 
52 namespace getfem
53 {
54  using bgeot::size_type;
55 
56 #ifdef GETFEM_HAS_OPENMP
57  void parallel_execution(std::function<void(void)> lambda,
58  bool iterate_over_partitions);
59 
60  //declaring a thread lock, to protect multi-threaded accesses to
61  //asserts, traces and warnings. Using a global mutex
62  class omp_guard
63  {
64  public:
65  omp_guard();
66 
67  private:
68  std::unique_ptr<std::lock_guard<std::recursive_mutex>> plock;
69  static std::recursive_mutex mutex;
70  };
71 
72  //like std::lock_guard, but copyable
73  class local_guard
74  {
75  public:
76  local_guard(std::recursive_mutex&);
77 
78  private:
79  std::recursive_mutex& mutex;
80  std::shared_ptr<std::lock_guard<std::recursive_mutex>> plock;
81  };
82 
83  //produces scoped lock on the
84  //mutex, held in this class
85  class lock_factory
86  {
87  public:
88 
89  //get a lock object with RAII acquire/release semantics
90  //on the mutex from this factory
91  local_guard get_lock() const;
92  private:
93  mutable std::recursive_mutex mutex;
94  };
95 
96  #define GLOBAL_OMP_GUARD getfem::omp_guard g; GMM_NOPERATION_(abs(&(g) != &(g)));
97 
98 #else
99 
100  class omp_guard{};
101  class local_guard{};
102  struct lock_factory
103  {
104  inline local_guard get_lock() const {return local_guard();}
105  };
106  #define GLOBAL_OMP_GUARD
107 
108 #endif
109 
110  /**set maximum number of OpenMP threads*/
111  void set_num_threads(int n);
112 
113  /**is the program running in the parallel section*/
115 
116  /** is the program is running on a single thread*/
117  bool not_multithreaded();
118 
119  /**Maximum number of threads that can run concurrently*/
121 
122  /**Thread policy, where partitioning is based on true threads*/
124  static size_type this_thread();
125  static size_type num_threads();
126  };
127 
128  /** Thread policy, regulated by partition_master
129  (can be true thread- or partition-based)*/
131  static size_type this_thread();
132  static size_type num_threads();
133  };
134 
135  //implementation classes for omp_distribute
136  namespace detail{
137 
138  struct general_tag{};
139  struct vector_tag{};
140  struct bool_tag{};
141 
142  template<typename T>
143  struct distribute_traits
144  {
145  using type = general_tag;
146  };
147 
148  template<typename T>
149  struct distribute_traits<std::vector<T>>
150  {
151  using type = vector_tag;
152  };
153 
154  template<>
155  struct distribute_traits<bool>
156  {
157  using type = bool_tag;
158  };
159 
160  template<typename T, typename thread_policy, typename tag>
161  class omp_distribute_impl;
162 
163  template<class V>
164  inline auto safe_component(V &v, size_type i) -> decltype(v[i]){
165  GMM_ASSERT2(i < v.size(),
166  i << "-th partition is not available. "
167  "Probably on_thread_update "
168  "should have been called first");
169  return v[i];
170  }
171 
172  template <typename T, typename thread_policy>
173  class omp_distribute_impl<T, thread_policy, general_tag> {
174  private:
175  std::vector<T> thread_values;
176  friend struct all_values_proxy;
177 
178  struct all_values_proxy{
179  omp_distribute_impl& distro;
180  all_values_proxy(omp_distribute_impl& d)
181  : distro(d)
182  {}
183 
184  void operator = (const T& x){
185  for(auto it = distro.thread_values.begin();
186  it != distro.thread_values.end(); ++it){
187  *it=x;
188  }
189  }
190  };
191 
192  public:
193 
194  template <class... args>
195  explicit omp_distribute_impl(args&&... value){
196  thread_values.reserve(num_threads());
197  for (size_type i = 0; i != num_threads(); ++i){
198  thread_values.emplace_back(std::forward<args>(value)...);
199  }
200  }
201 
202  operator T& (){
203  return operator()(this_thread());
204  }
205 
206  operator const T& () const {
207  return operator()(this_thread());
208  }
209 
210  T& thrd_cast(){
211  return operator()(this_thread());
212  }
213 
214  const T& thrd_cast() const {
215  return operator()(this_thread());
216  }
217 
218  T& operator()(size_type i) {
219  return safe_component(thread_values, i);
220  }
221 
222  const T& operator()(size_type i) const {
223  return safe_component(thread_values, i);
224  }
225 
226  void on_thread_update() {
227  if (thread_values.size() == num_threads()) return;
228  GLOBAL_OMP_GUARD
229  if (thread_values.size() != num_threads()) {
230  thread_values.resize(num_threads());
231  }
232  }
233 
234  size_type num_threads() const {
235  return thread_policy::num_threads();
236  }
237 
238  size_type this_thread() const {
239  return thread_policy::this_thread();
240  }
241 
242  T& operator = (const T& x){
244  thrd_cast() = x;
245  }
246  else all_threads() = x;
247 
248  return *this;
249  }
250 
251  all_values_proxy all_threads(){
252  return all_values_proxy(*this);
253  }
254  };
255 
256  /**Specialization for std::vector<T>, adds vector indexing operator*/
257  template <typename T,
258  typename thread_policy>
259  class omp_distribute_impl<std::vector<T>, thread_policy, vector_tag>
260  : public omp_distribute_impl<std::vector<T>, thread_policy, general_tag>
261  {
262  public:
263  using base = omp_distribute_impl<std::vector<T>, thread_policy, general_tag>;
264 
265  template <class... args>
266  explicit omp_distribute_impl(args&&... value)
267  : base(std::forward<args>(value)...)
268  {}
269 
270  T& operator[](size_type i){
271  return base::thrd_cast()[i];
272  }
273  const T& operator[](size_type i) const{
274  return base::thrd_cast()[i];
275  }
276 
277  std::vector<T>& operator = (const std::vector<T>& x){
278  return base::operator=(x);
279  }
280  };
281 
282  /**Specialization for bool, to circumvent the shortcomings
283  of standards library's specialization for std::vector<bool>,
284  we use std::vector<int> instead*/
285  template <typename thread_policy>
286  class omp_distribute_impl<bool, thread_policy, bool_tag>
287  : public omp_distribute_impl<int, thread_policy, general_tag>
288  {
289  public:
290  using base = omp_distribute_impl<int, thread_policy, general_tag>;
291 
292  template <class... Args>
293  explicit omp_distribute_impl(Args&&... value)
294  : base(std::forward<Args>(value)...)
295  {}
296 
297  operator bool () const {
298  return base::operator const int&();
299  }
300 
301  bool operator = (const bool& x){
302  return base::operator=(x);
303  }
304  };
305 
306  } /* end of namespace detail. */
307 
308  template<typename T, typename thread_policy>
309  using od_base = typename detail::omp_distribute_impl
310  <T, thread_policy, typename detail::distribute_traits<T>::type>;
311 
312  /**
313  Use this template class for any object you want to
314  distribute to open_MP threads. The creation of this
315  object should happen in serial, while accessing the individual
316  thread local instances will take place in parallel.
317  Use thread_policy to either distribute the objects between physical
318  threads or a fixed number of partitions, independent of the number
319  of threads. If you change the default policy, remember to also
320  use this_thread() and num_threads() from the corresponding policy
321  for iterating over the thread-specific components.
322  */
323  template<typename T,
324  typename thread_policy = global_thread_policy>
325  class omp_distribute : public od_base<T, thread_policy>
326  {
327  public:
328  using base = od_base<T, thread_policy>;
329 
330  template <class... args>
331  explicit omp_distribute(args&&... value)
332  : base(std::forward<args>(value)...)
333  {}
334 
335  auto operator = (const T& x) -> decltype(std::declval<base>() = x){
336  return base::operator=(x);
337  }
338  };
339 
340  /* Use these macros only in function local context to achieve
341  the effect of thread local storage for any type of objects
342  and their initialization (it's more general and portable
343  than using __declspec(thread))*/
344  #ifdef GETFEM_HAS_OPENMP
345  #define THREAD_SAFE_STATIC thread_local
346  #else
347  #define THREAD_SAFE_STATIC static
348  #endif
349 
350  class partition_master;
351 
352  /**Iterator that runs over partitions on the current
353  thread and sets the global (but thread-specific)
354  partition during incrementation*/
356  {
357  public:
358 
359  partition_iterator operator ++();
360  bool operator==(const partition_iterator&) const;
361  bool operator!=(const partition_iterator&) const;
362  size_type operator*() const;
363 
364  private:
365 
366  friend class partition_master;
367 
368  /**Only partition_master can create one*/
370  std::set<size_type>::const_iterator it);
371 
372  partition_master &master;
373  std::set<size_type>::const_iterator it;
374  };
375 
376  enum class thread_behaviour {true_threads, partition_threads};
377 
378  /**
379  A singleton that Manages partitions on individual threads.
380  */
382  {
383  public:
384 
385  static partition_master &get();
386 
387  /**beginning of the partitions for the current thread*/
389 
390  /**end of the partitions for the current thread*/
392 
393  /**Sets the behaviour for the full program: either partitioning parallel loops
394  according to the number of true threads, specified by the user,
395  or to the number of the fixed partitions equal to the max concurrency of the system.
396  The later makes the partitioning independent of the number of the threads set*/
397  void set_behaviour(thread_behaviour);
398 
399  /**active partition on the thread. If number of threads is equal to the
400  max concurrency of the system, then it's also the index of the actual thread*/
402 
403  /**number of partitions or threads, depending on thread policy*/
405 
406  /**for thread_behaviour::partition_threads set the total number of partitions.
407  This call must be made before all the omp_distribute based classes are created.
408  Otherwise they become invalid*/
410 
411  void check_threads();
412 
413  private:
414 
415  void rewind_partitions();
416 
417  //Parallel execution of a lambda. Please use the macros below
418  friend void parallel_execution(std::function<void(void)> lambda, bool iterate_over_partitions);
419 
420  /**set current partition, which will be also returned in this_thread() call*/
421  void set_current_partition(size_type);
422 
423  friend partition_iterator;
424 
426 
427  void update_partitions();
428 
431  std::atomic<size_type> nb_user_threads;
432  thread_behaviour behaviour = thread_behaviour::partition_threads;
433  std::atomic<bool> partitions_updated{false};
434  size_type nb_partitions;
435  bool partitions_set_by_user = false;
436 
437  static partition_master instance;
438  };
439 
440  class standard_locale;
441  class thread_exception;
442 
443  /**Encapsulates open_mp-related initialization and de-initialization*/
445  {
446  std::unique_ptr<standard_locale> plocale;
447  std::unique_ptr<thread_exception> pexception;
448 
449  public:
451  void run_lambda(std::function<void(void)> lambda);
453  };
454 
455  #ifdef __GNUC__
456  #define pragma_op(arg) _Pragma("arg")
457  #else
458  #define pragma_op(arg) __pragma(arg)
459  #endif
460 
461  /**
462  Organizes a proper parallel omp section:
463  - iteration on thread independent partitions
464  - passing exceptions to the master thread
465  - thread-safe locale
466  */
467  #ifdef GETFEM_HAS_OPENMP
468  #define GETFEM_OMP_PARALLEL(body) getfem::parallel_execution([&](){body;}, true);
469 
470  /**execute in parallel, but do not iterate over partitions*/
471  #define GETFEM_OMP_PARALLEL_NO_PARTITION(body) getfem::parallel_execution([&](){body;}, false);
472 
473  /**execute for loop in parallel. Not iterating over partitions*/
474  #define GETFEM_OMP_FOR(init, check, increment, body) {\
475  auto boilerplate = getfem::parallel_boilerplate{}; \
476  pragma_op(omp parallel for) \
477  for (init; check; increment){ \
478  boilerplate.run_lambda([&](){body;}); \
479  } \
480  }
481 
482  #else
483  #define GETFEM_OMP_PARALLEL(body) body
484  #define GETFEM_OMP_PARALLEL_NO_PARTITION(body) body;
485  #define GETFEM_OMP_FOR(init, check, increment, body)\
486  for (init; check; increment) { \
487  body \
488  }
489 
490  #endif
491 
492 } /* end of namespace getfem. */
defines and typedefs for namespace bgeot
Use this template class for any object you want to distribute to open_MP threads.
Definition: getfem_omp.h:326
Encapsulates open_mp-related initialization and de-initialization.
Definition: getfem_omp.h:445
Iterator that runs over partitions on the current thread and sets the global (but thread-specific) pa...
Definition: getfem_omp.h:356
A singleton that Manages partitions on individual threads.
Definition: getfem_omp.h:382
partition_iterator begin()
beginning of the partitions for the current thread
Definition: getfem_omp.cc:215
size_type get_current_partition() const
active partition on the thread.
Definition: getfem_omp.cc:243
size_type get_nb_partitions() const
number of partitions or threads, depending on thread policy
Definition: getfem_omp.cc:254
partition_iterator end()
end of the partitions for the current thread
Definition: getfem_omp.cc:223
void set_nb_partitions(size_type)
for thread_behaviour::partition_threads set the total number of partitions.
Definition: getfem_omp.cc:198
void set_behaviour(thread_behaviour)
Sets the behaviour for the full program: either partitioning parallel loops according to the number o...
Definition: getfem_omp.cc:227
Identical to gmm::standard_locale, but does not change std::locale in multi-threaded sections of the ...
Definition: getfem_locale.h:50
Allows to re-throw exceptions, generated in OpemMP parallel section.
Definition: getfem_omp.cc:120
size_t size_type
used as the common size type in the library
Definition: bgeot_poly.h:49
GEneric Tool for Finite Element Methods.
bool not_multithreaded()
is the program is running on a single thread
Definition: getfem_omp.cc:110
size_type max_concurrency()
Maximum number of threads that can run concurrently.
Definition: getfem_omp.cc:112
bool me_is_multithreaded_now()
is the program running in the parallel section
Definition: getfem_omp.cc:106
void set_num_threads(int n)
set maximum number of OpenMP threads
Definition: getfem_omp.cc:108
Thread policy, regulated by partition_master (can be true thread- or partition-based)
Definition: getfem_omp.h:130
Thread policy, where partitioning is based on true threads.
Definition: getfem_omp.h:123