doc/getfem_reference/getfem__omp_8cc_source.html

 /*===========================================================================


  Copyright (C) 2012-2020 Andriy Andreykiv.


  This file is a part of GetFEM


  GetFEM  is  free software;  you  can  redistribute  it  and/or modify it

  under  the  terms  of the  GNU  Lesser General Public License as published

  by  the  Free Software Foundation;  either version 3 of the License,  or

  (at your option) any later version along with the GCC Runtime Library

  Exception either version 3.1 or (at your option) any later version.

  This program  is  distributed  in  the  hope  that it will be useful,  but

  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY

  or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public

  License and GCC Runtime Library Exception for more details.

  You  should  have received a copy of the GNU Lesser General Public License

  along  with  this program;  if not, write to the Free Software Foundation,

  Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.


 ===========================================================================*/


 #include "getfem/dal_singleton.h"

 #include "getfem/getfem_locale.h"

 #include "getfem/getfem_omp.h"


 #ifdef GETFEM_HAS_OPENMP

   #include <thread>

   #include <omp.h>

 #endif


 using bgeot::scalar_type;


 namespace getfem{


 #ifdef GETFEM_HAS_OPENMP


   std::recursive_mutex omp_guard::mutex;


   omp_guard::omp_guard()

     : plock{me_is_multithreaded_now() ?

        std::make_unique<std::lock_guard<std::recursive_mutex>>(mutex)

       : nullptr}

   {}


   local_guard::local_guard(std::recursive_mutex& m) :

     mutex{m},

     plock{me_is_multithreaded_now() ?

       std::make_shared<std::lock_guard<std::recursive_mutex>>(m)

       : nullptr}

   {}


   local_guard lock_factory::get_lock() const{

     return local_guard{mutex};

   }


   size_type global_thread_policy::this_thread() {

     return partition_master::get().get_current_partition();

   }


   size_type global_thread_policy::num_threads(){

     return partition_master::get().get_nb_partitions();

   }


   size_type true_thread_policy::this_thread() {

     return omp_get_thread_num();

   }


   size_type true_thread_policy::num_threads(){

     return omp_get_max_threads();

   }


   void set_num_threads(int n){

     omp_set_num_threads(n);

     partition_master::get().check_threads();

   }


   bool me_is_multithreaded_now(){

     // serial region

     if(omp_get_num_threads() == 1 && omp_get_level() == 0) return false;

     // parallel region with one thread

     if(omp_get_num_threads() == 1 && omp_get_level() == 1) return true;

     // parallel region with more than one thread

     if(omp_in_parallel() == 1) return true;


     return false;

   }


   bool not_multithreaded(){

     return omp_get_max_threads() == 1;

   }


   size_type max_concurrency() {

     return std::thread::hardware_concurrency();

   }


 #else


   size_type global_thread_policy::this_thread() {return 0;}


   size_type global_thread_policy::num_threads(){return 1;}


   size_type true_thread_policy::this_thread() {return 0;}


   size_type true_thread_policy::num_threads(){return 1;}


   bool me_is_multithreaded_now(){return false;}


   void set_num_threads(int /*n*/){}


   bool not_multithreaded(){return true;}


   size_type max_concurrency() {return 1;}


 #endif


   /** Allows to re-throw exceptions, generated in OpemMP parallel section.

       Collects exceptions from all threads and on destruction re-throws

       the first one, so that

       it can be again caught in the master thread. */

   class thread_exception {

     std::vector<std::exception_ptr> exceptions;


     void captureException(){

       exceptions[true_thread_policy::this_thread()] = std::current_exception();

     }


   public:

     thread_exception()

       : exceptions(true_thread_policy::num_threads(), nullptr)

     {}


     template <typename function, typename... parameters>

     void run(function f, parameters... params){

         try {f(params...);} catch (...) {captureException();}

     }


     std::vector<std::exception_ptr> caughtExceptions() const{

       std::vector<std::exception_ptr> non_empty_exceptions;

       for (auto &&pException : exceptions){

         if (pException != nullptr) non_empty_exceptions.push_back(pException);

       }

       return non_empty_exceptions;

     }


     void rethrow() {

       for (auto &&pException : exceptions){

         if (pException != nullptr) std::rethrow_exception(pException);

       }

     }

   };


   partition_iterator::partition_iterator(

     partition_master &m, std::set<size_type>::const_iterator it_from_set)

     : master{m}, it{it_from_set}

   {}


   partition_iterator partition_iterator::operator++(){

     ++it;

     if (*this != master.end()) master.set_current_partition(*it);

     return *this;

   }


   bool partition_iterator::operator==(const partition_iterator &it1) const {

     return it == it1.it;

   }


   bool partition_iterator::operator!=(const partition_iterator &it1) const {

     return !(*this == it1);

   }


   size_type partition_iterator::operator*() const{

     return *it;

   }


   partition_master partition_master::instance;


   partition_master& partition_master::get(){

     return instance;

   }


   void partition_master::check_threads(){

     GLOBAL_OMP_GUARD

     auto must_update = false;

     if (nb_user_threads != true_thread_policy::num_threads()){

       nb_user_threads = true_thread_policy::num_threads();

       must_update = true;

     }

     if (nb_partitions < nb_user_threads && !partitions_set_by_user){

       nb_partitions = nb_user_threads;

       must_update = true;

     }

     if (must_update){

       update_partitions();

       dal::singletons_manager::on_partitions_change();

     }

   }


   void partition_master::set_nb_partitions(size_type n){

     GMM_ASSERT1 (!partitions_set_by_user,

                  "Number of partitions can be set only once.");

     if (n > nb_partitions){

       nb_partitions = n;

       nb_user_threads = true_thread_policy::num_threads();

       update_partitions();

       dal::singletons_manager::on_partitions_change();

     }

     else if (n < nb_partitions){

       GMM_WARNING1("Not reducing number of partitions from "

                    << nb_partitions <<" to " << n <<

                    " as it might invalidate global storage.");

     }

     partitions_set_by_user = true;

   }


   partition_iterator partition_master::begin(){

     GMM_ASSERT1(nb_user_threads == true_thread_policy::num_threads(),

                 "The number of omp threads was changed outside partition_master."

                 "Please use getfem::set_num_threads for this.");

     current_partition = *(std::begin(partitions.thrd_cast()));

     return partition_iterator{*this, std::begin(partitions.thrd_cast())};

   }


   partition_iterator partition_master::end(){

     return partition_iterator{*this, std::end(partitions.thrd_cast())};

   }


   void partition_master::set_behaviour(thread_behaviour b){

     if (b != behaviour){

       GMM_ASSERT1(!me_is_multithreaded_now(),

                   "Cannot change thread policy in parallel section.");

       behaviour = b;

       check_threads();

     }

   }


   partition_master::partition_master()

     : nb_user_threads{1}, nb_partitions{1} {

         partitions_updated = false;

         set_num_threads(1);

         update_partitions();

   }


   size_type partition_master::get_current_partition() const {

     GMM_ASSERT2(behaviour == thread_behaviour::partition_threads ?

                 true_thread_policy::this_thread() < nb_partitions : true,

                 "Requesting current partition for thread " <<

                 true_thread_policy::this_thread() <<

                 " while number of partitions is " << nb_partitions

                 << ".");

     return behaviour == thread_behaviour::partition_threads ?

            current_partition : true_thread_policy::this_thread();

   }


   size_type partition_master::get_nb_partitions() const {

     return behaviour == thread_behaviour::partition_threads ?

            nb_partitions : true_thread_policy::num_threads();

   }


   void partition_master::set_current_partition(size_type p){

     if (behaviour == thread_behaviour::partition_threads){

       GMM_ASSERT2(partitions.thrd_cast().count(p) != 0, "Internal error: "

                   << p << " is not a valid partitions for thread "

                   << true_thread_policy::this_thread()

                   << ".");

       current_partition = p;

     }

   }


   void partition_master::rewind_partitions(){

     if (me_is_multithreaded_now()){

       current_partition = *(std::begin(partitions.thrd_cast()));

     }

     else{

       for (size_type t = 0; t != partitions.num_threads(); ++t){

         current_partition(t) = *(std::begin(partitions(t)));

       }

     }

   }


   void partition_master::update_partitions(){

     partitions_updated = false;


     GLOBAL_OMP_GUARD


     if (partitions_updated) return;


     partitions = decltype(partitions){};

     current_partition = decltype(current_partition){};


     auto n_threads = true_thread_policy::num_threads();

     if(n_threads > nb_partitions){

       GMM_WARNING0("Using " << n_threads <<

                    " threads which is above the maximum number of partitions :" <<

                    nb_partitions

                    << ".");

     }

     if (behaviour == thread_behaviour::partition_threads){

       for (size_type t = 0; t != n_threads; ++t){

         auto partition_size = static_cast<size_type>

                                 (std::ceil(static_cast<scalar_type>(nb_partitions) /

                                            static_cast<scalar_type >(n_threads)));

         auto partition_begin = partition_size * t;

         if (partition_begin >= nb_partitions) break;

         auto partition_end = std::min(partition_size * (t + 1), nb_partitions);

         auto hint_it = std::begin(partitions(t));

         for (size_type i = partition_begin; i != partition_end; ++i){

           hint_it = partitions(t).insert(hint_it, i);

         }

         current_partition(t) = partition_begin;

       }

     }

     else{

       for (size_type t = 0; t != n_threads; ++t){

         partitions(t).insert(t);

         current_partition(t) = t;

       }

     }


     partitions_updated = true;

   }


   #if defined _WIN32 && !defined (__GNUC__)

     #define GETFEM_ON_WIN

   #endif


   parallel_boilerplate::

   parallel_boilerplate()

   : plocale{std::make_unique<standard_locale>()},

     pexception{std::make_unique<thread_exception>()} {

     #ifdef GETFEM_ON_WIN

       _configthreadlocale(_ENABLE_PER_THREAD_LOCALE);

     #endif

   }


   void parallel_boilerplate::run_lambda(std::function<void(void)> lambda){

     pexception->run(lambda);

   }


   parallel_boilerplate::~parallel_boilerplate(){

     #ifdef GETFEM_ON_WIN

     _configthreadlocale(_DISABLE_PER_THREAD_LOCALE);

     #endif

     pexception->rethrow();

   }


   void parallel_execution(std::function<void(void)> lambda,

                           bool iterate_over_partitions){

     if (me_is_multithreaded_now()) {

       lambda();

       return;

     }

     parallel_boilerplate boilerplate;

     auto &pm = partition_master::get();

     if (pm.get_nb_partitions() < true_thread_policy::num_threads()){

       pm.set_nb_partitions(true_thread_policy::num_threads());

     }

     #pragma omp parallel default(shared)

     {

       if (iterate_over_partitions) {

         for (auto &&partitions : partition_master::get()) {

           (void)partitions;

           boilerplate.run_lambda(lambda);

         }

       }

       else {

         boilerplate.run_lambda(lambda);

       }

     }

     if (iterate_over_partitions) partition_master::get().rewind_partitions();

   }


 #ifdef GETFEM_FORCE_SINGLE_THREAD_BLAS

 # include <dlfcn.h>

 # define BLAS_FORCE_SINGLE_THREAD                                       \

   int openblas_get_num_threads_res = 1;                                 \

   {                                                                     \

     typedef int (* ptrfunc1)();                                         \

     ptrfunc1 func1 = ptrfunc1(dlsym(NULL, "openblas_get_num_threads")); \

     if (func1) openblas_get_num_threads_res = (*func1)();               \

     typedef void (* ptrfunc2)(int);                                     \

     ptrfunc2 func2 = ptrfunc2(dlsym(NULL, "openblas_set_num_threads")); \

     if (func2) (*func2)(1);                                             \

   }

 # define BLAS_RESTORE_NUM_THREAD                                        \

   {                                                                     \

     typedef void (* ptrfunc2)(int);                                     \

     ptrfunc func2 = ptrfunc2(dlsym(NULL, "openblas_set_num_threads"));  \

     if (func2) (*func)(openblas_get_num_threads_res);                   \

   }

 #else

 # define BLAS_FORCE_SINGLE_THREAD

 # define BLAS_RESTORE_NUM_THREAD

 #endif


   struct dummy_class_for_blas_nbthread_init {

     dummy_class_for_blas_nbthread_init(void)

     { BLAS_FORCE_SINGLE_THREAD; }

   };


   static dummy_class_for_blas_nbthread_init dcfbnti;


 }  /* end of namespace getfem.                                             */

getfem::partition_iterator
Iterator that runs over partitions on the current thread and sets the global (but thread-specific) pa...
Definition: getfem_omp.h:356

getfem::partition_master
A singleton that Manages partitions on individual threads.
Definition: getfem_omp.h:382

getfem::partition_master::begin
partition_iterator begin()
beginning of the partitions for the current thread
Definition: getfem_omp.cc:215

getfem::partition_master::get_current_partition
size_type get_current_partition() const
active partition on the thread.
Definition: getfem_omp.cc:243

getfem::partition_master::get_nb_partitions
size_type get_nb_partitions() const
number of partitions or threads, depending on thread policy
Definition: getfem_omp.cc:254

getfem::partition_master::end
partition_iterator end()
end of the partitions for the current thread
Definition: getfem_omp.cc:223

getfem::partition_master::set_nb_partitions
void set_nb_partitions(size_type)
for thread_behaviour::partition_threads set the total number of partitions.
Definition: getfem_omp.cc:198

getfem::partition_master::set_behaviour
void set_behaviour(thread_behaviour)
Sets the behaviour for the full program: either partitioning parallel loops according to the number o...
Definition: getfem_omp.cc:227

getfem::thread_exception
Allows to re-throw exceptions, generated in OpemMP parallel section.
Definition: getfem_omp.cc:120

dal_singleton.h
A simple singleton implementation.

getfem_locale.h
thread safe standard locale with RAII semantics

getfem_omp.h
Tools for multithreaded, OpenMP and Boost based parallelization.

bgeot::size_type
size_t size_type
used as the common size type in the library
Definition: bgeot_poly.h:49

getfem
GEneric Tool for Finite Element Methods.
Definition: getfem_accumulated_distro.h:47

getfem::not_multithreaded
bool not_multithreaded()
is the program is running on a single thread
Definition: getfem_omp.cc:110

getfem::max_concurrency
size_type max_concurrency()
Maximum number of threads that can run concurrently.
Definition: getfem_omp.cc:112

getfem::me_is_multithreaded_now
bool me_is_multithreaded_now()
is the program running in the parallel section
Definition: getfem_omp.cc:106

getfem::set_num_threads
void set_num_threads(int n)
set maximum number of OpenMP threads
Definition: getfem_omp.cc:108