NDDEM/html/Parallelizer_8h_source.html

 // This file is part of Eigen, a lightweight C++ template library

 // for linear algebra.

 //

 // Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>

 //

 // This Source Code Form is subject to the terms of the Mozilla

 // Public License v. 2.0. If a copy of the MPL was not distributed

 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.


 #ifndef EIGEN_PARALLELIZER_H

 #define EIGEN_PARALLELIZER_H


 #if EIGEN_HAS_CXX11_ATOMIC

 #include <atomic>

 #endif


 namespace Eigen {


 namespace internal {


 inline void manage_multi_threading(Action action, int* v)

 {

   static int m_maxThreads = -1;

   EIGEN_UNUSED_VARIABLE(m_maxThreads)


   if(action==SetAction)

   {

     eigen_internal_assert(v!=0);

     m_maxThreads = *v;

   }

   else if(action==GetAction)

   {

     eigen_internal_assert(v!=0);

     #ifdef EIGEN_HAS_OPENMP

     if(m_maxThreads>0)

       *v = m_maxThreads;

     else

       *v = omp_get_max_threads();

     #else

     *v = 1;

     #endif

   }

   else

   {

     eigen_internal_assert(false);

   }

 }


 }


 inline void initParallel()

 {

   int nbt;

   internal::manage_multi_threading(GetAction, &nbt);

   std::ptrdiff_t l1, l2, l3;

   internal::manage_caching_sizes(GetAction, &l1, &l2, &l3);

 }


 inline int nbThreads()

 {

   int ret;

   internal::manage_multi_threading(GetAction, &ret);

   return ret;

 }


 inline void setNbThreads(int v)

 {

   internal::manage_multi_threading(SetAction, &v);

 }


 namespace internal {


 template<typename Index> struct GemmParallelInfo

 {

   GemmParallelInfo() : sync(-1), users(0), lhs_start(0), lhs_length(0) {}


   // volatile is not enough on all architectures (see bug 1572)

   // to guarantee that when thread A says to thread B that it is

   // done with packing a block, then all writes have been really

   // carried out... C++11 memory model+atomic guarantees this.

 #if EIGEN_HAS_CXX11_ATOMIC

   std::atomic<Index> sync;

   std::atomic<int> users;

 #else

   Index volatile sync;

   int volatile users;

 #endif


   Index lhs_start;

   Index lhs_length;

 };


 template<bool Condition, typename Functor, typename Index>

 void parallelize_gemm(const Functor& func, Index rows, Index cols, Index depth, bool transpose)

 {

   // TODO when EIGEN_USE_BLAS is defined,

   // we should still enable OMP for other scalar types

   // Without C++11, we have to disable GEMM's parallelization on

   // non x86 architectures because there volatile is not enough for our purpose.

   // See bug 1572.

 #if (! defined(EIGEN_HAS_OPENMP)) || defined(EIGEN_USE_BLAS) || ((!EIGEN_HAS_CXX11_ATOMIC) && !(EIGEN_ARCH_i386_OR_x86_64))

   // FIXME the transpose variable is only needed to properly split

   // the matrix product when multithreading is enabled. This is a temporary

   // fix to support row-major destination matrices. This whole

   // parallelizer mechanism has to be redesigned anyway.

   EIGEN_UNUSED_VARIABLE(depth);

   EIGEN_UNUSED_VARIABLE(transpose);

   func(0,rows, 0,cols);

 #else


   // Dynamically check whether we should enable or disable OpenMP.

   // The conditions are:

   // - the max number of threads we can create is greater than 1

   // - we are not already in a parallel code

   // - the sizes are large enough


   // compute the maximal number of threads from the size of the product:

   // This first heuristic takes into account that the product kernel is fully optimized when working with nr columns at once.

   Index size = transpose ? rows : cols;

   Index pb_max_threads = std::max<Index>(1,size / Functor::Traits::nr);


   // compute the maximal number of threads from the total amount of work:

   double work = static_cast<double>(rows) * static_cast<double>(cols) *

       static_cast<double>(depth);

   double kMinTaskSize = 50000;  // FIXME improve this heuristic.

   pb_max_threads = std::max<Index>(1, std::min<Index>(pb_max_threads, static_cast<Index>( work / kMinTaskSize ) ));


   // compute the number of threads we are going to use

   Index threads = std::min<Index>(nbThreads(), pb_max_threads);


   // if multi-threading is explicitly disabled, not useful, or if we already are in a parallel session,

   // then abort multi-threading

   // FIXME omp_get_num_threads()>1 only works for openmp, what if the user does not use openmp?

   if((!Condition) || (threads==1) || (omp_get_num_threads()>1))

     return func(0,rows, 0,cols);


   Eigen::initParallel();

   func.initParallelSession(threads);


   if(transpose)

     std::swap(rows,cols);


   ei_declare_aligned_stack_constructed_variable(GemmParallelInfo<Index>,info,threads,0);


   #pragma omp parallel num_threads(threads)

   {

     Index i = omp_get_thread_num();

     // Note that the actual number of threads might be lower than the number of request ones.

     Index actual_threads = omp_get_num_threads();


     Index blockCols = (cols / actual_threads) & ~Index(0x3);

     Index blockRows = (rows / actual_threads);

     blockRows = (blockRows/Functor::Traits::mr)*Functor::Traits::mr;


     Index r0 = i*blockRows;

     Index actualBlockRows = (i+1==actual_threads) ? rows-r0 : blockRows;


     Index c0 = i*blockCols;

     Index actualBlockCols = (i+1==actual_threads) ? cols-c0 : blockCols;


     info[i].lhs_start = r0;

     info[i].lhs_length = actualBlockRows;


     if(transpose) func(c0, actualBlockCols, 0, rows, info);

     else          func(0, rows, c0, actualBlockCols, info);

   }

 #endif

 }


 } // end namespace internal


 } // end namespace Eigen


 #endif // EIGEN_PARALLELIZER_H

action
boost::random::uniform_int_distribution action(0, 100)

eigen_internal_assert
#define eigen_internal_assert(x)
Definition: Macros.h:1043

EIGEN_UNUSED_VARIABLE
#define EIGEN_UNUSED_VARIABLE(var)
Definition: Macros.h:1076

ei_declare_aligned_stack_constructed_variable
#define ei_declare_aligned_stack_constructed_variable(TYPE, NAME, SIZE, BUFFER)
Definition: Memory.h:768

Action
Handle force and torque contact information.
Definition: ContactList.h:19

Eigen::internal::parallelize_gemm
void parallelize_gemm(const Functor &func, Index rows, Index cols, Index depth, bool transpose)
Definition: Parallelizer.h:100

Eigen::internal::manage_multi_threading
void manage_multi_threading(Action action, int *v)
Definition: Parallelizer.h:22

Eigen::internal::manage_caching_sizes
void manage_caching_sizes(Action action, std::ptrdiff_t *l1, std::ptrdiff_t *l2, std::ptrdiff_t *l3)
Definition: GeneralBlockPanelKernel.h:86

Eigen::internal::size
EIGEN_CONSTEXPR Index size(const T &x)
Definition: Meta.h:479

Eigen
Namespace containing all symbols from the Eigen library.
Definition: LDLT.h:16

Eigen::GetAction
@ GetAction
Definition: Constants.h:504

Eigen::SetAction
@ SetAction
Definition: Constants.h:504

Eigen::Index
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:74

Eigen::initParallel
void initParallel()
Definition: Parallelizer.h:53

Eigen::nbThreads
int nbThreads()
Definition: Parallelizer.h:63

Eigen::setNbThreads
void setNbThreads(int v)
Definition: Parallelizer.h:72

internal
Definition: document.h:416

std::swap
NLOHMANN_BASIC_JSON_TPL_DECLARATION void swap(nlohmann::NLOHMANN_BASIC_JSON_TPL &j1, nlohmann::NLOHMANN_BASIC_JSON_TPL &j2) noexcept(//NOLINT(readability-inconsistent-declaration-parameter-name, cert-dcl58-cpp) is_nothrow_move_constructible< nlohmann::NLOHMANN_BASIC_JSON_TPL >::value &&//NOLINT(misc-redundant-expression, cppcoreguidelines-noexcept-swap, performance-noexcept-swap) is_nothrow_move_assignable< nlohmann::NLOHMANN_BASIC_JSON_TPL >::value)
exchanges the values of two JSON objects
Definition: json.hpp:25399

Eigen::internal::GemmParallelInfo
Definition: Parallelizer.h:80

Eigen::internal::GemmParallelInfo::GemmParallelInfo
GemmParallelInfo()
Definition: Parallelizer.h:81

Eigen::internal::GemmParallelInfo::lhs_length
Index lhs_length
Definition: Parallelizer.h:96

Eigen::internal::GemmParallelInfo::lhs_start
Index lhs_start
Definition: Parallelizer.h:95

Eigen::internal::GemmParallelInfo::sync
Index volatile sync
Definition: Parallelizer.h:91

Eigen::internal::GemmParallelInfo::users
int volatile users
Definition: Parallelizer.h:92