Set Up Multi-Threaded Newton Method

multi_newton_setup

@(@\newcommand{\W}[1]{ \; #1 \; } \newcommand{\R}[1]{ {\rm #1} } \newcommand{\B}[1]{ {\bf #1} } \newcommand{\D}[2]{ \frac{\partial #1}{\partial #2} } \newcommand{\DD}[3]{ \frac{\partial^2 #1}{\partial #2 \partial #3} } \newcommand{\Dpow}[2]{ \frac{\partial^{#1}}{\partial {#2}^{#1}} } \newcommand{\dpow}[2]{ \frac{ {\rm d}^{#1}}{{\rm d}\, {#2}^{#1}} }@)@Set Up Multi-Threaded Newton Method
Syntax

ok = multi_newton_setup(

     num_sub, xlow, xup, epsilon, max_itr, num_threads

)

Purpose
These routine does the setup for splitting finding all the zeros in an interval into separate sub-intervals, one for each thread.

Thread
It is assumed that this function is called by thread zero, and all the other threads are blocked (waiting).

num_sub
See num_sub in multi_newton_run .

xlow
See xlow in multi_newton_run .

xup
See xup in multi_newton_run .

epsilon
See epsilon in multi_newton_run .

max_itr
See max_itr in multi_newton_run .

num_threads
See num_threads in multi_newton_run .

Source


namespace {
bool multi_newton_setup(
     size_t num_sub                              ,
     double xlow                                 ,
     double xup                                  ,
     double epsilon                              ,
     size_t max_itr                              ,
     size_t num_threads                          )
{
     num_threads  = std::max(num_threads_, size_t(1));
     bool ok      = num_threads == thread_alloc::num_threads();
     ok          &= thread_alloc::thread_num() == 0;

     // inputs that are same for all threads
     epsilon_ = epsilon;
     max_itr_ = max_itr;

     // resize the work vector to accomidate the number of threads
     ok &= work_all_.size() == 0;
     work_all_.resize(num_threads);

     // length of each sub interval
     sub_length_ = (xup - xlow) / double(num_sub);

     // determine values that are specific to each thread
     size_t num_min   = num_sub / num_threads; // minimum num_sub
     size_t num_more  = num_sub % num_threads; // number that have one more
     size_t sum_num   = 0;  // sum with respect to thread of num_sub
     size_t thread_num, num_sub_thread;
     for(thread_num = 0; thread_num < num_threads; thread_num++)
     {
# if  USE_THREAD_ALLOC_FOR_WORK_ALL
          // allocate separate memory for this thread to avoid false sharing
          size_t min_bytes(sizeof(work_one_t)), cap_bytes;
          void* v_ptr = thread_alloc::get_memory(min_bytes, cap_bytes);
          work_all_[thread_num] = static_cast<work_one_t*>(v_ptr);

          // thread_alloc is a raw memory allocator; i.e., it does not call
          // the constructor for the objects it creates. The vector
          // class requires it's constructor to be called so we do it here
          new(& (work_all_[thread_num]->x) ) vector<double>();
# else
          work_all_[thread_num] = new work_one_t;
# endif

          // number of sub-intervalse for this thread
          if( thread_num < num_more  )
               num_sub_thread = num_min + 1;
          else     num_sub_thread = num_min;

          // when thread_num == 0, xlow_thread == xlow
          double xlow_thread = xlow + double(sum_num) * sub_length_;

          // when thread_num == num_threads - 1, xup_thread = xup
          double xup_thread =
               xlow + double(sum_num + num_sub_thread) * sub_length_;
          if( thread_num == num_threads - 1 )
               xup_thread = xup;

          // update sum_num for next time through loop
          sum_num += num_sub_thread;

          // input information specific to this thread
          work_all_[thread_num]->num_sub = num_sub_thread;
          work_all_[thread_num]->xlow    = xlow_thread;
          work_all_[thread_num]->xup     = xup_thread;
          ok &= work_all_[thread_num]->x.size() == 0;

          // in case this thread does not get called
          work_all_[thread_num]->ok = false;
     }
     ok &= sum_num == num_sub;
     return ok;
}
}

Input File: example/multi_thread/multi_newton.cpp