/home/coin/SVN-release/OS-2.1.0/Bcp/src/TM/BCP_tm_main.cpp

Go to the documentation of this file.
00001 // Copyright (C) 2000, International Business Machines
00002 // Corporation and others.  All Rights Reserved.
00003 #include <cstdio>
00004 #include <cerrno>
00005 #include <cmath>
00006 #include <queue>
00007 #ifdef _MSC_VER
00008 #include <process.h>
00009 #endif 
00010 
00011 #include "CoinTime.hpp"
00012 
00013 #include "BcpConfig.h"
00014 #include "BCP_os.hpp"
00015 
00016 #include "BCP_USER.hpp"
00017 #include "BCP_string.hpp"
00018 #include "BCP_vector.hpp"
00019 #include "BCP_buffer.hpp"
00020 #include "BCP_message.hpp"
00021 #include "BCP_var.hpp"
00022 #include "BCP_cut.hpp"
00023 #include "BCP_node_change.hpp"
00024 #include "BCP_tm.hpp"
00025 #include "BCP_tm_functions.hpp"
00026 #include "BCP_main_fun.hpp"
00027 
00028 #include "BCP_tm_user.hpp"
00029 #include "BCP_lp_user.hpp"
00030 
00031 #include "BCP_message_single.hpp"
00032 #include "BCP_message_mpi.hpp"
00033 #include "BCP_message_pvm.hpp"
00034 
00035 //#############################################################################
00036 
00037 int bcp_main(int argc, char* argv[], USER_initialize* user_init)
00038 {
00039     BCP_message_environment* msg_env = user_init->msgenv_init(argc, argv);
00040     BCP_single_environment* single_env =
00041         dynamic_cast<BCP_single_environment*>(msg_env);
00042     if (single_env) {
00043         // this way when register_process takes over the execution the single
00044         // environment will have access to the command line arguments and can
00045         // parse it.
00046         single_env->set_arguments(argc, argv);
00047     }
00048 
00049     int my_id = msg_env->register_process(user_init);
00050 
00051     if (single_env) {
00052         // register process did everything. We just return.
00053         delete msg_env;
00054         return 0;
00055     }
00056 
00057     int parent = msg_env->parent_process();
00058 
00059     if (parent == -1) {
00060         //We compute the real numeber of arguments because Mpi can change
00061         //list of arguments
00062         int cnt;
00063         for (cnt = 0; cnt < argc; ++cnt) {
00064             if (argv[cnt] == NULL)
00065                 break;
00066         }
00067         BCP_tm_main(msg_env, user_init, cnt, argv);
00068     } else {
00069         // In MPI all processes get the argument list, so we must not check
00070         // this
00071 #if defined(COIN_HAS_MPI)
00072         BCP_mpi_environment* mpi_env =
00073             dynamic_cast<BCP_mpi_environment*>(msg_env);
00074         if (!mpi_env && argc != 1) {
00075             throw BCP_fatal_error("The slaves do not take any argument!\n");
00076         }
00077 #endif
00078         BCP_buffer msg_buf;
00079         msg_env->receive(parent, BCP_Msg_AnyMessage, msg_buf, -1);
00080         if (msg_buf.msgtag() != BCP_Msg_ProcessType) {
00081             throw BCP_fatal_error("The first message is not ProcessType!!!\n");
00082         }
00083         // got a new identity, act on it
00084         BCP_process_t ptype;
00085         double ub;
00086         msg_buf.unpack(ptype);
00087         msg_buf.unpack(ub);
00088         while (ptype != BCP_ProcessType_EndProcess) {
00089             const bool maxheap_set = false;
00090             switch (ptype) {
00091             case BCP_ProcessType_LP:
00092               if (maxheap_set) {
00093                  printf("usedheap before LP: %li\n", BCP_used_heap());
00094               }
00095               ptype = BCP_lp_main(msg_env, user_init, my_id, parent, ub);
00096               if (maxheap_set) {
00097                   printf("usedheap after LP: %li\n", BCP_used_heap());
00098               }
00099               break;
00100             case BCP_ProcessType_CP:
00101               // BCP_cp_main(msg_env, user_init, my_id, parent, ub);
00102               break;
00103             case BCP_ProcessType_VP:
00104               // BCP_vp_main(msg_env, user_init, my_id, parent, ub);
00105               break;
00106             case BCP_ProcessType_CG:
00107               ptype = BCP_cg_main(msg_env, user_init, my_id, parent, ub);
00108               break;
00109             case BCP_ProcessType_VG:
00110               ptype = BCP_vg_main(msg_env, user_init, my_id, parent, ub);
00111               break;
00112             case BCP_ProcessType_TS:
00113               if (maxheap_set) {
00114                   printf("usedheap before TS: %li\n", BCP_used_heap());
00115               }
00116               ptype = BCP_tmstorage_main(msg_env, user_init, my_id, parent, ub);
00117               if (maxheap_set) {
00118                   printf("usedheap after TS: %li\n", BCP_used_heap());
00119               }
00120               break;
00121             case BCP_ProcessType_Any:
00122               throw BCP_fatal_error("\
00123 New process identity is BCP_ProcessType_Any!\n");
00124             case BCP_ProcessType_TM:
00125               throw BCP_fatal_error("\
00126 New process identity is BCP_ProcessType_TM!\n");
00127             case BCP_ProcessType_EndProcess:
00128               break;
00129             }
00130         }
00131     }
00132     delete msg_env;
00133 
00134     return 0;
00135 }
00136 
00137 //#############################################################################
00138 
00139 void
00140 BCP_tm_main(BCP_message_environment* msg_env,
00141             USER_initialize* user_init,
00142             const int argnum, const char* const * arglist)
00143 {
00144     // If we ever get here then the environment is parallel
00145     
00146     // Start to create the universe... (we don't have a user universe yet).
00147     BCP_tm_prob p;
00148 
00149     // If we ever get here then the environment is parallel
00150     p.par.set_entry(BCP_tm_par::MessagePassingIsSerial,false);
00151     p.slave_pars.lp.set_entry(BCP_lp_par::MessagePassingIsSerial,false);
00152     p.slave_pars.cg.set_entry(BCP_cg_par::MessagePassingIsSerial,false);
00153     p.slave_pars.vg.set_entry(BCP_vg_par::MessagePassingIsSerial,false);
00154     /*
00155       p.slave_pars.cp.set_entry(BCP_cp_par::MessagePassingIsSerial,false);
00156       p.slave_pars.vp.set_entry(BCP_vp_par::MessagePassingIsSerial,false);
00157     */
00158 
00159     // this also reads in the parameters from a file
00160     BCP_tm_parse_command_line(p, argnum, arglist);
00161    
00162     BCP_buffer msg_buf;
00163     p.msg_env = msg_env;
00164 
00165     //We check if the number of BCP processes is the same as in MPI
00166 #if defined(COIN_HAS_MPI)
00167     BCP_mpi_environment* mpi_env = dynamic_cast<BCP_mpi_environment*>(msg_env);
00168     if (mpi_env) {
00169         const int n_proc =
00170             p.param(BCP_tm_par::LpProcessNum) +
00171             p.param(BCP_tm_par::CgProcessNum) +
00172             p.param(BCP_tm_par::VgProcessNum) +
00173             p.param(BCP_tm_par::CpProcessNum) +
00174             p.param(BCP_tm_par::VpProcessNum) + 1;
00175         if (p.msg_env->num_procs() < n_proc) {
00176             throw BCP_fatal_error("\
00177 Number of process in parameter file %d > n_proc in mpirun -np %d!\n",
00178                                   n_proc, p.msg_env->num_procs());
00179         }
00180     }
00181 #endif
00182 
00183     p.stat.set_num_lp(p.param(BCP_tm_par::LpProcessNum));
00184 
00185     p.start_time = CoinWallclockTime();
00186 
00187     FILE* logfile = 0;
00188 
00189     const BCP_string& log = p.par.entry(BCP_tm_par::LogFileName);
00190     if (! (p.par.entry(BCP_tm_par::LogFileName) == "")) {
00191         int len = log.length();
00192         char *logname = new char[len + 300];
00193         memcpy(logname, log.c_str(), len);
00194         memcpy(logname + len, "-tm-", 4);
00195         len += 4;
00196         gethostname(logname + len, 255);
00197         len = strlen(logname);
00198         logname[len++] = '-';
00199         sprintf(logname + len, "%i", static_cast<int>(GETPID));
00200         logfile = freopen(logname, "a", stdout);
00201         if (logfile == 0) {
00202             fprintf(stderr, "Error while redirecting stdout: %i\n", errno);
00203             abort();
00204         }
00205         setvbuf(logfile, NULL, _IOLBF, 0); // make it line buffered
00206         delete[] logname;
00207     } else {
00208         setvbuf(stdout, NULL, _IOLBF, 0); // make it line buffered
00209     }
00210 
00211     // BCP_tm_user_init() returns a BCP_tm_user* and that will be part of p.
00212     // Also, it should take care of every I/O, heuristic startup, etc. the user
00213     // wants to do. Wreak havoc in p if (s)he wants.
00214     // BUT: once this function returns, the processes designated to be part of
00215     // BCP must be idle and waiting for a message. See the
00216     // BCP_slave_process_stub() function below. 
00217     p.user = user_init->tm_init(p, argnum, arglist);
00218     p.user->setTmProblemPointer(&p);
00219     p.packer = user_init->packer_init(p.user);
00220     p.packer->user_class = p.user;
00221 
00222     // Set the core (variables & cuts)
00223     p.core = BCP_tm_create_core(p);
00224     p.core_as_change = new BCP_problem_core_change;
00225     *p.core_as_change = *p.core;
00226 
00227     // Fire up the LP/CG/CP/VG/VP processes
00228     // Actually, this is firing up enough copies of self.
00229     BCP_tm_start_processes(p);
00230     p.lp_scheduler.
00231       setParams(p.param(BCP_tm_par::LPscheduler_OverEstimationStatic),
00232                 p.param(BCP_tm_par::LPscheduler_SwitchToRateThreshold),
00233                 10.0, /* estimated root time */
00234                 p.param(BCP_tm_par::LPscheduler_FactorTimeHorizon),
00235                 p.param(BCP_tm_par::LPscheduler_OverEstimationRate),
00236                 p.param(BCP_tm_par::LPscheduler_MaxNodeIdRatio),
00237                 p.param(BCP_tm_par::LPscheduler_MaxNodeIdNum),
00238                 p.param(BCP_tm_par::LPscheduler_MaxSbIdNum),
00239                 p.param(BCP_tm_par::LPscheduler_MinSbIdNum));
00240 
00241     // Notify the LP/CG/CP/VG/VP processes about their identity. Also, send out
00242     // their parameters, core and user info.
00243     BCP_tm_notify_processes(p);
00244 
00245 #if ! defined(BCP_ONLY_LP_PROCESS_HANDLING_WORKS)
00246     // Initialize the number of leaves assigned to CP's and VP's as 0
00247     if (p.param(BCP_tm_par::CpProcessNum) > 0) {
00248       for (int i = p.slaves.cp->procs().size() - 1; i >= 0; --i)
00249         p.leaves_per_cp.push_back(std::make_pair(p.slaves.cp->procs()[i], 0));
00250     }
00251     if (p.param(BCP_tm_par::VpProcessNum) > 0) {
00252       for (int i = p.slaves.vp->procs().size() - 1; i >= 0; --i)
00253         p.leaves_per_vp.push_back(std::make_pair(p.slaves.vp->procs()[i], 0));
00254     }
00255 #endif
00256 
00257     // Initialize the root of the search tree (can't invoke directly
00258     // p.user->create_root(), b/c the root might contain extra vars/cuts and
00259     // it's better if we take care of inserting them into the appropriate data
00260     // structures.
00261     BCP_tm_node* root = BCP_tm_create_root(p);
00262 
00263     p.next_phase_nodes.push_back(root);
00264     p.search_tree.insert(root);
00265 
00266     BCP_sanity_checks(p);
00267 
00268     //-------------------------------------------------------------------------
00269     // The main loop
00270     //-------------------------------------------------------------------------
00271     bool something_died = false;
00272     for ( p.phase = 0; true ; ++p.phase) {
00273         // insert the nodes in next_phase_nodes into candidates, print out some
00274         // statistics about the previous phase (if there was one) and do some
00275         // other stuff, too.
00276         BCP_tm_tasks_before_new_phase(p);
00277         // do one phase (return true/false depending on success)
00278         something_died = BCP_tm_do_one_phase(p);
00279         // If nothing is left for the next phase or if something has died then
00280         // quit the infinite loop.
00281         if (p.next_phase_nodes.size() == 0 || something_died)
00282             break;
00283     }
00284 
00285     //-------------------------------------------------------------------------
00286     // Everything is done
00287     //-------------------------------------------------------------------------
00288     // first let the processes know that they're not needed in BCP any more,
00289     // they can start idling (this will be used when we'll loop around in TM).
00290     // The processes will respond by sending statistics.
00291     p.lp_scheduler.update_idle_times();
00292     BCP_tm_idle_processes(p);
00293 
00294     BCP_tm_wrapup(&p, 0, 0, 0, true);
00295 
00296     // Finally stop all the processes.
00297     BCP_tm_stop_processes(p);
00298 
00299     if (logfile)
00300         fclose(logfile);
00301 }
00302 
00303 //#############################################################################
00304 
00305 bool BCP_tm_do_one_phase(BCP_tm_prob& p)
00306 {
00307     BCP_buffer& buf = p.msg_buf;
00308     // While there are nodes waiting to be processed (or being processed) we
00309     // don't go to the next phase
00310     while (!p.candidate_list.empty() > 0 || p.lp_scheduler.numNodeIds() > 0){
00311         // Fill up as many free LP processes as we can
00312         if (BCP_tm_start_new_nodes(p) == BCP_NodeStart_Error)
00313             // Error indicates that something has died
00314             return true;
00315         buf.clear();
00316         // Check if need to balance data
00317         p.need_a_TS = ! BCP_tm_is_data_balanced(p);
00318         p.need_a_TS = BCP_tm_balance_data(p);
00319 
00320         // Process incoming messages. If there are no active nodes left then
00321         // timeout is set to 0, so we just check the queue, but not wait.
00322         const int numNodeIds = p.lp_scheduler.numNodeIds();
00323         const double t0 = CoinWallclockTime();
00324         const double timeout = (p.lp_scheduler.numNodeIds() == 0 ?
00325                                 0 : p.param(BCP_tm_par::TmTimeout));
00326         p.msg_env->receive(BCP_AnyProcess, BCP_Msg_AnyMessage, buf, timeout);
00327         const double t1 = CoinWallclockTime();
00328         p.stat.update_wait_time(numNodeIds, t1-t0);
00329 #ifdef COIN_HAS_MPI
00330         p.stat.update_queue_length(numNodeIds, MPIDI_BGLTS_get_num_messages());
00331 #endif
00332         p.stat.print(false /* not final */, t1 - p.start_time);
00333         try {
00334             p.process_message();
00335         }
00336         catch (BCP_fatal_error& err) {
00337             // something is baaaad... e.g. timeout
00338             return true;
00339         }
00340     }
00341     return false;
00342 }
00343 
00344 //#############################################################################
00345 
00346 BCP_problem_core* BCP_tm_create_core(BCP_tm_prob& p)
00347 {
00348     BCP_vec<BCP_var_core*> bvars;
00349     BCP_vec<BCP_cut_core*> bcuts;
00350     BCP_lp_relax* matrix = 0;
00351 
00352     p.user->initialize_core(bvars, bcuts, matrix);
00353 
00354     const int bvarnum = bvars.size();
00355     if (bvarnum > 0) {
00356         int i = p.next_var_index_set_start;
00357         for (i = 0; i < bvarnum; ++i) {
00358             BCP_var_core* var = bvars[i];
00359             // make certain that the bounds of integer vars is integer...
00360             if (var->var_type() != BCP_ContinuousVar) {
00361                 var->set_lb(ceil(var->lb()-1e-8));
00362                 var->set_ub(floor(var->ub()+1e-8));
00363             }
00364             var->set_bcpind(i);
00365             p.vars_local[i] = new BCP_var_core(*var);
00366         }
00367         p.next_var_index_set_start = i;
00368     }
00369 
00370     const int bcutnum = bcuts.size();
00371     if (bcutnum > 0) {
00372         int i = p.next_cut_index_set_start;
00373         for (i = 0; i < bcutnum; ++i) {
00374             BCP_cut_core* cut = bcuts[i];
00375             cut->set_bcpind(i);
00376             p.cuts_local[i] = new BCP_cut_core(*cut);
00377         }
00378         p.next_cut_index_set_start = i;
00379     }
00380 
00381     if (!matrix)
00382         matrix = new BCP_lp_relax;
00383 
00384     return new BCP_problem_core(bvars, bcuts, matrix);
00385 }
00386    
00387 //#############################################################################
00388 
00389 static inline BCP_cut*
00390 BCP_tm_unpack_root_cut(BCP_tm_prob& tm)
00391 {
00392     BCP_cut* cut;
00393     BCP_buffer& buf = tm.msg_buf;
00394     BCP_object_t obj_t;
00395     double lb, ub;
00396     BCP_obj_status stat;
00397     buf.unpack(obj_t).unpack(stat).unpack(lb).unpack(ub);
00398     switch (obj_t) {
00399     case BCP_CoreObj:
00400         cut = new BCP_cut_core(lb, ub);
00401         break;
00402     case BCP_AlgoObj:
00403         cut = tm.packer->unpack_cut_algo(buf);
00404         cut->change_bounds(lb, ub);
00405         break;
00406     default:
00407         throw BCP_fatal_error("BCP_tm_unpack_root_cut: unexpected obj_t.\n");
00408     }
00409     cut->set_bcpind(0);
00410     cut->set_status(stat);
00411 
00412     return cut;
00413 }
00414 
00415 //#############################################################################
00416 
00417 BCP_tm_node* BCP_tm_create_root(BCP_tm_prob& p)
00418 {
00419     BCP_vec<BCP_var*> added_vars;
00420     BCP_vec<BCP_cut*> added_cuts;
00421     BCP_user_data* user_data = 0;
00422 
00423     // If the root cuts are saved then read them in
00424     const BCP_string& cutfile = p.param(BCP_tm_par::ReadRootCutsFrom);
00425     if (cutfile.length() > 0) {
00426         BCP_buffer& buf = p.msg_buf;
00427         buf.clear();
00428         FILE* f = fopen(cutfile.c_str(), "r");
00429         size_t size;
00430         if (fread(&size, 1, sizeof(size), f) != sizeof(size))
00431             throw BCP_fatal_error("ReadRootCutsFrom read error.\n");
00432         char * data = new char[size];
00433         if (fread(data, 1, size, f) != size)
00434             throw BCP_fatal_error("ReadRootCutsFrom read error.\n");
00435         fclose(f);
00436         buf.set_content(data, size, 0, BCP_Msg_NoMessage);
00437         delete[] data;
00438       
00439         int num;
00440         buf.unpack(num);
00441         added_cuts.reserve(added_cuts.size() + num);
00442         for (int i = 0; i < num; ++i) {
00443             added_cuts.unchecked_push_back(BCP_tm_unpack_root_cut(p));
00444         }
00445     }
00446 
00447     p.user->create_root(added_vars, added_cuts, user_data);
00448 
00449     BCP_node_change* root_changes = new BCP_node_change;
00450     root_changes->core_change._storage = BCP_Storage_WrtCore;
00451 
00452     if (added_vars.size() > 0) {
00453         const int num = added_vars.size();
00454         BCP_obj_set_change& vc = root_changes->var_change;
00455         vc._change.reserve(num);
00456         vc._new_objs.reserve(num);
00457         int ind = p.next_var_index_set_start;
00458         for (int i = 0; i < num; ++i) {
00459             BCP_var* var = added_vars[i];
00460             vc._new_objs.unchecked_push_back(ind);
00461             p.vars_local[ind] = var;
00462             var->set_bcpind(ind++);
00463             vc._change.unchecked_push_back(BCP_obj_change(var->lb(), var->ub(),
00464                                                           var->status()));
00465         }
00466         p.next_var_index_set_start = ind;
00467     }
00468 
00469     if (added_cuts.size() > 0) {
00470         const int num = added_cuts.size();
00471         BCP_obj_set_change& cc = root_changes->cut_change;
00472         cc._change.reserve(num);
00473         cc._new_objs.reserve(num);
00474         int ind = p.next_cut_index_set_start;
00475         for (int i = 0; i < num; ++i) {
00476             BCP_cut* cut = added_cuts[i];
00477             cc._new_objs.unchecked_push_back(ind);
00478             p.cuts_local[ind] = cut;
00479             cut->set_bcpind(ind++);
00480             cc._change.unchecked_push_back(BCP_obj_change(cut->lb(), cut->ub(),
00481                                                           cut->status()));
00482         }
00483         p.next_cut_index_set_start = ind;
00484     }
00485 
00486     BCP_tm_node* root = new BCP_tm_node(0, root_changes);
00487 
00488     root->_data._user = user_data;
00489 
00490     root->_core_storage = root->_data._desc->core_change.storage();
00491     root->_var_storage = BCP_Storage_Explicit;
00492     root->_cut_storage = BCP_Storage_Explicit;
00493     root->_ws_storage = BCP_Storage_NoData;
00494 
00495     return root;
00496 }
00497 
00498 //#############################################################################
00499 
00500 void BCP_tm_tasks_before_new_phase(BCP_tm_prob& p)
00501 {
00502     if (p.param(BCP_tm_par::TmVerb_NewPhaseStart)) {
00503         printf("##########################################################\n");
00504         printf("TM: Starting phase %i\n", p.phase);
00505         printf("##########################################################\n");
00506     }
00507 
00508     if (p.phase > 0) {
00509         // Notify the LPs about the start of the new phase and get back the
00510         // timing data for the previous phase
00511         BCP_tm_notify_about_new_phase(p);
00512 
00513         // print statistics about the previous phase
00514         BCP_tm_wrapup(&p, 0, 0, 0, false); // false refers to not being final
00515 
00516         // trim the tree if needed and possible
00517         if (p.param(BCP_tm_par::TrimTreeBeforeNewPhase) && p.has_ub())
00518             BCP_tm_trim_tree_wrapper(p, true /* called between phases */);
00519     }
00520 
00521     if (p.candidate_list.getTree() && !p.candidate_list.empty()) {
00522         throw BCP_fatal_error("\
00523 BCP_tm_tasks_before_new_phase: candidate_list should be empty!\n");
00524     }
00525 
00526     // build up candidates. initialize the candidate list comparison function.
00527     p.current_phase_colgen = BCP_DoNotGenerateColumns_Fathom;
00528     p.candidate_list.setTree(NULL);
00529     CoinSearchTreeBase* candidates = NULL;
00530     p.user->init_new_phase(p.phase, p.current_phase_colgen, candidates);
00531     if (candidates == NULL) {
00532         candidates = new CoinSearchTree<CoinSearchTreeCompareBest>;
00533     }
00534     p.candidate_list.setTree(candidates);
00535    
00536     for (int i = p.next_phase_nodes.size() - 1; i >= 0; --i) {
00537         p.candidate_list.push(p.next_phase_nodes[i]);
00538     }
00539 
00540     p.next_phase_nodes.clear();
00541 }
00542 
00543 //#############################################################################
00544 
00545 

Generated on Tue Mar 30 03:04:33 2010 by  doxygen 1.4.7