/home/coin/SVN-release/OS-2.4.1/Bcp/src/TM/BCP_tm_main.cpp

Go to the documentation of this file.
00001 // Copyright (C) 2000, International Business Machines
00002 // Corporation and others.  All Rights Reserved.
00003 #include <cstdio>
00004 #include <cerrno>
00005 #include <cmath>
00006 #include <queue>
00007 #ifdef _MSC_VER
00008 #include <process.h>
00009 #endif 
00010 
00011 #include "CoinTime.hpp"
00012 
00013 #include "BcpConfig.h"
00014 #include "BCP_os.hpp"
00015 
00016 #include "BCP_USER.hpp"
00017 #include "BCP_string.hpp"
00018 #include "BCP_vector.hpp"
00019 #include "BCP_buffer.hpp"
00020 #include "BCP_message.hpp"
00021 #include "BCP_var.hpp"
00022 #include "BCP_cut.hpp"
00023 #include "BCP_node_change.hpp"
00024 #include "BCP_tm.hpp"
00025 #include "BCP_tm_functions.hpp"
00026 #include "BCP_main_fun.hpp"
00027 
00028 #include "BCP_tm_user.hpp"
00029 #include "BCP_lp_user.hpp"
00030 
00031 #include "BCP_message_single.hpp"
00032 #include "BCP_message_mpi.hpp"
00033 #include "BCP_message_pvm.hpp"
00034 
00035 //#############################################################################
00036 
00037 int bcp_main(int argc, char* argv[], USER_initialize* user_init)
00038 {
00039     BCP_message_environment* msg_env = user_init->msgenv_init(argc, argv);
00040     BCP_single_environment* single_env =
00041         dynamic_cast<BCP_single_environment*>(msg_env);
00042     if (single_env) {
00043         // this way when register_process takes over the execution the single
00044         // environment will have access to the command line arguments and can
00045         // parse it.
00046         single_env->set_arguments(argc, argv);
00047     }
00048 
00049     int my_id = msg_env->register_process(user_init);
00050 
00051     if (single_env) {
00052         // register process did everything. We just return.
00053         delete msg_env;
00054         return 0;
00055     }
00056 
00057     int parent = msg_env->parent_process();
00058 
00059     if (parent == -1) {
00060         //We compute the real numeber of arguments because Mpi can change
00061         //list of arguments
00062         int cnt;
00063         for (cnt = 0; cnt < argc; ++cnt) {
00064             if (argv[cnt] == NULL)
00065                 break;
00066         }
00067         BCP_tm_main(msg_env, user_init, cnt, argv);
00068     } else {
00069         // In MPI all processes get the argument list, so we must not check
00070         // this
00071 #if defined(COIN_HAS_MPI)
00072         BCP_mpi_environment* mpi_env =
00073             dynamic_cast<BCP_mpi_environment*>(msg_env);
00074         if (!mpi_env && argc != 1) {
00075             throw BCP_fatal_error("The slaves do not take any argument!\n");
00076         }
00077 #endif
00078         BCP_buffer msg_buf;
00079         msg_env->receive(parent, BCP_Msg_AnyMessage, msg_buf, -1);
00080         if (msg_buf.msgtag() != BCP_Msg_ProcessType) {
00081             throw BCP_fatal_error("The first message is not ProcessType!!!\n");
00082         }
00083         // got a new identity, act on it
00084         BCP_process_t ptype;
00085         double ub;
00086         msg_buf.unpack(ptype);
00087         msg_buf.unpack(ub);
00088         while (ptype != BCP_ProcessType_EndProcess) {
00089             const bool maxheap_set = false;
00090             switch (ptype) {
00091             case BCP_ProcessType_LP:
00092               if (maxheap_set) {
00093                  printf("usedheap before LP: %li\n", BCP_used_heap());
00094               }
00095               ptype = BCP_lp_main(msg_env, user_init, my_id, parent, ub);
00096               if (maxheap_set) {
00097                   printf("usedheap after LP: %li\n", BCP_used_heap());
00098               }
00099               break;
00100             case BCP_ProcessType_CP:
00101               // BCP_cp_main(msg_env, user_init, my_id, parent, ub);
00102               break;
00103             case BCP_ProcessType_VP:
00104               // BCP_vp_main(msg_env, user_init, my_id, parent, ub);
00105               break;
00106             case BCP_ProcessType_CG:
00107               ptype = BCP_cg_main(msg_env, user_init, my_id, parent, ub);
00108               break;
00109             case BCP_ProcessType_VG:
00110               ptype = BCP_vg_main(msg_env, user_init, my_id, parent, ub);
00111               break;
00112             case BCP_ProcessType_TS:
00113               if (maxheap_set) {
00114                   printf("usedheap before TS: %li\n", BCP_used_heap());
00115               }
00116               ptype = BCP_tmstorage_main(msg_env, user_init, my_id, parent, ub);
00117               if (maxheap_set) {
00118                   printf("usedheap after TS: %li\n", BCP_used_heap());
00119               }
00120               break;
00121             case BCP_ProcessType_Any:
00122               throw BCP_fatal_error("\
00123 New process identity is BCP_ProcessType_Any!\n");
00124             case BCP_ProcessType_TM:
00125               throw BCP_fatal_error("\
00126 New process identity is BCP_ProcessType_TM!\n");
00127             case BCP_ProcessType_EndProcess:
00128               break;
00129             }
00130         }
00131     }
00132     delete msg_env;
00133 
00134     return 0;
00135 }
00136 
00137 //#############################################################################
00138 
00139 void
00140 BCP_tm_main(BCP_message_environment* msg_env,
00141             USER_initialize* user_init,
00142             const int argnum, const char* const * arglist)
00143 {
00144     // If we ever get here then the environment is parallel
00145     
00146     // Start to create the universe... (we don't have a user universe yet).
00147     BCP_tm_prob p;
00148 
00149     // If we ever get here then the environment is parallel
00150     p.par.set_entry(BCP_tm_par::MessagePassingIsSerial,false);
00151     p.slave_pars.lp.set_entry(BCP_lp_par::MessagePassingIsSerial,false);
00152     p.slave_pars.cg.set_entry(BCP_cg_par::MessagePassingIsSerial,false);
00153     p.slave_pars.vg.set_entry(BCP_vg_par::MessagePassingIsSerial,false);
00154     /*
00155       p.slave_pars.cp.set_entry(BCP_cp_par::MessagePassingIsSerial,false);
00156       p.slave_pars.vp.set_entry(BCP_vp_par::MessagePassingIsSerial,false);
00157     */
00158 
00159     // this also reads in the parameters from a file
00160     BCP_tm_parse_command_line(p, argnum, arglist);
00161    
00162     BCP_buffer msg_buf;
00163     p.msg_env = msg_env;
00164 
00165     //We check if the number of BCP processes is the same as in MPI
00166 #if defined(COIN_HAS_MPI)
00167     BCP_mpi_environment* mpi_env = dynamic_cast<BCP_mpi_environment*>(msg_env);
00168     if (mpi_env) {
00169         const int n_proc =
00170             p.param(BCP_tm_par::LpProcessNum) +
00171             p.param(BCP_tm_par::CgProcessNum) +
00172             p.param(BCP_tm_par::VgProcessNum) +
00173             p.param(BCP_tm_par::CpProcessNum) +
00174             p.param(BCP_tm_par::VpProcessNum) + 1;
00175         if (p.msg_env->num_procs() < n_proc) {
00176             throw BCP_fatal_error("\
00177 Number of process in parameter file %d > n_proc in mpirun -np %d!\n",
00178                                   n_proc, p.msg_env->num_procs());
00179         }
00180     }
00181 #endif
00182 
00183     p.stat.set_num_lp(p.param(BCP_tm_par::LpProcessNum));
00184 
00185     p.start_time = CoinWallclockTime();
00186 
00187     FILE* logfile = 0;
00188 
00189     const BCP_string& log = p.par.entry(BCP_tm_par::LogFileName);
00190     if (! (p.par.entry(BCP_tm_par::LogFileName) == "")) {
00191         int len = log.length();
00192         char *logname = new char[len + 300];
00193         memcpy(logname, log.c_str(), len);
00194         memcpy(logname + len, "-tm-", 4);
00195         len += 4;
00196         gethostname(logname + len, 255);
00197         len = strlen(logname);
00198         logname[len++] = '-';
00199         sprintf(logname + len, "%i", static_cast<int>(GETPID));
00200         logfile = freopen(logname, "a", stdout);
00201         if (logfile == 0) {
00202             fprintf(stderr, "Error while redirecting stdout: %i\n", errno);
00203             abort();
00204         }
00205         setvbuf(logfile, NULL, _IOLBF, 0); // make it line buffered
00206         delete[] logname;
00207     } else {
00208         setvbuf(stdout, NULL, _IOLBF, 0); // make it line buffered
00209     }
00210 
00211     // BCP_tm_user_init() returns a BCP_tm_user* and that will be part of p.
00212     // Also, it should take care of every I/O, heuristic startup, etc. the user
00213     // wants to do. Wreak havoc in p if (s)he wants.
00214     // BUT: once this function returns, the processes designated to be part of
00215     // BCP must be idle and waiting for a message. See the
00216     // BCP_slave_process_stub() function below. 
00217     p.user = user_init->tm_init(p, argnum, arglist);
00218     p.user->setTmProblemPointer(&p);
00219     p.packer = user_init->packer_init(p.user);
00220     p.packer->user_class = p.user;
00221 
00222     // Set the core (variables & cuts)
00223     p.core = BCP_tm_create_core(p);
00224     p.core_as_change = new BCP_problem_core_change;
00225     *p.core_as_change = *p.core;
00226 
00227     // Fire up the LP/CG/CP/VG/VP processes
00228     // Actually, this is firing up enough copies of self.
00229     BCP_tm_start_processes(p);
00230     p.lp_scheduler.
00231       setParams(p.param(BCP_tm_par::LPscheduler_OverEstimationStatic),
00232                 p.param(BCP_tm_par::LPscheduler_SwitchToRateThreshold),
00233                 10.0, /* estimated root time */
00234                 p.param(BCP_tm_par::LPscheduler_FactorTimeHorizon),
00235                 p.param(BCP_tm_par::LPscheduler_OverEstimationRate),
00236                 p.param(BCP_tm_par::LPscheduler_MaxNodeIdRatio),
00237                 p.param(BCP_tm_par::LPscheduler_MaxNodeIdNum),
00238                 p.param(BCP_tm_par::LPscheduler_MaxSbIdNum),
00239                 p.param(BCP_tm_par::LPscheduler_MinSbIdNum));
00240 
00241     // Notify the LP/CG/CP/VG/VP processes about their identity. Also, send out
00242     // their parameters, core and user info.
00243     BCP_tm_notify_processes(p);
00244 
00245 #if ! defined(BCP_ONLY_LP_PROCESS_HANDLING_WORKS)
00246     // Initialize the number of leaves assigned to CP's and VP's as 0
00247     if (p.param(BCP_tm_par::CpProcessNum) > 0) {
00248       for (int i = p.slaves.cp->procs().size() - 1; i >= 0; --i)
00249         p.leaves_per_cp.push_back(std::make_pair(p.slaves.cp->procs()[i], 0));
00250     }
00251     if (p.param(BCP_tm_par::VpProcessNum) > 0) {
00252       for (int i = p.slaves.vp->procs().size() - 1; i >= 0; --i)
00253         p.leaves_per_vp.push_back(std::make_pair(p.slaves.vp->procs()[i], 0));
00254     }
00255 #endif
00256 
00257     // Initialize the root of the search tree (can't invoke directly
00258     // p.user->create_root(), b/c the root might contain extra vars/cuts and
00259     // it's better if we take care of inserting them into the appropriate data
00260     // structures.
00261     BCP_tm_node* root = BCP_tm_create_root(p);
00262 
00263     p.next_phase_nodes.push_back(root);
00264     p.search_tree.insert(root);
00265     p.lower_bounds.insert(root->getTrueLB());
00266 
00267     BCP_sanity_checks(p);
00268 
00269     //-------------------------------------------------------------------------
00270     // The main loop
00271     //-------------------------------------------------------------------------
00272     bool something_died = false;
00273     for ( p.phase = 0; true ; ++p.phase) {
00274         // insert the nodes in next_phase_nodes into candidates, print out some
00275         // statistics about the previous phase (if there was one) and do some
00276         // other stuff, too.
00277         BCP_tm_tasks_before_new_phase(p);
00278         // do one phase (return true/false depending on success)
00279         something_died = BCP_tm_do_one_phase(p);
00280         // If nothing is left for the next phase or if something has died then
00281         // quit the infinite loop.
00282         if (p.next_phase_nodes.size() == 0 || something_died)
00283             break;
00284     }
00285 
00286     //-------------------------------------------------------------------------
00287     // Everything is done
00288     //-------------------------------------------------------------------------
00289     // first let the processes know that they're not needed in BCP any more,
00290     // they can start idling (this will be used when we'll loop around in TM).
00291     // The processes will respond by sending statistics.
00292     p.lp_scheduler.update_idle_times();
00293     BCP_tm_idle_processes(p);
00294 
00295     BCP_tm_wrapup(&p, 0, 0, 0, true);
00296 
00297     // Finally stop all the processes.
00298     BCP_tm_stop_processes(p);
00299 
00300     if (logfile)
00301         fclose(logfile);
00302 }
00303 
00304 //#############################################################################
00305 
00306 bool BCP_tm_do_one_phase(BCP_tm_prob& p)
00307 {
00308     BCP_buffer& buf = p.msg_buf;
00309     // While there are nodes waiting to be processed (or being processed) we
00310     // don't go to the next phase
00311     while (!p.candidate_list.empty() > 0 || p.lp_scheduler.numNodeIds() > 0){
00312         // Fill up as many free LP processes as we can
00313         if (BCP_tm_start_new_nodes(p) == BCP_NodeStart_Error)
00314             // Error indicates that something has died
00315             return true;
00316         buf.clear();
00317         // Check if need to balance data
00318         p.need_a_TS = ! BCP_tm_is_data_balanced(p);
00319         p.need_a_TS = BCP_tm_balance_data(p);
00320 
00321         // Process incoming messages. If there are no active nodes left then
00322         // timeout is set to 0, so we just check the queue, but not wait.
00323         const int numNodeIds = p.lp_scheduler.numNodeIds();
00324         const double t0 = CoinWallclockTime();
00325         const double timeout = (p.lp_scheduler.numNodeIds() == 0 ?
00326                                 0 : p.param(BCP_tm_par::TmTimeout));
00327         p.msg_env->receive(BCP_AnyProcess, BCP_Msg_AnyMessage, buf, timeout);
00328         const double t1 = CoinWallclockTime();
00329         p.stat.update_wait_time(numNodeIds, t1-t0);
00330 #ifdef COIN_HAS_MPI
00331         p.stat.update_queue_length(numNodeIds, MPIDI_BGLTS_get_num_messages());
00332 #endif
00333         p.stat.print(false /* not final */, t1 - p.start_time);
00334         try {
00335             p.process_message();
00336         }
00337         catch (BCP_fatal_error& err) {
00338             // something is baaaad... e.g. timeout
00339             return true;
00340         }
00341     }
00342     return false;
00343 }
00344 
00345 //#############################################################################
00346 
00347 BCP_problem_core* BCP_tm_create_core(BCP_tm_prob& p)
00348 {
00349     BCP_vec<BCP_var_core*> bvars;
00350     BCP_vec<BCP_cut_core*> bcuts;
00351     BCP_lp_relax* matrix = 0;
00352 
00353     p.user->initialize_core(bvars, bcuts, matrix);
00354 
00355     const int bvarnum = bvars.size();
00356     if (bvarnum > 0) {
00357         int i = p.next_var_index_set_start;
00358         for (i = 0; i < bvarnum; ++i) {
00359             BCP_var_core* var = bvars[i];
00360             // make certain that the bounds of integer vars is integer...
00361             if (var->var_type() != BCP_ContinuousVar) {
00362                 var->set_lb(ceil(var->lb()-1e-8));
00363                 var->set_ub(floor(var->ub()+1e-8));
00364             }
00365             var->set_bcpind(i);
00366             p.vars_local[i] = new BCP_var_core(*var);
00367         }
00368         p.next_var_index_set_start = i;
00369     }
00370 
00371     const int bcutnum = bcuts.size();
00372     if (bcutnum > 0) {
00373         int i = p.next_cut_index_set_start;
00374         for (i = 0; i < bcutnum; ++i) {
00375             BCP_cut_core* cut = bcuts[i];
00376             cut->set_bcpind(i);
00377             p.cuts_local[i] = new BCP_cut_core(*cut);
00378         }
00379         p.next_cut_index_set_start = i;
00380     }
00381 
00382     if (!matrix)
00383         matrix = new BCP_lp_relax;
00384 
00385     return new BCP_problem_core(bvars, bcuts, matrix);
00386 }
00387    
00388 //#############################################################################
00389 
00390 static inline BCP_cut*
00391 BCP_tm_unpack_root_cut(BCP_tm_prob& tm)
00392 {
00393     BCP_cut* cut;
00394     BCP_buffer& buf = tm.msg_buf;
00395     BCP_object_t obj_t;
00396     double lb, ub;
00397     BCP_obj_status stat;
00398     buf.unpack(obj_t).unpack(stat).unpack(lb).unpack(ub);
00399     switch (obj_t) {
00400     case BCP_CoreObj:
00401         cut = new BCP_cut_core(lb, ub);
00402         break;
00403     case BCP_AlgoObj:
00404         cut = tm.packer->unpack_cut_algo(buf);
00405         cut->change_bounds(lb, ub);
00406         break;
00407     default:
00408         throw BCP_fatal_error("BCP_tm_unpack_root_cut: unexpected obj_t.\n");
00409     }
00410     cut->set_bcpind(0);
00411     cut->set_status(stat);
00412 
00413     return cut;
00414 }
00415 
00416 //#############################################################################
00417 
00418 BCP_tm_node* BCP_tm_create_root(BCP_tm_prob& p)
00419 {
00420     BCP_vec<BCP_var*> added_vars;
00421     BCP_vec<BCP_cut*> added_cuts;
00422     BCP_user_data* user_data = 0;
00423 
00424     // If the root cuts are saved then read them in
00425     const BCP_string& cutfile = p.param(BCP_tm_par::ReadRootCutsFrom);
00426     if (cutfile.length() > 0) {
00427         BCP_buffer& buf = p.msg_buf;
00428         buf.clear();
00429         FILE* f = fopen(cutfile.c_str(), "r");
00430         size_t size;
00431         if (fread(&size, 1, sizeof(size), f) != sizeof(size))
00432             throw BCP_fatal_error("ReadRootCutsFrom read error.\n");
00433         char * data = new char[size];
00434         if (fread(data, 1, size, f) != size)
00435             throw BCP_fatal_error("ReadRootCutsFrom read error.\n");
00436         fclose(f);
00437         buf.set_content(data, size, 0, BCP_Msg_NoMessage);
00438         delete[] data;
00439       
00440         int num;
00441         buf.unpack(num);
00442         added_cuts.reserve(added_cuts.size() + num);
00443         for (int i = 0; i < num; ++i) {
00444             added_cuts.unchecked_push_back(BCP_tm_unpack_root_cut(p));
00445         }
00446     }
00447 
00448     p.user->create_root(added_vars, added_cuts, user_data);
00449 
00450     BCP_node_change* root_changes = new BCP_node_change;
00451     root_changes->core_change._storage = BCP_Storage_WrtCore;
00452 
00453     if (added_vars.size() > 0) {
00454         const int num = added_vars.size();
00455         BCP_obj_set_change& vc = root_changes->var_change;
00456         vc._change.reserve(num);
00457         vc._new_objs.reserve(num);
00458         int ind = p.next_var_index_set_start;
00459         for (int i = 0; i < num; ++i) {
00460             BCP_var* var = added_vars[i];
00461             vc._new_objs.unchecked_push_back(ind);
00462             p.vars_local[ind] = var;
00463             var->set_bcpind(ind++);
00464             vc._change.unchecked_push_back(BCP_obj_change(var->lb(), var->ub(),
00465                                                           var->status()));
00466         }
00467         p.next_var_index_set_start = ind;
00468     }
00469 
00470     if (added_cuts.size() > 0) {
00471         const int num = added_cuts.size();
00472         BCP_obj_set_change& cc = root_changes->cut_change;
00473         cc._change.reserve(num);
00474         cc._new_objs.reserve(num);
00475         int ind = p.next_cut_index_set_start;
00476         for (int i = 0; i < num; ++i) {
00477             BCP_cut* cut = added_cuts[i];
00478             cc._new_objs.unchecked_push_back(ind);
00479             p.cuts_local[ind] = cut;
00480             cut->set_bcpind(ind++);
00481             cc._change.unchecked_push_back(BCP_obj_change(cut->lb(), cut->ub(),
00482                                                           cut->status()));
00483         }
00484         p.next_cut_index_set_start = ind;
00485     }
00486 
00487     BCP_tm_node* root = new BCP_tm_node(0, root_changes);
00488 
00489     root->_data._user = user_data;
00490 
00491     root->_core_storage = root->_data._desc->core_change.storage();
00492     root->_var_storage = BCP_Storage_Explicit;
00493     root->_cut_storage = BCP_Storage_Explicit;
00494     root->_ws_storage = BCP_Storage_NoData;
00495 
00496     return root;
00497 }
00498 
00499 //#############################################################################
00500 
00501 void BCP_tm_tasks_before_new_phase(BCP_tm_prob& p)
00502 {
00503     if (p.param(BCP_tm_par::TmVerb_NewPhaseStart)) {
00504         printf("##########################################################\n");
00505         printf("TM: Starting phase %i\n", p.phase);
00506         printf("##########################################################\n");
00507     }
00508 
00509     if (p.phase > 0) {
00510         // Notify the LPs about the start of the new phase and get back the
00511         // timing data for the previous phase
00512         BCP_tm_notify_about_new_phase(p);
00513 
00514         // print statistics about the previous phase
00515         BCP_tm_wrapup(&p, 0, 0, 0, false); // false refers to not being final
00516 
00517         // trim the tree if needed and possible
00518         if (p.param(BCP_tm_par::TrimTreeBeforeNewPhase) && p.has_ub())
00519             BCP_tm_trim_tree_wrapper(p, true /* called between phases */);
00520     }
00521 
00522     if (p.candidate_list.getTree() && !p.candidate_list.empty()) {
00523         throw BCP_fatal_error("\
00524 BCP_tm_tasks_before_new_phase: candidate_list should be empty!\n");
00525     }
00526 
00527     // build up candidates. initialize the candidate list comparison function.
00528     p.current_phase_colgen = BCP_DoNotGenerateColumns_Fathom;
00529     p.candidate_list.setTree(NULL);
00530     CoinSearchTreeBase* candidates = NULL;
00531     p.user->init_new_phase(p.phase, p.current_phase_colgen, candidates);
00532     if (candidates == NULL) {
00533         candidates = new CoinSearchTree<CoinSearchTreeCompareBest>;
00534     }
00535     p.candidate_list.setTree(candidates);
00536    
00537     for (int i = p.next_phase_nodes.size() - 1; i >= 0; --i) {
00538         p.candidate_list.push(p.next_phase_nodes[i]);
00539     }
00540 
00541     p.next_phase_nodes.clear();
00542 }
00543 
00544 //#############################################################################
00545 
00546 

Generated on Thu Nov 10 03:05:39 2011 by  doxygen 1.4.7