20 str_eq(
const char * str0,
const char * str1)
22 return strcmp(str0, str1) == 0;
26 str_lt(
const char * str0,
const char * str1)
28 return strcmp(str0, str1) < 0;
35 stop(
const char * msg);
37 int main(
int argc,
char** argv)
39 int pid = pvm_mytid();
41 if (argc != 2 && argc != 3)
42 stop(
"Usage: tm_driver <control_file> [TM_tid]\n");
45 char * control_file = strdup(argv[1]);
48 sscanf(argv[2],
"t%x", &tm_tid);
55 struct pvmhostinfo *hostp = 0;
56 info = pvm_config(&nhost, &narch, &hostp);
59 int to_delete_size = 0;
60 char ** to_delete = 0;
64 int delete_proc_num = 0;
81 ifstream ctl(control_file);
83 stop(
"Cannot open parameter file... Aborting.\n");
86 const int MAX_PARAM_LINE_LENGTH = 1024;
87 char line[MAX_PARAM_LINE_LENGTH+1], *end_of_line, *keyword, *value, *ctmp;
90 ctl.get(line, MAX_PARAM_LINE_LENGTH);
94 printf(
"Too long (>= %i chars) line in the parameter file.\n",
95 MAX_PARAM_LINE_LENGTH);
96 stop(
"This is absurd. Aborting.\n");
99 end_of_line = line + strlen(line);
101 keyword = find_if(line, end_of_line, isgraph);
102 if (keyword == end_of_line)
104 ctmp = find_if(keyword, end_of_line, isspace);
105 if (ctmp == end_of_line)
110 value = find_if(ctmp, end_of_line, isgraph);
111 if (value == end_of_line)
114 ctmp = find_if(value, end_of_line, isspace);
118 if (
str_eq(keyword,
"BCP_delete_machine")) {
120 }
else if (
str_eq(keyword,
"BCP_add_machine")) {
122 }
else if (
str_eq(keyword,
"BCP_delete_proc")) {
124 }
else if (
str_eq(keyword,
"BCP_lp_process")) {
126 }
else if (
str_eq(keyword,
"BCP_cg_process")) {
128 }
else if (
str_eq(keyword,
"BCP_vg_process")) {
130 }
else if (
str_eq(keyword,
"BCP_cp_process")) {
132 }
else if (
str_eq(keyword,
"BCP_vp_process")) {
138 if (to_delete_size > 0) {
139 to_delete =
new char*[to_delete_size];
142 if (to_add_size > 0) {
143 to_add =
new char*[to_add_size];
146 if (delete_proc_num > 0) {
147 tid_delete =
new int[delete_proc_num];
151 lp_mach =
new char*[lp_num];
155 cg_mach =
new char*[cg_num];
159 vg_mach =
new char*[vg_num];
163 cp_mach =
new char*[cp_num];
167 vp_mach =
new char*[vp_num];
171 ctl.open(control_file);
173 ctl.get(line, MAX_PARAM_LINE_LENGTH);
177 printf(
"Too long (>= %i chars) line in the parameter file.\n",
178 MAX_PARAM_LINE_LENGTH);
179 stop(
"This is absurd. Aborting.\n");
182 end_of_line = line + strlen(line);
184 keyword = find_if(line, end_of_line, isgraph);
185 if (keyword == end_of_line)
187 ctmp = find_if(keyword, end_of_line, isspace);
188 if (ctmp == end_of_line)
193 value = find_if(ctmp, end_of_line, isgraph);
194 if (value == end_of_line)
197 ctmp = find_if(value, end_of_line, isspace);
201 if (
str_eq(keyword,
"BCP_delete_machine")) {
202 to_delete[to_delete_size++] = strdup(value);
203 }
else if (
str_eq(keyword,
"BCP_add_machine")) {
204 to_add[to_add_size++] = strdup(value);
205 }
else if (
str_eq(keyword,
"BCP_delete_proc")) {
206 sscanf(value,
"t%x", &tid_delete[delete_proc_num++]);
207 }
else if (
str_eq(keyword,
"BCP_lp_process")) {
208 lp_mach[lp_num++] = strdup(value);
209 }
else if (
str_eq(keyword,
"BCP_cg_process")) {
210 cg_mach[cg_num++] = strdup(value);
211 }
else if (
str_eq(keyword,
"BCP_vg_process")) {
212 vg_mach[vg_num++] = strdup(value);
213 }
else if (
str_eq(keyword,
"BCP_cp_process")) {
214 cp_mach[cp_num++] = strdup(value);
215 }
else if (
str_eq(keyword,
"BCP_vp_process")) {
216 vp_mach[vp_num++] = strdup(value);
226 if (to_delete_size > 0) {
227 sort(to_delete, to_delete + to_delete_size,
str_lt);
228 last = unique(to_delete, to_delete + to_delete_size,
str_eq);
229 if (to_delete_size != last - to_delete)
230 stop(
"A machine to be deleted is listed twice... Aborting.\n");
234 if (to_add_size > 0) {
235 sort(to_add, to_add + to_add_size,
str_lt);
236 last = unique(to_add, to_add + to_add_size,
str_eq);
237 if (to_add_size != last - to_add)
238 stop(
"A machine to be added is listed twice... Aborting.\n");
242 char ** mach_list =
new char*[nhost + to_add_size];
243 for (i = 0; i < nhost; ++i)
244 mach_list[i] = strdup(hostp[i].hi_name);
245 sort(mach_list, mach_list + nhost,
str_lt);
247 char ** current_list =
new char*[nhost + to_add_size];
250 if (to_delete_size > 0) {
251 last = set_difference(to_delete, to_delete + to_delete_size,
252 mach_list, mach_list + nhost,
254 if (last != current_list)
255 stop(
"A nonexisting machine is to be deleted... Aborting.\n");
256 last = set_difference(mach_list, mach_list + nhost,
257 to_delete, to_delete + to_delete_size,
259 ::swap(mach_list, current_list);
263 if (to_add_size > 0) {
264 last = set_intersection(to_add, to_add + to_add_size,
265 mach_list, mach_list + nhost,
267 if (last != current_list)
268 stop(
"A machine to be added is already there... Aborting.\n");
269 last = merge(to_add, to_add + to_add_size,
270 mach_list, mach_list + nhost,
272 ::swap(mach_list, current_list);
275 const int mach_num = nhost - to_delete_size + to_add_size;
281 sort(lp_mach, lp_mach + lp_num,
str_lt);
282 if (set_difference(lp_mach, lp_mach + lp_num,
283 mach_list, mach_list + mach_num,
284 current_list,
str_lt) != current_list)
285 stop(
"An lp machine is not in the final machine list... Aborting.\n");
288 sort(cg_mach, cg_mach + cg_num,
str_lt);
289 if (set_difference(cg_mach, cg_mach + cg_num,
290 mach_list, mach_list + mach_num,
291 current_list,
str_lt) != current_list)
292 stop(
"An cg machine is not in the final machine list... Aborting.\n");
295 sort(vg_mach, vg_mach + vg_num,
str_lt);
296 if (set_difference(vg_mach, vg_mach + vg_num,
297 mach_list, mach_list + mach_num,
298 current_list,
str_lt) != current_list)
299 stop(
"An vg machine is not in the final machine list... Aborting.\n");
302 sort(cp_mach, cp_mach + cp_num,
str_lt);
303 if (set_difference(cp_mach, cp_mach + cp_num,
304 mach_list, mach_list + mach_num,
305 current_list,
str_lt) != current_list)
306 stop(
"An cp machine is not in the final machine list... Aborting.\n");
309 sort(vp_mach, vp_mach + vp_num,
str_lt);
310 if (set_difference(vp_mach, vp_mach + vp_num,
311 mach_list, mach_list + mach_num,
312 current_list,
str_lt) != current_list)
313 stop(
"An vp machine is not in the final machine list... Aborting.\n");
320 if (to_delete_size > 0) {
321 const int dtid = pvm_tidtohost(tm_tid);
322 for (i = 0; i < nhost; ++i) {
323 if (hostp[i].hi_tid == dtid)
324 for (
int j = 0;
j < to_delete_size; ++
j) {
325 if (
str_eq(hostp[i].hi_name, to_delete[
j]))
326 stop(
"Can't delete the machine the TM is on. Aborting.\n");
332 if (delete_proc_num > 0) {
333 if (find(tid_delete, tid_delete + delete_proc_num, tm_tid) !=
334 tid_delete + delete_proc_num)
335 stop(
"Can't delete the TM... Aborting.\n");
339 if (to_delete_size > 0 || to_add_size > 0) {
340 int * infos =
new int[max(to_delete_size, to_add_size)];
341 if (to_delete_size > 0)
342 if (pvm_delhosts(to_delete, to_delete_size, infos) < 0) {
343 printf(
"Failed to delete all specified machines...\n");
344 stop(
"Please check the situation manually... Aborting.\n");
347 if (pvm_addhosts(to_add, to_add_size, infos) < 0) {
348 printf(
"Failed to add all specified machines...\n");
349 stop(
"Please check the situation manually... Aborting.\n");
354 for (i = 0; i < delete_proc_num; ++i)
355 pvm_kill(tid_delete[i]);
359 int len = (lp_num + cg_num + vg_num + cp_num + vp_num) *
sizeof(
int);
361 len += 5 *
sizeof(
int);
362 for (i = 0; i < lp_num; ++i) len += strlen(lp_mach[i]);
363 for (i = 0; i < cg_num; ++i) len += strlen(cg_mach[i]);
364 for (i = 0; i < vg_num; ++i) len += strlen(vg_mach[i]);
365 for (i = 0; i < cp_num; ++i) len += strlen(cp_mach[i]);
366 for (i = 0; i < vp_num; ++i) len += strlen(vp_mach[i]);
368 char * buf =
new char[len];
370 memcpy(buf, &lp_num,
sizeof(
int));
372 for (i = 0; i < lp_num; ++i) {
373 const int l = strlen(lp_mach[i]);
374 memcpy(buf, &l,
sizeof(
int));
376 memcpy(buf, lp_mach[i], l);
380 memcpy(buf, &cg_num,
sizeof(
int));
382 for (i = 0; i < cg_num; ++i) {
383 const int l = strlen(cg_mach[i]);
384 memcpy(buf, &l,
sizeof(
int));
386 memcpy(buf, cg_mach[i], l);
390 memcpy(buf, &vg_num,
sizeof(
int));
392 for (i = 0; i < vg_num; ++i) {
393 const int l = strlen(vg_mach[i]);
394 memcpy(buf, &l,
sizeof(
int));
396 memcpy(buf, vg_mach[i], l);
400 memcpy(buf, &cp_num,
sizeof(
int));
402 for (i = 0; i < cp_num; ++i) {
403 const int l = strlen(cp_mach[i]);
404 memcpy(buf, &l,
sizeof(
int));
406 memcpy(buf, cp_mach[i], l);
410 memcpy(buf, &vp_num,
sizeof(
int));
412 for (i = 0; i < vp_num; ++i) {
413 const int l = strlen(vp_mach[i]);
414 memcpy(buf, &l,
sizeof(
int));
416 memcpy(buf, vp_mach[i], l);
422 pvm_initsend(PvmDataRaw);
423 pvm_pkbyte(buf, len, 1);
426 int bufid = pvm_recv(tm_tid, -1);
427 int bytes = 0, msgtag = 0;
428 pvm_bufinfo(bufid, &bytes, &msgtag, &tm_tid);
430 stop(
"TM had difficulties. Please check the situation manually.\n");
443 struct pvmtaskinfo *taskp = 0;
445 pvm_tasks(0, &ntask, &taskp);
446 int * tids =
new int[ntask];
449 for (i = 0, k = 0; i < ntask; ++i) {
450 if (taskp[i].ti_ptid != 0)
452 if (taskp[i].ti_tid == my_tid)
455 tids[k++] = taskp[i].ti_tid;
460 for (i = 0; i <
k; ++i)
461 if (tids[i] == tm_tid)
464 stop(
"No TM candidate has the given tid... Aborting.\n");
467 pvm_initsend(PvmDataRaw);
470 struct timeval tout = {15, 0};
473 stop(
"No TM candidates replied within 30 seconds... Aborting.\n");
474 int bytes = 0, msgtag = 0;
475 pvm_bufinfo(bufid, &bytes, &msgtag, &tm_tid);
static bool str_eq(const char *str0, const char *str1)
void fint fint fint real fint real real real real real real real real real fint real fint fint fint real fint fint fint fint * info
static int str_lt(const char *str0, const char *str1)
int main(int argc, char *argv[])
static void find_tree_manager(const int my_tid, int &tm_tid)
static void stop(const char *msg)