BCP_netconfig_pvm.cpp
Go to the documentation of this file.
1 // Copyright (C) 2000, International Business Machines
2 // Corporation and others. All Rights Reserved.
3 #include <cstdio>
4 #include <cstring>
5 #include <fstream>
6 #include <cctype>
7 #include <algorithm>
8 
9 #include <pvm3.h>
10 
11 enum messages {
17 };
18 
19 static inline bool
20 str_eq(const char * str0, const char * str1)
21 {
22  return strcmp(str0, str1) == 0;
23 }
24 
25 static inline int
26 str_lt(const char * str0, const char * str1)
27 {
28  return strcmp(str0, str1) < 0;
29 }
30 
31 static void
32 find_tree_manager(const int my_tid, int &tm_tid);
33 
34 static void
35 stop(const char * msg);
36 
37 int main(int argc, char** argv)
38 {
39  int pid = pvm_mytid();
40 
41  if (argc != 2 && argc != 3)
42  stop("Usage: tm_driver <control_file> [TM_tid]\n");
43 
44  int tm_tid = 0;
45  char * control_file = strdup(argv[1]);
46 
47  if (argc == 3)
48  sscanf(argv[2], "t%x", &tm_tid);
49 
50  int info = 0;
51 
52  // Get the machine configuration
53  int nhost = 0;
54  int narch = 0;
55  struct pvmhostinfo *hostp = 0;
56  info = pvm_config(&nhost, &narch, &hostp);
57 
58  // Parse the control file
59  int to_delete_size = 0; // # of machsto delete
60  char ** to_delete = 0; // names of machs to delete
61  int to_add_size = 0; // # of machsto add
62  char ** to_add = 0; // names of machs to add
63 
64  int delete_proc_num = 0; // # of procs to delete
65  int * tid_delete = 0; // the tids of procs to delete
66 
67  // # of various procs to start
68  int lp_num = 0;
69  int cg_num = 0;
70  int vg_num = 0;
71  int cp_num = 0;
72  int vp_num = 0;
73  // the mach names where the procs shoud be started
74  char ** lp_mach = 0;
75  char ** cg_mach = 0;
76  char ** vg_mach = 0;
77  char ** cp_mach = 0;
78  char ** vp_mach = 0;
79 
80  // Do the parsing. First count
81  ifstream ctl(control_file);
82  if (!ctl)
83  stop("Cannot open parameter file... Aborting.\n");
84  // Get the lines of the parameter file one-by-one and if a line contains a
85  // (keyword, value) pair then interpret it.
86  const int MAX_PARAM_LINE_LENGTH = 1024;
87  char line[MAX_PARAM_LINE_LENGTH+1], *end_of_line, *keyword, *value, *ctmp;
88  char ch;
89  while (ctl) {
90  ctl.get(line, MAX_PARAM_LINE_LENGTH);
91  if (ctl) {
92  ctl.get(ch);
93  if (ch != '\n') {
94  printf("Too long (>= %i chars) line in the parameter file.\n",
95  MAX_PARAM_LINE_LENGTH);
96  stop("This is absurd. Aborting.\n");
97  }
98  }
99  end_of_line = line + strlen(line);
100  //-------------------------- First separate the keyword and value ------
101  keyword = find_if(line, end_of_line, isgraph);
102  if (keyword == end_of_line) // empty line
103  continue;
104  ctmp = find_if(keyword, end_of_line, isspace);
105  if (ctmp == end_of_line) // line is just one word. must be a comment
106  continue;
107  *ctmp = 0; // terminate the keyword with a 0 character
108  ++ctmp;
109 
110  value = find_if(ctmp, end_of_line, isgraph);
111  if (value == end_of_line) // line is just one word. must be a comment
112  continue;
113 
114  ctmp = find_if(value, end_of_line, isspace);
115  *ctmp = 0; // terminate the value with a 0 character. this is good even
116  // if ctmp == end_ofline
117 
118  if (str_eq(keyword, "BCP_delete_machine")) {
119  ++to_delete_size;
120  } else if (str_eq(keyword, "BCP_add_machine")) {
121  ++to_add_size;
122  } else if (str_eq(keyword, "BCP_delete_proc")) {
123  ++delete_proc_num;
124  } else if (str_eq(keyword, "BCP_lp_process")) {
125  ++lp_num;
126  } else if (str_eq(keyword, "BCP_cg_process")) {
127  ++cg_num;
128  } else if (str_eq(keyword, "BCP_vg_process")) {
129  ++vg_num;
130  } else if (str_eq(keyword, "BCP_cp_process")) {
131  ++cp_num;
132  } else if (str_eq(keyword, "BCP_vp_process")) {
133  ++vp_num;
134  }
135  }
136  ctl.close();
137 
138  if (to_delete_size > 0) {
139  to_delete = new char*[to_delete_size];
140  to_delete_size = 0;
141  }
142  if (to_add_size > 0) {
143  to_add = new char*[to_add_size];
144  to_add_size = 0;
145  }
146  if (delete_proc_num > 0) {
147  tid_delete = new int[delete_proc_num];
148  delete_proc_num = 0;
149  }
150  if (lp_num) {
151  lp_mach = new char*[lp_num];
152  lp_num = 0;
153  }
154  if (cg_num) {
155  cg_mach = new char*[cg_num];
156  cg_num = 0;
157  }
158  if (vg_num) {
159  vg_mach = new char*[vg_num];
160  vg_num = 0;
161  }
162  if (cp_num) {
163  cp_mach = new char*[cp_num];
164  cp_num = 0;
165  }
166  if (vp_num) {
167  vp_mach = new char*[vp_num];
168  vp_num = 0;
169  }
170 
171  ctl.open(control_file);
172  while (ctl) {
173  ctl.get(line, MAX_PARAM_LINE_LENGTH);
174  if (ctl) {
175  ctl.get(ch);
176  if (ch != '\n') {
177  printf("Too long (>= %i chars) line in the parameter file.\n",
178  MAX_PARAM_LINE_LENGTH);
179  stop("This is absurd. Aborting.\n");
180  }
181  }
182  end_of_line = line + strlen(line);
183  //-------------------------- First separate the keyword and value ------
184  keyword = find_if(line, end_of_line, isgraph);
185  if (keyword == end_of_line) // empty line
186  continue;
187  ctmp = find_if(keyword, end_of_line, isspace);
188  if (ctmp == end_of_line) // line is just one word. must be a comment
189  continue;
190  *ctmp = 0; // terminate the keyword with a 0 character
191  ++ctmp;
192 
193  value = find_if(ctmp, end_of_line, isgraph);
194  if (value == end_of_line) // line is just one word. must be a comment
195  continue;
196 
197  ctmp = find_if(value, end_of_line, isspace);
198  *ctmp = 0; // terminate the value with a 0 character. this is good even
199  // if ctmp == end_ofline
200 
201  if (str_eq(keyword, "BCP_delete_machine")) {
202  to_delete[to_delete_size++] = strdup(value);
203  } else if (str_eq(keyword, "BCP_add_machine")) {
204  to_add[to_add_size++] = strdup(value);
205  } else if (str_eq(keyword, "BCP_delete_proc")) {
206  sscanf(value, "t%x", &tid_delete[delete_proc_num++]);
207  } else if (str_eq(keyword, "BCP_lp_process")) {
208  lp_mach[lp_num++] = strdup(value);
209  } else if (str_eq(keyword, "BCP_cg_process")) {
210  cg_mach[cg_num++] = strdup(value);
211  } else if (str_eq(keyword, "BCP_vg_process")) {
212  vg_mach[vg_num++] = strdup(value);
213  } else if (str_eq(keyword, "BCP_cp_process")) {
214  cp_mach[cp_num++] = strdup(value);
215  } else if (str_eq(keyword, "BCP_vp_process")) {
216  vp_mach[vp_num++] = strdup(value);
217  }
218  }
219  ctl.close();
220 
221  // Check that machine deletions and additions are correct
222 
223  char ** last = 0;
224 
225  // Are there duplicates on the to be deleted list ?
226  if (to_delete_size > 0) {
227  sort(to_delete, to_delete + to_delete_size, str_lt);
228  last = unique(to_delete, to_delete + to_delete_size, str_eq);
229  if (to_delete_size != last - to_delete)
230  stop("A machine to be deleted is listed twice... Aborting.\n");
231  }
232 
233  // Are there duplicates on the to be added list?
234  if (to_add_size > 0) {
235  sort(to_add, to_add + to_add_size, str_lt);
236  last = unique(to_add, to_add + to_add_size, str_eq);
237  if (to_add_size != last - to_add)
238  stop("A machine to be added is listed twice... Aborting.\n");
239  }
240 
241  int i;
242  char ** mach_list = new char*[nhost + to_add_size];
243  for (i = 0; i < nhost; ++i)
244  mach_list[i] = strdup(hostp[i].hi_name);
245  sort(mach_list, mach_list + nhost, str_lt);
246 
247  char ** current_list = new char*[nhost + to_add_size];
248 
249  // Is there a nonexisting machine to be deleted?
250  if (to_delete_size > 0) {
251  last = set_difference(to_delete, to_delete + to_delete_size,
252  mach_list, mach_list + nhost,
253  current_list, str_lt);
254  if (last != current_list)
255  stop("A nonexisting machine is to be deleted... Aborting.\n");
256  last = set_difference(mach_list, mach_list + nhost,
257  to_delete, to_delete + to_delete_size,
258  current_list, str_lt);
259  ::swap(mach_list, current_list);
260  }
261 
262  // Is there an already existing machine to be added?
263  if (to_add_size > 0) {
264  last = set_intersection(to_add, to_add + to_add_size,
265  mach_list, mach_list + nhost,
266  current_list, str_lt);
267  if (last != current_list)
268  stop("A machine to be added is already there... Aborting.\n");
269  last = merge(to_add, to_add + to_add_size,
270  mach_list, mach_list + nhost,
271  current_list, str_lt);
272  ::swap(mach_list, current_list);
273  }
274 
275  const int mach_num = nhost - to_delete_size + to_add_size;
276 
277  // Check that the machines the new processes are supposed to be started on
278  // really exist.
279 
280  if (lp_num > 0) {
281  sort(lp_mach, lp_mach + lp_num, str_lt);
282  if (set_difference(lp_mach, lp_mach + lp_num,
283  mach_list, mach_list + mach_num,
284  current_list, str_lt) != current_list)
285  stop("An lp machine is not in the final machine list... Aborting.\n");
286  }
287  if (cg_num > 0) {
288  sort(cg_mach, cg_mach + cg_num, str_lt);
289  if (set_difference(cg_mach, cg_mach + cg_num,
290  mach_list, mach_list + mach_num,
291  current_list, str_lt) != current_list)
292  stop("An cg machine is not in the final machine list... Aborting.\n");
293  }
294  if (vg_num > 0) {
295  sort(vg_mach, vg_mach + vg_num, str_lt);
296  if (set_difference(vg_mach, vg_mach + vg_num,
297  mach_list, mach_list + mach_num,
298  current_list, str_lt) != current_list)
299  stop("An vg machine is not in the final machine list... Aborting.\n");
300  }
301  if (cp_num > 0) {
302  sort(cp_mach, cp_mach + cp_num, str_lt);
303  if (set_difference(cp_mach, cp_mach + cp_num,
304  mach_list, mach_list + mach_num,
305  current_list, str_lt) != current_list)
306  stop("An cp machine is not in the final machine list... Aborting.\n");
307  }
308  if (vp_num > 0) {
309  sort(vp_mach, vp_mach + vp_num, str_lt);
310  if (set_difference(vp_mach, vp_mach + vp_num,
311  mach_list, mach_list + mach_num,
312  current_list, str_lt) != current_list)
313  stop("An vp machine is not in the final machine list... Aborting.\n");
314  }
315 
316  // Find the tree manager
317  find_tree_manager(pid, tm_tid);
318 
319  // Check that the TM is not on one of the machines to be deleted.
320  if (to_delete_size > 0) {
321  const int dtid = pvm_tidtohost(tm_tid);
322  for (i = 0; i < nhost; ++i) {
323  if (hostp[i].hi_tid == dtid)
324  for (int j = 0; j < to_delete_size; ++j) {
325  if (str_eq(hostp[i].hi_name, to_delete[j]))
326  stop("Can't delete the machine the TM is on. Aborting.\n");
327  }
328  }
329  }
330 
331  // Check that the TM is not one of the processes to be deleted
332  if (delete_proc_num > 0) {
333  if (find(tid_delete, tid_delete + delete_proc_num, tm_tid) !=
334  tid_delete + delete_proc_num)
335  stop("Can't delete the TM... Aborting.\n");
336  }
337 
338  // Modify the machine configuration
339  if (to_delete_size > 0 || to_add_size > 0) {
340  int * infos = new int[max(to_delete_size, to_add_size)];
341  if (to_delete_size > 0)
342  if (pvm_delhosts(to_delete, to_delete_size, infos) < 0) {
343  printf("Failed to delete all specified machines...\n");
344  stop("Please check the situation manually... Aborting.\n");
345  }
346  if (to_add_size > 0)
347  if (pvm_addhosts(to_add, to_add_size, infos) < 0) {
348  printf("Failed to add all specified machines...\n");
349  stop("Please check the situation manually... Aborting.\n");
350  }
351  }
352 
353  // Kill the processes to be killed
354  for (i = 0; i < delete_proc_num; ++i)
355  pvm_kill(tid_delete[i]);
356 
357  // Put together a message to be sent to the TM that contains the machine
358  // names on which the new processes should be spawned
359  int len = (lp_num + cg_num + vg_num + cp_num + vp_num) * sizeof(int);
360  if (len > 0) {
361  len += 5 * sizeof(int);
362  for (i = 0; i < lp_num; ++i) len += strlen(lp_mach[i]);
363  for (i = 0; i < cg_num; ++i) len += strlen(cg_mach[i]);
364  for (i = 0; i < vg_num; ++i) len += strlen(vg_mach[i]);
365  for (i = 0; i < cp_num; ++i) len += strlen(cp_mach[i]);
366  for (i = 0; i < vp_num; ++i) len += strlen(vp_mach[i]);
367 
368  char * buf = new char[len];
369 
370  memcpy(buf, &lp_num, sizeof(int));
371  buf += sizeof(int);
372  for (i = 0; i < lp_num; ++i) {
373  const int l = strlen(lp_mach[i]);
374  memcpy(buf, &l, sizeof(int));
375  buf += sizeof(int);
376  memcpy(buf, lp_mach[i], l);
377  buf += l;
378  }
379 
380  memcpy(buf, &cg_num, sizeof(int));
381  buf += sizeof(int);
382  for (i = 0; i < cg_num; ++i) {
383  const int l = strlen(cg_mach[i]);
384  memcpy(buf, &l, sizeof(int));
385  buf += sizeof(int);
386  memcpy(buf, cg_mach[i], l);
387  buf += l;
388  }
389 
390  memcpy(buf, &vg_num, sizeof(int));
391  buf += sizeof(int);
392  for (i = 0; i < vg_num; ++i) {
393  const int l = strlen(vg_mach[i]);
394  memcpy(buf, &l, sizeof(int));
395  buf += sizeof(int);
396  memcpy(buf, vg_mach[i], l);
397  buf += l;
398  }
399 
400  memcpy(buf, &cp_num, sizeof(int));
401  buf += sizeof(int);
402  for (i = 0; i < cp_num; ++i) {
403  const int l = strlen(cp_mach[i]);
404  memcpy(buf, &l, sizeof(int));
405  buf += sizeof(int);
406  memcpy(buf, cp_mach[i], l);
407  buf += l;
408  }
409 
410  memcpy(buf, &vp_num, sizeof(int));
411  buf += sizeof(int);
412  for (i = 0; i < vp_num; ++i) {
413  const int l = strlen(vp_mach[i]);
414  memcpy(buf, &l, sizeof(int));
415  buf += sizeof(int);
416  memcpy(buf, vp_mach[i], l);
417  buf += l;
418  }
419 
420  buf -= len;
421 
422  pvm_initsend(PvmDataRaw);
423  pvm_pkbyte(buf, len, 1);
424  pvm_send(tm_tid, BCP_CONFIG_CHANGE);
425 
426  int bufid = pvm_recv(tm_tid, -1);
427  int bytes = 0, msgtag = 0;
428  pvm_bufinfo(bufid, &bytes, &msgtag, &tm_tid);
429  if (msgtag == BCP_CONFIG_ERROR)
430  stop("TM had difficulties. Please check the situation manually.\n");
431  }
432 
433  pvm_exit();
434  return 0;
435 }
436 
437 //#############################################################################
438 // Find the TreeManager
439 
440 static void
441 find_tree_manager(const int my_tid, int &tm_tid)
442 {
443  struct pvmtaskinfo *taskp = 0;
444  int ntask = 0;
445  pvm_tasks(0, &ntask, &taskp);
446  int * tids = new int[ntask];
447  int i, k;
448 
449  for (i = 0, k = 0; i < ntask; ++i) {
450  if (taskp[i].ti_ptid != 0)
451  continue; // has a parent, can't be the TM
452  if (taskp[i].ti_tid == my_tid)
453  continue; // self
454  // Otherwise it could be the TM (might be a console...)
455  tids[k++] = taskp[i].ti_tid;
456  }
457 
458  if (tm_tid != 0) {
459  // Check that the given tid is among the candidates
460  for (i = 0; i < k; ++i)
461  if (tids[i] == tm_tid)
462  break;
463  if (i == k)
464  stop("No TM candidate has the given tid... Aborting.\n");
465  } else {
466  // Broadcast a query to the candidates
467  pvm_initsend(PvmDataRaw);
468  pvm_mcast(tids, k, BCP_ARE_YOU_TREEMANAGER);
469  // Wait for an answer
470  struct timeval tout = {15, 0};
471  int bufid = pvm_trecv(-1, BCP_I_AM_TREEMANAGER, &tout);
472  if (bufid == 0)
473  stop("No TM candidates replied within 30 seconds... Aborting.\n");
474  int bytes = 0, msgtag = 0;
475  pvm_bufinfo(bufid, &bytes, &msgtag, &tm_tid);
476  }
477 
478  delete[] tids;
479 }
480 
481 //#############################################################################
482 
483 static void
484 stop(const char * msg)
485 {
486  printf("%s", msg);
487  pvm_exit();
488  abort();
489 }
static bool str_eq(const char *str0, const char *str1)
void fint fint fint real fint real real real real real real real real real fint real fint fint fint real fint fint fint fint * info
static int str_lt(const char *str0, const char *str1)
int main(int argc, char *argv[])
Definition: BB_tm.cpp:32
static void find_tree_manager(const int my_tid, int &tm_tid)
static char * j
Definition: OSdtoa.cpp:3622
void fint fint * k
static int
Definition: OSdtoa.cpp:2173
static void stop(const char *msg)