pacemaker  1.1.15-e174ec8
Scalable High-Availability cluster resource manager
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Modules Pages
membership.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * This library is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with this library; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 #include <crm_internal.h>
19 
20 #ifndef _GNU_SOURCE
21 # define _GNU_SOURCE
22 #endif
23 
24 #include <sys/param.h>
25 #include <sys/types.h>
26 #include <stdio.h>
27 #include <unistd.h>
28 #include <string.h>
29 #include <glib.h>
30 #include <crm/common/ipc.h>
31 #include <crm/cluster/internal.h>
32 #include <crm/msg_xml.h>
33 #include <crm/stonith-ng.h>
34 
35 /* The peer cache remembers cluster nodes that have been seen.
36  * This is managed mostly automatically by libcluster, based on
37  * cluster membership events.
38  *
39  * Because cluster nodes can have conflicting names or UUIDs,
40  * the hash table key is a uniquely generated ID.
41  */
42 GHashTable *crm_peer_cache = NULL;
43 
44 /*
45  * The remote peer cache tracks pacemaker_remote nodes. While the
46  * value has the same type as the peer cache's, it is tracked separately for
47  * three reasons: pacemaker_remote nodes can't have conflicting names or UUIDs,
48  * so the name (which is also the UUID) is used as the hash table key; there
49  * is no equivalent of membership events, so management is not automatic; and
50  * most users of the peer cache need to exclude pacemaker_remote nodes.
51  *
52  * That said, using a single cache would be more logical and less error-prone,
53  * so it would be a good idea to merge them one day.
54  *
55  * libcluster provides two avenues for populating the cache:
56  * crm_remote_peer_get(), crm_remote_peer_cache_add() and
57  * crm_remote_peer_cache_remove() directly manage it,
58  * while crm_remote_peer_cache_refresh() populates it via the CIB.
59  */
60 GHashTable *crm_remote_peer_cache = NULL;
61 
62 unsigned long long crm_peer_seq = 0;
63 gboolean crm_have_quorum = FALSE;
64 static gboolean crm_autoreap = TRUE;
65 
66 int
68 {
69  if (crm_remote_peer_cache == NULL) {
70  return 0;
71  }
72  return g_hash_table_size(crm_remote_peer_cache);
73 }
74 
86 crm_node_t *
87 crm_remote_peer_get(const char *node_name)
88 {
89  crm_node_t *node;
90 
91  if (node_name == NULL) {
92  errno = -EINVAL;
93  return NULL;
94  }
95 
96  /* Return existing cache entry if one exists */
97  node = g_hash_table_lookup(crm_remote_peer_cache, node_name);
98  if (node) {
99  return node;
100  }
101 
102  /* Allocate a new entry */
103  node = calloc(1, sizeof(crm_node_t));
104  if (node == NULL) {
105  return NULL;
106  }
107 
108  /* Populate the essential information */
109  node->flags = crm_remote_node;
110  node->uuid = strdup(node_name);
111  if (node->uuid == NULL) {
112  free(node);
113  errno = -ENOMEM;
114  return NULL;
115  }
116 
117  /* Add the new entry to the cache */
118  g_hash_table_replace(crm_remote_peer_cache, node->uuid, node);
119  crm_trace("added %s to remote cache", node_name);
120 
121  /* Update the entry's uname, ensuring peer status callbacks are called */
122  crm_update_peer_uname(node, node_name);
123  return node;
124 }
125 
134 void
135 crm_remote_peer_cache_add(const char *node_name)
136 {
137  CRM_ASSERT(crm_remote_peer_get(node_name) != NULL);
138 }
139 
140 void
141 crm_remote_peer_cache_remove(const char *node_name)
142 {
143  if (g_hash_table_remove(crm_remote_peer_cache, node_name)) {
144  crm_trace("removed %s from remote peer cache", node_name);
145  }
146 }
147 
159 static const char *
160 remote_state_from_cib(xmlNode *node_state)
161 {
162  const char *status;
163 
164  status = crm_element_value(node_state, XML_NODE_IN_CLUSTER);
165  if (status && !crm_is_true(status)) {
166  status = CRM_NODE_LOST;
167  } else {
168  status = CRM_NODE_MEMBER;
169  }
170  return status;
171 }
172 
173 /* user data for looping through remote node xpath searches */
174 struct refresh_data {
175  const char *field; /* XML attribute to check for node name */
176  gboolean has_state; /* whether to update node state based on XML */
177 };
178 
186 static void
187 remote_cache_refresh_helper(xmlNode *result, void *user_data)
188 {
189  struct refresh_data *data = user_data;
190  const char *remote = crm_element_value(result, data->field);
191  const char *state = NULL;
192  crm_node_t *node;
193 
194  CRM_CHECK(remote != NULL, return);
195 
196  /* Determine node's state, if the result has it */
197  if (data->has_state) {
198  state = remote_state_from_cib(result);
199  }
200 
201  /* Check whether cache already has entry for node */
202  node = g_hash_table_lookup(crm_remote_peer_cache, remote);
203 
204  if (node == NULL) {
205  /* Node is not in cache, so add a new entry for it */
206  node = crm_remote_peer_get(remote);
207  CRM_ASSERT(node);
208  if (state) {
209  crm_update_peer_state(__FUNCTION__, node, state, 0);
210  }
211 
212  } else if (is_set(node->flags, crm_node_dirty)) {
213  /* Node is in cache and hasn't been updated already, so mark it clean */
215  if (state) {
216  crm_update_peer_state(__FUNCTION__, node, state, 0);
217  }
218  }
219 }
220 
221 static void
222 mark_dirty(gpointer key, gpointer value, gpointer user_data)
223 {
224  set_bit(((crm_node_t*)value)->flags, crm_node_dirty);
225 }
226 
227 static gboolean
228 is_dirty(gpointer key, gpointer value, gpointer user_data)
229 {
230  return is_set(((crm_node_t*)value)->flags, crm_node_dirty);
231 }
232 
233 /* search string to find CIB resources entries for guest nodes */
234 #define XPATH_GUEST_NODE_CONFIG \
235  "//" XML_TAG_CIB "//" XML_CIB_TAG_CONFIGURATION "//" XML_CIB_TAG_RESOURCE \
236  "//" XML_TAG_META_SETS "//" XML_CIB_TAG_NVPAIR \
237  "[@name='" XML_RSC_ATTR_REMOTE_NODE "']"
238 
239 /* search string to find CIB resources entries for remote nodes */
240 #define XPATH_REMOTE_NODE_CONFIG \
241  "//" XML_TAG_CIB "//" XML_CIB_TAG_CONFIGURATION "//" XML_CIB_TAG_RESOURCE \
242  "[@type='remote'][@provider='pacemaker']"
243 
244 /* search string to find CIB node status entries for pacemaker_remote nodes */
245 #define XPATH_REMOTE_NODE_STATUS \
246  "//" XML_TAG_CIB "//" XML_CIB_TAG_STATUS "//" XML_CIB_TAG_STATE \
247  "[@" XML_NODE_IS_REMOTE "='true']"
248 
254 void
256 {
257  struct refresh_data data;
258 
259  /* First, we mark all existing cache entries as dirty,
260  * so that later we can remove any that weren't in the CIB.
261  * We don't empty the cache, because we need to detect changes in state.
262  */
263  g_hash_table_foreach(crm_remote_peer_cache, mark_dirty, NULL);
264 
265  /* Look for guest nodes and remote nodes in the status section */
266  data.field = "id";
267  data.has_state = TRUE;
269  remote_cache_refresh_helper, &data);
270 
271  /* Look for guest nodes and remote nodes in the configuration section,
272  * because they may have just been added and not have a status entry yet.
273  * In that case, the cached node state will be left NULL, so that the
274  * peer status callback isn't called until we're sure the node started
275  * successfully.
276  */
277  data.field = "value";
278  data.has_state = FALSE;
280  remote_cache_refresh_helper, &data);
281  data.field = "id";
282  data.has_state = FALSE;
284  remote_cache_refresh_helper, &data);
285 
286  /* Remove all old cache entries that weren't seen in the CIB */
287  g_hash_table_foreach_remove(crm_remote_peer_cache, is_dirty, NULL);
288 }
289 
290 gboolean
292 {
293  if(node == NULL) {
294  return FALSE;
295  }
296 
297  if (is_set(node->flags, crm_remote_node)) {
298  /* remote nodes are never considered active members. This
299  * guarantees they will never be considered for DC membership.*/
300  return FALSE;
301  }
302 #if SUPPORT_COROSYNC
303  if (is_openais_cluster()) {
304  return crm_is_corosync_peer_active(node);
305  }
306 #endif
307 #if SUPPORT_HEARTBEAT
308  if (is_heartbeat_cluster()) {
309  return crm_is_heartbeat_peer_active(node);
310  }
311 #endif
312  crm_err("Unhandled cluster type: %s", name_for_cluster_type(get_cluster_type()));
313  return FALSE;
314 }
315 
316 static gboolean
317 crm_reap_dead_member(gpointer key, gpointer value, gpointer user_data)
318 {
319  crm_node_t *node = value;
320  crm_node_t *search = user_data;
321 
322  if (search == NULL) {
323  return FALSE;
324 
325  } else if (search->id && node->id != search->id) {
326  return FALSE;
327 
328  } else if (search->id == 0 && safe_str_neq(node->uname, search->uname)) {
329  return FALSE;
330 
331  } else if (crm_is_peer_active(value) == FALSE) {
332  crm_info("Removing node with name %s and id %u from membership cache",
333  (node->uname? node->uname : "unknown"), node->id);
334  return TRUE;
335  }
336  return FALSE;
337 }
338 
347 guint
348 reap_crm_member(uint32_t id, const char *name)
349 {
350  int matches = 0;
351  crm_node_t search;
352 
353  if (crm_peer_cache == NULL) {
354  crm_trace("Membership cache not initialized, ignoring purge request");
355  return 0;
356  }
357 
358  search.id = id;
359  search.uname = name ? strdup(name) : NULL;
360  matches = g_hash_table_foreach_remove(crm_peer_cache, crm_reap_dead_member, &search);
361  if(matches) {
362  crm_notice("Purged %d peers with id=%u%s%s from the membership cache",
363  matches, search.id,
364  (search.uname? " and/or uname=" : ""),
365  (search.uname? search.uname : ""));
366 
367  } else {
368  crm_info("No peers with id=%u%s%s to purge from the membership cache",
369  search.id, (search.uname? " and/or uname=" : ""),
370  (search.uname? search.uname : ""));
371  }
372 
373  free(search.uname);
374  return matches;
375 }
376 
377 static void
378 crm_count_peer(gpointer key, gpointer value, gpointer user_data)
379 {
380  guint *count = user_data;
381  crm_node_t *node = value;
382 
383  if (crm_is_peer_active(node)) {
384  *count = *count + 1;
385  }
386 }
387 
388 guint
390 {
391  guint count = 0;
392 
393  if (crm_peer_cache) {
394  g_hash_table_foreach(crm_peer_cache, crm_count_peer, &count);
395  }
396  return count;
397 }
398 
399 static void
400 destroy_crm_node(gpointer data)
401 {
402  crm_node_t *node = data;
403 
404  crm_trace("Destroying entry for node %u: %s", node->id, node->uname);
405 
406  free(node->addr);
407  free(node->uname);
408  free(node->state);
409  free(node->uuid);
410  free(node->expected);
411  free(node);
412 }
413 
414 void
416 {
417  if (crm_peer_cache == NULL) {
418  crm_peer_cache = g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, free, destroy_crm_node);
419  }
420 
421  if (crm_remote_peer_cache == NULL) {
422  crm_remote_peer_cache = g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, NULL, destroy_crm_node);
423  }
424 }
425 
426 void
428 {
429  if (crm_peer_cache != NULL) {
430  crm_trace("Destroying peer cache with %d members", g_hash_table_size(crm_peer_cache));
431  g_hash_table_destroy(crm_peer_cache);
432  crm_peer_cache = NULL;
433  }
434 
435  if (crm_remote_peer_cache != NULL) {
436  crm_trace("Destroying remote peer cache with %d members", g_hash_table_size(crm_remote_peer_cache));
437  g_hash_table_destroy(crm_remote_peer_cache);
438  crm_remote_peer_cache = NULL;
439  }
440 }
441 
442 void (*crm_status_callback) (enum crm_status_type, crm_node_t *, const void *) = NULL;
443 
454 void
455 crm_set_status_callback(void (*dispatch) (enum crm_status_type, crm_node_t *, const void *))
456 {
457  crm_status_callback = dispatch;
458 }
459 
471 void
472 crm_set_autoreap(gboolean autoreap)
473 {
474  crm_autoreap = autoreap;
475 }
476 
477 static void crm_dump_peer_hash(int level, const char *caller)
478 {
479  GHashTableIter iter;
480  const char *id = NULL;
481  crm_node_t *node = NULL;
482 
483  g_hash_table_iter_init(&iter, crm_peer_cache);
484  while (g_hash_table_iter_next(&iter, (gpointer *) &id, (gpointer *) &node)) {
485  do_crm_log(level, "%s: Node %u/%s = %p - %s", caller, node->id, node->uname, node, id);
486  }
487 }
488 
489 static gboolean crm_hash_find_by_data(gpointer key, gpointer value, gpointer user_data)
490 {
491  if(value == user_data) {
492  return TRUE;
493  }
494  return FALSE;
495 }
496 
497 crm_node_t *
498 crm_find_peer_full(unsigned int id, const char *uname, int flags)
499 {
500  crm_node_t *node = NULL;
501 
502  CRM_ASSERT(id > 0 || uname != NULL);
503 
504  crm_peer_init();
505 
506  if (flags & CRM_GET_PEER_REMOTE) {
507  node = g_hash_table_lookup(crm_remote_peer_cache, uname);
508  }
509 
510  if (node == NULL && (flags & CRM_GET_PEER_CLUSTER)) {
511  node = crm_find_peer(id, uname);
512  }
513  return node;
514 }
515 
516 crm_node_t *
517 crm_get_peer_full(unsigned int id, const char *uname, int flags)
518 {
519  crm_node_t *node = NULL;
520 
521  CRM_ASSERT(id > 0 || uname != NULL);
522 
523  crm_peer_init();
524 
525  if (flags & CRM_GET_PEER_REMOTE) {
526  node = g_hash_table_lookup(crm_remote_peer_cache, uname);
527  }
528 
529  if (node == NULL && (flags & CRM_GET_PEER_CLUSTER)) {
530  node = crm_get_peer(id, uname);
531  }
532  return node;
533 }
534 
535 crm_node_t *
536 crm_find_peer(unsigned int id, const char *uname)
537 {
538  GHashTableIter iter;
539  crm_node_t *node = NULL;
540  crm_node_t *by_id = NULL;
541  crm_node_t *by_name = NULL;
542 
543  CRM_ASSERT(id > 0 || uname != NULL);
544 
545  crm_peer_init();
546 
547  if (uname != NULL) {
548  g_hash_table_iter_init(&iter, crm_peer_cache);
549  while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
550  if(node->uname && strcasecmp(node->uname, uname) == 0) {
551  crm_trace("Name match: %s = %p", node->uname, node);
552  by_name = node;
553  break;
554  }
555  }
556  }
557 
558  if (id > 0) {
559  g_hash_table_iter_init(&iter, crm_peer_cache);
560  while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
561  if(node->id == id) {
562  crm_trace("ID match: %u = %p", node->id, node);
563  by_id = node;
564  break;
565  }
566  }
567  }
568 
569  node = by_id; /* Good default */
570  if(by_id == by_name) {
571  /* Nothing to do if they match (both NULL counts) */
572  crm_trace("Consistent: %p for %u/%s", by_id, id, uname);
573 
574  } else if(by_id == NULL && by_name) {
575  crm_trace("Only one: %p for %u/%s", by_name, id, uname);
576 
577  if(id && by_name->id) {
578  crm_dump_peer_hash(LOG_WARNING, __FUNCTION__);
579  crm_crit("Node %u and %u share the same name '%s'",
580  id, by_name->id, uname);
581  node = NULL; /* Create a new one */
582 
583  } else {
584  node = by_name;
585  }
586 
587  } else if(by_name == NULL && by_id) {
588  crm_trace("Only one: %p for %u/%s", by_id, id, uname);
589 
590  if(uname && by_id->uname) {
591  crm_dump_peer_hash(LOG_WARNING, __FUNCTION__);
592  crm_crit("Node '%s' and '%s' share the same cluster nodeid %u: assuming '%s' is correct",
593  uname, by_id->uname, id, uname);
594  }
595 
596  } else if(uname && by_id->uname) {
597  if(safe_str_eq(uname, by_id->uname)) {
598  crm_notice("Node '%s' has changed its ID from %u to %u", by_id->uname, by_name->id, by_id->id);
599  g_hash_table_foreach_remove(crm_peer_cache, crm_hash_find_by_data, by_name);
600 
601  } else {
602  crm_warn("Node '%s' and '%s' share the same cluster nodeid: %u %s", by_id->uname, by_name->uname, id, uname);
603  crm_dump_peer_hash(LOG_INFO, __FUNCTION__);
604  crm_abort(__FILE__, __FUNCTION__, __LINE__, "member weirdness", TRUE, TRUE);
605  }
606 
607  } else if(id && by_name->id) {
608  crm_warn("Node %u and %u share the same name: '%s'", by_id->id, by_name->id, uname);
609 
610  } else {
611  /* Simple merge */
612 
613  /* Only corosync based clusters use nodeid's
614  *
615  * The functions that call crm_update_peer_state() only know nodeid
616  * so 'by_id' is authorative when merging
617  *
618  * Same for crm_update_peer_proc()
619  */
620  crm_dump_peer_hash(LOG_DEBUG, __FUNCTION__);
621 
622  crm_info("Merging %p into %p", by_name, by_id);
623  g_hash_table_foreach_remove(crm_peer_cache, crm_hash_find_by_data, by_name);
624  }
625 
626  return node;
627 }
628 
629 #if SUPPORT_COROSYNC
630 static guint
631 crm_remove_conflicting_peer(crm_node_t *node)
632 {
633  int matches = 0;
634  GHashTableIter iter;
635  crm_node_t *existing_node = NULL;
636 
637  if (node->id == 0 || node->uname == NULL) {
638  return 0;
639  }
640 
641 # if !SUPPORT_PLUGIN
642  if (corosync_cmap_has_config("nodelist") != 0) {
643  return 0;
644  }
645 # endif
646 
647  g_hash_table_iter_init(&iter, crm_peer_cache);
648  while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &existing_node)) {
649  if (existing_node->id > 0
650  && existing_node->id != node->id
651  && existing_node->uname != NULL
652  && strcasecmp(existing_node->uname, node->uname) == 0) {
653 
654  if (crm_is_peer_active(existing_node)) {
655  continue;
656  }
657 
658  crm_warn("Removing cached offline node %u/%s which has conflicting uname with %u",
659  existing_node->id, existing_node->uname, node->id);
660 
661  g_hash_table_iter_remove(&iter);
662  matches++;
663  }
664  }
665 
666  return matches;
667 }
668 #endif
669 
670 /* coverity[-alloc] Memory is referenced in one or both hashtables */
671 crm_node_t *
672 crm_get_peer(unsigned int id, const char *uname)
673 {
674  crm_node_t *node = NULL;
675  char *uname_lookup = NULL;
676 
677  CRM_ASSERT(id > 0 || uname != NULL);
678 
679  crm_peer_init();
680 
681  node = crm_find_peer(id, uname);
682 
683  /* if uname wasn't provided, and find_peer did not turn up a uname based on id.
684  * we need to do a lookup of the node name using the id in the cluster membership. */
685  if ((node == NULL || node->uname == NULL) && (uname == NULL)) {
686  uname_lookup = get_node_name(id);
687  }
688 
689  if (uname_lookup) {
690  uname = uname_lookup;
691  crm_trace("Inferred a name of '%s' for node %u", uname, id);
692 
693  /* try to turn up the node one more time now that we know the uname. */
694  if (node == NULL) {
695  node = crm_find_peer(id, uname);
696  }
697  }
698 
699 
700  if (node == NULL) {
701  char *uniqueid = crm_generate_uuid();
702 
703  node = calloc(1, sizeof(crm_node_t));
704  CRM_ASSERT(node);
705 
706  crm_info("Created entry %s/%p for node %s/%u (%d total)",
707  uniqueid, node, uname, id, 1 + g_hash_table_size(crm_peer_cache));
708  g_hash_table_replace(crm_peer_cache, uniqueid, node);
709  }
710 
711  if(id > 0 && uname && (node->id == 0 || node->uname == NULL)) {
712  crm_info("Node %u is now known as %s", id, uname);
713  }
714 
715  if(id > 0 && node->id == 0) {
716  node->id = id;
717  }
718 
719  if (uname && (node->uname == NULL)) {
720  crm_update_peer_uname(node, uname);
721  }
722 
723  if(node->uuid == NULL) {
724  const char *uuid = crm_peer_uuid(node);
725 
726  if (uuid) {
727  crm_info("Node %u has uuid %s", id, uuid);
728 
729  } else {
730  crm_info("Cannot obtain a UUID for node %u/%s", id, node->uname);
731  }
732  }
733 
734  free(uname_lookup);
735 
736  return node;
737 }
738 
750 crm_node_t *
751 crm_update_peer(const char *source, unsigned int id, uint64_t born, uint64_t seen, int32_t votes,
752  uint32_t children, const char *uuid, const char *uname, const char *addr,
753  const char *state)
754 {
755 #if SUPPORT_PLUGIN
756  gboolean addr_changed = FALSE;
757  gboolean votes_changed = FALSE;
758 #endif
759  crm_node_t *node = NULL;
760 
761  id = get_corosync_id(id, uuid);
762  node = crm_get_peer(id, uname);
763 
764  CRM_ASSERT(node != NULL);
765 
766  if (node->uuid == NULL) {
767  if (is_openais_cluster()) {
768  /* Yes, overrule whatever was passed in */
769  crm_peer_uuid(node);
770 
771  } else if (uuid != NULL) {
772  node->uuid = strdup(uuid);
773  }
774  }
775 
776  if (children > 0) {
777  if (crm_update_peer_proc(source, node, children, state) == NULL) {
778  return NULL;
779  }
780  }
781 
782  if (state != NULL) {
783  if (crm_update_peer_state(source, node, state, seen) == NULL) {
784  return NULL;
785  }
786  }
787 #if SUPPORT_HEARTBEAT
788  if (born != 0) {
789  node->born = born;
790  }
791 #endif
792 
793 #if SUPPORT_PLUGIN
794  /* These were only used by the plugin */
795  if (born != 0) {
796  node->born = born;
797  }
798 
799  if (votes > 0 && node->votes != votes) {
800  votes_changed = TRUE;
801  node->votes = votes;
802  }
803 
804  if (addr != NULL) {
805  if (node->addr == NULL || crm_str_eq(node->addr, addr, FALSE) == FALSE) {
806  addr_changed = TRUE;
807  free(node->addr);
808  node->addr = strdup(addr);
809  }
810  }
811  if (addr_changed || votes_changed) {
812  crm_info("%s: Node %s: id=%u state=%s addr=%s%s votes=%d%s born=" U64T " seen=" U64T
813  " proc=%.32x", source, node->uname, node->id, node->state,
814  node->addr, addr_changed ? " (new)" : "", node->votes,
815  votes_changed ? " (new)" : "", node->born, node->last_seen, node->processes);
816  }
817 #endif
818 
819  return node;
820 }
821 
833 void
834 crm_update_peer_uname(crm_node_t *node, const char *uname)
835 {
836  int i, len = strlen(uname);
837 
838  for (i = 0; i < len; i++) {
839  if (uname[i] >= 'A' && uname[i] <= 'Z') {
840  crm_warn("Node names with capitals are discouraged, consider changing '%s'",
841  uname);
842  break;
843  }
844  }
845 
846  free(node->uname);
847  node->uname = strdup(uname);
848  if (crm_status_callback) {
850  }
851 
852 #if SUPPORT_COROSYNC
853  if (is_openais_cluster() && !is_set(node->flags, crm_remote_node)) {
854  crm_remove_conflicting_peer(node);
855  }
856 #endif
857 }
858 
875 crm_node_t *
876 crm_update_peer_proc(const char *source, crm_node_t * node, uint32_t flag, const char *status)
877 {
878  uint32_t last = 0;
879  gboolean changed = FALSE;
880 
881  CRM_CHECK(node != NULL, crm_err("%s: Could not set %s to %s for NULL",
882  source, peer2text(flag), status); return NULL);
883 
884  /* Pacemaker doesn't spawn processes on remote nodes */
885  if (is_set(node->flags, crm_remote_node)) {
886  return node;
887  }
888 
889  last = node->processes;
890  if (status == NULL) {
891  node->processes = flag;
892  if (node->processes != last) {
893  changed = TRUE;
894  }
895 
896  } else if (safe_str_eq(status, ONLINESTATUS)) {
897  if ((node->processes & flag) != flag) {
898  set_bit(node->processes, flag);
899  changed = TRUE;
900  }
901 #if SUPPORT_PLUGIN
902  } else if (safe_str_eq(status, CRM_NODE_MEMBER)) {
903  if (flag > 0 && node->processes != flag) {
904  node->processes = flag;
905  changed = TRUE;
906  }
907 #endif
908 
909  } else if (node->processes & flag) {
910  clear_bit(node->processes, flag);
911  changed = TRUE;
912  }
913 
914  if (changed) {
915  if (status == NULL && flag <= crm_proc_none) {
916  crm_info("%s: Node %s[%u] - all processes are now offline", source, node->uname,
917  node->id);
918  } else {
919  crm_info("%s: Node %s[%u] - %s is now %s", source, node->uname, node->id,
920  peer2text(flag), status);
921  }
922 
923  /* Call the client callback first, then update the peer state,
924  * in case the node will be reaped
925  */
926  if (crm_status_callback) {
928  }
929 
930  /* The client callback shouldn't touch the peer caches,
931  * but as a safety net, bail if the peer cache was destroyed.
932  */
933  if (crm_peer_cache == NULL) {
934  return NULL;
935  }
936 
937  if (crm_autoreap) {
938  node = crm_update_peer_state(__FUNCTION__, node,
939  is_set(node->processes, crm_get_cluster_proc())?
941  }
942  } else {
943  crm_trace("%s: Node %s[%u] - %s is unchanged (%s)", source, node->uname, node->id,
944  peer2text(flag), status);
945  }
946  return node;
947 }
948 
949 void
950 crm_update_peer_expected(const char *source, crm_node_t * node, const char *expected)
951 {
952  char *last = NULL;
953  gboolean changed = FALSE;
954 
955  CRM_CHECK(node != NULL, crm_err("%s: Could not set 'expected' to %s", source, expected);
956  return);
957 
958  /* Remote nodes don't participate in joins */
959  if (is_set(node->flags, crm_remote_node)) {
960  return;
961  }
962 
963  last = node->expected;
964  if (expected != NULL && safe_str_neq(node->expected, expected)) {
965  node->expected = strdup(expected);
966  changed = TRUE;
967  }
968 
969  if (changed) {
970  crm_info("%s: Node %s[%u] - expected state is now %s (was %s)", source, node->uname, node->id,
971  expected, last);
972  free(last);
973  } else {
974  crm_trace("%s: Node %s[%u] - expected state is unchanged (%s)", source, node->uname,
975  node->id, expected);
976  }
977 }
978 
995 static crm_node_t *
996 crm_update_peer_state_iter(const char *source, crm_node_t * node, const char *state, int membership, GHashTableIter *iter)
997 {
998  gboolean is_member;
999 
1000  CRM_CHECK(node != NULL,
1001  crm_err("Could not set state for unknown host to %s"
1002  CRM_XS " source=%s", state, source);
1003  return NULL);
1004 
1005  is_member = safe_str_eq(state, CRM_NODE_MEMBER);
1006  if (membership && is_member) {
1007  node->last_seen = membership;
1008  }
1009 
1010  if (state && safe_str_neq(node->state, state)) {
1011  char *last = node->state;
1012  enum crm_status_type status_type = is_set(node->flags, crm_remote_node)?
1014 
1015  node->state = strdup(state);
1016  crm_notice("Node %s state is now %s " CRM_XS
1017  " nodeid=%u previous=%s source=%s", node->uname, state,
1018  node->id, (last? last : "unknown"), source);
1019  if (crm_status_callback) {
1020  crm_status_callback(status_type, node, last);
1021  }
1022  free(last);
1023 
1024  if (crm_autoreap && !is_member && !is_set(node->flags, crm_remote_node)) {
1025  /* We only autoreap from the peer cache, not the remote peer cache,
1026  * because the latter should be managed only by
1027  * crm_remote_peer_cache_refresh().
1028  */
1029  if(iter) {
1030  crm_notice("Purged 1 peer with id=%u and/or uname=%s from the membership cache", node->id, node->uname);
1031  g_hash_table_iter_remove(iter);
1032 
1033  } else {
1034  reap_crm_member(node->id, node->uname);
1035  }
1036  node = NULL;
1037  }
1038 
1039  } else {
1040  crm_trace("Node %s state is unchanged (%s) " CRM_XS
1041  " nodeid=%u source=%s", node->uname, state, node->id, source);
1042  }
1043  return node;
1044 }
1045 
1061 crm_node_t *
1062 crm_update_peer_state(const char *source, crm_node_t * node, const char *state, int membership)
1063 {
1064  return crm_update_peer_state_iter(source, node, state, membership, NULL);
1065 }
1066 
1073 void
1074 crm_reap_unseen_nodes(uint64_t membership)
1075 {
1076  GHashTableIter iter;
1077  crm_node_t *node = NULL;
1078 
1079  crm_trace("Reaping unseen nodes...");
1080  g_hash_table_iter_init(&iter, crm_peer_cache);
1081  while (g_hash_table_iter_next(&iter, NULL, (gpointer *)&node)) {
1082  if (node->last_seen != membership) {
1083  if (node->state) {
1084  /*
1085  * Calling crm_update_peer_state_iter() allows us to
1086  * remove the node from crm_peer_cache without
1087  * invalidating our iterator
1088  */
1089  crm_update_peer_state_iter(__FUNCTION__, node, CRM_NODE_LOST, membership, &iter);
1090 
1091  } else {
1092  crm_info("State of node %s[%u] is still unknown",
1093  node->uname, node->id);
1094  }
1095  }
1096  }
1097 }
1098 
1099 int
1100 crm_terminate_member(int nodeid, const char *uname, void *unused)
1101 {
1102  /* Always use the synchronous, non-mainloop version */
1103  return stonith_api_kick(nodeid, uname, 120, TRUE);
1104 }
1105 
1106 int
1107 crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection)
1108 {
1109  return stonith_api_kick(nodeid, uname, 120, TRUE);
1110 }
uint32_t votes
Definition: internal.h:50
#define CRM_CHECK(expr, failure_action)
Definition: logging.h:164
void crm_remote_peer_cache_add(const char *node_name)
Add a node to the remote peer cache.
Definition: membership.c:135
void crm_reap_unseen_nodes(uint64_t membership)
Definition: membership.c:1074
#define crm_notice(fmt, args...)
Definition: logging.h:250
#define CRM_NODE_LOST
Definition: cluster.h:43
#define XPATH_REMOTE_NODE_STATUS
Definition: membership.c:245
GHashTable * crm_peer_cache
Definition: membership.c:42
gboolean is_openais_cluster(void)
Definition: cluster.c:630
#define crm_crit(fmt, args...)
Definition: logging.h:247
gboolean safe_str_neq(const char *a, const char *b)
Definition: utils.c:696
char * crm_generate_uuid(void)
Definition: utils.c:2361
uint64_t flags
Definition: cluster.h:76
void crm_peer_destroy(void)
Definition: membership.c:427
uint32_t id
Definition: cluster.h:73
gboolean is_heartbeat_cluster(void)
Definition: cluster.c:645
uint64_t born
Definition: cluster.h:74
char * uuid
Definition: cluster.h:83
int stonith_api_kick(uint32_t nodeid, const char *uname, int timeout, bool off)
Definition: st_client.c:2562
crm_node_t * crm_find_peer(unsigned int id, const char *uname)
Definition: membership.c:536
int get_corosync_id(int id, const char *uuid)
Definition: cluster.c:96
gboolean crm_have_quorum
Definition: membership.c:63
crm_node_t * crm_find_peer_full(unsigned int id, const char *uname, int flags)
Definition: membership.c:498
GHashTable * crm_remote_peer_cache
Definition: membership.c:60
char * addr
Definition: cluster.h:87
#define clear_bit(word, bit)
Definition: crm_internal.h:200
unsigned long long crm_peer_seq
Definition: membership.c:62
char * get_node_name(uint32_t nodeid)
Definition: cluster.c:301
void crm_set_autoreap(gboolean autoreap)
Tell the library whether to automatically reap lost nodes.
Definition: membership.c:472
void crm_peer_init(void)
Definition: membership.c:415
void crm_remote_peer_cache_remove(const char *node_name)
Definition: membership.c:141
gboolean crm_is_corosync_peer_active(const crm_node_t *node)
Definition: corosync.c:468
char uname[MAX_NAME]
Definition: internal.h:53
int crm_remote_peer_cache_size(void)
Definition: membership.c:67
#define crm_warn(fmt, args...)
Definition: logging.h:249
#define set_bit(word, bit)
Definition: crm_internal.h:199
uint32_t processes
Definition: cluster.h:79
crm_node_t * crm_get_peer_full(unsigned int id, const char *uname, int flags)
Definition: membership.c:517
crm_node_t * crm_update_peer(const char *source, unsigned int id, uint64_t born, uint64_t seen, int32_t votes, uint32_t children, const char *uuid, const char *uname, const char *addr, const char *state)
Definition: membership.c:751
guint reap_crm_member(uint32_t id, const char *name)
Remove all peer cache entries matching a node ID and/or uname.
Definition: membership.c:348
gboolean crm_is_peer_active(const crm_node_t *node)
Definition: membership.c:291
uint32_t id
Definition: internal.h:48
guint crm_strcase_hash(gconstpointer v)
Definition: utils.c:2289
#define XPATH_GUEST_NODE_CONFIG
Definition: membership.c:234
crm_status_type
Definition: cluster.h:198
void crm_update_peer_expected(const char *source, crm_node_t *node, const char *expected)
Definition: membership.c:950
#define crm_trace(fmt, args...)
Definition: logging.h:254
#define do_crm_log(level, fmt, args...)
Log a message.
Definition: logging.h:129
const char * crm_element_value(xmlNode *data, const char *name)
Definition: xml.c:5842
int corosync_cmap_has_config(const char *prefix)
Definition: corosync.c:597
#define CRM_NODE_MEMBER
Definition: cluster.h:44
void crm_update_peer_uname(crm_node_t *node, const char *uname)
Definition: membership.c:834
void(* crm_status_callback)(enum crm_status_type, crm_node_t *, const void *)
Definition: membership.c:442
gboolean crm_str_eq(const char *a, const char *b, gboolean use_case)
Definition: utils.c:1441
void crm_set_status_callback(void(*dispatch)(enum crm_status_type, crm_node_t *, const void *))
Set a client function that will be called after peer status changes.
Definition: membership.c:455
const char * name_for_cluster_type(enum cluster_type_e type)
Definition: cluster.c:468
int crm_terminate_member(int nodeid, const char *uname, void *unused)
Definition: membership.c:1100
char * expected
Definition: cluster.h:85
#define CRM_XS
Definition: logging.h:42
void crm_remote_peer_cache_refresh(xmlNode *cib)
Repopulate the remote peer cache based on CIB XML.
Definition: membership.c:255
guint crm_active_peers(void)
Definition: membership.c:389
crm_node_t * crm_remote_peer_get(const char *node_name)
Get a remote node peer cache entry, creating it if necessary.
Definition: membership.c:87
#define crm_err(fmt, args...)
Definition: logging.h:248
Fencing aka. STONITH.
#define uint32_t
Definition: stdint.in.h:158
int crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection)
Definition: membership.c:1107
#define CRM_ASSERT(expr)
Definition: error.h:35
char data[0]
Definition: internal.h:58
char * state
Definition: cluster.h:84
void crm_foreach_xpath_result(xmlNode *xml, const char *xpath, void(*helper)(xmlNode *, void *), void *user_data)
Run a supplied function for each result of an xpath search.
Definition: xpath.c:179
#define U64T
Definition: config.h:635
Wrappers for and extensions to libqb IPC.
crm_node_t * crm_update_peer_proc(const char *source, crm_node_t *node, uint32_t flag, const char *status)
Definition: membership.c:876
int32_t votes
Definition: cluster.h:78
char * uname
Definition: cluster.h:82
uint64_t last_seen
Definition: cluster.h:75
#define XML_NODE_IN_CLUSTER
Definition: msg_xml.h:251
gboolean crm_is_true(const char *s)
Definition: utils.c:711
#define safe_str_eq(a, b)
Definition: util.h:74
#define ONLINESTATUS
Definition: util.h:48
void crm_abort(const char *file, const char *function, int line, const char *condition, gboolean do_core, gboolean do_fork)
Definition: utils.c:1154
crm_node_t * crm_get_peer(unsigned int id, const char *uname)
Definition: membership.c:672
#define XPATH_REMOTE_NODE_CONFIG
Definition: membership.c:240
#define crm_info(fmt, args...)
Definition: logging.h:251
const char * crm_peer_uuid(crm_node_t *node)
Definition: cluster.c:135
uint64_t flags
Definition: remote.c:121
#define int32_t
Definition: stdint.in.h:157
enum cluster_type_e get_cluster_type(void)
Definition: cluster.c:513
crm_node_t * crm_update_peer_state(const char *source, crm_node_t *node, const char *state, int membership)
Update a node's state and membership information.
Definition: membership.c:1062