Cluster code should look at cman_cluster_t . ci_generation whenever it gets notification of a quorum state change. This is the closest thing to a "ring number' that we can get from the libcman interface. When we get a quorum update, check the ring number on it. If it is less than the ring number that we have already seen -- then the update is stale and should be ignored. Another one will be coming shortly, if appropriate.
Here's what I have for this so far -- untested yet. ( I have to switch to a high-priority bug for a bit. ) Index: src/qpid/cluster/Quorum_cman.h =================================================================== --- src/qpid/cluster/Quorum_cman.h (revision 1362472) +++ src/qpid/cluster/Quorum_cman.h (working copy) @@ -45,18 +45,20 @@ Quorum(boost::function<void ()> onError); ~Quorum(); void start(boost::shared_ptr<sys::Poller>); + bool setGenerationNumber ( uint32_t ); private: void dispatch(sys::DispatchHandle&); void disconnect(sys::DispatchHandle&); int getFd(); void watch(int fd); - + cman_handle_t cman; int cmanFd; std::auto_ptr<sys::PosixIOHandle> ioHandle; std::auto_ptr<sys::DispatchHandleRef> dispatchHandle; boost::shared_ptr<sys::Poller> poller; + uint32_t generationNumber; }; Index: src/qpid/cluster/Quorum_cman.cpp =================================================================== --- src/qpid/cluster/Quorum_cman.cpp (revision 1362472) +++ src/qpid/cluster/Quorum_cman.cpp (working copy) @@ -34,15 +34,24 @@ boost::function<void()> errorFn; void cmanCallbackFn(cman_handle_t handle, void */*privdata*/, int reason, int /*arg*/) { - if (reason == CMAN_REASON_STATECHANGE && !cman_is_quorate(handle)) { - QPID_LOG(critical, "Lost contact with cluster quorum."); - if (errorFn) errorFn(); - cman_stop_notification(handle); + cman_cluster_t clinfo; + cman_get_cluster ( handle, & clinfo ); + void * privdata; + cman_getprivdata ( handle, & privdata ); + Quorum * quorum = static_cast<Quorum *>(privdata); + + // If the generation number is wrong, ignore this + if ( quorum->setGenerationNumber ( clinfo.ci_generation ) ) { + if (reason == CMAN_REASON_STATECHANGE && !cman_is_quorate(handle)) { + QPID_LOG(critical, "Lost contact with cluster quorum."); + if (errorFn) errorFn(); + cman_stop_notification(handle); + } } } } -Quorum::Quorum(boost::function<void()> err) : cman(0), cmanFd(0) { +Quorum::Quorum(boost::function<void()> err) : cman(0), cmanFd(0), generationNumber(0) { errorFn = err; } @@ -61,7 +70,13 @@ QPID_LOG(notice, "Waiting for cluster quorum."); while(!cman_is_quorate(cman)) sys::sleep(5); } - int err = cman_start_notification(cman, cmanCallbackFn); + + int err; + // We will need this pointer in the callback, to store generation info in this object. + if ( 0 != ( err = cman_setprivdata ( cman, static_cast<void *>(this)))) + throw ErrnoException("Can't set private cman data."); + + err = cman_start_notification(cman, cmanCallbackFn); if (err != 0) throw ErrnoException("Can't register for cman notifications"); watch(getFd()); } @@ -102,4 +117,15 @@ errorFn(); } +bool Quorum::setGenerationNumber ( uint32_t newGenerationNumber ) { + /* + If we receive a generation number that is less than one we + have already seen, we should ignore the associated statement. + */ + if ( newGenerationNumber < generationNumber ) + return false; + generationNumber = newGenerationNumber; + return true; +} + }} // namespace qpid::cluster