~ne02ptzero/libfloat

9890efc69513d419fcf9a17903b4ed8e0fa15f03 — Michael Bonfils 30 days ago 6cfcf0e
Check qorum is ok in periodic check

Stopped followers didn't trigger a step down from leader
if quorum is no more reached.

Patch: https://lists.sr.ht/~ne02ptzero/libfloat/patches/27509

Signed-off-by: Michael Bonfils <mbonfils@scaleway.com>
Acked-by      : Patrik Cyvoct <pcyvoct@scaleway.com>
Acked-by      : Louis Solofrizzo <lsolofrizzo@scaleway.com>
1 files changed, 10 insertions(+), 6 deletions(-)

M periodic.c
M periodic.c => periodic.c +10 -6
@@ 10,25 10,29 @@ void libfloat_periodic(libfloat_ctx_t *ctx, uint32_t time)
        {
            if (ctx->conf.sanity_timeout != 0 && ctx->n_nodes > 1)
            {
                bool            node_reachable = false;
                int             nodes_reachable = 0;
                libfloat_node_t *node;

                for_every_node(ctx, node, {
                    if (node->last_update + ctx->conf.sanity_timeout > ctx->time(NULL))
                    if (node->id == ctx->me->id)
                    {
                        node_reachable = true;
                        break;
                        nodes_reachable++;
                    }
                    else if (node->last_update + ctx->conf.sanity_timeout > ctx->time(NULL))
                    {
                        nodes_reachable++;
                    }
                });

                if (!node_reachable)
                if (nodes_reachable < ctx->n_nodes / 2 + 1)
                {
                    /**
                    * Hmmm, we might be partitionned, or at the very least we don't have a quorum anymore.
                    * Time to step down!
                    */
                    libfloat_become_follower(ctx);
                    ERROR(ctx, "Sanity timeout has been reached (%d seconds), stepping down from leader position", ctx->conf.sanity_timeout);
                    ERROR(ctx, "Sanity timeout has been reached (%d seconds), stepping down from leader position: reachable %d / quorum %lu (%lu)",
                          ctx->conf.sanity_timeout, nodes_reachable, ctx->n_nodes / 2 + 1, ctx->n_nodes);
                    return;
                }
            }