~ne02ptzero/libfloat

639a13ae9054dfd29a894b2176fabc24f9d43d8f — Louis Solofrizzo 14 days ago 6269352
election: Do not spam elections if one is already ongoing

And more debug here and there

Patch: https://lists.sr.ht/~ne02ptzero/libfloat/patches/36926

Signed-off-by: Louis Solofrizzo <lsolofrizzo@scaleway.com>
Acked-by     : Florian Florensa <fflorensa@scaleway.com>

 __________________________________
/ To iterate is human, to recurse, \
\ divine. -- Robert Heller         /
 ----------------------------------
        \   ^__^
         \  (oo)\_______
            (__)\       )\/\
                ||----w |
                ||     ||
3 files changed, 19 insertions(+), 5 deletions(-)

M election.c
M libfloat.h
M periodic.c
M election.c => election.c +16 -3
@@ 24,6 24,9 @@ void __libfloat_election_start(libfloat_ctx_t *ctx, libfloat_elections_args_t *a
    libfloat_term_t     last_term = 0;
    libfloat_entry_id_t last_id = 0;

    if (ctx->state == RAFT_STATE_CANDIDATE && args->force == false)
        return;

    DEBUG(ctx, "Election starting! reason=%s", args->reason);
    /* First, reset the vote of everyone */
    for_every_node(ctx, node, {


@@ 81,12 84,15 @@ static bool libfloat_can_i_grant_vote(libfloat_ctx_t *ctx, libfloat_rpc_request_
    {
        if (ctx->persistent.voted_for == req->candidate_id)
            return true;

        DEBUG(ctx, "Cannot grant vote to node %d: I have already voted for %d", req->candidate_id, ctx->persistent.voted_for);
        return false;
    }

    if (ctx->persistent.commit_index > 0 && libfloat_get_last_term(ctx, NULL, &last_term) == false)
    {
        /* We have failed to retrieve last_ferm from log */
        /* We have failed to retrieve last_term from log */
        DEBUG(ctx, "Cannot grant vote to node %d: Cannot get last term", req->candidate_id);
        return false;
    }



@@ 94,6 100,7 @@ static bool libfloat_can_i_grant_vote(libfloat_ctx_t *ctx, libfloat_rpc_request_
    if (last_term > req->last_log_term && req->last_log_index <= ctx->persistent.commit_index)
    {
        /* We have a superior term and a superior log, we can't grant our vote */
        DEBUG(ctx, "Cannot grant vote to node %d: I have a greater term and a greater log", req->candidate_id);
        return false;
    }



@@ 103,10 110,12 @@ static bool libfloat_can_i_grant_vote(libfloat_ctx_t *ctx, libfloat_rpc_request_
        /* We have a superior log, we can't grant our vote */

        /* There's an election in progress, and we have a superior log, let's try to become leader */
        libfloat_election_start(ctx, .reason = "I have a greater log");
        DEBUG(ctx, "Cannot grant vote to node %d: I have a greater log", req->candidate_id);
        libfloat_election_start(ctx, .reason = "I have a greater log", .force = true);
        return false;
    }

    DEBUG(ctx, "Granted vote to %d", req->candidate_id);
    /* All good! */
    return true;
}


@@ 140,7 149,11 @@ void libfloat_request_vote_receive(libfloat_ctx_t *ctx, libfloat_rpc_request_vot
    {
        libfloat_set_current_term(ctx, req->term);

        libfloat_become_follower(ctx, .reason = "request_vote_receive: our term is lower than remote term");
        if (ctx->state == RAFT_STATE_LEADER)
            libfloat_become_follower(ctx, .reason = "request_vote_receive: our term is lower than remote term");

        /* Vote reset */
        libfloat_vote_for(ctx, 0);
        ctx->timeout_elapsed = 0;
    }


M libfloat.h => libfloat.h +1 -0
@@ 292,6 292,7 @@ struct libfloat_ctx_s {
 */
typedef struct {
    const char  *reason;        /*!< Logic reason for calling a function, for debug purposes */
    bool        force;
} libfloat_elections_args_t;

/*!

M periodic.c => periodic.c +2 -2
@@ 93,7 93,7 @@ void libfloat_periodic(libfloat_ctx_t *ctx, uint32_t time)
            {
                /* DynamoDB-like elections are not implemented, let's simply start an election */
                DEBUG(ctx, "New election: timeout elapsed %d", ctx->timeout_elapsed);
                libfloat_election_start(ctx, .reason = "Election timeout, start a new one");
                libfloat_election_start(ctx, .reason = "Election timeout, start a new one", .force = true);
                ctx->lost_leader_time -= (ctx->conf.election_timeout / 1000);
            }
            else


@@ 162,7 162,7 @@ void libfloat_periodic(libfloat_ctx_t *ctx, uint32_t time)
                        {
                            /* We did not receive any reponses, and the election timeout has expired twice, let's launch an election */
                            ctx->gray_failures.checking = false;
                            libfloat_election_start(ctx, .reason = "Gray failures: Complete timeout");
                            libfloat_election_start(ctx, .reason = "Gray failures: Complete timeout", .force = true);

                            /* XXX: In this specific case, I'm not sure that triggering an election actually does something:
                             * - Either every node in the cluster is down / unreachable, and a quorum will never be reached anyway