~ne02ptzero/libfloat

c626a3a56ed4ccd30392690339b27c35772e816b — Louis Solofrizzo 16 days ago 9890efc master
log: Add metric on orphaned logs

Add an exposed counter for logs that are committed on the leader but not
anywhere else on a cluster, in order to alert on a possible
network-split.

Patch: https://lists.sr.ht/~ne02ptzero/libfloat/patches/27934

Signed-off-by : Louis Solofrizzo <lsolofrizzo@scaleway.com>
Acked-by      : Patrick Cyvoct <pcyvoct@scaleway.com>
Acked-by      : Michael Bonfils <mbonfils@scaleway.com>

 ________________________________________
/ "Been through Hell? Whaddya bring back \
\ for me?" -- A. Brilliant               /
 ----------------------------------------
        \   ^__^
         \  (oo)\_______
            (__)\       )\/\
                ||----w |
                ||     ||
2 files changed, 7 insertions(+), 0 deletions(-)

M libfloat.h
M periodic.c
M libfloat.h => libfloat.h +1 -0
@@ 45,6 45,7 @@ struct libfloat_ctx_s {

    struct {
        uint64_t        leader_election;                /*!< Count of leader elections for this cluster */
        uint64_t        orphans_logs;                   /*!< Count of logs that are applied on the leader only */
    } stat;

    struct {

M periodic.c => periodic.c +6 -0
@@ 64,6 64,12 @@ void libfloat_periodic(libfloat_ctx_t *ctx, uint32_t time)
                        ctx->leader == ctx->me
                    );

                    if (libfloat_list_count(&entry->node_acks) == 1)
                    {
                        ERROR(ctx, "Log %d timeout with only leader commited, possible loss of synchronization", entry->id);
                        ctx->stat.orphans_logs++;
                    }

                    entry->commit(entry->udata, LIBFLOAT_ENTRY_TIMEOUT);
                    entry->commit = NULL;
                    libfloat_list_del(&entry->next);