M config/certmon.sample.config => config/certmon.sample.config +1 -0
@@ 1,5 1,6 @@
{port, 9101}.
{scan_interval, 30000}.
+{max_error_count, 10}.
{targets, [
{"neo.sinyax.net", 443}
]
M src/cert_scanner.erl => src/cert_scanner.erl +26 -5
@@ 18,7 18,11 @@
hostname,
port,
timer,
- validity
+ validity,
+ % count of errors since last success
+ errorCount,
+ % total count of errors
+ totalErrors
}).
%% API.
@@ 38,11 42,16 @@ init([{Hostname, Port}]) ->
hostname = Hostname,
port = Port,
timer = Timer,
- validity = unknown
+ validity = unknown,
+ errorCount = 0,
+ totalErrors = 0
}}.
handle_call({get_status}, _From, State) ->
- {reply, State#state.validity, State};
+ {reply, [
+ State#state.validity,
+ {errors, State#state.totalErrors}
+ ], State};
handle_call(_Request, _From, State) ->
{reply, ignored, State}.
@@ 83,10 92,22 @@ do_scan(State) ->
RemainingDays = (NotAfterSeconds - NowSeconds) / (24 * 3600),
logger:debug("peer cert ~s valid until: ~p (unix epoch: ~p, remaining days: ~p)~n",
[Hostname, NotAfter, NotAfterEpoch, RemainingDays]),
- {noreply, State#state{validity = {valid, NotAfterEpoch}}};
+ {noreply, State#state{validity = {valid, NotAfterEpoch}, errorCount = 0}};
{error, Reason} ->
logger:warning("failed to connect to ~p: ~p", [Hostname, Reason]),
- {noreply, State#state{validity = {error, Reason}}}
+ {ok, MaxErrorCount} = application:get_env(max_error_count),
+
+ % if max error count is not reached, we keep the old validity
+ ReportedValidity = if
+ State#state.errorCount < MaxErrorCount ->
+ State#state.validity;
+ true -> {error, Reason}
+ end,
+ {noreply, State#state{
+ validity = ReportedValidity,
+ errorCount = State#state.errorCount + 1,
+ totalErrors = State#state.totalErrors + 1
+ }}
end.
%% utilities
M src/metrics_handler.erl => src/metrics_handler.erl +7 -5
@@ 6,16 6,18 @@
format_metric({Hostname, Port}) ->
Target = {Hostname, Port},
{ok, ScannerPid} = certmon_sup:get_scanner_pid(Target),
- Valid = try gen_server:call(ScannerPid, {get_status}) of
- {valid, Epoch} -> Epoch;
- _ -> 0
+ Response = try gen_server:call(ScannerPid, {get_status}) of
+ R -> R
catch
exit:Reason ->
logger:error("failed to get status for ~p: Reason: ~p", [{Hostname, Port}, Reason]),
0
end,
- [<<"tls_cert_expiry{hostname=\"">>, Hostname, <<"\", port=\"">>,
- io_lib:format("~p", [Port]), <<"\"} ">>, io_lib:format("~p\n", [Valid])
+ Valid = proplists:get_value(valid, Response, 0),
+ Errors = proplists:get_value(errors, Response, 0),
+ [
+ io_lib:format("tls_cert_expiry{hostname=\"~p\", port=\"~p\"} ~p\n", [Hostname, Port, Valid]),
+ io_lib:format("tls_cert_errors{hostname=\"~p\", port=\"~p\"} ~p\n", [Hostname, Port, Errors])
].
init(Req0, State) ->