using System.Diagnostics;
using Microsoft.AspNetCore.SignalR;
using Microsoft.EntityFrameworkCore;
using OTSSignsOrchestrator.Server.Clients;
using OTSSignsOrchestrator.Server.Data;
using OTSSignsOrchestrator.Server.Data.Entities;
using OTSSignsOrchestrator.Server.Hubs;
namespace OTSSignsOrchestrator.Server.Health.Checks;
///
/// Probes the central Authentik instance at GET /api/v3/-/health/ready/.
/// Measures latency and writes an row.
/// If down: Severity = Critical, message "Central Authentik is DOWN — all customer web UI logins failing".
/// This is a fleet-wide P1 alert. Runs every 2 minutes on a separate schedule.
///
/// This check is NOT per-instance — it runs once globally. The engine skips it for
/// per-instance checks. Instead it is scheduled independently as a Quartz job.
///
public sealed class AuthentikGlobalHealthCheck : IHealthCheck
{
private readonly IAuthentikClient _authentikClient;
private readonly IServiceProvider _services;
private readonly ILogger _logger;
public string CheckName => "AuthentikGlobal";
public bool AutoRemediate => false;
public AuthentikGlobalHealthCheck(
IAuthentikClient authentikClient,
IServiceProvider services,
ILogger logger)
{
_authentikClient = authentikClient;
_services = services;
_logger = logger;
}
public async Task RunAsync(Instance instance, CancellationToken ct)
{
// This check doesn't use the instance parameter — it checks global Authentik health.
return await RunGlobalAsync(ct);
}
///
/// Core logic — callable from the Quartz job without an instance context.
///
public async Task RunGlobalAsync(CancellationToken ct)
{
var sw = Stopwatch.StartNew();
AuthentikMetricsStatus metricsStatus;
string? errorMessage = null;
HealthCheckResult result;
try
{
var response = await _authentikClient.CheckHealthAsync();
sw.Stop();
if (response.IsSuccessStatusCode)
{
metricsStatus = AuthentikMetricsStatus.Healthy;
result = new HealthCheckResult(HealthStatus.Healthy,
$"Authentik healthy (latency: {sw.ElapsedMilliseconds}ms)");
}
else
{
metricsStatus = AuthentikMetricsStatus.Critical;
errorMessage = $"HTTP {response.StatusCode}";
result = new HealthCheckResult(HealthStatus.Critical,
"Central Authentik is DOWN — all customer web UI logins failing",
$"Health endpoint returned {response.StatusCode}");
}
}
catch (Exception ex)
{
sw.Stop();
metricsStatus = AuthentikMetricsStatus.Critical;
errorMessage = ex.Message;
result = new HealthCheckResult(HealthStatus.Critical,
"Central Authentik is DOWN — all customer web UI logins failing",
ex.Message);
}
// Write metrics row
await using var scope = _services.CreateAsyncScope();
var db = scope.ServiceProvider.GetRequiredService();
db.AuthentikMetrics.Add(new AuthentikMetrics
{
Id = Guid.NewGuid(),
CheckedAt = DateTime.UtcNow,
Status = metricsStatus,
LatencyMs = (int)sw.ElapsedMilliseconds,
ErrorMessage = errorMessage,
});
await db.SaveChangesAsync(ct);
// Broadcast alert if critical
if (result.Status == HealthStatus.Critical)
{
var hub = scope.ServiceProvider.GetRequiredService>();
await hub.Clients.All.SendAlertRaised("Critical", result.Message);
}
return result;
}
}