using System.Diagnostics; using Microsoft.AspNetCore.SignalR; using Microsoft.EntityFrameworkCore; using OTSSignsOrchestrator.Server.Clients; using OTSSignsOrchestrator.Server.Data; using OTSSignsOrchestrator.Server.Data.Entities; using OTSSignsOrchestrator.Server.Hubs; namespace OTSSignsOrchestrator.Server.Health.Checks; /// /// Probes the central Authentik instance at GET /api/v3/-/health/ready/. /// Measures latency and writes an row. /// If down: Severity = Critical, message "Central Authentik is DOWN — all customer web UI logins failing". /// This is a fleet-wide P1 alert. Runs every 2 minutes on a separate schedule. /// /// This check is NOT per-instance — it runs once globally. The engine skips it for /// per-instance checks. Instead it is scheduled independently as a Quartz job. /// public sealed class AuthentikGlobalHealthCheck : IHealthCheck { private readonly IAuthentikClient _authentikClient; private readonly IServiceProvider _services; private readonly ILogger _logger; public string CheckName => "AuthentikGlobal"; public bool AutoRemediate => false; public AuthentikGlobalHealthCheck( IAuthentikClient authentikClient, IServiceProvider services, ILogger logger) { _authentikClient = authentikClient; _services = services; _logger = logger; } public async Task RunAsync(Instance instance, CancellationToken ct) { // This check doesn't use the instance parameter — it checks global Authentik health. return await RunGlobalAsync(ct); } /// /// Core logic — callable from the Quartz job without an instance context. /// public async Task RunGlobalAsync(CancellationToken ct) { var sw = Stopwatch.StartNew(); AuthentikMetricsStatus metricsStatus; string? errorMessage = null; HealthCheckResult result; try { var response = await _authentikClient.CheckHealthAsync(); sw.Stop(); if (response.IsSuccessStatusCode) { metricsStatus = AuthentikMetricsStatus.Healthy; result = new HealthCheckResult(HealthStatus.Healthy, $"Authentik healthy (latency: {sw.ElapsedMilliseconds}ms)"); } else { metricsStatus = AuthentikMetricsStatus.Critical; errorMessage = $"HTTP {response.StatusCode}"; result = new HealthCheckResult(HealthStatus.Critical, "Central Authentik is DOWN — all customer web UI logins failing", $"Health endpoint returned {response.StatusCode}"); } } catch (Exception ex) { sw.Stop(); metricsStatus = AuthentikMetricsStatus.Critical; errorMessage = ex.Message; result = new HealthCheckResult(HealthStatus.Critical, "Central Authentik is DOWN — all customer web UI logins failing", ex.Message); } // Write metrics row await using var scope = _services.CreateAsyncScope(); var db = scope.ServiceProvider.GetRequiredService(); db.AuthentikMetrics.Add(new AuthentikMetrics { Id = Guid.NewGuid(), CheckedAt = DateTime.UtcNow, Status = metricsStatus, LatencyMs = (int)sw.ElapsedMilliseconds, ErrorMessage = errorMessage, }); await db.SaveChangesAsync(ct); // Broadcast alert if critical if (result.Status == HealthStatus.Critical) { var hub = scope.ServiceProvider.GetRequiredService>(); await hub.Clients.All.SendAlertRaised("Critical", result.Message); } return result; } }