Files
OTSSignsOrchestrator/OTSSignsOrchestrator.Server/Health/Checks/AuthentikGlobalHealthCheck.cs
Matt Batchelder c6d46098dd feat: Implement provisioning pipelines for subscription management
- Add ReactivatePipeline to handle subscription reactivation, including scaling Docker services, health verification, status updates, audit logging, and broadcasting status changes.
- Introduce RotateCredentialsPipeline for OAuth2 credential rotation, managing the deletion of old apps, creation of new ones, credential storage, access verification, and audit logging.
- Create StepRunner to manage job step execution, including lifecycle management and progress broadcasting via SignalR.
- Implement SuspendPipeline for subscription suspension, scaling down services, updating statuses, logging audits, and broadcasting changes.
- Add UpdateScreenLimitPipeline to update Xibo CMS screen limits and record snapshots.
- Introduce XiboFeatureManifests for hardcoded feature ACLs per role.
- Add docker-compose.dev.yml for local development with PostgreSQL setup.
2026-03-18 10:27:26 -04:00

108 lines
4.0 KiB
C#

using System.Diagnostics;
using Microsoft.AspNetCore.SignalR;
using Microsoft.EntityFrameworkCore;
using OTSSignsOrchestrator.Server.Clients;
using OTSSignsOrchestrator.Server.Data;
using OTSSignsOrchestrator.Server.Data.Entities;
using OTSSignsOrchestrator.Server.Hubs;
namespace OTSSignsOrchestrator.Server.Health.Checks;
/// <summary>
/// Probes the central Authentik instance at <c>GET /api/v3/-/health/ready/</c>.
/// Measures latency and writes an <see cref="AuthentikMetrics"/> row.
/// If down: <c>Severity = Critical</c>, message "Central Authentik is DOWN — all customer web UI logins failing".
/// This is a fleet-wide P1 alert. Runs every 2 minutes on a separate schedule.
///
/// This check is NOT per-instance — it runs once globally. The engine skips it for
/// per-instance checks. Instead it is scheduled independently as a Quartz job.
/// </summary>
public sealed class AuthentikGlobalHealthCheck : IHealthCheck
{
private readonly IAuthentikClient _authentikClient;
private readonly IServiceProvider _services;
private readonly ILogger<AuthentikGlobalHealthCheck> _logger;
public string CheckName => "AuthentikGlobal";
public bool AutoRemediate => false;
public AuthentikGlobalHealthCheck(
IAuthentikClient authentikClient,
IServiceProvider services,
ILogger<AuthentikGlobalHealthCheck> logger)
{
_authentikClient = authentikClient;
_services = services;
_logger = logger;
}
public async Task<HealthCheckResult> RunAsync(Instance instance, CancellationToken ct)
{
// This check doesn't use the instance parameter — it checks global Authentik health.
return await RunGlobalAsync(ct);
}
/// <summary>
/// Core logic — callable from the Quartz job without an instance context.
/// </summary>
public async Task<HealthCheckResult> RunGlobalAsync(CancellationToken ct)
{
var sw = Stopwatch.StartNew();
AuthentikMetricsStatus metricsStatus;
string? errorMessage = null;
HealthCheckResult result;
try
{
var response = await _authentikClient.CheckHealthAsync();
sw.Stop();
if (response.IsSuccessStatusCode)
{
metricsStatus = AuthentikMetricsStatus.Healthy;
result = new HealthCheckResult(HealthStatus.Healthy,
$"Authentik healthy (latency: {sw.ElapsedMilliseconds}ms)");
}
else
{
metricsStatus = AuthentikMetricsStatus.Critical;
errorMessage = $"HTTP {response.StatusCode}";
result = new HealthCheckResult(HealthStatus.Critical,
"Central Authentik is DOWN — all customer web UI logins failing",
$"Health endpoint returned {response.StatusCode}");
}
}
catch (Exception ex)
{
sw.Stop();
metricsStatus = AuthentikMetricsStatus.Critical;
errorMessage = ex.Message;
result = new HealthCheckResult(HealthStatus.Critical,
"Central Authentik is DOWN — all customer web UI logins failing",
ex.Message);
}
// Write metrics row
await using var scope = _services.CreateAsyncScope();
var db = scope.ServiceProvider.GetRequiredService<OrchestratorDbContext>();
db.AuthentikMetrics.Add(new AuthentikMetrics
{
Id = Guid.NewGuid(),
CheckedAt = DateTime.UtcNow,
Status = metricsStatus,
LatencyMs = (int)sw.ElapsedMilliseconds,
ErrorMessage = errorMessage,
});
await db.SaveChangesAsync(ct);
// Broadcast alert if critical
if (result.Status == HealthStatus.Critical)
{
var hub = scope.ServiceProvider.GetRequiredService<IHubContext<FleetHub, IFleetClient>>();
await hub.Clients.All.SendAlertRaised("Critical", result.Message);
}
return result;
}
}