- Add ReactivatePipeline to handle subscription reactivation, including scaling Docker services, health verification, status updates, audit logging, and broadcasting status changes. - Introduce RotateCredentialsPipeline for OAuth2 credential rotation, managing the deletion of old apps, creation of new ones, credential storage, access verification, and audit logging. - Create StepRunner to manage job step execution, including lifecycle management and progress broadcasting via SignalR. - Implement SuspendPipeline for subscription suspension, scaling down services, updating statuses, logging audits, and broadcasting changes. - Add UpdateScreenLimitPipeline to update Xibo CMS screen limits and record snapshots. - Introduce XiboFeatureManifests for hardcoded feature ACLs per role. - Add docker-compose.dev.yml for local development with PostgreSQL setup.
108 lines
4.0 KiB
C#
108 lines
4.0 KiB
C#
using System.Diagnostics;
|
|
using Microsoft.AspNetCore.SignalR;
|
|
using Microsoft.EntityFrameworkCore;
|
|
using OTSSignsOrchestrator.Server.Clients;
|
|
using OTSSignsOrchestrator.Server.Data;
|
|
using OTSSignsOrchestrator.Server.Data.Entities;
|
|
using OTSSignsOrchestrator.Server.Hubs;
|
|
|
|
namespace OTSSignsOrchestrator.Server.Health.Checks;
|
|
|
|
/// <summary>
|
|
/// Probes the central Authentik instance at <c>GET /api/v3/-/health/ready/</c>.
|
|
/// Measures latency and writes an <see cref="AuthentikMetrics"/> row.
|
|
/// If down: <c>Severity = Critical</c>, message "Central Authentik is DOWN — all customer web UI logins failing".
|
|
/// This is a fleet-wide P1 alert. Runs every 2 minutes on a separate schedule.
|
|
///
|
|
/// This check is NOT per-instance — it runs once globally. The engine skips it for
|
|
/// per-instance checks. Instead it is scheduled independently as a Quartz job.
|
|
/// </summary>
|
|
public sealed class AuthentikGlobalHealthCheck : IHealthCheck
|
|
{
|
|
private readonly IAuthentikClient _authentikClient;
|
|
private readonly IServiceProvider _services;
|
|
private readonly ILogger<AuthentikGlobalHealthCheck> _logger;
|
|
|
|
public string CheckName => "AuthentikGlobal";
|
|
public bool AutoRemediate => false;
|
|
|
|
public AuthentikGlobalHealthCheck(
|
|
IAuthentikClient authentikClient,
|
|
IServiceProvider services,
|
|
ILogger<AuthentikGlobalHealthCheck> logger)
|
|
{
|
|
_authentikClient = authentikClient;
|
|
_services = services;
|
|
_logger = logger;
|
|
}
|
|
|
|
public async Task<HealthCheckResult> RunAsync(Instance instance, CancellationToken ct)
|
|
{
|
|
// This check doesn't use the instance parameter — it checks global Authentik health.
|
|
return await RunGlobalAsync(ct);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Core logic — callable from the Quartz job without an instance context.
|
|
/// </summary>
|
|
public async Task<HealthCheckResult> RunGlobalAsync(CancellationToken ct)
|
|
{
|
|
var sw = Stopwatch.StartNew();
|
|
AuthentikMetricsStatus metricsStatus;
|
|
string? errorMessage = null;
|
|
HealthCheckResult result;
|
|
|
|
try
|
|
{
|
|
var response = await _authentikClient.CheckHealthAsync();
|
|
sw.Stop();
|
|
|
|
if (response.IsSuccessStatusCode)
|
|
{
|
|
metricsStatus = AuthentikMetricsStatus.Healthy;
|
|
result = new HealthCheckResult(HealthStatus.Healthy,
|
|
$"Authentik healthy (latency: {sw.ElapsedMilliseconds}ms)");
|
|
}
|
|
else
|
|
{
|
|
metricsStatus = AuthentikMetricsStatus.Critical;
|
|
errorMessage = $"HTTP {response.StatusCode}";
|
|
result = new HealthCheckResult(HealthStatus.Critical,
|
|
"Central Authentik is DOWN — all customer web UI logins failing",
|
|
$"Health endpoint returned {response.StatusCode}");
|
|
}
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
sw.Stop();
|
|
metricsStatus = AuthentikMetricsStatus.Critical;
|
|
errorMessage = ex.Message;
|
|
result = new HealthCheckResult(HealthStatus.Critical,
|
|
"Central Authentik is DOWN — all customer web UI logins failing",
|
|
ex.Message);
|
|
}
|
|
|
|
// Write metrics row
|
|
await using var scope = _services.CreateAsyncScope();
|
|
var db = scope.ServiceProvider.GetRequiredService<OrchestratorDbContext>();
|
|
db.AuthentikMetrics.Add(new AuthentikMetrics
|
|
{
|
|
Id = Guid.NewGuid(),
|
|
CheckedAt = DateTime.UtcNow,
|
|
Status = metricsStatus,
|
|
LatencyMs = (int)sw.ElapsedMilliseconds,
|
|
ErrorMessage = errorMessage,
|
|
});
|
|
await db.SaveChangesAsync(ct);
|
|
|
|
// Broadcast alert if critical
|
|
if (result.Status == HealthStatus.Critical)
|
|
{
|
|
var hub = scope.ServiceProvider.GetRequiredService<IHubContext<FleetHub, IFleetClient>>();
|
|
await hub.Clients.All.SendAlertRaised("Critical", result.Message);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
}
|