- Add ReactivatePipeline to handle subscription reactivation, including scaling Docker services, health verification, status updates, audit logging, and broadcasting status changes. - Introduce RotateCredentialsPipeline for OAuth2 credential rotation, managing the deletion of old apps, creation of new ones, credential storage, access verification, and audit logging. - Create StepRunner to manage job step execution, including lifecycle management and progress broadcasting via SignalR. - Implement SuspendPipeline for subscription suspension, scaling down services, updating statuses, logging audits, and broadcasting changes. - Add UpdateScreenLimitPipeline to update Xibo CMS screen limits and record snapshots. - Introduce XiboFeatureManifests for hardcoded feature ACLs per role. - Add docker-compose.dev.yml for local development with PostgreSQL setup.
128 lines
5.2 KiB
C#
128 lines
5.2 KiB
C#
using Renci.SshNet;
|
|
using OTSSignsOrchestrator.Server.Data.Entities;
|
|
|
|
namespace OTSSignsOrchestrator.Server.Health.Checks;
|
|
|
|
/// <summary>
|
|
/// Verifies the Docker stack is healthy by running <c>docker stack ps {stackName}</c>
|
|
/// via SSH and checking that all services report Running state.
|
|
/// </summary>
|
|
public sealed class StackHealthCheck : IHealthCheck
|
|
{
|
|
private readonly IServiceProvider _services;
|
|
private readonly ILogger<StackHealthCheck> _logger;
|
|
|
|
public string CheckName => "StackHealth";
|
|
public bool AutoRemediate => false;
|
|
|
|
public StackHealthCheck(
|
|
IServiceProvider services,
|
|
ILogger<StackHealthCheck> logger)
|
|
{
|
|
_services = services;
|
|
_logger = logger;
|
|
}
|
|
|
|
public async Task<HealthCheckResult> RunAsync(Instance instance, CancellationToken ct)
|
|
{
|
|
var stackName = instance.DockerStackName;
|
|
if (string.IsNullOrEmpty(stackName))
|
|
return new HealthCheckResult(HealthStatus.Critical, "No Docker stack name configured");
|
|
|
|
try
|
|
{
|
|
var settings = _services.GetRequiredService<Core.Services.SettingsService>();
|
|
var sshInfo = await GetSwarmSshHostAsync(settings);
|
|
|
|
using var sshClient = CreateSshClient(sshInfo);
|
|
sshClient.Connect();
|
|
|
|
try
|
|
{
|
|
// Get task status for all services in the stack
|
|
var output = RunSshCommand(sshClient,
|
|
$"docker stack ps {stackName} --format '{{{{.Name}}}}|{{{{.CurrentState}}}}|{{{{.DesiredState}}}}'");
|
|
|
|
var lines = output.Split('\n', StringSplitOptions.RemoveEmptyEntries);
|
|
var notRunning = new List<string>();
|
|
|
|
foreach (var line in lines)
|
|
{
|
|
var parts = line.Split('|');
|
|
if (parts.Length < 3) continue;
|
|
|
|
var name = parts[0].Trim();
|
|
var currentState = parts[1].Trim();
|
|
var desiredState = parts[2].Trim();
|
|
|
|
// Only check tasks whose desired state is Running
|
|
if (desiredState.Equals("Running", StringComparison.OrdinalIgnoreCase) &&
|
|
!currentState.StartsWith("Running", StringComparison.OrdinalIgnoreCase))
|
|
{
|
|
notRunning.Add($"{name}: {currentState}");
|
|
}
|
|
}
|
|
|
|
if (notRunning.Count == 0)
|
|
return new HealthCheckResult(HealthStatus.Healthy,
|
|
$"All services in {stackName} are Running");
|
|
|
|
return new HealthCheckResult(HealthStatus.Critical,
|
|
$"{notRunning.Count} service(s) not running in {stackName}",
|
|
string.Join("\n", notRunning));
|
|
}
|
|
finally
|
|
{
|
|
sshClient.Disconnect();
|
|
}
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
return new HealthCheckResult(HealthStatus.Critical,
|
|
$"SSH check failed for {stackName}: {ex.Message}");
|
|
}
|
|
}
|
|
|
|
private static async Task<SshConnectionInfo> GetSwarmSshHostAsync(Core.Services.SettingsService settings)
|
|
{
|
|
var host = await settings.GetAsync("Ssh.SwarmHost")
|
|
?? throw new InvalidOperationException("SSH Swarm host not configured.");
|
|
var portStr = await settings.GetAsync("Ssh.SwarmPort", "22");
|
|
var user = await settings.GetAsync("Ssh.SwarmUser", "root");
|
|
var keyPath = await settings.GetAsync("Ssh.SwarmKeyPath");
|
|
var password = await settings.GetAsync("Ssh.SwarmPassword");
|
|
if (!int.TryParse(portStr, out var port)) port = 22;
|
|
return new SshConnectionInfo(host, port, user, keyPath, password);
|
|
}
|
|
|
|
private static SshClient CreateSshClient(SshConnectionInfo info)
|
|
{
|
|
var authMethods = new List<AuthenticationMethod>();
|
|
if (!string.IsNullOrEmpty(info.KeyPath))
|
|
authMethods.Add(new PrivateKeyAuthenticationMethod(info.Username, new PrivateKeyFile(info.KeyPath)));
|
|
if (!string.IsNullOrEmpty(info.Password))
|
|
authMethods.Add(new PasswordAuthenticationMethod(info.Username, info.Password));
|
|
if (authMethods.Count == 0)
|
|
{
|
|
var defaultKeyPath = Path.Combine(
|
|
Environment.GetFolderPath(Environment.SpecialFolder.UserProfile), ".ssh", "id_rsa");
|
|
if (File.Exists(defaultKeyPath))
|
|
authMethods.Add(new PrivateKeyAuthenticationMethod(info.Username, new PrivateKeyFile(defaultKeyPath)));
|
|
else
|
|
throw new InvalidOperationException($"No SSH auth method for {info.Host}:{info.Port}.");
|
|
}
|
|
var connInfo = new Renci.SshNet.ConnectionInfo(info.Host, info.Port, info.Username, authMethods.ToArray());
|
|
return new SshClient(connInfo);
|
|
}
|
|
|
|
private static string RunSshCommand(SshClient client, string command)
|
|
{
|
|
using var cmd = client.RunCommand(command);
|
|
if (cmd.ExitStatus != 0)
|
|
throw new InvalidOperationException($"SSH command failed (exit {cmd.ExitStatus}): {cmd.Error}");
|
|
return cmd.Result;
|
|
}
|
|
|
|
internal sealed record SshConnectionInfo(string Host, int Port, string Username, string? KeyPath, string? Password);
|
|
}
|