- Introduced `docker-compose.prod.yml` for production deployment. - Configured service to connect to an external PostgreSQL instance. - Set environment variables for JWT and database connection strings. - Defined network and volume for data protection keys.
75 lines
2.7 KiB
C#
75 lines
2.7 KiB
C#
using OTSSignsOrchestrator.Data.Entities;
|
|
using OTSSignsOrchestrator.Services;
|
|
|
|
namespace OTSSignsOrchestrator.Health.Checks;
|
|
|
|
/// <summary>
|
|
/// Verifies the Docker stack is healthy by running <c>docker stack ps {stackName}</c>
|
|
/// via SSH and checking that all services report Running state.
|
|
/// </summary>
|
|
public sealed class StackHealthCheck : IHealthCheck
|
|
{
|
|
private readonly IServiceProvider _services;
|
|
private readonly ILogger<StackHealthCheck> _logger;
|
|
|
|
public string CheckName => "StackHealth";
|
|
public bool AutoRemediate => false;
|
|
|
|
public StackHealthCheck(
|
|
IServiceProvider services,
|
|
ILogger<StackHealthCheck> logger)
|
|
{
|
|
_services = services;
|
|
_logger = logger;
|
|
}
|
|
|
|
public async Task<HealthCheckResult> RunAsync(Instance instance, CancellationToken ct)
|
|
{
|
|
var stackName = instance.DockerStackName;
|
|
if (string.IsNullOrEmpty(stackName))
|
|
return new HealthCheckResult(HealthStatus.Critical, "No Docker stack name configured");
|
|
|
|
try
|
|
{
|
|
await using var shell = _services.GetRequiredService<SwarmShellService>();
|
|
|
|
// Get task status for all services in the stack
|
|
var output = await shell.RunCommandAsync(
|
|
$"docker stack ps {stackName} --format '{{{{.Name}}}}|{{{{.CurrentState}}}}|{{{{.DesiredState}}}}'");
|
|
|
|
var lines = output.Split('\n', StringSplitOptions.RemoveEmptyEntries);
|
|
var notRunning = new List<string>();
|
|
|
|
foreach (var line in lines)
|
|
{
|
|
var parts = line.Split('|');
|
|
if (parts.Length < 3) continue;
|
|
|
|
var name = parts[0].Trim();
|
|
var currentState = parts[1].Trim();
|
|
var desiredState = parts[2].Trim();
|
|
|
|
// Only check tasks whose desired state is Running
|
|
if (desiredState.Equals("Running", StringComparison.OrdinalIgnoreCase) &&
|
|
!currentState.StartsWith("Running", StringComparison.OrdinalIgnoreCase))
|
|
{
|
|
notRunning.Add($"{name}: {currentState}");
|
|
}
|
|
}
|
|
|
|
if (notRunning.Count == 0)
|
|
return new HealthCheckResult(HealthStatus.Healthy,
|
|
$"All services in {stackName} are Running");
|
|
|
|
return new HealthCheckResult(HealthStatus.Critical,
|
|
$"{notRunning.Count} service(s) not running in {stackName}",
|
|
string.Join("\n", notRunning));
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
return new HealthCheckResult(HealthStatus.Critical,
|
|
$"Shell check failed for {stackName}: {ex.Message}");
|
|
}
|
|
}
|
|
}
|