Some checks failed
Build and Publish Docker Image / build-and-push (push) Has been cancelled
513 lines
20 KiB
C#
513 lines
20 KiB
C#
using System.Diagnostics;
|
|
using Microsoft.Extensions.Logging;
|
|
using Microsoft.Extensions.Options;
|
|
using MySqlConnector;
|
|
using OTSSignsOrchestrator.Core.Configuration;
|
|
using OTSSignsOrchestrator.Core.Models.DTOs;
|
|
using OTSSignsOrchestrator.Core.Models.Entities;
|
|
using OTSSignsOrchestrator.Core.Services;
|
|
|
|
namespace OTSSignsOrchestrator.Desktop.Services;
|
|
|
|
/// <summary>
|
|
/// Docker CLI service that executes docker commands on a remote host over SSH.
|
|
/// Requires an SshHost to be set before use via SetHost().
|
|
/// </summary>
|
|
public class SshDockerCliService : IDockerCliService
|
|
{
|
|
private readonly SshConnectionService _ssh;
|
|
private readonly DockerOptions _options;
|
|
private readonly ILogger<SshDockerCliService> _logger;
|
|
private SshHost? _currentHost;
|
|
|
|
public SshDockerCliService(
|
|
SshConnectionService ssh,
|
|
IOptions<DockerOptions> options,
|
|
ILogger<SshDockerCliService> logger)
|
|
{
|
|
_ssh = ssh;
|
|
_options = options.Value;
|
|
_logger = logger;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Set the SSH host to use for Docker commands.
|
|
/// </summary>
|
|
public void SetHost(SshHost host)
|
|
{
|
|
_currentHost = host;
|
|
}
|
|
|
|
public SshHost? CurrentHost => _currentHost;
|
|
|
|
public async Task<DeploymentResultDto> DeployStackAsync(string stackName, string composeYaml, bool resolveImage = false)
|
|
{
|
|
EnsureHost();
|
|
var sw = Stopwatch.StartNew();
|
|
|
|
var args = "docker stack deploy --compose-file -";
|
|
if (resolveImage)
|
|
args += " --resolve-image changed";
|
|
args += $" {stackName}";
|
|
|
|
_logger.LogInformation("Deploying stack via SSH: {StackName} on {Host}", stackName, _currentHost!.Label);
|
|
|
|
var (exitCode, stdout, stderr) = await _ssh.RunCommandWithStdinAsync(_currentHost, args, composeYaml);
|
|
sw.Stop();
|
|
|
|
var result = new DeploymentResultDto
|
|
{
|
|
StackName = stackName,
|
|
Success = exitCode == 0,
|
|
ExitCode = exitCode,
|
|
Output = stdout,
|
|
ErrorMessage = stderr,
|
|
Message = exitCode == 0 ? "Success" : "Failed",
|
|
DurationMs = sw.ElapsedMilliseconds
|
|
};
|
|
|
|
if (result.Success)
|
|
_logger.LogInformation("Stack deployed via SSH: {StackName} | duration={DurationMs}ms", stackName, result.DurationMs);
|
|
else
|
|
_logger.LogError("Stack deploy failed via SSH: {StackName} | error={Error}", stackName, result.ErrorMessage);
|
|
|
|
return result;
|
|
}
|
|
|
|
public async Task<DeploymentResultDto> RemoveStackAsync(string stackName)
|
|
{
|
|
EnsureHost();
|
|
var sw = Stopwatch.StartNew();
|
|
|
|
_logger.LogInformation("Removing stack via SSH: {StackName} on {Host}", stackName, _currentHost!.Label);
|
|
|
|
var (exitCode, stdout, stderr) = await _ssh.RunCommandAsync(_currentHost, $"docker stack rm {stackName}");
|
|
sw.Stop();
|
|
|
|
var result = new DeploymentResultDto
|
|
{
|
|
StackName = stackName,
|
|
Success = exitCode == 0,
|
|
ExitCode = exitCode,
|
|
Output = stdout,
|
|
ErrorMessage = stderr,
|
|
Message = exitCode == 0 ? "Success" : "Failed",
|
|
DurationMs = sw.ElapsedMilliseconds
|
|
};
|
|
|
|
if (result.Success)
|
|
_logger.LogInformation("Stack removed via SSH: {StackName}", stackName);
|
|
else
|
|
_logger.LogError("Stack remove failed via SSH: {StackName} | error={Error}", stackName, result.ErrorMessage);
|
|
|
|
return result;
|
|
}
|
|
|
|
public async Task<List<StackInfo>> ListStacksAsync()
|
|
{
|
|
EnsureHost();
|
|
|
|
var (exitCode, stdout, _) = await _ssh.RunCommandAsync(
|
|
_currentHost!, "docker stack ls --format '{{.Name}}\\t{{.Services}}'");
|
|
|
|
if (exitCode != 0 || string.IsNullOrWhiteSpace(stdout))
|
|
return new List<StackInfo>();
|
|
|
|
return stdout
|
|
.Split('\n', StringSplitOptions.RemoveEmptyEntries)
|
|
.Select(line =>
|
|
{
|
|
var parts = line.Split('\t', 2);
|
|
return new StackInfo
|
|
{
|
|
Name = parts[0].Trim(),
|
|
ServiceCount = parts.Length > 1 && int.TryParse(parts[1].Trim(), out var c) ? c : 0
|
|
};
|
|
})
|
|
.ToList();
|
|
}
|
|
|
|
public async Task<List<ServiceInfo>> InspectStackServicesAsync(string stackName)
|
|
{
|
|
EnsureHost();
|
|
|
|
var (exitCode, stdout, _) = await _ssh.RunCommandAsync(
|
|
_currentHost!, $"docker stack services {stackName} --format '{{{{.Name}}}}\\t{{{{.Image}}}}\\t{{{{.Replicas}}}}'");
|
|
|
|
if (exitCode != 0 || string.IsNullOrWhiteSpace(stdout))
|
|
return new List<ServiceInfo>();
|
|
|
|
return stdout
|
|
.Split('\n', StringSplitOptions.RemoveEmptyEntries)
|
|
.Select(line =>
|
|
{
|
|
var parts = line.Split('\t', 3);
|
|
return new ServiceInfo
|
|
{
|
|
Name = parts.Length > 0 ? parts[0].Trim() : "",
|
|
Image = parts.Length > 1 ? parts[1].Trim() : "",
|
|
Replicas = parts.Length > 2 ? parts[2].Trim() : ""
|
|
};
|
|
})
|
|
.ToList();
|
|
}
|
|
|
|
public async Task<bool> EnsureDirectoryAsync(string path)
|
|
{
|
|
EnsureHost();
|
|
var (exitCode, _, stderr) = await _ssh.RunCommandAsync(_currentHost!, $"mkdir -p {path}");
|
|
if (exitCode != 0)
|
|
_logger.LogWarning("Failed to create directory {Path} on {Host}: {Error}", path, _currentHost!.Label, stderr);
|
|
else
|
|
_logger.LogInformation("Ensured directory exists on {Host}: {Path}", _currentHost!.Label, path);
|
|
return exitCode == 0;
|
|
}
|
|
|
|
public async Task<bool> EnsureNfsFoldersAsync(
|
|
string nfsServer,
|
|
string nfsExport,
|
|
IEnumerable<string> folderNames,
|
|
string? nfsExportFolder = null)
|
|
{
|
|
EnsureHost();
|
|
var exportPath = (nfsExport ?? string.Empty).Trim('/');
|
|
var subFolder = (nfsExportFolder ?? string.Empty).Trim('/');
|
|
|
|
// Build the sub-path beneath the mount point where volume folders will be created
|
|
var subPath = string.IsNullOrEmpty(subFolder) ? string.Empty : $"/{subFolder}";
|
|
|
|
// Build mkdir targets relative to the temporary mount point
|
|
var folderList = folderNames.Select(f => $"\"$MNT{subPath}/{f}\"").ToList();
|
|
var mkdirTargets = string.Join(" ", folderList);
|
|
|
|
// Single SSH command: create temp dir, mount NFS, mkdir -p all folders, unmount, cleanup
|
|
// Use addr= to pin the server IP — avoids "Server address does not match proto= option"
|
|
// errors when the hostname resolves to IPv6 but proto=tcp implies IPv4.
|
|
var script = $"""
|
|
set -e
|
|
MNT=$(mktemp -d)
|
|
sudo mount -t nfs -o addr={nfsServer},nfsvers=4,proto=tcp,soft,timeo=50,retrans=2 {nfsServer}:/{exportPath} "$MNT"
|
|
sudo mkdir -p {mkdirTargets}
|
|
sudo umount "$MNT"
|
|
rmdir "$MNT"
|
|
""";
|
|
|
|
_logger.LogInformation(
|
|
"Mounting NFS export {Server}:/{Export} on Docker host {Host} to create {Count} folders",
|
|
nfsServer, exportPath, _currentHost!.Label, folderList.Count);
|
|
|
|
var (exitCode, stdout, stderr) = await _ssh.RunCommandAsync(_currentHost!, script, TimeSpan.FromSeconds(30));
|
|
|
|
if (exitCode == 0)
|
|
{
|
|
_logger.LogInformation(
|
|
"NFS export folders ensured via mount on {Host}: {Server}:/{Export}{Sub} ({Count} folders)",
|
|
_currentHost.Label, nfsServer, exportPath, subPath, folderList.Count);
|
|
}
|
|
else
|
|
{
|
|
_logger.LogWarning(
|
|
"Failed to create NFS export folders on {Host}: {Error}",
|
|
_currentHost.Label, (stderr ?? stdout ?? "unknown error").Trim());
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
public async Task<(bool Success, string? Error)> EnsureNfsFoldersWithErrorAsync(
|
|
string nfsServer,
|
|
string nfsExport,
|
|
IEnumerable<string> folderNames,
|
|
string? nfsExportFolder = null)
|
|
{
|
|
EnsureHost();
|
|
var exportPath = (nfsExport ?? string.Empty).Trim('/');
|
|
var subFolder = (nfsExportFolder ?? string.Empty).Trim('/');
|
|
var subPath = string.IsNullOrEmpty(subFolder) ? string.Empty : $"/{subFolder}";
|
|
var folderList = folderNames.Select(f => $"\"$MNT{subPath}/{f}\"").ToList();
|
|
var mkdirTargets = string.Join(" ", folderList);
|
|
|
|
var script = $"""
|
|
set -e
|
|
MNT=$(mktemp -d)
|
|
sudo mount -t nfs -o addr={nfsServer},nfsvers=4,proto=tcp,soft,timeo=50,retrans=2 {nfsServer}:/{exportPath} "$MNT"
|
|
sudo mkdir -p {mkdirTargets}
|
|
sudo umount "$MNT"
|
|
rmdir "$MNT"
|
|
""";
|
|
|
|
_logger.LogInformation(
|
|
"Mounting NFS export {Server}:/{Export} on Docker host {Host} to create {Count} folders",
|
|
nfsServer, exportPath, _currentHost!.Label, folderList.Count);
|
|
|
|
var (exitCode, stdout, stderr) = await _ssh.RunCommandAsync(_currentHost!, script, TimeSpan.FromSeconds(30));
|
|
|
|
if (exitCode == 0)
|
|
{
|
|
_logger.LogInformation(
|
|
"NFS export folders ensured via mount on {Host}: {Server}:/{Export}{Sub} ({Count} folders)",
|
|
_currentHost.Label, nfsServer, exportPath, subPath, folderList.Count);
|
|
return (true, null);
|
|
}
|
|
|
|
var error = (stderr ?? stdout ?? "unknown error").Trim();
|
|
_logger.LogWarning(
|
|
"Failed to create NFS export folders on {Host}: {Error}",
|
|
_currentHost.Label, error);
|
|
return (false, error);
|
|
}
|
|
|
|
public async Task<bool> ForceUpdateServiceAsync(string serviceName)
|
|
{
|
|
EnsureHost();
|
|
_logger.LogInformation("Force-updating service {ServiceName} on {Host}", serviceName, _currentHost!.Label);
|
|
var (exitCode, _, stderr) = await _ssh.RunCommandAsync(_currentHost!, $"docker service update --force {serviceName}");
|
|
if (exitCode != 0)
|
|
_logger.LogWarning("Force-update failed for {ServiceName}: {Error}", serviceName, stderr);
|
|
return exitCode == 0;
|
|
}
|
|
|
|
public async Task<(MySqlConnection Connection, IDisposable Tunnel)> OpenMySqlConnectionAsync(
|
|
string mysqlHost, int port,
|
|
string adminUser, string adminPassword)
|
|
{
|
|
EnsureHost();
|
|
_logger.LogInformation(
|
|
"Opening tunnelled MySQL connection to {MysqlHost}:{Port} via SSH",
|
|
mysqlHost, port);
|
|
|
|
var tunnel = _ssh.OpenForwardedPort(_currentHost!, mysqlHost, (uint)port);
|
|
var localPort = (int)tunnel.BoundPort;
|
|
|
|
var csb = new MySqlConnectionStringBuilder
|
|
{
|
|
Server = "127.0.0.1",
|
|
Port = (uint)localPort,
|
|
UserID = adminUser,
|
|
Password = adminPassword,
|
|
ConnectionTimeout = 15,
|
|
SslMode = MySqlSslMode.Disabled,
|
|
};
|
|
|
|
var connection = new MySqlConnection(csb.ConnectionString);
|
|
try
|
|
{
|
|
await connection.OpenAsync();
|
|
return (connection, tunnel);
|
|
}
|
|
catch
|
|
{
|
|
await connection.DisposeAsync();
|
|
tunnel.Dispose();
|
|
throw;
|
|
}
|
|
}
|
|
|
|
public async Task<(bool Success, string Error)> AlterMySqlUserPasswordAsync(
|
|
string mysqlHost, int port,
|
|
string adminUser, string adminPassword,
|
|
string targetUser, string newPassword)
|
|
{
|
|
_logger.LogInformation(
|
|
"Altering MySQL password for user {User} on {MysqlHost}:{Port} via SSH tunnel",
|
|
targetUser, mysqlHost, port);
|
|
|
|
try
|
|
{
|
|
var (connection, tunnel) = await OpenMySqlConnectionAsync(mysqlHost, port, adminUser, adminPassword);
|
|
await using (connection)
|
|
using (tunnel)
|
|
{
|
|
var escapedUser = targetUser.Replace("'", "''");
|
|
await using var cmd = connection.CreateCommand();
|
|
cmd.CommandText = $"ALTER USER '{escapedUser}'@'%' IDENTIFIED BY @pwd";
|
|
cmd.Parameters.AddWithValue("@pwd", newPassword);
|
|
await cmd.ExecuteNonQueryAsync();
|
|
}
|
|
|
|
_logger.LogInformation("MySQL password updated for user {User} via SSH tunnel", targetUser);
|
|
return (true, string.Empty);
|
|
}
|
|
catch (MySqlException ex)
|
|
{
|
|
_logger.LogError(ex, "MySQL ALTER USER failed via SSH tunnel for user {User}", targetUser);
|
|
return (false, ex.Message);
|
|
}
|
|
}
|
|
|
|
public async Task<bool> ServiceSwapSecretAsync(string serviceName, string oldSecretName, string newSecretName, string? targetAlias = null)
|
|
{
|
|
EnsureHost();
|
|
var target = targetAlias ?? oldSecretName;
|
|
var cmd = $"docker service update --secret-rm {oldSecretName} --secret-add \"source={newSecretName},target={target}\" {serviceName}";
|
|
_logger.LogInformation(
|
|
"Swapping secret on {ServiceName}: {OldSecret} → {NewSecret} (target={Target})",
|
|
serviceName, oldSecretName, newSecretName, target);
|
|
var (exitCode, _, stderr) = await _ssh.RunCommandAsync(_currentHost!, cmd);
|
|
if (exitCode != 0)
|
|
_logger.LogError("Secret swap failed for {ServiceName}: {Error}", serviceName, stderr);
|
|
return exitCode == 0;
|
|
}
|
|
|
|
public async Task<List<NodeInfo>> ListNodesAsync()
|
|
{
|
|
EnsureHost();
|
|
|
|
_logger.LogInformation("Listing swarm nodes via SSH on {Host}", _currentHost!.Label);
|
|
|
|
// Use docker node inspect on all nodes to get IP addresses (Status.Addr)
|
|
// that are not available from 'docker node ls'.
|
|
// First, get all node IDs.
|
|
var (lsExit, lsOut, lsErr) = await _ssh.RunCommandAsync(
|
|
_currentHost!, "docker node ls --format '{{.ID}}'");
|
|
|
|
if (lsExit != 0)
|
|
{
|
|
var msg = (lsErr ?? lsOut ?? "unknown error").Trim();
|
|
_logger.LogWarning("docker node ls failed on {Host} (exit {Code}): {Error}",
|
|
_currentHost.Label, lsExit, msg);
|
|
throw new InvalidOperationException(
|
|
$"Failed to list swarm nodes on {_currentHost.Label}: {msg}");
|
|
}
|
|
|
|
if (string.IsNullOrWhiteSpace(lsOut))
|
|
return new List<NodeInfo>();
|
|
|
|
var nodeIds = lsOut.Split('\n', StringSplitOptions.RemoveEmptyEntries)
|
|
.Select(id => id.Trim())
|
|
.Where(id => !string.IsNullOrEmpty(id))
|
|
.ToList();
|
|
|
|
if (nodeIds.Count == 0)
|
|
return new List<NodeInfo>();
|
|
|
|
// Inspect all nodes in a single call to get full details including IP address
|
|
var ids = string.Join(" ", nodeIds);
|
|
var format = "'{{.ID}}\t{{.Description.Hostname}}\t{{.Status.State}}\t{{.Spec.Availability}}\t{{.ManagerStatus.Addr}}\t{{.Status.Addr}}\t{{.Description.Engine.EngineVersion}}\t{{.Spec.Role}}'";
|
|
var (exitCode, stdout, stderr) = await _ssh.RunCommandAsync(
|
|
_currentHost!, $"docker node inspect --format {format} {ids}");
|
|
|
|
if (exitCode != 0)
|
|
{
|
|
var msg = (stderr ?? stdout ?? "unknown error").Trim();
|
|
_logger.LogWarning("docker node inspect failed on {Host} (exit {Code}): {Error}",
|
|
_currentHost.Label, exitCode, msg);
|
|
throw new InvalidOperationException(
|
|
$"Failed to inspect swarm nodes on {_currentHost.Label}: {msg}");
|
|
}
|
|
|
|
if (string.IsNullOrWhiteSpace(stdout))
|
|
return new List<NodeInfo>();
|
|
|
|
return stdout
|
|
.Split('\n', StringSplitOptions.RemoveEmptyEntries)
|
|
.Select(line =>
|
|
{
|
|
var parts = line.Split('\t', 8);
|
|
// ManagerStatus.Addr includes port (e.g. "10.0.0.1:2377"); Status.Addr is just the IP.
|
|
// Prefer Status.Addr; fall back to ManagerStatus.Addr (strip port) if Status.Addr is empty/template-error.
|
|
var statusAddr = parts.Length > 5 ? parts[5].Trim() : "";
|
|
var managerAddr = parts.Length > 4 ? parts[4].Trim() : "";
|
|
var ip = statusAddr;
|
|
if (string.IsNullOrEmpty(ip) || ip.StartsWith("<") || ip.StartsWith("{"))
|
|
{
|
|
// managerAddr may be "10.0.0.1:2377"
|
|
ip = managerAddr.Contains(':') ? managerAddr[..managerAddr.LastIndexOf(':')] : managerAddr;
|
|
}
|
|
// Clean up template rendering artefacts like "<no value>"
|
|
if (ip.StartsWith("<") || ip.StartsWith("{"))
|
|
ip = "";
|
|
|
|
var role = parts.Length > 7 ? parts[7].Trim() : "";
|
|
var managerStatus = "";
|
|
if (string.Equals(role, "manager", StringComparison.OrdinalIgnoreCase))
|
|
{
|
|
// Determine if this is the leader by checking if ManagerStatus.Addr is non-empty
|
|
managerStatus = !string.IsNullOrEmpty(managerAddr) && !managerAddr.StartsWith("<") ? "Reachable" : "";
|
|
}
|
|
|
|
return new NodeInfo
|
|
{
|
|
Id = parts.Length > 0 ? parts[0].Trim() : "",
|
|
Hostname = parts.Length > 1 ? parts[1].Trim() : "",
|
|
Status = parts.Length > 2 ? parts[2].Trim() : "",
|
|
Availability = parts.Length > 3 ? parts[3].Trim() : "",
|
|
ManagerStatus = managerStatus,
|
|
IpAddress = ip,
|
|
EngineVersion = parts.Length > 6 ? parts[6].Trim() : ""
|
|
};
|
|
})
|
|
.ToList();
|
|
}
|
|
|
|
private void EnsureHost()
|
|
{
|
|
if (_currentHost == null)
|
|
throw new InvalidOperationException("No SSH host configured. Call SetHost() before using Docker commands.");
|
|
}
|
|
|
|
public async Task<bool> RemoveStackVolumesAsync(string stackName)
|
|
{
|
|
EnsureHost();
|
|
|
|
// ── 1. Remove the stack first so containers release the volumes ─────
|
|
_logger.LogInformation("Removing stack {StackName} before volume cleanup", stackName);
|
|
var (rmExit, _, rmErr) = await _ssh.RunCommandAsync(_currentHost!,
|
|
$"docker stack rm {stackName} 2>&1 || true");
|
|
if (rmExit != 0)
|
|
_logger.LogWarning("Stack rm returned non-zero for {StackName}: {Err}", stackName, rmErr);
|
|
|
|
// Give Swarm a moment to tear down containers on all nodes
|
|
await Task.Delay(5000);
|
|
|
|
// ── 2. Clean volumes on the local (manager) node ────────────────────
|
|
var localCmd = $"docker volume ls --filter \"name={stackName}_\" -q | xargs -r docker volume rm 2>&1 || true";
|
|
var (_, localOut, _) = await _ssh.RunCommandAsync(_currentHost!, localCmd);
|
|
if (!string.IsNullOrEmpty(localOut?.Trim()))
|
|
_logger.LogInformation("Volume cleanup (manager): {Output}", localOut!.Trim());
|
|
|
|
// ── 3. Clean volumes on ALL swarm nodes via a temporary global service ──
|
|
// This deploys a short-lived container on every node that mounts the Docker
|
|
// socket and removes matching volumes. This handles worker nodes that the
|
|
// orchestrator has no direct SSH access to.
|
|
var cleanupSvcName = $"vol-cleanup-{stackName}".Replace("_", "-");
|
|
|
|
// Remove leftover cleanup service from a previous run (if any)
|
|
await _ssh.RunCommandAsync(_currentHost!,
|
|
$"docker service rm {cleanupSvcName} 2>/dev/null || true");
|
|
|
|
var createCmd = string.Join(" ",
|
|
"docker service create",
|
|
"--detach",
|
|
"--mode global",
|
|
"--restart-condition none",
|
|
$"--name {cleanupSvcName}",
|
|
"--mount type=bind,source=/var/run/docker.sock,target=/var/run/docker.sock",
|
|
"docker:cli",
|
|
"sh", "-c",
|
|
$"'docker volume ls -q --filter name={stackName}_ | xargs -r docker volume rm 2>&1; echo done'");
|
|
|
|
_logger.LogInformation("Deploying global volume-cleanup service on all swarm nodes for {StackName}", stackName);
|
|
var (svcExit, svcOut, svcErr) = await _ssh.RunCommandAsync(_currentHost!, createCmd);
|
|
|
|
if (svcExit != 0)
|
|
{
|
|
_logger.LogWarning("Global volume cleanup service creation failed: {Err}", svcErr);
|
|
}
|
|
else
|
|
{
|
|
// Wait for the cleanup tasks to finish on all nodes
|
|
_logger.LogInformation("Waiting for volume cleanup tasks to complete on all nodes...");
|
|
await Task.Delay(10000);
|
|
}
|
|
|
|
// Remove the cleanup service
|
|
await _ssh.RunCommandAsync(_currentHost!,
|
|
$"docker service rm {cleanupSvcName} 2>/dev/null || true");
|
|
|
|
_logger.LogInformation("Volume cleanup complete for stack {StackName}", stackName);
|
|
return true;
|
|
}
|
|
}
|