using System.Diagnostics; using Microsoft.Extensions.Logging; using Microsoft.Extensions.Options; using MySqlConnector; using OTSSignsOrchestrator.Core.Configuration; using OTSSignsOrchestrator.Core.Models.DTOs; using OTSSignsOrchestrator.Core.Models.Entities; using OTSSignsOrchestrator.Core.Services; using ServiceLogEntry = OTSSignsOrchestrator.Core.Models.DTOs.ServiceLogEntry; namespace OTSSignsOrchestrator.Desktop.Services; /// /// Docker CLI service that executes docker commands on a remote host over SSH. /// Requires an SshHost to be set before use via SetHost(). /// public class SshDockerCliService : IDockerCliService { private readonly SshConnectionService _ssh; private readonly DockerOptions _options; private readonly ILogger _logger; private SshHost? _currentHost; public SshDockerCliService( SshConnectionService ssh, IOptions options, ILogger logger) { _ssh = ssh; _options = options.Value; _logger = logger; } /// /// Set the SSH host to use for Docker commands. /// public void SetHost(SshHost host) { _currentHost = host; } public SshHost? CurrentHost => _currentHost; public async Task DeployStackAsync(string stackName, string composeYaml, bool resolveImage = false) { EnsureHost(); var sw = Stopwatch.StartNew(); var args = "docker stack deploy --compose-file -"; if (resolveImage) args += " --resolve-image changed"; args += $" {stackName}"; _logger.LogInformation("Deploying stack via SSH: {StackName} on {Host}", stackName, _currentHost!.Label); var (exitCode, stdout, stderr) = await _ssh.RunCommandWithStdinAsync(_currentHost, args, composeYaml); sw.Stop(); var result = new DeploymentResultDto { StackName = stackName, Success = exitCode == 0, ExitCode = exitCode, Output = stdout, ErrorMessage = stderr, Message = exitCode == 0 ? "Success" : "Failed", DurationMs = sw.ElapsedMilliseconds }; if (result.Success) _logger.LogInformation("Stack deployed via SSH: {StackName} | duration={DurationMs}ms", stackName, result.DurationMs); else _logger.LogError("Stack deploy failed via SSH: {StackName} | error={Error}", stackName, result.ErrorMessage); return result; } public async Task RemoveStackAsync(string stackName) { EnsureHost(); var sw = Stopwatch.StartNew(); _logger.LogInformation("Removing stack via SSH: {StackName} on {Host}", stackName, _currentHost!.Label); var (exitCode, stdout, stderr) = await _ssh.RunCommandAsync(_currentHost, $"docker stack rm {stackName}"); sw.Stop(); var result = new DeploymentResultDto { StackName = stackName, Success = exitCode == 0, ExitCode = exitCode, Output = stdout, ErrorMessage = stderr, Message = exitCode == 0 ? "Success" : "Failed", DurationMs = sw.ElapsedMilliseconds }; if (result.Success) _logger.LogInformation("Stack removed via SSH: {StackName}", stackName); else _logger.LogError("Stack remove failed via SSH: {StackName} | error={Error}", stackName, result.ErrorMessage); return result; } public async Task> ListStacksAsync() { EnsureHost(); var (exitCode, stdout, _) = await _ssh.RunCommandAsync( _currentHost!, "docker stack ls --format '{{.Name}}\\t{{.Services}}'"); if (exitCode != 0 || string.IsNullOrWhiteSpace(stdout)) return new List(); return stdout .Split('\n', StringSplitOptions.RemoveEmptyEntries) .Select(line => { var parts = line.Split('\t', 2); return new StackInfo { Name = parts[0].Trim(), ServiceCount = parts.Length > 1 && int.TryParse(parts[1].Trim(), out var c) ? c : 0 }; }) .ToList(); } public async Task> InspectStackServicesAsync(string stackName) { EnsureHost(); var (exitCode, stdout, _) = await _ssh.RunCommandAsync( _currentHost!, $"docker stack services {stackName} --format '{{{{.Name}}}}\\t{{{{.Image}}}}\\t{{{{.Replicas}}}}'"); if (exitCode != 0 || string.IsNullOrWhiteSpace(stdout)) return new List(); return stdout .Split('\n', StringSplitOptions.RemoveEmptyEntries) .Select(line => { var parts = line.Split('\t', 3); return new ServiceInfo { Name = parts.Length > 0 ? parts[0].Trim() : "", Image = parts.Length > 1 ? parts[1].Trim() : "", Replicas = parts.Length > 2 ? parts[2].Trim() : "" }; }) .ToList(); } public async Task EnsureDirectoryAsync(string path) { EnsureHost(); var (exitCode, _, stderr) = await _ssh.RunCommandAsync(_currentHost!, $"mkdir -p {path}"); if (exitCode != 0) _logger.LogWarning("Failed to create directory {Path} on {Host}: {Error}", path, _currentHost!.Label, stderr); else _logger.LogInformation("Ensured directory exists on {Host}: {Path}", _currentHost!.Label, path); return exitCode == 0; } public async Task EnsureNfsFoldersAsync( string nfsServer, string nfsExport, IEnumerable folderNames, string? nfsExportFolder = null) { EnsureHost(); var exportPath = (nfsExport ?? string.Empty).Trim('/'); var subFolder = (nfsExportFolder ?? string.Empty).Trim('/'); // Build the sub-path beneath the mount point where volume folders will be created var subPath = string.IsNullOrEmpty(subFolder) ? string.Empty : $"/{subFolder}"; // Build mkdir targets relative to the temporary mount point var folderList = folderNames.Select(f => $"\"$MNT{subPath}/{f}\"").ToList(); var mkdirTargets = string.Join(" ", folderList); // Single SSH command: create temp dir, mount NFS, mkdir -p all folders, unmount, cleanup // Use addr= to pin the server IP — avoids "Server address does not match proto= option" // errors when the hostname resolves to IPv6 but proto=tcp implies IPv4. var script = $""" set -e MNT=$(mktemp -d) sudo mount -t nfs -o addr={nfsServer},nfsvers=4,proto=tcp,soft,timeo=50,retrans=2 {nfsServer}:/{exportPath} "$MNT" sudo mkdir -p {mkdirTargets} sudo umount "$MNT" rmdir "$MNT" """; _logger.LogInformation( "Mounting NFS export {Server}:/{Export} on Docker host {Host} to create {Count} folders", nfsServer, exportPath, _currentHost!.Label, folderList.Count); var (exitCode, stdout, stderr) = await _ssh.RunCommandAsync(_currentHost!, script, TimeSpan.FromSeconds(30)); if (exitCode == 0) { _logger.LogInformation( "NFS export folders ensured via mount on {Host}: {Server}:/{Export}{Sub} ({Count} folders)", _currentHost.Label, nfsServer, exportPath, subPath, folderList.Count); } else { _logger.LogWarning( "Failed to create NFS export folders on {Host}: {Error}", _currentHost.Label, (stderr ?? stdout ?? "unknown error").Trim()); return false; } return true; } public async Task<(bool Success, string? Error)> EnsureNfsFoldersWithErrorAsync( string nfsServer, string nfsExport, IEnumerable folderNames, string? nfsExportFolder = null) { EnsureHost(); var exportPath = (nfsExport ?? string.Empty).Trim('/'); var subFolder = (nfsExportFolder ?? string.Empty).Trim('/'); var subPath = string.IsNullOrEmpty(subFolder) ? string.Empty : $"/{subFolder}"; var folderList = folderNames.Select(f => $"\"$MNT{subPath}/{f}\"").ToList(); var mkdirTargets = string.Join(" ", folderList); var script = $""" set -e MNT=$(mktemp -d) sudo mount -t nfs -o addr={nfsServer},nfsvers=4,proto=tcp,soft,timeo=50,retrans=2 {nfsServer}:/{exportPath} "$MNT" sudo mkdir -p {mkdirTargets} sudo umount "$MNT" rmdir "$MNT" """; _logger.LogInformation( "Mounting NFS export {Server}:/{Export} on Docker host {Host} to create {Count} folders", nfsServer, exportPath, _currentHost!.Label, folderList.Count); var (exitCode, stdout, stderr) = await _ssh.RunCommandAsync(_currentHost!, script, TimeSpan.FromSeconds(30)); if (exitCode == 0) { _logger.LogInformation( "NFS export folders ensured via mount on {Host}: {Server}:/{Export}{Sub} ({Count} folders)", _currentHost.Label, nfsServer, exportPath, subPath, folderList.Count); return (true, null); } var error = (stderr ?? stdout ?? "unknown error").Trim(); _logger.LogWarning( "Failed to create NFS export folders on {Host}: {Error}", _currentHost.Label, error); return (false, error); } public async Task ForceUpdateServiceAsync(string serviceName) { EnsureHost(); _logger.LogInformation("Force-updating service {ServiceName} on {Host}", serviceName, _currentHost!.Label); var (exitCode, _, stderr) = await _ssh.RunCommandAsync(_currentHost!, $"docker service update --force {serviceName}"); if (exitCode != 0) _logger.LogWarning("Force-update failed for {ServiceName}: {Error}", serviceName, stderr); return exitCode == 0; } public async Task<(MySqlConnection Connection, IDisposable Tunnel)> OpenMySqlConnectionAsync( string mysqlHost, int port, string adminUser, string adminPassword) { EnsureHost(); _logger.LogInformation( "Opening tunnelled MySQL connection to {MysqlHost}:{Port} via SSH", mysqlHost, port); var tunnel = _ssh.OpenForwardedPort(_currentHost!, mysqlHost, (uint)port); var localPort = (int)tunnel.BoundPort; var csb = new MySqlConnectionStringBuilder { Server = "127.0.0.1", Port = (uint)localPort, UserID = adminUser, Password = adminPassword, ConnectionTimeout = 15, SslMode = MySqlSslMode.Disabled, }; var connection = new MySqlConnection(csb.ConnectionString); try { await connection.OpenAsync(); return (connection, tunnel); } catch { await connection.DisposeAsync(); tunnel.Dispose(); throw; } } public async Task<(bool Success, string Error)> AlterMySqlUserPasswordAsync( string mysqlHost, int port, string adminUser, string adminPassword, string targetUser, string newPassword) { _logger.LogInformation( "Altering MySQL password for user {User} on {MysqlHost}:{Port} via SSH tunnel", targetUser, mysqlHost, port); try { var (connection, tunnel) = await OpenMySqlConnectionAsync(mysqlHost, port, adminUser, adminPassword); await using (connection) using (tunnel) { var escapedUser = targetUser.Replace("'", "''"); await using var cmd = connection.CreateCommand(); cmd.CommandText = $"ALTER USER '{escapedUser}'@'%' IDENTIFIED BY @pwd"; cmd.Parameters.AddWithValue("@pwd", newPassword); await cmd.ExecuteNonQueryAsync(); } _logger.LogInformation("MySQL password updated for user {User} via SSH tunnel", targetUser); return (true, string.Empty); } catch (MySqlException ex) { _logger.LogError(ex, "MySQL ALTER USER failed via SSH tunnel for user {User}", targetUser); return (false, ex.Message); } } public async Task ServiceSwapSecretAsync(string serviceName, string oldSecretName, string newSecretName, string? targetAlias = null) { EnsureHost(); var target = targetAlias ?? oldSecretName; var cmd = $"docker service update --secret-rm {oldSecretName} --secret-add \"source={newSecretName},target={target}\" {serviceName}"; _logger.LogInformation( "Swapping secret on {ServiceName}: {OldSecret} → {NewSecret} (target={Target})", serviceName, oldSecretName, newSecretName, target); var (exitCode, _, stderr) = await _ssh.RunCommandAsync(_currentHost!, cmd); if (exitCode != 0) _logger.LogError("Secret swap failed for {ServiceName}: {Error}", serviceName, stderr); return exitCode == 0; } public async Task> ListNodesAsync() { EnsureHost(); _logger.LogInformation("Listing swarm nodes via SSH on {Host}", _currentHost!.Label); // Use docker node inspect on all nodes to get IP addresses (Status.Addr) // that are not available from 'docker node ls'. // First, get all node IDs. var (lsExit, lsOut, lsErr) = await _ssh.RunCommandAsync( _currentHost!, "docker node ls --format '{{.ID}}'"); if (lsExit != 0) { var msg = (lsErr ?? lsOut ?? "unknown error").Trim(); _logger.LogWarning("docker node ls failed on {Host} (exit {Code}): {Error}", _currentHost.Label, lsExit, msg); throw new InvalidOperationException( $"Failed to list swarm nodes on {_currentHost.Label}: {msg}"); } if (string.IsNullOrWhiteSpace(lsOut)) return new List(); var nodeIds = lsOut.Split('\n', StringSplitOptions.RemoveEmptyEntries) .Select(id => id.Trim()) .Where(id => !string.IsNullOrEmpty(id)) .ToList(); if (nodeIds.Count == 0) return new List(); // Inspect all nodes in a single call to get full details including IP address var ids = string.Join(" ", nodeIds); var format = "'{{.ID}}\t{{.Description.Hostname}}\t{{.Status.State}}\t{{.Spec.Availability}}\t{{.ManagerStatus.Addr}}\t{{.Status.Addr}}\t{{.Description.Engine.EngineVersion}}\t{{.Spec.Role}}'"; var (exitCode, stdout, stderr) = await _ssh.RunCommandAsync( _currentHost!, $"docker node inspect --format {format} {ids}"); if (exitCode != 0) { var msg = (stderr ?? stdout ?? "unknown error").Trim(); _logger.LogWarning("docker node inspect failed on {Host} (exit {Code}): {Error}", _currentHost.Label, exitCode, msg); throw new InvalidOperationException( $"Failed to inspect swarm nodes on {_currentHost.Label}: {msg}"); } if (string.IsNullOrWhiteSpace(stdout)) return new List(); return stdout .Split('\n', StringSplitOptions.RemoveEmptyEntries) .Select(line => { var parts = line.Split('\t', 8); // ManagerStatus.Addr includes port (e.g. "10.0.0.1:2377"); Status.Addr is just the IP. // Prefer Status.Addr; fall back to ManagerStatus.Addr (strip port) if Status.Addr is empty/template-error. var statusAddr = parts.Length > 5 ? parts[5].Trim() : ""; var managerAddr = parts.Length > 4 ? parts[4].Trim() : ""; var ip = statusAddr; if (string.IsNullOrEmpty(ip) || ip.StartsWith("<") || ip.StartsWith("{")) { // managerAddr may be "10.0.0.1:2377" ip = managerAddr.Contains(':') ? managerAddr[..managerAddr.LastIndexOf(':')] : managerAddr; } // Clean up template rendering artefacts like "" if (ip.StartsWith("<") || ip.StartsWith("{")) ip = ""; var role = parts.Length > 7 ? parts[7].Trim() : ""; var managerStatus = ""; if (string.Equals(role, "manager", StringComparison.OrdinalIgnoreCase)) { // Determine if this is the leader by checking if ManagerStatus.Addr is non-empty managerStatus = !string.IsNullOrEmpty(managerAddr) && !managerAddr.StartsWith("<") ? "Reachable" : ""; } return new NodeInfo { Id = parts.Length > 0 ? parts[0].Trim() : "", Hostname = parts.Length > 1 ? parts[1].Trim() : "", Status = parts.Length > 2 ? parts[2].Trim() : "", Availability = parts.Length > 3 ? parts[3].Trim() : "", ManagerStatus = managerStatus, IpAddress = ip, EngineVersion = parts.Length > 6 ? parts[6].Trim() : "" }; }) .ToList(); } public async Task> GetServiceLogsAsync(string stackName, string? serviceName = null, int tailLines = 200) { EnsureHost(); // Determine which services to fetch logs for List serviceNames; if (!string.IsNullOrEmpty(serviceName)) { serviceNames = new List { serviceName }; } else { var services = await InspectStackServicesAsync(stackName); serviceNames = services.Select(s => s.Name).ToList(); } var allEntries = new List(); foreach (var svcName in serviceNames) { try { var cmd = $"docker service logs --timestamps --no-trunc --tail {tailLines} {svcName} 2>&1"; var (exitCode, stdout, _) = await _ssh.RunCommandAsync(_currentHost!, cmd, TimeSpan.FromSeconds(15)); if (exitCode != 0 || string.IsNullOrWhiteSpace(stdout)) { _logger.LogDebug("No logs returned for service {Service} (exit={ExitCode})", svcName, exitCode); continue; } // Parse each line. Docker service logs format with --timestamps: // ..@ | // or sometimes just: // .. foreach (var line in stdout.Split('\n', StringSplitOptions.RemoveEmptyEntries)) { var entry = ParseLogLine(line, svcName, stackName); if (entry != null) allEntries.Add(entry); } } catch (Exception ex) { _logger.LogWarning(ex, "Failed to fetch logs for service {Service}", svcName); } } return allEntries.OrderBy(e => e.Timestamp).ToList(); } /// /// Parses a single line from docker service logs --timestamps output. /// private static ServiceLogEntry? ParseLogLine(string line, string serviceName, string stackName) { if (string.IsNullOrWhiteSpace(line)) return null; // Format: "2026-02-25T14:30:45.123456789Z service.replica.taskid@node | message" // The timestamp is always the first space-delimited token when --timestamps is used. var firstSpace = line.IndexOf(' '); if (firstSpace <= 0) return new ServiceLogEntry { Timestamp = DateTimeOffset.UtcNow, Source = serviceName, ServiceName = StripStackPrefix(serviceName, stackName), Message = line }; var timestampStr = line[..firstSpace]; var rest = line[(firstSpace + 1)..].TrimStart(); // Try to parse the timestamp if (!DateTimeOffset.TryParse(timestampStr, out var timestamp)) { // If timestamp parsing fails, treat the whole line as the message return new ServiceLogEntry { Timestamp = DateTimeOffset.UtcNow, Source = serviceName, ServiceName = StripStackPrefix(serviceName, stackName), Message = line }; } // Split source and message on the pipe separator var source = serviceName; var message = rest; var pipeIndex = rest.IndexOf('|'); if (pipeIndex >= 0) { source = rest[..pipeIndex].Trim(); message = rest[(pipeIndex + 1)..].TrimStart(); } return new ServiceLogEntry { Timestamp = timestamp, Source = source, ServiceName = StripStackPrefix(serviceName, stackName), Message = message }; } /// /// Strips the stack name prefix from a fully-qualified service name. /// e.g. "acm-cms-stack_acm-web" → "acm-web" /// private static string StripStackPrefix(string serviceName, string stackName) { var prefix = stackName + "_"; return serviceName.StartsWith(prefix) ? serviceName[prefix.Length..] : serviceName; } private void EnsureHost() { if (_currentHost == null) throw new InvalidOperationException("No SSH host configured. Call SetHost() before using Docker commands."); } public async Task RemoveStackVolumesAsync(string stackName) { EnsureHost(); // ── 1. Remove the stack first so containers release the volumes ───── _logger.LogInformation("Removing stack {StackName} before volume cleanup", stackName); var (rmExit, _, rmErr) = await _ssh.RunCommandAsync(_currentHost!, $"docker stack rm {stackName} 2>&1 || true"); if (rmExit != 0) _logger.LogWarning("Stack rm returned non-zero for {StackName}: {Err}", stackName, rmErr); // Give Swarm a moment to tear down containers on all nodes await Task.Delay(5000); // ── 2. Clean volumes on the local (manager) node ──────────────────── var localCmd = $"docker volume ls --filter \"name={stackName}_\" -q | xargs -r docker volume rm 2>&1 || true"; var (_, localOut, _) = await _ssh.RunCommandAsync(_currentHost!, localCmd); if (!string.IsNullOrEmpty(localOut?.Trim())) _logger.LogInformation("Volume cleanup (manager): {Output}", localOut!.Trim()); // ── 3. Clean volumes on ALL swarm nodes via a temporary global service ── // This deploys a short-lived container on every node that mounts the Docker // socket and removes matching volumes. This handles worker nodes that the // orchestrator has no direct SSH access to. var cleanupSvcName = $"vol-cleanup-{stackName}".Replace("_", "-"); // Remove leftover cleanup service from a previous run (if any) await _ssh.RunCommandAsync(_currentHost!, $"docker service rm {cleanupSvcName} 2>/dev/null || true"); var createCmd = string.Join(" ", "docker service create", "--detach", "--mode global", "--restart-condition none", $"--name {cleanupSvcName}", "--mount type=bind,source=/var/run/docker.sock,target=/var/run/docker.sock", "docker:cli", "sh", "-c", $"'docker volume ls -q --filter name={stackName}_ | xargs -r docker volume rm 2>&1; echo done'"); _logger.LogInformation("Deploying global volume-cleanup service on all swarm nodes for {StackName}", stackName); var (svcExit, svcOut, svcErr) = await _ssh.RunCommandAsync(_currentHost!, createCmd); if (svcExit != 0) { _logger.LogWarning("Global volume cleanup service creation failed: {Err}", svcErr); } else { // Wait for the cleanup tasks to finish on all nodes _logger.LogInformation("Waiting for volume cleanup tasks to complete on all nodes..."); await Task.Delay(10000); } // Remove the cleanup service await _ssh.RunCommandAsync(_currentHost!, $"docker service rm {cleanupSvcName} 2>/dev/null || true"); _logger.LogInformation("Volume cleanup complete for stack {StackName}", stackName); return true; } }