Add WAL file for database and log instance deployment failures
Some checks failed
Build and Publish Docker Image / build-and-push (push) Has been cancelled

This commit is contained in:
Matt Batchelder
2026-02-19 08:27:54 -05:00
parent 4a903bfd2a
commit adf1a2e4db
41 changed files with 2789 additions and 1297 deletions

View File

@@ -93,6 +93,31 @@ public class SshConnectionService : IDisposable
});
}
/// <summary>
/// Run a command on the remote host with a timeout.
/// Returns exit code -1 and an error message if the command times out.
/// </summary>
public async Task<(int ExitCode, string Stdout, string Stderr)> RunCommandAsync(SshHost host, string command, TimeSpan timeout)
{
return await Task.Run(() =>
{
var client = GetClient(host);
using var cmd = client.CreateCommand(command);
cmd.CommandTimeout = timeout;
try
{
cmd.Execute();
return (cmd.ExitStatus ?? -1, cmd.Result, cmd.Error);
}
catch (Renci.SshNet.Common.SshOperationTimeoutException)
{
_logger.LogWarning("SSH command timed out after {Timeout}s: {Command}",
timeout.TotalSeconds, command.Length > 120 ? command[..120] + "…" : command);
return (-1, string.Empty, $"Command timed out after {timeout.TotalSeconds}s");
}
});
}
/// <summary>
/// Run a command that requires stdin input (e.g., docker stack deploy --compose-file -).
/// </summary>
@@ -171,6 +196,23 @@ public class SshConnectionService : IDisposable
return new SshClient(connInfo);
}
/// <summary>
/// Opens an SSH local port-forward from 127.0.0.1:&lt;auto&gt; → <paramref name="remoteHost"/>:<paramref name="remotePort"/>
/// through the existing SSH connection for <paramref name="host"/>.
/// The caller must dispose the returned <see cref="ForwardedPortLocal"/> to close the tunnel.
/// </summary>
public ForwardedPortLocal OpenForwardedPort(SshHost host, string remoteHost, uint remotePort)
{
var client = GetClient(host);
// Port 0 lets the OS assign a free local port; SSH.NET updates BoundPort after Start().
var tunnel = new ForwardedPortLocal("127.0.0.1", 0, remoteHost, remotePort);
client.AddForwardedPort(tunnel);
tunnel.Start();
_logger.LogDebug("SSH tunnel opened: 127.0.0.1:{LocalPort} → {RemoteHost}:{RemotePort}",
tunnel.BoundPort, remoteHost, remotePort);
return tunnel;
}
public void Dispose()
{
lock (_lock)

View File

@@ -1,6 +1,7 @@
using System.Diagnostics;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using MySqlConnector;
using OTSSignsOrchestrator.Core.Configuration;
using OTSSignsOrchestrator.Core.Models.DTOs;
using OTSSignsOrchestrator.Core.Models.Entities;
@@ -162,66 +163,282 @@ public class SshDockerCliService : IDockerCliService
return exitCode == 0;
}
public async Task<bool> EnsureSmbFoldersAsync(
string cifsServer,
string cifsShareName,
string cifsUsername,
string cifsPassword,
public async Task<bool> EnsureNfsFoldersAsync(
string nfsServer,
string nfsExport,
IEnumerable<string> folderNames,
string? cifsShareFolder = null)
string? nfsExportFolder = null)
{
EnsureHost();
var allSucceeded = true;
var subFolder = (cifsShareFolder ?? string.Empty).Trim('/');
var exportPath = (nfsExport ?? string.Empty).Trim('/');
var subFolder = (nfsExportFolder ?? string.Empty).Trim('/');
// If a subfolder is specified, ensure it exists first
if (!string.IsNullOrEmpty(subFolder))
// Build the sub-path beneath the mount point where volume folders will be created
var subPath = string.IsNullOrEmpty(subFolder) ? string.Empty : $"/{subFolder}";
// Build mkdir targets relative to the temporary mount point
var folderList = folderNames.Select(f => $"\"$MNT{subPath}/{f}\"").ToList();
var mkdirTargets = string.Join(" ", folderList);
// Single SSH command: create temp dir, mount NFS, mkdir -p all folders, unmount, cleanup
// Use addr= to pin the server IP — avoids "Server address does not match proto= option"
// errors when the hostname resolves to IPv6 but proto=tcp implies IPv4.
var script = $"""
set -e
MNT=$(mktemp -d)
sudo mount -t nfs -o addr={nfsServer},nfsvers=4,proto=tcp,soft,timeo=50,retrans=2 {nfsServer}:/{exportPath} "$MNT"
sudo mkdir -p {mkdirTargets}
sudo umount "$MNT"
rmdir "$MNT"
""";
_logger.LogInformation(
"Mounting NFS export {Server}:/{Export} on Docker host {Host} to create {Count} folders",
nfsServer, exportPath, _currentHost!.Label, folderList.Count);
var (exitCode, stdout, stderr) = await _ssh.RunCommandAsync(_currentHost!, script, TimeSpan.FromSeconds(30));
if (exitCode == 0)
{
var mkdirCmd = $"smbclient //{cifsServer}/{cifsShareName} -U '{cifsUsername}%{cifsPassword}' -c 'mkdir {subFolder}' 2>&1";
var (_, mkdirOut, _) = await _ssh.RunCommandAsync(_currentHost!, mkdirCmd);
var mkdirOutput = mkdirOut ?? string.Empty;
var alreadyExists = mkdirOutput.Contains("NT_STATUS_OBJECT_NAME_COLLISION", StringComparison.OrdinalIgnoreCase)
|| mkdirOutput.Contains("already exists", StringComparison.OrdinalIgnoreCase);
var success = alreadyExists || !mkdirOutput.Contains("NT_STATUS_", StringComparison.OrdinalIgnoreCase);
if (success)
_logger.LogInformation("SMB subfolder ensured: //{Server}/{Share}/{Folder}", cifsServer, cifsShareName, subFolder);
else
{
_logger.LogWarning("Failed to create SMB subfolder //{Server}/{Share}/{Folder}: {Output}",
cifsServer, cifsShareName, subFolder, mkdirOutput.Trim());
allSucceeded = false;
}
_logger.LogInformation(
"NFS export folders ensured via mount on {Host}: {Server}:/{Export}{Sub} ({Count} folders)",
_currentHost.Label, nfsServer, exportPath, subPath, folderList.Count);
}
else
{
_logger.LogWarning(
"Failed to create NFS export folders on {Host}: {Error}",
_currentHost.Label, (stderr ?? stdout ?? "unknown error").Trim());
return false;
}
// Build the target path prefix for volume folders
var pathPrefix = string.IsNullOrEmpty(subFolder) ? string.Empty : $"{subFolder}/";
return true;
}
foreach (var folder in folderNames)
public async Task<(bool Success, string? Error)> EnsureNfsFoldersWithErrorAsync(
string nfsServer,
string nfsExport,
IEnumerable<string> folderNames,
string? nfsExportFolder = null)
{
EnsureHost();
var exportPath = (nfsExport ?? string.Empty).Trim('/');
var subFolder = (nfsExportFolder ?? string.Empty).Trim('/');
var subPath = string.IsNullOrEmpty(subFolder) ? string.Empty : $"/{subFolder}";
var folderList = folderNames.Select(f => $"\"$MNT{subPath}/{f}\"").ToList();
var mkdirTargets = string.Join(" ", folderList);
var script = $"""
set -e
MNT=$(mktemp -d)
sudo mount -t nfs -o addr={nfsServer},nfsvers=4,proto=tcp,soft,timeo=50,retrans=2 {nfsServer}:/{exportPath} "$MNT"
sudo mkdir -p {mkdirTargets}
sudo umount "$MNT"
rmdir "$MNT"
""";
_logger.LogInformation(
"Mounting NFS export {Server}:/{Export} on Docker host {Host} to create {Count} folders",
nfsServer, exportPath, _currentHost!.Label, folderList.Count);
var (exitCode, stdout, stderr) = await _ssh.RunCommandAsync(_currentHost!, script, TimeSpan.FromSeconds(30));
if (exitCode == 0)
{
var targetFolder = $"{pathPrefix}{folder}";
// Run smbclient on the remote Docker host to create the folder on the share.
// NT_STATUS_OBJECT_NAME_COLLISION means it already exists — treat as success.
var cmd = $"smbclient //{cifsServer}/{cifsShareName} -U '{cifsUsername}%{cifsPassword}' -c 'mkdir {targetFolder}' 2>&1";
var (_, stdout, _) = await _ssh.RunCommandAsync(_currentHost!, cmd);
var output = stdout ?? string.Empty;
var exists = output.Contains("NT_STATUS_OBJECT_NAME_COLLISION", StringComparison.OrdinalIgnoreCase)
|| output.Contains("already exists", StringComparison.OrdinalIgnoreCase);
var ok = exists || !output.Contains("NT_STATUS_", StringComparison.OrdinalIgnoreCase);
if (ok)
_logger.LogInformation("SMB folder ensured: //{Server}/{Share}/{Folder}", cifsServer, cifsShareName, targetFolder);
else
{
_logger.LogWarning("Failed to create SMB folder //{Server}/{Share}/{Folder}: {Output}",
cifsServer, cifsShareName, targetFolder, output.Trim());
allSucceeded = false;
}
_logger.LogInformation(
"NFS export folders ensured via mount on {Host}: {Server}:/{Export}{Sub} ({Count} folders)",
_currentHost.Label, nfsServer, exportPath, subPath, folderList.Count);
return (true, null);
}
return allSucceeded;
var error = (stderr ?? stdout ?? "unknown error").Trim();
_logger.LogWarning(
"Failed to create NFS export folders on {Host}: {Error}",
_currentHost.Label, error);
return (false, error);
}
public async Task<bool> ForceUpdateServiceAsync(string serviceName)
{
EnsureHost();
_logger.LogInformation("Force-updating service {ServiceName} on {Host}", serviceName, _currentHost!.Label);
var (exitCode, _, stderr) = await _ssh.RunCommandAsync(_currentHost!, $"docker service update --force {serviceName}");
if (exitCode != 0)
_logger.LogWarning("Force-update failed for {ServiceName}: {Error}", serviceName, stderr);
return exitCode == 0;
}
public async Task<(MySqlConnection Connection, IDisposable Tunnel)> OpenMySqlConnectionAsync(
string mysqlHost, int port,
string adminUser, string adminPassword)
{
EnsureHost();
_logger.LogInformation(
"Opening tunnelled MySQL connection to {MysqlHost}:{Port} via SSH",
mysqlHost, port);
var tunnel = _ssh.OpenForwardedPort(_currentHost!, mysqlHost, (uint)port);
var localPort = (int)tunnel.BoundPort;
var csb = new MySqlConnectionStringBuilder
{
Server = "127.0.0.1",
Port = (uint)localPort,
UserID = adminUser,
Password = adminPassword,
ConnectionTimeout = 15,
SslMode = MySqlSslMode.Disabled,
};
var connection = new MySqlConnection(csb.ConnectionString);
try
{
await connection.OpenAsync();
return (connection, tunnel);
}
catch
{
await connection.DisposeAsync();
tunnel.Dispose();
throw;
}
}
public async Task<(bool Success, string Error)> AlterMySqlUserPasswordAsync(
string mysqlHost, int port,
string adminUser, string adminPassword,
string targetUser, string newPassword)
{
_logger.LogInformation(
"Altering MySQL password for user {User} on {MysqlHost}:{Port} via SSH tunnel",
targetUser, mysqlHost, port);
try
{
var (connection, tunnel) = await OpenMySqlConnectionAsync(mysqlHost, port, adminUser, adminPassword);
await using (connection)
using (tunnel)
{
var escapedUser = targetUser.Replace("'", "''");
await using var cmd = connection.CreateCommand();
cmd.CommandText = $"ALTER USER '{escapedUser}'@'%' IDENTIFIED BY @pwd";
cmd.Parameters.AddWithValue("@pwd", newPassword);
await cmd.ExecuteNonQueryAsync();
}
_logger.LogInformation("MySQL password updated for user {User} via SSH tunnel", targetUser);
return (true, string.Empty);
}
catch (MySqlException ex)
{
_logger.LogError(ex, "MySQL ALTER USER failed via SSH tunnel for user {User}", targetUser);
return (false, ex.Message);
}
}
public async Task<bool> ServiceSwapSecretAsync(string serviceName, string oldSecretName, string newSecretName, string? targetAlias = null)
{
EnsureHost();
var target = targetAlias ?? oldSecretName;
var cmd = $"docker service update --secret-rm {oldSecretName} --secret-add \"source={newSecretName},target={target}\" {serviceName}";
_logger.LogInformation(
"Swapping secret on {ServiceName}: {OldSecret} → {NewSecret} (target={Target})",
serviceName, oldSecretName, newSecretName, target);
var (exitCode, _, stderr) = await _ssh.RunCommandAsync(_currentHost!, cmd);
if (exitCode != 0)
_logger.LogError("Secret swap failed for {ServiceName}: {Error}", serviceName, stderr);
return exitCode == 0;
}
public async Task<List<NodeInfo>> ListNodesAsync()
{
EnsureHost();
_logger.LogInformation("Listing swarm nodes via SSH on {Host}", _currentHost!.Label);
// Use docker node inspect on all nodes to get IP addresses (Status.Addr)
// that are not available from 'docker node ls'.
// First, get all node IDs.
var (lsExit, lsOut, lsErr) = await _ssh.RunCommandAsync(
_currentHost!, "docker node ls --format '{{.ID}}'");
if (lsExit != 0)
{
var msg = (lsErr ?? lsOut ?? "unknown error").Trim();
_logger.LogWarning("docker node ls failed on {Host} (exit {Code}): {Error}",
_currentHost.Label, lsExit, msg);
throw new InvalidOperationException(
$"Failed to list swarm nodes on {_currentHost.Label}: {msg}");
}
if (string.IsNullOrWhiteSpace(lsOut))
return new List<NodeInfo>();
var nodeIds = lsOut.Split('\n', StringSplitOptions.RemoveEmptyEntries)
.Select(id => id.Trim())
.Where(id => !string.IsNullOrEmpty(id))
.ToList();
if (nodeIds.Count == 0)
return new List<NodeInfo>();
// Inspect all nodes in a single call to get full details including IP address
var ids = string.Join(" ", nodeIds);
var format = "'{{.ID}}\t{{.Description.Hostname}}\t{{.Status.State}}\t{{.Spec.Availability}}\t{{.ManagerStatus.Addr}}\t{{.Status.Addr}}\t{{.Description.Engine.EngineVersion}}\t{{.Spec.Role}}'";
var (exitCode, stdout, stderr) = await _ssh.RunCommandAsync(
_currentHost!, $"docker node inspect --format {format} {ids}");
if (exitCode != 0)
{
var msg = (stderr ?? stdout ?? "unknown error").Trim();
_logger.LogWarning("docker node inspect failed on {Host} (exit {Code}): {Error}",
_currentHost.Label, exitCode, msg);
throw new InvalidOperationException(
$"Failed to inspect swarm nodes on {_currentHost.Label}: {msg}");
}
if (string.IsNullOrWhiteSpace(stdout))
return new List<NodeInfo>();
return stdout
.Split('\n', StringSplitOptions.RemoveEmptyEntries)
.Select(line =>
{
var parts = line.Split('\t', 8);
// ManagerStatus.Addr includes port (e.g. "10.0.0.1:2377"); Status.Addr is just the IP.
// Prefer Status.Addr; fall back to ManagerStatus.Addr (strip port) if Status.Addr is empty/template-error.
var statusAddr = parts.Length > 5 ? parts[5].Trim() : "";
var managerAddr = parts.Length > 4 ? parts[4].Trim() : "";
var ip = statusAddr;
if (string.IsNullOrEmpty(ip) || ip.StartsWith("<") || ip.StartsWith("{"))
{
// managerAddr may be "10.0.0.1:2377"
ip = managerAddr.Contains(':') ? managerAddr[..managerAddr.LastIndexOf(':')] : managerAddr;
}
// Clean up template rendering artefacts like "<no value>"
if (ip.StartsWith("<") || ip.StartsWith("{"))
ip = "";
var role = parts.Length > 7 ? parts[7].Trim() : "";
var managerStatus = "";
if (string.Equals(role, "manager", StringComparison.OrdinalIgnoreCase))
{
// Determine if this is the leader by checking if ManagerStatus.Addr is non-empty
managerStatus = !string.IsNullOrEmpty(managerAddr) && !managerAddr.StartsWith("<") ? "Reachable" : "";
}
return new NodeInfo
{
Id = parts.Length > 0 ? parts[0].Trim() : "",
Hostname = parts.Length > 1 ? parts[1].Trim() : "",
Status = parts.Length > 2 ? parts[2].Trim() : "",
Availability = parts.Length > 3 ? parts[3].Trim() : "",
ManagerStatus = managerStatus,
IpAddress = ip,
EngineVersion = parts.Length > 6 ? parts[6].Trim() : ""
};
})
.ToList();
}
private void EnsureHost()

View File

@@ -43,7 +43,12 @@ public class SshDockerSecretsService : IDockerSecretsService
if (existing != null && rotate)
{
_logger.LogInformation("Rotating secret via SSH: {SecretName} (old id={SecretId})", name, existing.Value.id);
await _ssh.RunCommandAsync(_currentHost!, $"docker secret rm {name}");
var (rmExit, _, rmErr) = await _ssh.RunCommandAsync(_currentHost!, $"docker secret rm {name}");
if (rmExit != 0)
{
_logger.LogError("Failed to remove old secret for rotation: {SecretName} | error={Error}", name, rmErr);
return (false, string.Empty);
}
}
// Create secret via stdin