feat: Implement provisioning pipelines for subscription management

- Add ReactivatePipeline to handle subscription reactivation, including scaling Docker services, health verification, status updates, audit logging, and broadcasting status changes.
- Introduce RotateCredentialsPipeline for OAuth2 credential rotation, managing the deletion of old apps, creation of new ones, credential storage, access verification, and audit logging.
- Create StepRunner to manage job step execution, including lifecycle management and progress broadcasting via SignalR.
- Implement SuspendPipeline for subscription suspension, scaling down services, updating statuses, logging audits, and broadcasting changes.
- Add UpdateScreenLimitPipeline to update Xibo CMS screen limits and record snapshots.
- Introduce XiboFeatureManifests for hardcoded feature ACLs per role.
- Add docker-compose.dev.yml for local development with PostgreSQL setup.
This commit is contained in:
Matt Batchelder
2026-03-18 10:27:26 -04:00
parent c2e03de8bb
commit c6d46098dd
77 changed files with 9412 additions and 29 deletions

View File

@@ -0,0 +1,430 @@
using Microsoft.AspNetCore.SignalR;
using Microsoft.EntityFrameworkCore;
using Renci.SshNet;
using OTSSignsOrchestrator.Core.Services;
using OTSSignsOrchestrator.Server.Clients;
using OTSSignsOrchestrator.Server.Data;
using OTSSignsOrchestrator.Server.Data.Entities;
using OTSSignsOrchestrator.Server.Hubs;
namespace OTSSignsOrchestrator.Server.Workers;
/// <summary>
/// Full decommission pipeline — removes all infrastructure for a cancelled subscription.
/// Handles <c>JobType = "decommission"</c>.
///
/// Steps:
/// 1. stack-remove — docker stack rm xibo-{abbrev}
/// 2. authentik-cleanup — Delete SAML provider, application, 4 tenant groups (+BYOI source)
/// 3. oauth2-cleanup — Delete Xibo OAuth2 application via API
/// 4. mysql-cleanup — DROP DATABASE + DROP USER via SSH
/// 5. nfs-archive — mv /nfs/{abbrev} → /nfs/archived/{abbrev}-{timestamp} (retain 30d min)
/// 6. registry-update — Customer.Status = Decommissioned, Instance health = Critical,
/// final AuditLog, broadcast InstanceStatusChanged
/// </summary>
public sealed class DecommissionPipeline : IProvisioningPipeline
{
public string HandlesJobType => "decommission";
private const int TotalSteps = 6;
private readonly IServiceProvider _services;
private readonly ILogger<DecommissionPipeline> _logger;
public DecommissionPipeline(
IServiceProvider services,
ILogger<DecommissionPipeline> logger)
{
_services = services;
_logger = logger;
}
public async Task ExecuteAsync(Job job, CancellationToken ct)
{
await using var scope = _services.CreateAsyncScope();
var db = scope.ServiceProvider.GetRequiredService<OrchestratorDbContext>();
var hub = scope.ServiceProvider.GetRequiredService<IHubContext<FleetHub, IFleetClient>>();
var authentikClient = scope.ServiceProvider.GetRequiredService<IAuthentikClient>();
var xiboFactory = scope.ServiceProvider.GetRequiredService<XiboClientFactory>();
var settings = scope.ServiceProvider.GetRequiredService<SettingsService>();
var ctx = await BuildContextAsync(job, db, ct);
var runner = new StepRunner(db, hub, _logger, job.Id, TotalSteps);
var abbrev = ctx.Abbreviation;
var stackName = ctx.DockerStackName;
// ── Step 1: stack-remove ────────────────────────────────────────────
await runner.RunAsync("stack-remove", async () =>
{
var sshHost = await GetSwarmSshHostAsync(settings);
using var sshClient = CreateSshClient(sshHost);
sshClient.Connect();
try
{
var result = RunSshCommand(sshClient, $"docker stack rm {stackName}");
db.AuditLogs.Add(new AuditLog
{
Id = Guid.NewGuid(),
InstanceId = ctx.InstanceId,
Actor = "system/decommission",
Action = "stack-remove",
Target = stackName,
Outcome = "success",
Detail = $"Docker stack '{stackName}' removed. Job {job.Id}.",
OccurredAt = DateTime.UtcNow,
});
await db.SaveChangesAsync(ct);
return $"Docker stack '{stackName}' removed. Output: {result}";
}
finally
{
sshClient.Disconnect();
}
}, ct);
// ── Step 2: authentik-cleanup ───────────────────────────────────────
await runner.RunAsync("authentik-cleanup", async () =>
{
var instance = await db.Instances
.Include(i => i.ByoiConfigs)
.FirstOrDefaultAsync(i => i.Id == ctx.InstanceId, ct)
?? throw new InvalidOperationException($"Instance {ctx.InstanceId} not found.");
var cleaned = new List<string>();
// Delete SAML provider (stored on Instance.AuthentikProviderId)
if (!string.IsNullOrEmpty(instance.AuthentikProviderId)
&& int.TryParse(instance.AuthentikProviderId, out var providerId))
{
await authentikClient.DeleteSamlProviderAsync(providerId);
cleaned.Add($"SAML provider {providerId}");
}
// Delete Authentik application
await authentikClient.DeleteApplicationAsync($"xibo-{abbrev}");
cleaned.Add($"application xibo-{abbrev}");
// Delete 4 tenant groups — search by name prefix, match by exact name
var groupNames = new[]
{
$"customer-{abbrev}",
$"customer-{abbrev}-viewer",
$"customer-{abbrev}-editor",
$"customer-{abbrev}-admin",
};
var searchResp = await authentikClient.ListGroupsAsync(search: $"customer-{abbrev}");
if (searchResp.IsSuccessStatusCode && searchResp.Content?.Results is { } groups)
{
foreach (var groupName in groupNames)
{
var match = groups.FirstOrDefault(g =>
g.TryGetValue("name", out var n) && n?.ToString() == groupName);
if (match is not null && match.TryGetValue("pk", out var pk))
{
await authentikClient.DeleteGroupAsync(pk.ToString()!);
cleaned.Add($"group {groupName}");
}
}
}
// If BYOI was enabled, delete the SAML source
var byoiConfig = instance.ByoiConfigs.FirstOrDefault(b => b.Enabled);
if (byoiConfig is not null)
{
await authentikClient.DeleteSamlSourceAsync($"byoi-{abbrev}");
cleaned.Add($"BYOI SAML source byoi-{abbrev}");
}
db.AuditLogs.Add(new AuditLog
{
Id = Guid.NewGuid(),
InstanceId = ctx.InstanceId,
Actor = "system/decommission",
Action = "authentik-cleanup",
Target = $"xibo-{abbrev}",
Outcome = "success",
Detail = $"Cleaned up: {string.Join(", ", cleaned)}. Job {job.Id}.",
OccurredAt = DateTime.UtcNow,
});
await db.SaveChangesAsync(ct);
return $"Authentik cleanup completed: {string.Join(", ", cleaned)}.";
}, ct);
// ── Step 3: oauth2-cleanup ──────────────────────────────────────────
await runner.RunAsync("oauth2-cleanup", async () =>
{
var oauthReg = await db.OauthAppRegistries
.Where(r => r.InstanceId == ctx.InstanceId)
.OrderByDescending(r => r.CreatedAt)
.FirstOrDefaultAsync(ct);
if (oauthReg is null)
return "No OauthAppRegistry found — skipping OAuth2 cleanup.";
// Get Xibo client to delete the application
var oauthSecret = await settings.GetAsync(SettingsService.InstanceOAuthSecretId(abbrev));
if (string.IsNullOrEmpty(oauthSecret))
return $"OAuth secret not found for '{abbrev}' — cannot authenticate to delete app. Manual cleanup required.";
var xiboClient = await xiboFactory.CreateAsync(ctx.InstanceUrl, oauthReg.ClientId, oauthSecret);
await xiboClient.DeleteApplicationAsync(oauthReg.ClientId);
db.AuditLogs.Add(new AuditLog
{
Id = Guid.NewGuid(),
InstanceId = ctx.InstanceId,
Actor = "system/decommission",
Action = "oauth2-cleanup",
Target = oauthReg.ClientId,
Outcome = "success",
Detail = $"OAuth2 application '{oauthReg.ClientId}' deleted from Xibo. Job {job.Id}.",
OccurredAt = DateTime.UtcNow,
});
await db.SaveChangesAsync(ct);
return $"OAuth2 application '{oauthReg.ClientId}' deleted.";
}, ct);
// ── Step 4: mysql-cleanup ───────────────────────────────────────────
await runner.RunAsync("mysql-cleanup", async () =>
{
var mysqlHost = await settings.GetAsync(SettingsService.MySqlHost, "localhost");
var mysqlPort = await settings.GetAsync(SettingsService.MySqlPort, "3306");
var mysqlAdminUser = await settings.GetAsync(SettingsService.MySqlAdminUser, "root");
var mysqlAdminPassword = await settings.GetAsync(SettingsService.MySqlAdminPassword, string.Empty);
if (!int.TryParse(mysqlPort, out var port)) port = 3306;
var dbName = $"xibo_{abbrev}";
var userName = $"xibo_{abbrev}";
var sshHost = await GetSwarmSshHostAsync(settings);
using var sshClient = CreateSshClient(sshHost);
sshClient.Connect();
try
{
// DROP DATABASE
var dropDbCmd = $"mysql -h {mysqlHost} -P {port} -u {mysqlAdminUser} " +
$"-p'{mysqlAdminPassword}' -e " +
$"\"DROP DATABASE IF EXISTS \\`{dbName}\\`\"";
RunSshCommand(sshClient, dropDbCmd);
// DROP USER
var dropUserCmd = $"mysql -h {mysqlHost} -P {port} -u {mysqlAdminUser} " +
$"-p'{mysqlAdminPassword}' -e " +
$"\"DROP USER IF EXISTS '{userName}'@'%'\"";
RunSshCommand(sshClient, dropUserCmd);
db.AuditLogs.Add(new AuditLog
{
Id = Guid.NewGuid(),
InstanceId = ctx.InstanceId,
Actor = "system/decommission",
Action = "mysql-cleanup",
Target = dbName,
Outcome = "success",
Detail = $"Database '{dbName}' and user '{userName}' dropped. Job {job.Id}.",
OccurredAt = DateTime.UtcNow,
});
await db.SaveChangesAsync(ct);
return $"Database '{dbName}' and user '{userName}' dropped on {mysqlHost}:{port}.";
}
finally
{
sshClient.Disconnect();
}
}, ct);
// ── Step 5: nfs-archive ─────────────────────────────────────────────
await runner.RunAsync("nfs-archive", async () =>
{
var nfsServer = await settings.GetAsync(SettingsService.NfsServer);
var nfsExport = await settings.GetAsync(SettingsService.NfsExport);
var nfsExportFolder = await settings.GetAsync(SettingsService.NfsExportFolder);
if (string.IsNullOrWhiteSpace(nfsServer))
return "NFS server not configured — skipping archive.";
var export = (nfsExport ?? string.Empty).TrimEnd('/');
var folder = (nfsExportFolder ?? string.Empty).Trim('/');
var basePath = string.IsNullOrEmpty(folder) ? export : $"{export}/{folder}";
var timestamp = DateTime.UtcNow.ToString("yyyyMMddHHmmss");
var sourcePath = $"{basePath}/{abbrev}";
var archivePath = $"{basePath}/archived/{abbrev}-{timestamp}";
var sshHost = await GetSwarmSshHostAsync(settings);
using var sshClient = CreateSshClient(sshHost);
sshClient.Connect();
try
{
// Temporarily mount NFS to move directories
var mountPoint = $"/tmp/nfs-decommission-{abbrev}";
RunSshCommand(sshClient, $"sudo mkdir -p {mountPoint}");
RunSshCommand(sshClient, $"sudo mount -t nfs4 {nfsServer}:{basePath} {mountPoint}");
try
{
// Ensure archive directory exists
RunSshCommand(sshClient, $"sudo mkdir -p {mountPoint}/archived");
// Move — DO NOT delete (retain for 30 days minimum)
RunSshCommand(sshClient, $"sudo mv {mountPoint}/{abbrev} {mountPoint}/archived/{abbrev}-{timestamp}");
}
finally
{
RunSshCommandAllowFailure(sshClient, $"sudo umount {mountPoint}");
RunSshCommandAllowFailure(sshClient, $"sudo rmdir {mountPoint}");
}
db.AuditLogs.Add(new AuditLog
{
Id = Guid.NewGuid(),
InstanceId = ctx.InstanceId,
Actor = "system/decommission",
Action = "nfs-archive",
Target = sourcePath,
Outcome = "success",
Detail = $"NFS data archived to {archivePath}. Retained for minimum 30 days. Job {job.Id}.",
OccurredAt = DateTime.UtcNow,
});
await db.SaveChangesAsync(ct);
return $"NFS data moved from {sourcePath} to {archivePath}. Retained for 30+ days.";
}
finally
{
sshClient.Disconnect();
}
}, ct);
// ── Step 6: registry-update ─────────────────────────────────────────
await runner.RunAsync("registry-update", async () =>
{
var customer = await db.Customers.FirstOrDefaultAsync(c => c.Id == ctx.CustomerId, ct)
?? throw new InvalidOperationException($"Customer {ctx.CustomerId} not found.");
var instance = await db.Instances.FirstOrDefaultAsync(i => i.Id == ctx.InstanceId, ct)
?? throw new InvalidOperationException($"Instance {ctx.InstanceId} not found.");
customer.Status = CustomerStatus.Decommissioned;
instance.HealthStatus = HealthStatus.Critical;
db.AuditLogs.Add(new AuditLog
{
Id = Guid.NewGuid(),
InstanceId = ctx.InstanceId,
Actor = "system/decommission",
Action = "decommission-complete",
Target = $"xibo-{abbrev}",
Outcome = "success",
Detail = $"Instance fully decommissioned. Customer status → Decommissioned. Job {job.Id}.",
OccurredAt = DateTime.UtcNow,
});
await db.SaveChangesAsync(ct);
await hub.Clients.All.SendInstanceStatusChanged(
ctx.CustomerId.ToString(), CustomerStatus.Decommissioned.ToString());
return $"Customer '{abbrev}' → Decommissioned. Instance health → Critical. Broadcast sent.";
}, ct);
_logger.LogInformation("DecommissionPipeline completed for job {JobId} (abbrev={Abbrev})", job.Id, abbrev);
}
// ─────────────────────────────────────────────────────────────────────────
// Helpers (shared pattern from Phase1Pipeline)
// ─────────────────────────────────────────────────────────────────────────
private static async Task<PipelineContext> BuildContextAsync(Job job, OrchestratorDbContext db, CancellationToken ct)
{
var customer = await db.Customers
.Include(c => c.Instances)
.FirstOrDefaultAsync(c => c.Id == job.CustomerId, ct)
?? throw new InvalidOperationException($"Customer {job.CustomerId} not found for job {job.Id}.");
var instance = customer.Instances.FirstOrDefault()
?? throw new InvalidOperationException($"No instance found for customer {job.CustomerId}.");
var abbrev = customer.Abbreviation.ToLowerInvariant();
return new PipelineContext
{
JobId = job.Id,
CustomerId = customer.Id,
InstanceId = instance.Id,
Abbreviation = abbrev,
CompanyName = customer.CompanyName,
AdminEmail = customer.AdminEmail,
AdminFirstName = customer.AdminFirstName,
InstanceUrl = instance.XiboUrl,
DockerStackName = instance.DockerStackName,
ParametersJson = job.Parameters,
};
}
private static async Task<SshConnectionInfo> GetSwarmSshHostAsync(SettingsService settings)
{
var host = await settings.GetAsync("Ssh.SwarmHost")
?? throw new InvalidOperationException("SSH Swarm host not configured (Ssh.SwarmHost).");
var portStr = await settings.GetAsync("Ssh.SwarmPort", "22");
var user = await settings.GetAsync("Ssh.SwarmUser", "root");
var keyPath = await settings.GetAsync("Ssh.SwarmKeyPath");
var password = await settings.GetAsync("Ssh.SwarmPassword");
if (!int.TryParse(portStr, out var port)) port = 22;
return new SshConnectionInfo(host, port, user, keyPath, password);
}
private static SshClient CreateSshClient(SshConnectionInfo info)
{
var authMethods = new List<AuthenticationMethod>();
if (!string.IsNullOrEmpty(info.KeyPath))
authMethods.Add(new PrivateKeyAuthenticationMethod(info.Username, new PrivateKeyFile(info.KeyPath)));
if (!string.IsNullOrEmpty(info.Password))
authMethods.Add(new PasswordAuthenticationMethod(info.Username, info.Password));
if (authMethods.Count == 0)
{
var defaultKeyPath = Path.Combine(
Environment.GetFolderPath(Environment.SpecialFolder.UserProfile), ".ssh", "id_rsa");
if (File.Exists(defaultKeyPath))
authMethods.Add(new PrivateKeyAuthenticationMethod(info.Username, new PrivateKeyFile(defaultKeyPath)));
else
throw new InvalidOperationException(
$"No SSH authentication method available for {info.Host}:{info.Port}.");
}
var connInfo = new Renci.SshNet.ConnectionInfo(info.Host, info.Port, info.Username, authMethods.ToArray());
return new SshClient(connInfo);
}
private static string RunSshCommand(SshClient client, string command)
{
using var cmd = client.RunCommand(command);
if (cmd.ExitStatus != 0)
throw new InvalidOperationException(
$"SSH command failed (exit {cmd.ExitStatus}): {cmd.Error}");
return cmd.Result;
}
private static void RunSshCommandAllowFailure(SshClient client, string command)
{
using var cmd = client.RunCommand(command);
// Intentionally ignore exit code — used for idempotent cleanup operations
}
internal sealed record SshConnectionInfo(
string Host, int Port, string Username, string? KeyPath, string? Password);
}