feat: Implement provisioning pipelines for subscription management

- Add ReactivatePipeline to handle subscription reactivation, including scaling Docker services, health verification, status updates, audit logging, and broadcasting status changes.
- Introduce RotateCredentialsPipeline for OAuth2 credential rotation, managing the deletion of old apps, creation of new ones, credential storage, access verification, and audit logging.
- Create StepRunner to manage job step execution, including lifecycle management and progress broadcasting via SignalR.
- Implement SuspendPipeline for subscription suspension, scaling down services, updating statuses, logging audits, and broadcasting changes.
- Add UpdateScreenLimitPipeline to update Xibo CMS screen limits and record snapshots.
- Introduce XiboFeatureManifests for hardcoded feature ACLs per role.
- Add docker-compose.dev.yml for local development with PostgreSQL setup.
This commit is contained in:
Matt Batchelder
2026-03-18 10:27:26 -04:00
parent c2e03de8bb
commit c6d46098dd
77 changed files with 9412 additions and 29 deletions

View File

@@ -0,0 +1,229 @@
using Microsoft.AspNetCore.SignalR;
using Microsoft.EntityFrameworkCore;
using Renci.SshNet;
using OTSSignsOrchestrator.Core.Services;
using OTSSignsOrchestrator.Server.Data;
using OTSSignsOrchestrator.Server.Data.Entities;
using OTSSignsOrchestrator.Server.Hubs;
namespace OTSSignsOrchestrator.Server.Workers;
/// <summary>
/// Subscription reactivation pipeline — scales up Docker services, verifies health, resets
/// payment failure counters. Handles <c>JobType = "reactivate"</c>.
///
/// Steps:
/// 1. scale-up — SSH docker service scale web=1, xmr=1
/// 2. health-verify — Poll GET /about every 10s up to 3 minutes
/// 3. update-status — Customer.Status = Active, reset FailedPaymentCount + FirstPaymentFailedAt
/// 4. audit-log — Append-only AuditLog entry
/// 5. broadcast — InstanceStatusChanged via FleetHub
/// </summary>
public sealed class ReactivatePipeline : IProvisioningPipeline
{
public string HandlesJobType => "reactivate";
private const int TotalSteps = 5;
private readonly IServiceProvider _services;
private readonly ILogger<ReactivatePipeline> _logger;
public ReactivatePipeline(
IServiceProvider services,
ILogger<ReactivatePipeline> logger)
{
_services = services;
_logger = logger;
}
public async Task ExecuteAsync(Job job, CancellationToken ct)
{
await using var scope = _services.CreateAsyncScope();
var db = scope.ServiceProvider.GetRequiredService<OrchestratorDbContext>();
var hub = scope.ServiceProvider.GetRequiredService<IHubContext<FleetHub, IFleetClient>>();
var settings = scope.ServiceProvider.GetRequiredService<SettingsService>();
var ctx = await BuildContextAsync(job, db, ct);
var runner = new StepRunner(db, hub, _logger, job.Id, TotalSteps);
var abbrev = ctx.Abbreviation;
// ── Step 1: scale-up ────────────────────────────────────────────────
await runner.RunAsync("scale-up", async () =>
{
var sshHost = await GetSwarmSshHostAsync(settings);
using var sshClient = CreateSshClient(sshHost);
sshClient.Connect();
try
{
var cmd = $"docker service scale xibo-{abbrev}_web=1 xibo-{abbrev}_xmr=1";
var result = RunSshCommand(sshClient, cmd);
return $"Scaled up services for xibo-{abbrev}. Output: {result}";
}
finally
{
sshClient.Disconnect();
}
}, ct);
// ── Step 2: health-verify ───────────────────────────────────────────
await runner.RunAsync("health-verify", async () =>
{
var timeout = TimeSpan.FromMinutes(3);
var interval = TimeSpan.FromSeconds(10);
var deadline = DateTimeOffset.UtcNow.Add(timeout);
using var httpClient = new HttpClient { BaseAddress = new Uri(ctx.InstanceUrl.TrimEnd('/')) };
while (DateTimeOffset.UtcNow < deadline)
{
ct.ThrowIfCancellationRequested();
try
{
var response = await httpClient.GetAsync("/api/about", ct);
if (response.IsSuccessStatusCode)
return $"Xibo CMS at {ctx.InstanceUrl} is healthy (status {(int)response.StatusCode}).";
}
catch (HttpRequestException)
{
// Not ready yet — keep polling
}
await Task.Delay(interval, ct);
}
throw new TimeoutException(
$"Xibo CMS at {ctx.InstanceUrl} did not return 200 OK from /about within {timeout.TotalMinutes} minutes.");
}, ct);
// ── Step 3: update-status ───────────────────────────────────────────
await runner.RunAsync("update-status", async () =>
{
var customer = await db.Customers.FirstOrDefaultAsync(c => c.Id == ctx.CustomerId, ct)
?? throw new InvalidOperationException($"Customer {ctx.CustomerId} not found.");
var instance = await db.Instances.FirstOrDefaultAsync(i => i.Id == ctx.InstanceId, ct)
?? throw new InvalidOperationException($"Instance {ctx.InstanceId} not found.");
customer.Status = CustomerStatus.Active;
customer.FailedPaymentCount = 0;
customer.FirstPaymentFailedAt = null;
instance.HealthStatus = HealthStatus.Healthy;
instance.LastHealthCheck = DateTime.UtcNow;
await db.SaveChangesAsync(ct);
return $"Customer '{abbrev}' status → Active, FailedPaymentCount → 0, instance health → Healthy.";
}, ct);
// ── Step 4: audit-log ───────────────────────────────────────────────
await runner.RunAsync("audit-log", async () =>
{
db.AuditLogs.Add(new AuditLog
{
Id = Guid.NewGuid(),
InstanceId = ctx.InstanceId,
Actor = "system/stripe-webhook",
Action = "reactivate",
Target = $"xibo-{abbrev}",
Outcome = "success",
Detail = $"Subscription reactivated. Services scaled to 1. Health verified. Job {job.Id}.",
OccurredAt = DateTime.UtcNow,
});
await db.SaveChangesAsync(ct);
return "AuditLog entry written for reactivation.";
}, ct);
// ── Step 5: broadcast ───────────────────────────────────────────────
await runner.RunAsync("broadcast", async () =>
{
await hub.Clients.All.SendInstanceStatusChanged(
ctx.CustomerId.ToString(), CustomerStatus.Active.ToString());
return "Broadcast InstanceStatusChanged → Active.";
}, ct);
_logger.LogInformation("ReactivatePipeline completed for job {JobId} (abbrev={Abbrev})", job.Id, abbrev);
}
// ─────────────────────────────────────────────────────────────────────────
// Helpers (shared pattern from Phase1Pipeline)
// ─────────────────────────────────────────────────────────────────────────
private static async Task<PipelineContext> BuildContextAsync(Job job, OrchestratorDbContext db, CancellationToken ct)
{
var customer = await db.Customers
.Include(c => c.Instances)
.FirstOrDefaultAsync(c => c.Id == job.CustomerId, ct)
?? throw new InvalidOperationException($"Customer {job.CustomerId} not found for job {job.Id}.");
var instance = customer.Instances.FirstOrDefault()
?? throw new InvalidOperationException($"No instance found for customer {job.CustomerId}.");
var abbrev = customer.Abbreviation.ToLowerInvariant();
return new PipelineContext
{
JobId = job.Id,
CustomerId = customer.Id,
InstanceId = instance.Id,
Abbreviation = abbrev,
CompanyName = customer.CompanyName,
AdminEmail = customer.AdminEmail,
AdminFirstName = customer.AdminFirstName,
InstanceUrl = instance.XiboUrl,
DockerStackName = instance.DockerStackName,
ParametersJson = job.Parameters,
};
}
private static async Task<SshConnectionInfo> GetSwarmSshHostAsync(SettingsService settings)
{
var host = await settings.GetAsync("Ssh.SwarmHost")
?? throw new InvalidOperationException("SSH Swarm host not configured (Ssh.SwarmHost).");
var portStr = await settings.GetAsync("Ssh.SwarmPort", "22");
var user = await settings.GetAsync("Ssh.SwarmUser", "root");
var keyPath = await settings.GetAsync("Ssh.SwarmKeyPath");
var password = await settings.GetAsync("Ssh.SwarmPassword");
if (!int.TryParse(portStr, out var port)) port = 22;
return new SshConnectionInfo(host, port, user, keyPath, password);
}
private static SshClient CreateSshClient(SshConnectionInfo info)
{
var authMethods = new List<AuthenticationMethod>();
if (!string.IsNullOrEmpty(info.KeyPath))
authMethods.Add(new PrivateKeyAuthenticationMethod(info.Username, new PrivateKeyFile(info.KeyPath)));
if (!string.IsNullOrEmpty(info.Password))
authMethods.Add(new PasswordAuthenticationMethod(info.Username, info.Password));
if (authMethods.Count == 0)
{
var defaultKeyPath = Path.Combine(
Environment.GetFolderPath(Environment.SpecialFolder.UserProfile), ".ssh", "id_rsa");
if (File.Exists(defaultKeyPath))
authMethods.Add(new PrivateKeyAuthenticationMethod(info.Username, new PrivateKeyFile(defaultKeyPath)));
else
throw new InvalidOperationException(
$"No SSH authentication method available for {info.Host}:{info.Port}.");
}
var connInfo = new Renci.SshNet.ConnectionInfo(info.Host, info.Port, info.Username, authMethods.ToArray());
return new SshClient(connInfo);
}
private static string RunSshCommand(SshClient client, string command)
{
using var cmd = client.RunCommand(command);
if (cmd.ExitStatus != 0)
throw new InvalidOperationException(
$"SSH command failed (exit {cmd.ExitStatus}): {cmd.Error}");
return cmd.Result;
}
internal sealed record SshConnectionInfo(
string Host, int Port, string Username, string? KeyPath, string? Password);
}