feat: add database circuit breaker and health-readiness endpoint

Introduce a process-wide DatabaseCircuitBreaker that fails fast when Postgres is unavailable (e.g. disk full) instead of letting every request burn doomed EF Core retries. CircuitAwareExecutionStrategy derives from NpgsqlRetryingExecutionStrategy and records success/failure around the public Execute/ExecuteAsync seams; background workers skip work and back off while the circuit is open; the exception middleware maps an open circuit (and other DB outages) to 503. Adds /health (liveness) and /health/ready (readiness, reporting circuit state), plus unit tests for the open/half-open transitions and non-transient SQLSTATE detection.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
西街长安 2026-06-11 17:52:18 +08:00
parent 6da0690fd3
commit 5196ffa0f0
11 changed files with 632 additions and 14 deletions

View File

@ -0,0 +1,15 @@
namespace LiveRecorder.Application.Models.Reports;
public sealed class HealthReadyResponse
{
public string Status { get; set; } = "ready";
public DateTimeOffset Timestamp { get; set; }
public DatabaseHealthStatus Database { get; set; } = new();
}
public sealed class DatabaseHealthStatus
{
public bool Reachable { get; set; }
public string? Reason { get; set; }
public int ConsecutiveFailures { get; set; }
}

View File

@ -0,0 +1,103 @@
using Microsoft.EntityFrameworkCore;
using Microsoft.EntityFrameworkCore.Storage;
using Npgsql.EntityFrameworkCore.PostgreSQL;
namespace LiveRecorder.Infrastructure.Persistence;
/// <summary>
/// Custom EF Core execution strategy that integrates with <see cref="DatabaseCircuitBreaker"/>.
///
/// When the circuit is open, operations fail immediately without retrying.
/// When a non-transient error occurs (e.g. disk_full), the operation does not retry.
/// After each failure, the circuit is recorded, and after each success, the circuit is reset.
/// </summary>
public sealed class CircuitAwareExecutionStrategy : NpgsqlRetryingExecutionStrategy
{
private static readonly TimeSpan DefaultMaxRetryDelay = TimeSpan.FromSeconds(15);
public CircuitAwareExecutionStrategy(
ExecutionStrategyDependencies dependencies,
int maxRetryCount,
TimeSpan maxRetryDelay)
: base(dependencies, maxRetryCount, maxRetryDelay, errorCodesToAdd: null)
{
}
public CircuitAwareExecutionStrategy(
ExecutionStrategyDependencies dependencies,
int maxRetryCount,
TimeSpan maxRetryDelay,
ICollection<string>? errorCodesToAdd)
: base(dependencies, maxRetryCount, maxRetryDelay, errorCodesToAdd)
{
}
protected override bool ShouldRetryOn(Exception? exception)
{
// Fast-fail: circuit is open
if (DatabaseCircuitBreaker.IsOpen)
{
return false;
}
// Fast-fail: non-transient errors (disk_full, out_of_memory, etc.)
if (DatabaseCircuitBreaker.IsNonTransient(exception))
{
return false;
}
// Delegate to default Npgsql retry logic for transient errors
return base.ShouldRetryOn(exception);
}
protected override void OnFirstExecution()
{
// If circuit is open, throw immediately before even attempting
if (DatabaseCircuitBreaker.IsOpen)
{
throw new DatabaseCircuitOpenException(
$"Database circuit breaker is open. Consecutive failures: {DatabaseCircuitBreaker.ConsecutiveFailures}. " +
$"Circuit opened at: {DatabaseCircuitBreaker.OpenedAt:O}.");
}
base.OnFirstExecution();
}
public override TResult Execute<TState, TResult>(
TState state,
Func<DbContext, TState, TResult> operation,
Func<DbContext, TState, ExecutionResult<TResult>>? verifySucceeded)
{
try
{
var result = base.Execute(state, operation, verifySucceeded);
DatabaseCircuitBreaker.RecordSuccess();
return result;
}
catch
{
DatabaseCircuitBreaker.RecordFailure();
throw;
}
}
public override async Task<TResult> ExecuteAsync<TState, TResult>(
TState state,
Func<DbContext, TState, CancellationToken, Task<TResult>> operation,
Func<DbContext, TState, CancellationToken, Task<ExecutionResult<TResult>>>? verifySucceeded,
CancellationToken cancellationToken)
{
try
{
var result = await base.ExecuteAsync(state, operation, verifySucceeded, cancellationToken)
.ConfigureAwait(false);
DatabaseCircuitBreaker.RecordSuccess();
return result;
}
catch
{
DatabaseCircuitBreaker.RecordFailure();
throw;
}
}
}

View File

@ -0,0 +1,121 @@
using Microsoft.Extensions.Logging;
namespace LiveRecorder.Infrastructure.Persistence;
/// <summary>
/// Thread-safe static circuit breaker for database operations.
/// When the database becomes unavailable (e.g. disk full), this prevents
/// every API request from wasting 45 seconds on doomed retries.
/// </summary>
public static class DatabaseCircuitBreaker
{
private static readonly object Lock = new();
/// <summary>Consecutive failures before the circuit opens.</summary>
private const int FailureThreshold = 5;
/// <summary>How long the circuit stays open before allowing a probe.</summary>
private static readonly TimeSpan BreakDuration = TimeSpan.FromSeconds(30);
/// <summary>Npgsql error codes that are NOT transient — retrying is futile.</summary>
private static readonly HashSet<string> NonTransientCodes = new(StringComparer.Ordinal)
{
"53100", // disk_full
"53200", // out_of_memory
"53300", // too_many_connections
"08006", // connection_failure (persistent)
"57P03", // cannot_connect_now
"42601", // syntax_error (bug, not transient)
"42501", // insufficient_privilege
"3D000", // invalid_catalog_name
"28P01", // invalid_password
};
private static int _consecutiveFailures;
private static DateTimeOffset _openedAt = DateTimeOffset.MinValue;
/// <summary>Whether the circuit is currently open (failing fast).</summary>
public static bool IsOpen
{
get
{
if (_consecutiveFailures < FailureThreshold)
{
return false;
}
if (DateTimeOffset.UtcNow - _openedAt > BreakDuration)
{
// Transition to half-open: allow one probe
lock (Lock)
{
if (_consecutiveFailures >= FailureThreshold &&
DateTimeOffset.UtcNow - _openedAt > BreakDuration)
{
// Reset to just below threshold so the next call probes
_consecutiveFailures = FailureThreshold - 1;
}
}
return false;
}
return true;
}
}
public static int ConsecutiveFailures => Volatile.Read(ref _consecutiveFailures);
public static DateTimeOffset OpenedAt => _openedAt;
/// <summary>Record a successful database operation.</summary>
public static void RecordSuccess()
{
lock (Lock)
{
_consecutiveFailures = 0;
}
}
/// <summary>Record a failed database operation.</summary>
public static void RecordFailure()
{
lock (Lock)
{
_consecutiveFailures++;
if (_consecutiveFailures >= FailureThreshold)
{
_openedAt = DateTimeOffset.UtcNow;
}
}
}
/// <summary>
/// Check whether a given exception is a non-transient database error
/// that should NOT be retried. Returns true if retrying would be futile.
/// </summary>
public static bool IsNonTransient(Exception? ex)
{
while (ex is not null)
{
if (ex is Npgsql.NpgsqlException npgEx && npgEx.SqlState is { Length: 5 } state)
{
return NonTransientCodes.Contains(state);
}
ex = ex.InnerException;
}
return false;
}
/// <summary>Log the current circuit state.</summary>
public static void LogState(ILogger logger)
{
logger.LogInformation(
"DatabaseCircuitBreaker state: Open={IsOpen}, ConsecutiveFailures={Failures}, OpenedAt={OpenedAt}",
IsOpen,
ConsecutiveFailures,
OpenedAt == DateTimeOffset.MinValue ? "never" : OpenedAt.ToString("O"));
}
}

View File

@ -0,0 +1,16 @@
namespace LiveRecorder.Infrastructure.Persistence;
/// <summary>
/// Thrown when a database operation is rejected because the circuit breaker is open.
/// This is a fast-fail — the request will not be retried.
/// </summary>
public sealed class DatabaseCircuitOpenException : InvalidOperationException
{
public DatabaseCircuitOpenException(string message) : base(message)
{
}
public DatabaseCircuitOpenException(string message, Exception innerException) : base(message, innerException)
{
}
}

View File

@ -1,3 +1,4 @@
using LiveRecorder.Infrastructure.Persistence;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging;
@ -7,6 +8,8 @@ namespace LiveRecorder.Infrastructure.Services;
public sealed class CleanupOperationBackgroundService : BackgroundService
{
private static readonly TimeSpan IdleDelay = TimeSpan.FromSeconds(2);
private static readonly TimeSpan ErrorBaseDelay = TimeSpan.FromSeconds(5);
private static readonly TimeSpan ErrorMaxDelay = TimeSpan.FromMinutes(5);
private readonly IServiceScopeFactory _serviceScopeFactory;
private readonly ILogger<CleanupOperationBackgroundService> _logger;
@ -20,6 +23,7 @@ public sealed class CleanupOperationBackgroundService : BackgroundService
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
{
// Startup: requeue interrupted operations
try
{
using var startupScope = _serviceScopeFactory.CreateScope();
@ -31,17 +35,36 @@ public sealed class CleanupOperationBackgroundService : BackgroundService
_logger.LogWarning(ex, "Failed to requeue interrupted cleanup operations at startup");
}
var consecutiveErrors = 0;
while (!stoppingToken.IsCancellationRequested)
{
try
{
// Skip processing if the circuit is open — don't waste resources
if (DatabaseCircuitBreaker.IsOpen)
{
consecutiveErrors = await DelayWithBackoff(ErrorBaseDelay, ErrorMaxDelay, consecutiveErrors, stoppingToken);
continue;
}
using var scope = _serviceScopeFactory.CreateScope();
var coordinator = scope.ServiceProvider.GetRequiredService<CleanupOperationCoordinator>();
var processed = await coordinator.ProcessNextQueuedOperationAsync(stoppingToken);
if (processed)
{
consecutiveErrors = 0;
continue;
}
// No queued operations — idle delay
consecutiveErrors = 0;
await Task.Delay(IdleDelay, stoppingToken);
}
catch (DatabaseCircuitOpenException)
{
consecutiveErrors = await DelayWithBackoff(ErrorBaseDelay, ErrorMaxDelay, consecutiveErrors, stoppingToken);
}
catch (OperationCanceledException) when (stoppingToken.IsCancellationRequested)
{
@ -49,17 +72,47 @@ public sealed class CleanupOperationBackgroundService : BackgroundService
}
catch (Exception ex)
{
_logger.LogError(ex, "Cleanup operation background worker failed");
}
var isDatabaseError = DatabaseCircuitBreaker.IsNonTransient(ex) ||
ex is Npgsql.NpgsqlException ||
ex is Microsoft.EntityFrameworkCore.DbUpdateException;
try
{
await Task.Delay(IdleDelay, stoppingToken);
}
catch (OperationCanceledException) when (stoppingToken.IsCancellationRequested)
{
break;
if (isDatabaseError)
{
DatabaseCircuitBreaker.RecordFailure();
_logger.LogWarning(ex,
"Cleanup background worker: database error (#{ErrorCount}). Circuit state: Open={IsOpen}, Failures={Failures}",
consecutiveErrors + 1,
DatabaseCircuitBreaker.IsOpen,
DatabaseCircuitBreaker.ConsecutiveFailures);
}
else
{
_logger.LogError(ex, "Cleanup operation background worker failed");
}
consecutiveErrors = await DelayWithBackoff(ErrorBaseDelay, ErrorMaxDelay, consecutiveErrors, stoppingToken);
}
}
}
private static async Task<int> DelayWithBackoff(
TimeSpan baseDelay, TimeSpan maxDelay, int errorCount, CancellationToken cancellationToken)
{
errorCount++;
// Exponential backoff: 5s, 10s, 20s, 40s, 80s, 160s, capping at 5min
var factor = Math.Pow(2, Math.Min(errorCount - 1, 6));
var delay = TimeSpan.FromMilliseconds(
Math.Min(baseDelay.TotalMilliseconds * factor, maxDelay.TotalMilliseconds));
try
{
await Task.Delay(delay, cancellationToken);
}
catch (OperationCanceledException)
{
// Swallow — loop will exit on next iteration
}
return errorCount;
}
}

View File

@ -146,8 +146,20 @@ public sealed class LiveRoomPollingBackgroundService : BackgroundService, ILiveR
{
break;
}
catch (DatabaseCircuitOpenException)
{
// Circuit is open — skip this iteration and wait
_logger.LogDebug("Polling loop skipped: database circuit breaker is open");
delay = TimeSpan.FromSeconds(30);
}
catch (Exception ex)
{
// Record database failures to the circuit breaker
if (IsTransientDatabaseException(ex) || DatabaseCircuitBreaker.IsNonTransient(ex))
{
DatabaseCircuitBreaker.RecordFailure();
}
_logger.LogError(ex, "Background live room polling failed");
try

View File

@ -1,3 +1,4 @@
using LiveRecorder.Infrastructure.Persistence;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging;
@ -7,7 +8,8 @@ namespace LiveRecorder.Infrastructure.Services;
public sealed class RetentionCleanupBackgroundService : BackgroundService
{
private static readonly TimeSpan CleanupInterval = TimeSpan.FromHours(24);
private static readonly TimeSpan ErrorBaseDelay = TimeSpan.FromSeconds(10);
private static readonly TimeSpan ErrorMaxDelay = TimeSpan.FromMinutes(5);
private readonly IServiceScopeFactory _serviceScopeFactory;
private readonly ILogger<RetentionCleanupBackgroundService> _logger;
@ -21,13 +23,31 @@ public sealed class RetentionCleanupBackgroundService : BackgroundService
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
{
var consecutiveErrors = 0;
while (!stoppingToken.IsCancellationRequested)
{
try
{
// Skip if circuit is already open
if (DatabaseCircuitBreaker.IsOpen)
{
_logger.LogDebug("Retention cleanup skipped: database circuit breaker is open");
await Task.Delay(TimeSpan.FromMinutes(1), stoppingToken);
continue;
}
using var scope = _serviceScopeFactory.CreateScope();
var cleanupService = scope.ServiceProvider.GetRequiredService<RetentionCleanupService>();
await cleanupService.TryEnqueueAsync(ignoreEnabledSetting: false, cancellationToken: stoppingToken);
consecutiveErrors = 0;
}
catch (DatabaseCircuitOpenException)
{
// Silent — circuit is already logged
await Task.Delay(TimeSpan.FromMinutes(1), stoppingToken);
continue;
}
catch (OperationCanceledException) when (stoppingToken.IsCancellationRequested)
{
@ -35,7 +55,38 @@ public sealed class RetentionCleanupBackgroundService : BackgroundService
}
catch (Exception ex)
{
_logger.LogError(ex, "Retention cleanup background task failed");
var isDatabaseError = DatabaseCircuitBreaker.IsNonTransient(ex) ||
ex is Npgsql.NpgsqlException ||
ex is Microsoft.EntityFrameworkCore.DbUpdateException;
consecutiveErrors++;
var delay = TimeSpan.FromMilliseconds(
Math.Min(ErrorBaseDelay.TotalMilliseconds * Math.Pow(2, Math.Min(consecutiveErrors - 1, 6)),
ErrorMaxDelay.TotalMilliseconds));
if (isDatabaseError)
{
DatabaseCircuitBreaker.RecordFailure();
_logger.LogWarning(ex,
"Retention cleanup: database error (#{ErrorCount}). Circuit: Open={IsOpen}, Failures={Failures}",
consecutiveErrors,
DatabaseCircuitBreaker.IsOpen,
DatabaseCircuitBreaker.ConsecutiveFailures);
}
else
{
_logger.LogError(ex, "Retention cleanup background task failed");
}
try
{
await Task.Delay(delay, stoppingToken);
}
catch (OperationCanceledException)
{
break;
}
continue;
}
try

View File

@ -1,3 +1,7 @@
using LiveRecorder.Infrastructure.Persistence;
using Microsoft.EntityFrameworkCore;
using Npgsql;
namespace LiveRecorder.WebApi.Middleware;
public sealed class ExceptionHandlingMiddleware
@ -17,22 +21,80 @@ public sealed class ExceptionHandlingMiddleware
{
await _next(context);
}
catch (DatabaseCircuitOpenException ex)
{
// Circuit is open — fast-fail with 503
_logger.LogWarning(ex, "Database circuit breaker is open, returning 503");
context.Response.StatusCode = StatusCodes.Status503ServiceUnavailable;
await context.Response.WriteAsJsonAsync(new
{
message = "Database temporarily unavailable",
detail = "The database is currently unreachable. Requests will be accepted again once connectivity is restored.",
retryAfterSeconds = 30
});
}
catch (Exception ex)
{
_logger.LogError(ex, "Unhandled exception");
context.Response.StatusCode = ex switch
// Detect database-related exceptions and return 503 instead of 500
var statusCode = ex switch
{
DatabaseCircuitOpenException => StatusCodes.Status503ServiceUnavailable,
KeyNotFoundException => StatusCodes.Status404NotFound,
InvalidOperationException => StatusCodes.Status400BadRequest,
NotSupportedException => StatusCodes.Status400BadRequest,
_ when IsDatabaseException(ex) => StatusCodes.Status503ServiceUnavailable,
_ => StatusCodes.Status500InternalServerError
};
context.Response.StatusCode = statusCode;
await context.Response.WriteAsJsonAsync(new
{
message = ex.Message,
detail = context.Response.StatusCode == StatusCodes.Status500InternalServerError ? "Internal Server Error" : null
message = statusCode == StatusCodes.Status503ServiceUnavailable
? "Database temporarily unavailable"
: ex.Message,
detail = statusCode switch
{
StatusCodes.Status503ServiceUnavailable => "The database is currently unreachable. Please retry later.",
StatusCodes.Status500InternalServerError => "Internal Server Error",
_ => null
}
});
}
}
/// <summary>
/// Check if an exception (or any of its inner exceptions) is a database-related error
/// that should be surfaced as 503 Service Unavailable.
/// </summary>
private static bool IsDatabaseException(Exception ex)
{
var current = ex;
while (current is not null)
{
if (current is NpgsqlException or DbUpdateException)
{
return true;
}
if (current is InvalidOperationException ioEx &&
(ioEx.Message.Contains("database", StringComparison.OrdinalIgnoreCase) ||
ioEx.Message.Contains("connection", StringComparison.OrdinalIgnoreCase) ||
ioEx.Message.Contains("Npgsql", StringComparison.OrdinalIgnoreCase)))
{
return true;
}
if (current is TimeoutException && current.Message.Contains("database", StringComparison.OrdinalIgnoreCase))
{
return true;
}
current = current.InnerException;
}
return false;
}
}

View File

@ -10,6 +10,7 @@ using LiveRecorder.Application.Abstractions.Recording;
using LiveRecorder.Application.Abstractions.Scripting;
using LiveRecorder.Application.Abstractions.Settings;
using LiveRecorder.Application.Abstractions.Storage;
using LiveRecorder.Application.Models.Reports;
using LiveRecorder.Application.Services;
using LiveRecorder.Infrastructure.Persistence;
using LiveRecorder.Infrastructure.Persistence.Repositories;
@ -31,6 +32,7 @@ using LiveRecorder.Infrastructure.Platforms.YouTube;
using LiveRecorder.Infrastructure.Services;
using LiveRecorder.WebApi.Middleware;
using Microsoft.EntityFrameworkCore;
using Microsoft.EntityFrameworkCore.Storage;
using Microsoft.OpenApi.Models;
using Npgsql;
@ -149,6 +151,11 @@ builder.Services.AddDbContext<LiveRecorderDbContext>(options =>
maxRetryCount: 3,
maxRetryDelay: TimeSpan.FromSeconds(15),
errorCodesToAdd: null);
npgsql.ExecutionStrategy(dependencies =>
new CircuitAwareExecutionStrategy(
dependencies,
maxRetryCount: 3,
maxRetryDelay: TimeSpan.FromSeconds(15)));
}));
builder.Services.AddScoped<IUnitOfWork>(provider => provider.GetRequiredService<LiveRecorderDbContext>());
@ -231,6 +238,61 @@ app.UseMiddleware<ApiTokenAuthenticationMiddleware>();
app.MapGet("/", () => Results.Redirect("/swagger"));
app.MapControllers();
// ── Health check endpoints ────────────────────────────────────────────
// /health — liveness: is the process alive and responding?
app.MapGet("/health", () => Results.Ok(new { status = "healthy", timestamp = DateTimeOffset.UtcNow }));
// /health/ready — readiness: can we reach the database?
app.MapGet("/health/ready", async (CancellationToken cancellationToken) =>
{
var readyResult = new HealthReadyResponse
{
Status = "ready",
Timestamp = DateTimeOffset.UtcNow,
Database = new DatabaseHealthStatus()
};
if (DatabaseCircuitBreaker.IsOpen)
{
readyResult.Status = "degraded";
readyResult.Database = new DatabaseHealthStatus
{
Reachable = false,
Reason = "Circuit breaker is open",
ConsecutiveFailures = DatabaseCircuitBreaker.ConsecutiveFailures
};
return Results.Json(readyResult, statusCode: StatusCodes.Status503ServiceUnavailable);
}
try
{
// Lightweight probe: just test connectivity with a 2-second timeout
await using var connection = new NpgsqlConnection(connectionStringBuilder.ConnectionString);
var probeCts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
probeCts.CancelAfter(TimeSpan.FromSeconds(2));
await connection.OpenAsync(probeCts.Token);
await using var cmd = connection.CreateCommand();
cmd.CommandText = "SELECT 1";
await cmd.ExecuteScalarAsync(probeCts.Token);
readyResult.Database = new DatabaseHealthStatus { Reachable = true };
return Results.Ok(readyResult);
}
catch (Exception ex)
{
DatabaseCircuitBreaker.RecordFailure();
readyResult.Status = "unhealthy";
readyResult.Database = new DatabaseHealthStatus
{
Reachable = false,
Reason = ex.Message,
ConsecutiveFailures = DatabaseCircuitBreaker.ConsecutiveFailures
};
return Results.Json(readyResult, statusCode: StatusCodes.Status503ServiceUnavailable);
}
});
using (var scope = app.Services.CreateScope())
{
var initializer = scope.ServiceProvider.GetRequiredService<DatabaseInitializer>();

View File

@ -0,0 +1,122 @@
using System.Reflection;
using LiveRecorder.Infrastructure.Persistence;
using Npgsql;
namespace LiveRecorder.Tests;
public sealed class DatabaseCircuitBreakerTests
{
// Mirrors the private FailureThreshold in DatabaseCircuitBreaker so the assertions
// read intentionally. Keep in sync if the production threshold changes.
private const int FailureThreshold = 5;
public DatabaseCircuitBreakerTests()
{
// The breaker is a process-wide static, so reset it to a known closed state
// before each test to keep cases independent.
DatabaseCircuitBreaker.RecordSuccess();
SetOpenedAt(DateTimeOffset.MinValue);
}
[Fact]
public void RecordFailure_below_threshold_keeps_circuit_closed()
{
for (var i = 0; i < FailureThreshold - 1; i++)
{
DatabaseCircuitBreaker.RecordFailure();
}
Assert.False(DatabaseCircuitBreaker.IsOpen);
Assert.Equal(FailureThreshold - 1, DatabaseCircuitBreaker.ConsecutiveFailures);
}
[Fact]
public void RecordFailure_at_threshold_opens_circuit()
{
for (var i = 0; i < FailureThreshold; i++)
{
DatabaseCircuitBreaker.RecordFailure();
}
Assert.True(DatabaseCircuitBreaker.IsOpen);
Assert.Equal(FailureThreshold, DatabaseCircuitBreaker.ConsecutiveFailures);
Assert.NotEqual(DateTimeOffset.MinValue, DatabaseCircuitBreaker.OpenedAt);
}
[Fact]
public void RecordSuccess_closes_circuit_and_resets_failures()
{
for (var i = 0; i < FailureThreshold; i++)
{
DatabaseCircuitBreaker.RecordFailure();
}
Assert.True(DatabaseCircuitBreaker.IsOpen);
DatabaseCircuitBreaker.RecordSuccess();
Assert.False(DatabaseCircuitBreaker.IsOpen);
Assert.Equal(0, DatabaseCircuitBreaker.ConsecutiveFailures);
}
[Fact]
public void IsOpen_after_break_duration_transitions_to_half_open()
{
for (var i = 0; i < FailureThreshold; i++)
{
DatabaseCircuitBreaker.RecordFailure();
}
Assert.True(DatabaseCircuitBreaker.IsOpen);
// The breaker reads DateTimeOffset.UtcNow directly (no injectable clock), so move
// the recorded open time past the 30s break window to simulate it elapsing.
SetOpenedAt(DateTimeOffset.UtcNow - TimeSpan.FromSeconds(31));
// The first read past the window half-opens: it permits one probe and drops the
// failure count to one below the threshold.
Assert.False(DatabaseCircuitBreaker.IsOpen);
Assert.Equal(FailureThreshold - 1, DatabaseCircuitBreaker.ConsecutiveFailures);
}
[Theory]
[InlineData("53100")] // disk_full
[InlineData("53300")] // too_many_connections
[InlineData("28P01")] // invalid_password
public void IsNonTransient_returns_true_for_known_fatal_sql_states(string sqlState)
{
var exception = new PostgresException("fatal", "FATAL", "FATAL", sqlState);
Assert.True(DatabaseCircuitBreaker.IsNonTransient(exception));
}
[Fact]
public void IsNonTransient_returns_false_for_transient_sql_state()
{
// 40001 = serialization_failure, which is retryable and not in the fatal set.
var exception = new PostgresException("retry me", "ERROR", "ERROR", "40001");
Assert.False(DatabaseCircuitBreaker.IsNonTransient(exception));
}
[Fact]
public void IsNonTransient_unwraps_inner_exceptions()
{
var inner = new PostgresException("disk full", "FATAL", "FATAL", "53100");
var wrapper = new InvalidOperationException("save failed", inner);
Assert.True(DatabaseCircuitBreaker.IsNonTransient(wrapper));
}
[Fact]
public void IsNonTransient_returns_false_for_null_and_non_postgres_exceptions()
{
Assert.False(DatabaseCircuitBreaker.IsNonTransient(null));
Assert.False(DatabaseCircuitBreaker.IsNonTransient(new InvalidOperationException("boom")));
}
private static void SetOpenedAt(DateTimeOffset value) =>
typeof(DatabaseCircuitBreaker)
.GetField("_openedAt", BindingFlags.NonPublic | BindingFlags.Static)!
.SetValue(null, value);
}

View File

@ -20,6 +20,7 @@
<ItemGroup>
<ProjectReference Include="..\..\src\LiveRecorder.Application\LiveRecorder.Application.csproj" />
<ProjectReference Include="..\..\src\LiveRecorder.Domain\LiveRecorder.Domain.csproj" />
<ProjectReference Include="..\..\src\LiveRecorder.Infrastructure\LiveRecorder.Infrastructure.csproj" />
</ItemGroup>
</Project>