fengling-risk-control/Fengling.RiskControl.Client/Failover/FailoverStrategy.cs
Sam 293209b1dc feat: add Fengling.RiskControl.Client SDK
- Implement RedisCounterService for rate limiting
- Implement RuleLoader with timer refresh
- Implement RiskEvaluator for local rule evaluation
- Implement SamplingService for CAP events
- Implement CapEventPublisher for async event publishing
- Implement FailoverStrategy for Redis failure handling
- Add configuration classes and DI extensions
- Add unit tests (9 tests)
- Add NuGet publishing script
2026-02-06 00:16:53 +08:00

177 lines
5.3 KiB
C#

using Fengling.RiskControl.Configuration;
using Fengling.RiskControl.Counter;
using Fengling.RiskControl.Evaluation;
using Fengling.RiskControl.Rules;
using Microsoft.Extensions.Logging;
using StackExchange.Redis;
namespace Fengling.RiskControl.Failover;
public interface IFailoverStrategy
{
Task<bool> IsHealthyAsync();
RiskControlMode GetCurrentMode();
Task ExecuteWithFailoverAsync(Func<Task> action);
Task<T> ExecuteWithFailoverAsync<T>(Func<Task<T>> action);
}
public enum RiskControlMode
{
Normal = 0,
QuickFail = 1,
DenyNewUsers = 2,
Maintenance = 3
}
public class FailoverStrategy : IFailoverStrategy, IDisposable
{
private readonly IConnectionMultiplexer _redis;
private readonly IRiskCounterService _counterService;
private readonly IRuleLoader _ruleLoader;
private readonly RiskControlClientOptions _options;
private readonly ILogger<FailoverStrategy> _logger;
private Timer? _healthCheckTimer;
private RiskControlMode _currentMode = RiskControlMode.Normal;
private DateTime _lastFailureTime = DateTime.MinValue;
private bool _disposed;
public FailoverStrategy(
IConnectionMultiplexer redis,
IRiskCounterService counterService,
IRuleLoader ruleLoader,
RiskControlClientOptions options,
ILogger<FailoverStrategy> logger)
{
_redis = redis;
_counterService = counterService;
_ruleLoader = ruleLoader;
_options = options;
_logger = logger;
if (_options.RedisFailover.Enabled)
{
_healthCheckTimer = new Timer(
_ => _ = CheckHealthAsync(),
null,
TimeSpan.FromSeconds(5),
TimeSpan.FromSeconds(5)
);
}
}
public RiskControlMode GetCurrentMode() => _currentMode;
public async Task<bool> IsHealthyAsync()
{
try
{
var db = _redis.GetDatabase();
await db.PingAsync();
return true;
}
catch
{
return false;
}
}
private async Task CheckHealthAsync()
{
var isHealthy = await IsHealthyAsync();
if (!isHealthy)
{
if (_lastFailureTime == DateTime.MinValue)
{
_lastFailureTime = DateTime.UtcNow;
}
var failureDuration = (DateTime.UtcNow - _lastFailureTime).TotalSeconds;
var newMode = DetermineMode(failureDuration);
if (newMode != _currentMode)
{
_currentMode = newMode;
_logger.LogWarning("Redis failure detected, duration={Duration}s, switching to mode={Mode}",
failureDuration, _currentMode);
await PublishFailoverAlertAsync(_currentMode);
}
}
else
{
if (_currentMode != RiskControlMode.Normal)
{
_logger.LogInformation("Redis restored, switching back to Normal mode");
_currentMode = RiskControlMode.Normal;
_lastFailureTime = DateTime.MinValue;
}
}
}
private RiskControlMode DetermineMode(double failureDuration)
{
if (failureDuration < _options.RedisFailover.QuickFailThresholdSeconds)
return RiskControlMode.QuickFail;
if (failureDuration < _options.RedisFailover.DenyNewUsersThresholdSeconds)
return RiskControlMode.DenyNewUsers;
return RiskControlMode.Maintenance;
}
public Task ExecuteWithFailoverAsync(Func<Task> action)
{
return ExecuteWithFailoverAsync(async () =>
{
await action();
return true;
});
}
public async Task<T> ExecuteWithFailoverAsync<T>(Func<Task<T>> action)
{
switch (_currentMode)
{
case RiskControlMode.Normal:
return await action();
case RiskControlMode.QuickFail:
_logger.LogWarning("QuickFail mode: failing fast");
throw new RedisConnectionException(ConnectionFailureType.UnableToConnect,
"Redis is temporarily unavailable");
case RiskControlMode.DenyNewUsers:
_logger.LogWarning("DenyNewUsers mode: checking if user has existing session");
throw new RedisConnectionException(ConnectionFailureType.UnableToConnect,
"Redis is temporarily unavailable, new users denied");
case RiskControlMode.Maintenance:
_logger.LogError("Maintenance mode: Redis unavailable for extended period");
throw new RedisConnectionException(ConnectionFailureType.UnableToConnect,
"Redis maintenance in progress");
default:
return await action();
}
}
private async Task PublishFailoverAlertAsync(RiskControlMode mode)
{
try
{
// CAP alert publishing would be handled by the main application
_logger.LogError("ALERT: RiskControl entering {Mode} mode due to Redis failure", mode);
}
catch
{
_logger.LogError("Failed to publish failover alert");
}
}
public void Dispose()
{
if (_disposed) return;
_healthCheckTimer?.Dispose();
_disposed = true;
}
}