using Fengling.RiskControl.Configuration; using Fengling.RiskControl.Counter; using Fengling.RiskControl.Evaluation; using Fengling.RiskControl.Rules; using Microsoft.Extensions.Logging; using StackExchange.Redis; namespace Fengling.RiskControl.Failover; public interface IFailoverStrategy { Task IsHealthyAsync(); RiskControlMode GetCurrentMode(); Task ExecuteWithFailoverAsync(Func action); Task ExecuteWithFailoverAsync(Func> action); } public enum RiskControlMode { Normal = 0, QuickFail = 1, DenyNewUsers = 2, Maintenance = 3 } public class FailoverStrategy : IFailoverStrategy, IDisposable { private readonly IConnectionMultiplexer _redis; private readonly IRiskCounterService _counterService; private readonly IRuleLoader _ruleLoader; private readonly RiskControlClientOptions _options; private readonly ILogger _logger; private Timer? _healthCheckTimer; private RiskControlMode _currentMode = RiskControlMode.Normal; private DateTime _lastFailureTime = DateTime.MinValue; private bool _disposed; public FailoverStrategy( IConnectionMultiplexer redis, IRiskCounterService counterService, IRuleLoader ruleLoader, RiskControlClientOptions options, ILogger logger) { _redis = redis; _counterService = counterService; _ruleLoader = ruleLoader; _options = options; _logger = logger; if (_options.RedisFailover.Enabled) { _healthCheckTimer = new Timer( _ => _ = CheckHealthAsync(), null, TimeSpan.FromSeconds(5), TimeSpan.FromSeconds(5) ); } } public RiskControlMode GetCurrentMode() => _currentMode; public async Task IsHealthyAsync() { try { var db = _redis.GetDatabase(); await db.PingAsync(); return true; } catch { return false; } } private async Task CheckHealthAsync() { var isHealthy = await IsHealthyAsync(); if (!isHealthy) { if (_lastFailureTime == DateTime.MinValue) { _lastFailureTime = DateTime.UtcNow; } var failureDuration = (DateTime.UtcNow - _lastFailureTime).TotalSeconds; var newMode = DetermineMode(failureDuration); if (newMode != _currentMode) { _currentMode = newMode; _logger.LogWarning("Redis failure detected, duration={Duration}s, switching to mode={Mode}", failureDuration, _currentMode); await PublishFailoverAlertAsync(_currentMode); } } else { if (_currentMode != RiskControlMode.Normal) { _logger.LogInformation("Redis restored, switching back to Normal mode"); _currentMode = RiskControlMode.Normal; _lastFailureTime = DateTime.MinValue; } } } private RiskControlMode DetermineMode(double failureDuration) { if (failureDuration < _options.RedisFailover.QuickFailThresholdSeconds) return RiskControlMode.QuickFail; if (failureDuration < _options.RedisFailover.DenyNewUsersThresholdSeconds) return RiskControlMode.DenyNewUsers; return RiskControlMode.Maintenance; } public Task ExecuteWithFailoverAsync(Func action) { return ExecuteWithFailoverAsync(async () => { await action(); return true; }); } public async Task ExecuteWithFailoverAsync(Func> action) { switch (_currentMode) { case RiskControlMode.Normal: return await action(); case RiskControlMode.QuickFail: _logger.LogWarning("QuickFail mode: failing fast"); throw new RedisConnectionException(ConnectionFailureType.UnableToConnect, "Redis is temporarily unavailable"); case RiskControlMode.DenyNewUsers: _logger.LogWarning("DenyNewUsers mode: checking if user has existing session"); throw new RedisConnectionException(ConnectionFailureType.UnableToConnect, "Redis is temporarily unavailable, new users denied"); case RiskControlMode.Maintenance: _logger.LogError("Maintenance mode: Redis unavailable for extended period"); throw new RedisConnectionException(ConnectionFailureType.UnableToConnect, "Redis maintenance in progress"); default: return await action(); } } private async Task PublishFailoverAlertAsync(RiskControlMode mode) { try { // CAP alert publishing would be handled by the main application _logger.LogError("ALERT: RiskControl entering {Mode} mode due to Redis failure", mode); } catch { _logger.LogError("Failed to publish failover alert"); } } public void Dispose() { if (_disposed) return; _healthCheckTimer?.Dispose(); _disposed = true; } }