- Implement RedisCounterService for rate limiting - Implement RuleLoader with timer refresh - Implement RiskEvaluator for local rule evaluation - Implement SamplingService for CAP events - Implement CapEventPublisher for async event publishing - Implement FailoverStrategy for Redis failure handling - Add configuration classes and DI extensions - Add unit tests (9 tests) - Add NuGet publishing script
177 lines
5.3 KiB
C#
177 lines
5.3 KiB
C#
using Fengling.RiskControl.Configuration;
|
|
using Fengling.RiskControl.Counter;
|
|
using Fengling.RiskControl.Evaluation;
|
|
using Fengling.RiskControl.Rules;
|
|
using Microsoft.Extensions.Logging;
|
|
using StackExchange.Redis;
|
|
|
|
namespace Fengling.RiskControl.Failover;
|
|
|
|
public interface IFailoverStrategy
|
|
{
|
|
Task<bool> IsHealthyAsync();
|
|
RiskControlMode GetCurrentMode();
|
|
Task ExecuteWithFailoverAsync(Func<Task> action);
|
|
Task<T> ExecuteWithFailoverAsync<T>(Func<Task<T>> action);
|
|
}
|
|
|
|
public enum RiskControlMode
|
|
{
|
|
Normal = 0,
|
|
QuickFail = 1,
|
|
DenyNewUsers = 2,
|
|
Maintenance = 3
|
|
}
|
|
|
|
public class FailoverStrategy : IFailoverStrategy, IDisposable
|
|
{
|
|
private readonly IConnectionMultiplexer _redis;
|
|
private readonly IRiskCounterService _counterService;
|
|
private readonly IRuleLoader _ruleLoader;
|
|
private readonly RiskControlClientOptions _options;
|
|
private readonly ILogger<FailoverStrategy> _logger;
|
|
private Timer? _healthCheckTimer;
|
|
private RiskControlMode _currentMode = RiskControlMode.Normal;
|
|
private DateTime _lastFailureTime = DateTime.MinValue;
|
|
private bool _disposed;
|
|
|
|
public FailoverStrategy(
|
|
IConnectionMultiplexer redis,
|
|
IRiskCounterService counterService,
|
|
IRuleLoader ruleLoader,
|
|
RiskControlClientOptions options,
|
|
ILogger<FailoverStrategy> logger)
|
|
{
|
|
_redis = redis;
|
|
_counterService = counterService;
|
|
_ruleLoader = ruleLoader;
|
|
_options = options;
|
|
_logger = logger;
|
|
|
|
if (_options.RedisFailover.Enabled)
|
|
{
|
|
_healthCheckTimer = new Timer(
|
|
_ => _ = CheckHealthAsync(),
|
|
null,
|
|
TimeSpan.FromSeconds(5),
|
|
TimeSpan.FromSeconds(5)
|
|
);
|
|
}
|
|
}
|
|
|
|
public RiskControlMode GetCurrentMode() => _currentMode;
|
|
|
|
public async Task<bool> IsHealthyAsync()
|
|
{
|
|
try
|
|
{
|
|
var db = _redis.GetDatabase();
|
|
await db.PingAsync();
|
|
return true;
|
|
}
|
|
catch
|
|
{
|
|
return false;
|
|
}
|
|
}
|
|
|
|
private async Task CheckHealthAsync()
|
|
{
|
|
var isHealthy = await IsHealthyAsync();
|
|
|
|
if (!isHealthy)
|
|
{
|
|
if (_lastFailureTime == DateTime.MinValue)
|
|
{
|
|
_lastFailureTime = DateTime.UtcNow;
|
|
}
|
|
|
|
var failureDuration = (DateTime.UtcNow - _lastFailureTime).TotalSeconds;
|
|
|
|
var newMode = DetermineMode(failureDuration);
|
|
if (newMode != _currentMode)
|
|
{
|
|
_currentMode = newMode;
|
|
_logger.LogWarning("Redis failure detected, duration={Duration}s, switching to mode={Mode}",
|
|
failureDuration, _currentMode);
|
|
|
|
await PublishFailoverAlertAsync(_currentMode);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (_currentMode != RiskControlMode.Normal)
|
|
{
|
|
_logger.LogInformation("Redis restored, switching back to Normal mode");
|
|
_currentMode = RiskControlMode.Normal;
|
|
_lastFailureTime = DateTime.MinValue;
|
|
}
|
|
}
|
|
}
|
|
|
|
private RiskControlMode DetermineMode(double failureDuration)
|
|
{
|
|
if (failureDuration < _options.RedisFailover.QuickFailThresholdSeconds)
|
|
return RiskControlMode.QuickFail;
|
|
if (failureDuration < _options.RedisFailover.DenyNewUsersThresholdSeconds)
|
|
return RiskControlMode.DenyNewUsers;
|
|
return RiskControlMode.Maintenance;
|
|
}
|
|
|
|
public Task ExecuteWithFailoverAsync(Func<Task> action)
|
|
{
|
|
return ExecuteWithFailoverAsync(async () =>
|
|
{
|
|
await action();
|
|
return true;
|
|
});
|
|
}
|
|
|
|
public async Task<T> ExecuteWithFailoverAsync<T>(Func<Task<T>> action)
|
|
{
|
|
switch (_currentMode)
|
|
{
|
|
case RiskControlMode.Normal:
|
|
return await action();
|
|
|
|
case RiskControlMode.QuickFail:
|
|
_logger.LogWarning("QuickFail mode: failing fast");
|
|
throw new RedisConnectionException(ConnectionFailureType.UnableToConnect,
|
|
"Redis is temporarily unavailable");
|
|
|
|
case RiskControlMode.DenyNewUsers:
|
|
_logger.LogWarning("DenyNewUsers mode: checking if user has existing session");
|
|
throw new RedisConnectionException(ConnectionFailureType.UnableToConnect,
|
|
"Redis is temporarily unavailable, new users denied");
|
|
|
|
case RiskControlMode.Maintenance:
|
|
_logger.LogError("Maintenance mode: Redis unavailable for extended period");
|
|
throw new RedisConnectionException(ConnectionFailureType.UnableToConnect,
|
|
"Redis maintenance in progress");
|
|
|
|
default:
|
|
return await action();
|
|
}
|
|
}
|
|
|
|
private async Task PublishFailoverAlertAsync(RiskControlMode mode)
|
|
{
|
|
try
|
|
{
|
|
// CAP alert publishing would be handled by the main application
|
|
_logger.LogError("ALERT: RiskControl entering {Mode} mode due to Redis failure", mode);
|
|
}
|
|
catch
|
|
{
|
|
_logger.LogError("Failed to publish failover alert");
|
|
}
|
|
}
|
|
|
|
public void Dispose()
|
|
{
|
|
if (_disposed) return;
|
|
_healthCheckTimer?.Dispose();
|
|
_disposed = true;
|
|
}
|
|
}
|