Files
gpt-load/internal/keypool/cron_checker.go
2025-07-13 23:21:50 +08:00

192 lines
4.5 KiB
Go

package keypool
import (
"context"
"gpt-load/internal/config"
"gpt-load/internal/models"
"gpt-load/internal/store"
"sync"
"sync/atomic"
"time"
"github.com/sirupsen/logrus"
"gorm.io/gorm"
)
// NewCronChecker is responsible for periodically validating invalid keys.
type CronChecker struct {
DB *gorm.DB
SettingsManager *config.SystemSettingsManager
Validator *KeyValidator
LeaderLock *store.LeaderLock
stopChan chan struct{}
wg sync.WaitGroup
}
// NewCronChecker creates a new CronChecker.
func NewCronChecker(
db *gorm.DB,
settingsManager *config.SystemSettingsManager,
validator *KeyValidator,
leaderLock *store.LeaderLock,
) *CronChecker {
return &CronChecker{
DB: db,
SettingsManager: settingsManager,
Validator: validator,
LeaderLock: leaderLock,
stopChan: make(chan struct{}),
}
}
// Start begins the cron job execution.
func (s *CronChecker) Start() {
logrus.Debug("Starting CronChecker...")
s.wg.Add(1)
go s.runLoop()
}
// Stop stops the cron job, respecting the context for shutdown timeout.
func (s *CronChecker) Stop(ctx context.Context) {
close(s.stopChan)
// Wait for the goroutine to finish, or for the shutdown to time out.
done := make(chan struct{})
go func() {
s.wg.Wait()
close(done)
}()
select {
case <-done:
logrus.Info("CronChecker stopped gracefully.")
case <-ctx.Done():
logrus.Warn("CronChecker stop timed out.")
}
}
func (s *CronChecker) runLoop() {
defer s.wg.Done()
if s.LeaderLock.IsLeader() {
s.submitValidationJobs()
}
ticker := time.NewTicker(5 * time.Minute)
defer ticker.Stop()
for {
select {
case <-ticker.C:
if s.LeaderLock.IsLeader() {
logrus.Debug("CronChecker: Running as leader, submitting validation jobs.")
s.submitValidationJobs()
} else {
logrus.Debug("CronChecker: Not the leader. Standing by.")
}
case <-s.stopChan:
return
}
}
}
// submitValidationJobs finds groups whose keys need validation and validates them concurrently.
func (s *CronChecker) submitValidationJobs() {
var groups []models.Group
if err := s.DB.Find(&groups).Error; err != nil {
logrus.Errorf("CronChecker: Failed to get groups: %v", err)
return
}
validationStartTime := time.Now()
var wg sync.WaitGroup
for i := range groups {
group := &groups[i]
group.EffectiveConfig = s.SettingsManager.GetEffectiveConfig(group.Config)
interval := time.Duration(group.EffectiveConfig.KeyValidationIntervalMinutes) * time.Minute
if group.LastValidatedAt == nil || validationStartTime.Sub(*group.LastValidatedAt) > interval {
wg.Add(1)
g := group
go func() {
defer wg.Done()
s.validateGroupKeys(g)
}()
}
}
wg.Wait()
}
// validateGroupKeys validates all invalid keys for a single group concurrently.
func (s *CronChecker) validateGroupKeys(group *models.Group) {
groupProcessStart := time.Now()
var invalidKeys []models.APIKey
err := s.DB.Where("group_id = ? AND status = ?", group.ID, models.KeyStatusInvalid).Find(&invalidKeys).Error
if err != nil {
logrus.Errorf("CronChecker: Failed to get invalid keys for group %s: %v", group.Name, err)
return
}
if len(invalidKeys) == 0 {
if err := s.DB.Model(group).Update("last_validated_at", time.Now()).Error; err != nil {
logrus.Errorf("CronChecker: Failed to update last_validated_at for group %s: %v", group.Name, err)
}
logrus.Infof("CronChecker: Group '%s' has no invalid keys to check.", group.Name)
return
}
var becameValidCount int32
var keyWg sync.WaitGroup
jobs := make(chan *models.APIKey, len(invalidKeys))
concurrency := group.EffectiveConfig.KeyValidationConcurrency
for range concurrency {
keyWg.Add(1)
go func() {
defer keyWg.Done()
for {
select {
case key, ok := <-jobs:
if !ok {
return
}
isValid, _ := s.Validator.ValidateSingleKey(key, group)
if isValid {
atomic.AddInt32(&becameValidCount, 1)
}
case <-s.stopChan:
return
}
}
}()
}
DistributeLoop:
for i := range invalidKeys {
select {
case jobs <- &invalidKeys[i]:
case <-s.stopChan:
break DistributeLoop
}
}
close(jobs)
keyWg.Wait()
if err := s.DB.Model(group).Update("last_validated_at", time.Now()).Error; err != nil {
logrus.Errorf("CronChecker: Failed to update last_validated_at for group %s: %v", group.Name, err)
}
duration := time.Since(groupProcessStart)
logrus.Infof(
"CronChecker: Group '%s' validation finished. Total checked: %d, became valid: %d. Duration: %s.",
group.Name,
len(invalidKeys),
becameValidCount,
duration.String(),
)
}