feat: 并发池功能

2025-07-06 02:00:21 +08:00
parent e6fe973ea4
commit 4dc17c12d5
9 changed files with 298 additions and 137 deletions
--- a/internal/config/manager.go
+++ b/internal/config/manager.go
@@ -47,6 +47,7 @@ type Config struct {
 	CORS        types.CORSConfig        `json:"cors"`
 	Performance types.PerformanceConfig `json:"performance"`
 	Log         types.LogConfig         `json:"log"`
+	Database    types.DatabaseConfig    `json:"database"`
 	RedisDSN    string                  `json:"redis_dsn"`
 }

@@ -102,6 +103,7 @@ func (m *Manager) ReloadConfig() error {
 		Performance: types.PerformanceConfig{
 			MaxConcurrentRequests: parseInteger(os.Getenv("MAX_CONCURRENT_REQUESTS"), 100),
 			EnableGzip:            parseBoolean(os.Getenv("ENABLE_GZIP"), true),
+			KeyValidationPoolSize: parseInteger(os.Getenv("KEY_VALIDATION_POOL_SIZE"), 10),
 		},
 		Log: types.LogConfig{
 			Level:         getEnvOrDefault("LOG_LEVEL", "info"),
@@ -110,6 +112,10 @@ func (m *Manager) ReloadConfig() error {
 			FilePath:      getEnvOrDefault("LOG_FILE_PATH", "logs/app.log"),
 			EnableRequest: parseBoolean(os.Getenv("LOG_ENABLE_REQUEST"), true),
 		},
+		Database: types.DatabaseConfig{
+			DSN:         os.Getenv("DATABASE_DSN"),
+			AutoMigrate: parseBoolean(os.Getenv("DB_AUTO_MIGRATE"), true),
+		},
 		RedisDSN: os.Getenv("REDIS_DSN"),
 	}
 	m.config = config
@@ -154,6 +160,11 @@ func (m *Manager) GetRedisDSN() string {
 	return m.config.RedisDSN
 }

+// GetDatabaseConfig returns the database configuration.
+func (m *Manager) GetDatabaseConfig() types.DatabaseConfig {
+	return m.config.Database
+}
+
 // GetEffectiveServerConfig returns server configuration merged with system settings
 func (m *Manager) GetEffectiveServerConfig() types.ServerConfig {
 	config := m.config.Server
@@ -217,6 +228,7 @@ func (m *Manager) DisplayConfig() {
 	}
 	logrus.Infof("   CORS: %s", corsStatus)
 	logrus.Infof("   Max concurrent requests: %d", perfConfig.MaxConcurrentRequests)
+	logrus.Infof("   Concurrency pool size: %d", perfConfig.KeyValidationPoolSize)

 	gzipStatus := "disabled"
 	if perfConfig.EnableGzip {
--- a/internal/config/system_settings.go
+++ b/internal/config/system_settings.go
@@ -37,7 +37,6 @@ type SystemSettings struct {

 	// 密钥验证
 	KeyValidationIntervalMinutes    int `json:"key_validation_interval_minutes" default:"60" name:"定时验证周期" category:"密钥验证" desc:"后台定时验证密钥的默认周期（分钟）" validate:"min=5"`
-	KeyValidationConcurrency        int `json:"key_validation_concurrency" default:"10" name:"验证并发数" category:"密钥验证" desc:"执行密钥验证时的并发 goroutine 数量" validate:"min=10,max=200"`
 	KeyValidationTaskTimeoutMinutes int `json:"key_validation_task_timeout_minutes" default:"60" name:"手动验证超时" category:"密钥验证" desc:"手动触发的全量验证任务的超时时间（分钟）" validate:"min=10"`
 }

@@ -375,8 +374,8 @@ func (sm *SystemSettingsManager) DisplayCurrentSettings() {
 	logrus.Infof("   Request timeouts: request=%ds, response=%ds, idle_conn=%ds",
 		sm.settings.RequestTimeout, sm.settings.ResponseTimeout, sm.settings.IdleConnTimeout)
 	logrus.Infof("   Request log retention: %d days", sm.settings.RequestLogRetentionDays)
-	logrus.Infof("   Key validation: interval=%dmin, concurrency=%d, task_timeout=%dmin",
-		sm.settings.KeyValidationIntervalMinutes, sm.settings.KeyValidationConcurrency, sm.settings.KeyValidationTaskTimeoutMinutes)
+	logrus.Infof("   Key validation: interval=%dmin, task_timeout=%dmin",
+		sm.settings.KeyValidationIntervalMinutes, sm.settings.KeyValidationTaskTimeoutMinutes)
 }

 // 辅助方法
--- a/internal/db/database.go
+++ b/internal/db/database.go
@@ -3,6 +3,7 @@ package db
 import (
 	"fmt"
 	"gpt-load/internal/models"
+	"gpt-load/internal/types"
 	"log"
 	"os"
 	"time"
@@ -14,10 +15,10 @@ import (

 var DB *gorm.DB

-func InitDB() (*gorm.DB, error) {
-	dsn := os.Getenv("DATABASE_DSN")
-	if dsn == "" {
-		return nil, fmt.Errorf("DATABASE_DSN environment variable not set")
+func InitDB(configManager types.ConfigManager) (*gorm.DB, error) {
+	dbConfig := configManager.GetDatabaseConfig()
+	if dbConfig.DSN == "" {
+		return nil, fmt.Errorf("DATABASE_DSN is not configured")
 	}

 	newLogger := logger.New(
@@ -31,7 +32,7 @@ func InitDB() (*gorm.DB, error) {
 	)

 	var err error
-	DB, err = gorm.Open(mysql.Open(dsn), &gorm.Config{
+	DB, err = gorm.Open(mysql.Open(dbConfig.DSN), &gorm.Config{
 		Logger: newLogger,
 	})
 	if err != nil {
@@ -48,7 +49,7 @@ func InitDB() (*gorm.DB, error) {
 	sqlDB.SetMaxOpenConns(100)
 	sqlDB.SetConnMaxLifetime(time.Hour)

-	if os.Getenv("DB_AUTO_MIGRATE") != "false" {
+	if dbConfig.AutoMigrate {
 		err = DB.AutoMigrate(
 			&models.SystemSetting{},
 			&models.Group{},
--- a/internal/services/key_cron_service.go
+++ b/internal/services/key_cron_service.go
@@ -11,30 +11,31 @@ import (
 	"gorm.io/gorm"
 )

-// KeyCronService is responsible for periodically validating all API keys.
+// KeyCronService is responsible for periodically submitting keys for validation.
 type KeyCronService struct {
 	DB              *gorm.DB
-	Validator       *KeyValidatorService
 	SettingsManager *config.SystemSettingsManager
+	Pool            *KeyValidationPool
 	stopChan        chan struct{}
 	wg              sync.WaitGroup
 }

 // NewKeyCronService creates a new KeyCronService.
-func NewKeyCronService(db *gorm.DB, validator *KeyValidatorService, settingsManager *config.SystemSettingsManager) *KeyCronService {
+func NewKeyCronService(db *gorm.DB, settingsManager *config.SystemSettingsManager, pool *KeyValidationPool) *KeyCronService {
 	return &KeyCronService{
 		DB:              db,
-		Validator:       validator,
 		SettingsManager: settingsManager,
+		Pool:            pool,
 		stopChan:        make(chan struct{}),
 	}
 }

-// Start begins the cron job.
+// Start begins the cron job and the results processor.
 func (s *KeyCronService) Start() {
 	logrus.Info("Starting KeyCronService...")
-	s.wg.Add(1)
+	s.wg.Add(2)
 	go s.run()
+	go s.processResults()
 }

 // Stop stops the cron job.
@@ -45,9 +46,11 @@ func (s *KeyCronService) Stop() {
 	logrus.Info("KeyCronService stopped.")
 }

+// run is the main ticker loop that triggers validation cycles.
 func (s *KeyCronService) run() {
 	defer s.wg.Done()
-	ctx := context.Background()
+	// Run once on start, then start the ticker.
+	s.submitValidationJobs()

 	ticker := time.NewTicker(5 * time.Minute)
 	defer ticker.Stop()
@@ -55,15 +58,73 @@ func (s *KeyCronService) run() {
 	for {
 		select {
 		case <-ticker.C:
-			s.validateAllGroups(ctx)
+			s.submitValidationJobs()
 		case <-s.stopChan:
 			return
 		}
 	}
 }

-func (s *KeyCronService) validateAllGroups(ctx context.Context) {
-	logrus.Info("KeyCronService: Starting validation cycle for all groups.")
+// processResults consumes results from the validation pool and updates the database.
+func (s *KeyCronService) processResults() {
+	defer s.wg.Done()
+	keysToUpdate := make(map[uint]models.APIKey)
+
+	// Process results in batches to avoid constant DB writes.
+	// This ticker defines the maximum delay for a batch update.
+	ticker := time.NewTicker(30 * time.Second)
+	defer ticker.Stop()
+
+	for {
+		select {
+		case result, ok := <-s.Pool.ResultsChannel():
+			if !ok {
+				s.batchUpdateKeyStatus(keysToUpdate)
+				return
+			}
+
+			key := result.Job.Key
+			var newStatus string
+			var newErrorReason string
+
+			if result.Error != nil {
+				newStatus = "inactive"
+				newErrorReason = result.Error.Error()
+			} else {
+				if result.IsValid {
+					newStatus = "active"
+					newErrorReason = ""
+				} else {
+					newStatus = "inactive"
+					newErrorReason = "Validation returned false without a specific error."
+				}
+			}
+
+			if key.Status != newStatus || key.ErrorReason != newErrorReason {
+				key.Status = newStatus
+				key.ErrorReason = newErrorReason
+				keysToUpdate[key.ID] = key
+			}
+
+		case <-ticker.C:
+			// Process batch on ticker interval
+			if len(keysToUpdate) > 0 {
+				s.batchUpdateKeyStatus(keysToUpdate)
+				keysToUpdate = make(map[uint]models.APIKey) 
+			}
+		case <-s.stopChan:
+			// Process any remaining keys before stopping
+			if len(keysToUpdate) > 0 {
+				s.batchUpdateKeyStatus(keysToUpdate)
+			}
+			return
+		}
+	}
+}
+
+// submitValidationJobs finds groups and keys that need validation and submits them to the pool.
+func (s *KeyCronService) submitValidationJobs() {
+	logrus.Info("KeyCronService: Starting validation submission cycle.")
 	var groups []models.Group
 	if err := s.DB.Find(&groups).Error; err != nil {
 		logrus.Errorf("KeyCronService: Failed to get groups: %v", err)
@@ -71,124 +132,71 @@ func (s *KeyCronService) validateAllGroups(ctx context.Context) {
 	}

 	validationStartTime := time.Now()
-	var wg sync.WaitGroup
-	for _, group := range groups {
-		groupCopy := group
-		wg.Add(1)
-		go func(g models.Group) {
-			defer wg.Done()
-			defer func() {
-				if r := recover(); r != nil {
-					logrus.Errorf("KeyCronService: Panic recovered in group validation for %s: %v", g.Name, r)
+	groupsToUpdateTimestamp := make(map[uint]*models.Group)
+
+	for i := range groups {
+		group := &groups[i]
+		effectiveSettings := s.SettingsManager.GetEffectiveConfig(group.Config)
+		interval := time.Duration(effectiveSettings.KeyValidationIntervalMinutes) * time.Minute
+
+		if group.LastValidatedAt == nil || validationStartTime.Sub(*group.LastValidatedAt) > interval {
+			groupsToUpdateTimestamp[group.ID] = group
+			var keys []models.APIKey
+			if err := s.DB.Where("group_id = ?", group.ID).Find(&keys).Error; err != nil {
+				logrus.Errorf("KeyCronService: Failed to get keys for group %s: %v", group.Name, err)
+				continue
+			}
+
+			if len(keys) == 0 {
+				continue
+			}
+
+			logrus.Infof("KeyCronService: Submitting %d keys for group %s for validation.", len(keys), group.Name)
+
+			for _, key := range keys {
+				// Create a new context with timeout for each job
+				ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+
+				job := ValidationJob{
+					Key:        key,
+					Group:      group,
+					Ctx:        ctx,
+					CancelFunc: cancel,
 				}
-			}()

-			effectiveSettings := s.SettingsManager.GetEffectiveConfig(g.Config)
-			interval := time.Duration(effectiveSettings.KeyValidationIntervalMinutes) * time.Minute
-
-			if g.LastValidatedAt == nil || validationStartTime.Sub(*g.LastValidatedAt) > interval {
-				s.validateGroup(ctx, &g, validationStartTime)
+				s.Pool.SubmitJob(job)
 			}
-		}(groupCopy)
+		}
 	}
-	wg.Wait()
-	logrus.Info("KeyCronService: Validation cycle finished.")
+
+	// Update timestamps for all groups that were due for validation
+	if len(groupsToUpdateTimestamp) > 0 {
+		s.updateGroupTimestamps(groupsToUpdateTimestamp, validationStartTime)
+	}
+	logrus.Info("KeyCronService: Validation submission cycle finished.")
 }

-func (s *KeyCronService) validateGroup(ctx context.Context, group *models.Group, validationStartTime time.Time) {
+func (s *KeyCronService) updateGroupTimestamps(groups map[uint]*models.Group, validationStartTime time.Time) {
+	var groupIDs []uint
+	for id := range groups {
+		groupIDs = append(groupIDs, id)
+	}
+	if err := s.DB.Model(&models.Group{}).Where("id IN ?", groupIDs).Update("last_validated_at", validationStartTime).Error; err != nil {
+		logrus.Errorf("KeyCronService: Failed to batch update last_validated_at for groups: %v", err)
+	}
+}
+
+func (s *KeyCronService) batchUpdateKeyStatus(keysToUpdate map[uint]models.APIKey) {
+	if len(keysToUpdate) == 0 {
+		return
+	}
+	logrus.Infof("KeyCronService: Batch updating status for %d keys.", len(keysToUpdate))
+
 	var keys []models.APIKey
-	if err := s.DB.Where("group_id = ?", group.ID).Find(&keys).Error; err != nil {
-		logrus.Errorf("KeyCronService: Failed to get keys for group %s: %v", group.Name, err)
-		return
+	for _, key := range keysToUpdate {
+		keys = append(keys, key)
 	}

-	if len(keys) == 0 {
-		if err := s.DB.Model(group).Update("last_validated_at", validationStartTime).Error; err != nil {
-			logrus.Errorf("KeyCronService: Failed to update last_validated_at for empty group %s: %v", group.Name, err)
-		}
-		return
-	}
-
-	logrus.Infof("KeyCronService: Validating %d keys for group %s", len(keys), group.Name)
-
-	jobs := make(chan models.APIKey, len(keys))
-	results := make(chan models.APIKey, len(keys))
-
-	concurrency := s.SettingsManager.GetInt("key_validation_concurrency", 10)
-	if concurrency <= 0 {
-		concurrency = 10
-	}
-
-	var workerWg sync.WaitGroup
-	for range concurrency {
-		workerWg.Add(1)
-		go s.worker(ctx, &workerWg, group, jobs, results)
-	}
-
-	for _, key := range keys {
-		jobs <- key
-	}
-	close(jobs)
-
-	workerWg.Wait()
-	close(results)
-
-	var keysToUpdate []models.APIKey
-	for key := range results {
-		keysToUpdate = append(keysToUpdate, key)
-	}
-
-	if len(keysToUpdate) > 0 {
-		s.batchUpdateKeyStatus(keysToUpdate)
-	}
-
-	if err := s.DB.Model(group).Update("last_validated_at", validationStartTime).Error; err != nil {
-		logrus.Errorf("KeyCronService: Failed to update last_validated_at for group %s: %v", group.Name, err)
-	}
-
-	logrus.Infof("KeyCronService: Finished validating group %s. %d keys had their status changed.", group.Name, len(keysToUpdate))
-}
-
-func (s *KeyCronService) worker(ctx context.Context, wg *sync.WaitGroup, group *models.Group, jobs <-chan models.APIKey, results chan<- models.APIKey) {
-	defer wg.Done()
-	for key := range jobs {
-		isValid, validationErr := s.Validator.ValidateSingleKey(ctx, &key, group)
-
-		var newStatus string
-		var newErrorReason string
-		statusChanged := false
-
-		if validationErr != nil {
-			newStatus = "inactive"
-			newErrorReason = validationErr.Error()
-		} else {
-			if isValid {
-				newStatus = "active"
-				newErrorReason = ""
-			} else {
-				newStatus = "inactive"
-				newErrorReason = "Validation returned false without a specific error."
-			}
-		}
-
-		if key.Status != newStatus || key.ErrorReason != newErrorReason {
-			statusChanged = true
-		}
-
-		if statusChanged {
-			key.Status = newStatus
-			key.ErrorReason = newErrorReason
-			results <- key
-		}
-	}
-}
-
-func (s *KeyCronService) batchUpdateKeyStatus(keys []models.APIKey) {
-	if len(keys) == 0 {
-		return
-	}
-	logrus.Infof("KeyCronService: Batch updating status/reason for %d keys.", len(keys))
-
 	err := s.DB.Transaction(func(tx *gorm.DB) error {
 		for _, key := range keys {
 			updates := map[string]any{
@@ -196,15 +204,13 @@ func (s *KeyCronService) batchUpdateKeyStatus(keys []models.APIKey) {
 				"error_reason": key.ErrorReason,
 			}
 			if err := tx.Model(&models.APIKey{}).Where("id = ?", key.ID).Updates(updates).Error; err != nil {
-				// Log the error for this specific key but continue the transaction
 				logrus.Errorf("KeyCronService: Failed to update key ID %d: %v", key.ID, err)
 			}
 		}
-		return nil // Commit the transaction even if some updates failed
+		return nil
 	})

 	if err != nil {
-		// This error is for the transaction itself, not individual updates
 		logrus.Errorf("KeyCronService: Transaction failed during batch update of key statuses: %v", err)
 	}
 }
--- a/internal/services/key_manual_validation_service.go
+++ b/internal/services/key_manual_validation_service.go
@@ -5,6 +5,7 @@ import (
 	"fmt"
 	"gpt-load/internal/config"
 	"gpt-load/internal/models"
+	"gpt-load/internal/types"
 	"sync"
 	"time"

@@ -25,15 +26,17 @@ type KeyManualValidationService struct {
 	Validator       *KeyValidatorService
 	TaskService     *TaskService
 	SettingsManager *config.SystemSettingsManager
+	ConfigManager   types.ConfigManager
 }

 // NewKeyManualValidationService creates a new KeyManualValidationService.
-func NewKeyManualValidationService(db *gorm.DB, validator *KeyValidatorService, taskService *TaskService, settingsManager *config.SystemSettingsManager) *KeyManualValidationService {
+func NewKeyManualValidationService(db *gorm.DB, validator *KeyValidatorService, taskService *TaskService, settingsManager *config.SystemSettingsManager, configManager types.ConfigManager) *KeyManualValidationService {
 	return &KeyManualValidationService{
 		DB:              db,
 		Validator:       validator,
 		TaskService:     taskService,
 		SettingsManager: settingsManager,
+		ConfigManager:   configManager,
 	}
 }

@@ -68,7 +71,9 @@ func (s *KeyManualValidationService) runValidation(group *models.Group, keys []m
 	jobs := make(chan models.APIKey, len(keys))
 	results := make(chan bool, len(keys))

-	concurrency := s.SettingsManager.GetInt("key_validation_concurrency", 10)
+	performanceConfig := s.ConfigManager.GetPerformanceConfig()
+	concurrency := performanceConfig.KeyValidationPoolSize
+
 	if concurrency <= 0 {
 		concurrency = 10
 	}
--- a/internal/services/key_validation_concurrency.go
+++ b/internal/services/key_validation_concurrency.go
@@ -0,0 +1,123 @@
+package services
+
+import (
+	"context"
+	"gpt-load/internal/models"
+	"gpt-load/internal/types"
+	"sync"
+
+	"github.com/sirupsen/logrus"
+)
+
+// ValidationJob represents a single key validation task for the worker pool.
+type ValidationJob struct {
+	TaskID     string
+	Key        models.APIKey
+	Group      *models.Group
+	Ctx        context.Context
+	CancelFunc context.CancelFunc
+}
+
+// ValidationResult holds the outcome of a validation job.
+type ValidationResult struct {
+	Job     ValidationJob
+	IsValid bool
+	Error   error
+}
+
+// KeyValidationPool manages a global worker pool for key validation.
+type KeyValidationPool struct {
+	validator     *KeyValidatorService
+	configManager types.ConfigManager
+	jobs          chan ValidationJob
+	results       chan ValidationResult // 定时任务结果
+	stopChan      chan struct{}
+	wg            sync.WaitGroup
+}
+
+// NewKeyValidationPool creates a new KeyValidationPool.
+func NewKeyValidationPool(validator *KeyValidatorService, configManager types.ConfigManager) *KeyValidationPool {
+	return &KeyValidationPool{
+		validator:     validator,
+		configManager: configManager,
+		jobs:          make(chan ValidationJob, 1024),
+		results:       make(chan ValidationResult, 1024),
+		stopChan:      make(chan struct{}),
+	}
+}
+
+// Start initializes and runs the worker pool.
+func (p *KeyValidationPool) Start() {
+	performanceConfig := p.configManager.GetPerformanceConfig()
+	concurrency := performanceConfig.KeyValidationPoolSize
+	if concurrency <= 0 {
+		concurrency = 10
+	}
+
+	logrus.Infof("Starting KeyValidationPool with %d workers...", concurrency)
+
+	p.wg.Add(concurrency)
+	for range concurrency {
+		go p.worker()
+	}
+}
+
+// Stop gracefully stops the worker pool.
+func (p *KeyValidationPool) Stop() {
+	logrus.Info("Stopping KeyValidationPool...")
+	close(p.stopChan)
+	close(p.jobs)
+	p.wg.Wait()
+
+	// 关闭结果通道
+	close(p.results)
+
+	logrus.Info("KeyValidationPool stopped.")
+}
+
+// worker is a single goroutine that processes jobs.
+func (p *KeyValidationPool) worker() {
+	defer p.wg.Done()
+	for {
+		select {
+		case job, ok := <-p.jobs:
+			if !ok {
+				return
+			}
+			ctx := job.Ctx
+			if ctx == nil {
+				ctx = context.Background()
+			}
+			isValid, err := p.validator.ValidateSingleKey(ctx, &job.Key, job.Group)
+			if job.CancelFunc != nil {
+				job.CancelFunc()
+			}
+			result := ValidationResult{
+				Job:     job,
+				IsValid: isValid,
+				Error:   err,
+			}
+
+			// Block until the result can be sent or the pool is stopped.
+			// This provides back-pressure and prevents result loss.
+			select {
+			case p.results <- result:
+			case <-p.stopChan:
+				logrus.Infof("Worker stopping, discarding result for key %d", job.Key.ID)
+				return
+			}
+		case <-p.stopChan:
+			return
+		}
+	}
+}
+
+// SubmitJob adds a new validation job to the pool.
+func (p *KeyValidationPool) SubmitJob(job ValidationJob) {
+	p.jobs <- job
+}
+
+// ResultsChannel returns the channel for reading validation results.
+func (p *KeyValidationPool) ResultsChannel() <-chan ValidationResult {
+	return p.results
+}
--- a/internal/types/types.go
+++ b/internal/types/types.go
@@ -11,6 +11,7 @@ type ConfigManager interface {
 	GetCORSConfig() CORSConfig
 	GetPerformanceConfig() PerformanceConfig
 	GetLogConfig() LogConfig
+	GetDatabaseConfig() DatabaseConfig
 	GetEffectiveServerConfig() ServerConfig
 	GetRedisDSN() string
 	Validate() error
@@ -61,6 +62,7 @@ type CORSConfig struct {
 // PerformanceConfig represents performance configuration
 type PerformanceConfig struct {
 	MaxConcurrentRequests int  `json:"maxConcurrentRequests"`
+	KeyValidationPoolSize int  `json:"KeyValidationPoolSize"`
 	EnableGzip            bool `json:"enableGzip"`
 }

@@ -72,3 +74,9 @@ type LogConfig struct {
 	FilePath      string `json:"filePath"`
 	EnableRequest bool   `json:"enableRequest"`
 }
+
+// DatabaseConfig represents database configuration
+type DatabaseConfig struct {
+	DSN         string `json:"dsn"`
+	AutoMigrate bool   `json:"autoMigrate"`
+}