feat: 优化超时配置
This commit is contained in:
30
.env.example
30
.env.example
@@ -32,9 +32,6 @@ MAX_RETRIES=3
|
|||||||
# 上游 API 地址
|
# 上游 API 地址
|
||||||
OPENAI_BASE_URL=https://api.openai.com
|
OPENAI_BASE_URL=https://api.openai.com
|
||||||
|
|
||||||
# 请求超时时间(毫秒)
|
|
||||||
REQUEST_TIMEOUT=30000
|
|
||||||
|
|
||||||
# ===========================================
|
# ===========================================
|
||||||
# 性能优化配置
|
# 性能优化配置
|
||||||
# ===========================================
|
# ===========================================
|
||||||
@@ -76,3 +73,30 @@ ENABLE_CORS=true
|
|||||||
|
|
||||||
# 允许的来源(逗号分隔,* 表示允许所有)
|
# 允许的来源(逗号分隔,* 表示允许所有)
|
||||||
ALLOWED_ORIGINS=*
|
ALLOWED_ORIGINS=*
|
||||||
|
ALLOWED_METHODS=GET,POST,PUT,DELETE,OPTIONS
|
||||||
|
ALLOWED_HEADERS=*
|
||||||
|
ALLOW_CREDENTIALS=false
|
||||||
|
|
||||||
|
# ===========================================
|
||||||
|
# 超时配置
|
||||||
|
# ===========================================
|
||||||
|
# 服务器读取超时时间(秒)
|
||||||
|
SERVER_READ_TIMEOUT=120
|
||||||
|
|
||||||
|
# 服务器写入超时时间(秒)
|
||||||
|
SERVER_WRITE_TIMEOUT=1800
|
||||||
|
|
||||||
|
# 服务器空闲超时时间(秒)
|
||||||
|
SERVER_IDLE_TIMEOUT=120
|
||||||
|
|
||||||
|
# 服务器优雅关闭超时时间(秒)
|
||||||
|
SERVER_GRACEFUL_SHUTDOWN_TIMEOUT=60
|
||||||
|
|
||||||
|
# 请求超时时间(秒)
|
||||||
|
REQUEST_TIMEOUT=30
|
||||||
|
|
||||||
|
# 响应超时时间(秒)- 控制TLS握手和响应头接收超时
|
||||||
|
RESPONSE_TIMEOUT=30
|
||||||
|
|
||||||
|
# 空闲连接超时时间(秒)- 控制连接池中空闲连接的生存时间
|
||||||
|
IDLE_CONN_TIMEOUT=120
|
43
README.md
43
README.md
@@ -92,18 +92,23 @@ cp .env.example .env
|
|||||||
|
|
||||||
### Key Configuration Options
|
### Key Configuration Options
|
||||||
|
|
||||||
| Setting | Environment Variable | Default | Description |
|
| Setting | Environment Variable | Default | Description |
|
||||||
| ------------------- | --------------------- | ------------------------ | ------------------------------- |
|
| ----------------------- | ---------------------------------- | ------------------------ | ------------------------------------------------------------------------------------------- |
|
||||||
| Server Port | `PORT` | 3000 | Server listening port |
|
| Server Port | `PORT` | 3000 | Server listening port |
|
||||||
| Server Host | `HOST` | 0.0.0.0 | Server binding address |
|
| Server Host | `HOST` | 0.0.0.0 | Server binding address |
|
||||||
| Keys File | `KEYS_FILE` | keys.txt | API keys file path |
|
| Keys File | `KEYS_FILE` | keys.txt | API keys file path |
|
||||||
| Start Index | `START_INDEX` | 0 | Starting key index for rotation |
|
| Start Index | `START_INDEX` | 0 | Starting key index for rotation |
|
||||||
| Blacklist Threshold | `BLACKLIST_THRESHOLD` | 1 | Error count before blacklisting |
|
| Blacklist Threshold | `BLACKLIST_THRESHOLD` | 1 | Error count before blacklisting |
|
||||||
| Upstream URL | `OPENAI_BASE_URL` | `https://api.openai.com` | OpenAI-compatible API base URL. Supports multiple, comma-separated URLs for load balancing. |
|
| Upstream URL | `OPENAI_BASE_URL` | `https://api.openai.com` | OpenAI-compatible API base URL. Supports multiple, comma-separated URLs for load balancing. |
|
||||||
| Request Timeout | `REQUEST_TIMEOUT` | 30000 | Request timeout in milliseconds |
|
| Auth Key | `AUTH_KEY` | - | Optional authentication key |
|
||||||
| Auth Key | `AUTH_KEY` | - | Optional authentication key |
|
| CORS | `ENABLE_CORS` | true | Enable CORS support |
|
||||||
| CORS | `ENABLE_CORS` | true | Enable CORS support |
|
| Server Read Timeout | `SERVER_READ_TIMEOUT` | 120 | HTTP server read timeout in seconds |
|
||||||
| Max Connections | `MAX_SOCKETS` | 50 | Maximum HTTP connections |
|
| Server Write Timeout | `SERVER_WRITE_TIMEOUT` | 1800 | HTTP server write timeout in seconds |
|
||||||
|
| Server Idle Timeout | `SERVER_IDLE_TIMEOUT` | 120 | HTTP server idle timeout in seconds |
|
||||||
|
| Graceful Shutdown | `SERVER_GRACEFUL_SHUTDOWN_TIMEOUT` | 60 | Graceful shutdown timeout in seconds |
|
||||||
|
| Request Timeout | `REQUEST_TIMEOUT` | 30 | Request timeout in seconds |
|
||||||
|
| Response Timeout | `RESPONSE_TIMEOUT` | 30 | Response timeout in seconds (TLS handshake & response header) |
|
||||||
|
| Idle Connection Timeout | `IDLE_CONN_TIMEOUT` | 120 | Idle connection timeout in seconds |
|
||||||
|
|
||||||
### Configuration Examples
|
### Configuration Examples
|
||||||
|
|
||||||
@@ -126,16 +131,16 @@ OPENAI_BASE_URL=https://your-resource.openai.azure.com
|
|||||||
```bash
|
```bash
|
||||||
OPENAI_BASE_URL=https://api.your-provider.com
|
OPENAI_BASE_URL=https://api.your-provider.com
|
||||||
# Use provider-specific API keys
|
# Use provider-specific API keys
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Multi-Target Load Balancing
|
#### Multi-Target Load Balancing
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Use a comma-separated list of target URLs
|
# Use a comma-separated list of target URLs
|
||||||
OPENAI_BASE_URL=https://gateway.ai.cloudflare.com/v1/.../openai,https://api.openai.com/v1,https://api.another-provider.com/v1
|
OPENAI_BASE_URL=https://gateway.ai.cloudflare.com/v1/.../openai,https://api.openai.com/v1,https://api.another-provider.com/v1
|
||||||
```
|
```
|
||||||
|
|
||||||
## API Key Validation
|
## API Key Validation
|
||||||
|
|
||||||
The project includes a high-performance API key validation tool:
|
The project includes a high-performance API key validation tool:
|
||||||
|
|
||||||
|
43
README_CN.md
43
README_CN.md
@@ -92,18 +92,23 @@ cp .env.example .env
|
|||||||
|
|
||||||
### 主要配置选项
|
### 主要配置选项
|
||||||
|
|
||||||
| 配置项 | 环境变量 | 默认值 | 说明 |
|
| 配置项 | 环境变量 | 默认值 | 说明 |
|
||||||
| ---------- | --------------------- | ------------------------ | ------------------------ |
|
| -------------- | ---------------------------------- | ------------------------ | -------------------------------------------------- |
|
||||||
| 服务器端口 | `PORT` | 3000 | 服务器监听端口 |
|
| 服务器端口 | `PORT` | 3000 | 服务器监听端口 |
|
||||||
| 服务器主机 | `HOST` | 0.0.0.0 | 服务器绑定地址 |
|
| 服务器主机 | `HOST` | 0.0.0.0 | 服务器绑定地址 |
|
||||||
| 密钥文件 | `KEYS_FILE` | keys.txt | API 密钥文件路径 |
|
| 密钥文件 | `KEYS_FILE` | keys.txt | API 密钥文件路径 |
|
||||||
| 起始索引 | `START_INDEX` | 0 | 密钥轮换起始索引 |
|
| 起始索引 | `START_INDEX` | 0 | 密钥轮换起始索引 |
|
||||||
| 拉黑阈值 | `BLACKLIST_THRESHOLD` | 1 | 拉黑前的错误次数 |
|
| 拉黑阈值 | `BLACKLIST_THRESHOLD` | 1 | 拉黑前的错误次数 |
|
||||||
| 上游地址 | `OPENAI_BASE_URL` | `https://api.openai.com` | OpenAI 兼容 API 基础地址。支持多个地址,用逗号分隔 |
|
| 上游地址 | `OPENAI_BASE_URL` | `https://api.openai.com` | OpenAI 兼容 API 基础地址。支持多个地址,用逗号分隔 |
|
||||||
| 请求超时 | `REQUEST_TIMEOUT` | 30000 | 请求超时时间(毫秒) |
|
| 认证密钥 | `AUTH_KEY` | - | 可选的认证密钥 |
|
||||||
| 认证密钥 | `AUTH_KEY` | - | 可选的认证密钥 |
|
| CORS | `ENABLE_CORS` | true | 启用 CORS 支持 |
|
||||||
| CORS | `ENABLE_CORS` | true | 启用 CORS 支持 |
|
| 服务器读取超时 | `SERVER_READ_TIMEOUT` | 120 | HTTP 服务器读取超时时间(秒) |
|
||||||
| 最大连接数 | `MAX_SOCKETS` | 50 | 最大 HTTP 连接数 |
|
| 服务器写入超时 | `SERVER_WRITE_TIMEOUT` | 1800 | HTTP 服务器写入超时时间(秒) |
|
||||||
|
| 服务器空闲超时 | `SERVER_IDLE_TIMEOUT` | 120 | HTTP 服务器空闲超时时间(秒) |
|
||||||
|
| 优雅关闭超时 | `SERVER_GRACEFUL_SHUTDOWN_TIMEOUT` | 60 | 服务器优雅关闭超时时间(秒) |
|
||||||
|
| 请求超时 | `REQUEST_TIMEOUT` | 30 | 请求超时时间(秒) |
|
||||||
|
| 响应超时 | `RESPONSE_TIMEOUT` | 30 | 响应超时时间(秒)- 控制 TLS 握手和响应头接收 |
|
||||||
|
| 空闲连接超时 | `IDLE_CONN_TIMEOUT` | 120 | 空闲连接超时时间(秒) |
|
||||||
|
|
||||||
### 配置示例
|
### 配置示例
|
||||||
|
|
||||||
@@ -126,16 +131,16 @@ OPENAI_BASE_URL=https://your-resource.openai.azure.com
|
|||||||
```bash
|
```bash
|
||||||
OPENAI_BASE_URL=https://api.your-provider.com
|
OPENAI_BASE_URL=https://api.your-provider.com
|
||||||
# 使用提供商特定的 API 密钥
|
# 使用提供商特定的 API 密钥
|
||||||
```
|
```
|
||||||
|
|
||||||
#### 多目标负载均衡
|
#### 多目标负载均衡
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# 使用逗号分隔多个目标地址
|
# 使用逗号分隔多个目标地址
|
||||||
OPENAI_BASE_URL=https://gateway.ai.cloudflare.com/v1/.../openai,https://api.openai.com/v1,https://api.another-provider.com/v1
|
OPENAI_BASE_URL=https://gateway.ai.cloudflare.com/v1/.../openai,https://api.openai.com/v1,https://api.another-provider.com/v1
|
||||||
```
|
```
|
||||||
|
|
||||||
## API 密钥验证
|
## API 密钥验证
|
||||||
|
|
||||||
项目包含高性能的 API 密钥验证工具:
|
项目包含高性能的 API 密钥验证工具:
|
||||||
|
|
||||||
|
@@ -61,10 +61,10 @@ func main() {
|
|||||||
server := &http.Server{
|
server := &http.Server{
|
||||||
Addr: fmt.Sprintf("%s:%d", serverConfig.Host, serverConfig.Port),
|
Addr: fmt.Sprintf("%s:%d", serverConfig.Host, serverConfig.Port),
|
||||||
Handler: router,
|
Handler: router,
|
||||||
ReadTimeout: 120 * time.Second, // Increased read timeout for large file uploads
|
ReadTimeout: time.Duration(serverConfig.ReadTimeout) * time.Second,
|
||||||
WriteTimeout: 1800 * time.Second, // Increased write timeout for streaming responses
|
WriteTimeout: time.Duration(serverConfig.WriteTimeout) * time.Second,
|
||||||
IdleTimeout: 120 * time.Second, // Increased idle timeout for connection reuse
|
IdleTimeout: time.Duration(serverConfig.IdleTimeout) * time.Second,
|
||||||
MaxHeaderBytes: 1 << 20, // 1MB header limit
|
MaxHeaderBytes: 1 << 20, // 1MB header limit
|
||||||
}
|
}
|
||||||
|
|
||||||
// Start server
|
// Start server
|
||||||
@@ -89,7 +89,7 @@ func main() {
|
|||||||
logrus.Info("Shutting down server...")
|
logrus.Info("Shutting down server...")
|
||||||
|
|
||||||
// Give outstanding requests a deadline for completion
|
// Give outstanding requests a deadline for completion
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(serverConfig.GracefulShutdownTimeout)*time.Second)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
// Attempt graceful shutdown
|
// Attempt graceful shutdown
|
||||||
@@ -203,7 +203,9 @@ func displayStartupInfo(configManager types.ConfigManager) {
|
|||||||
logrus.Infof(" Blacklist threshold: %d errors", keysConfig.BlacklistThreshold)
|
logrus.Infof(" Blacklist threshold: %d errors", keysConfig.BlacklistThreshold)
|
||||||
logrus.Infof(" Max retries: %d", keysConfig.MaxRetries)
|
logrus.Infof(" Max retries: %d", keysConfig.MaxRetries)
|
||||||
logrus.Infof(" Upstream URL: %s", openaiConfig.BaseURL)
|
logrus.Infof(" Upstream URL: %s", openaiConfig.BaseURL)
|
||||||
logrus.Infof(" Request timeout: %dms", openaiConfig.Timeout)
|
logrus.Infof(" Request timeout: %ds", openaiConfig.RequestTimeout)
|
||||||
|
logrus.Infof(" Response timeout: %ds", openaiConfig.ResponseTimeout)
|
||||||
|
logrus.Infof(" Idle connection timeout: %ds", openaiConfig.IdleConnTimeout)
|
||||||
|
|
||||||
authStatus := "disabled"
|
authStatus := "disabled"
|
||||||
if authConfig.Enabled {
|
if authConfig.Enabled {
|
||||||
|
@@ -30,8 +30,8 @@ type Constants struct {
|
|||||||
var DefaultConstants = Constants{
|
var DefaultConstants = Constants{
|
||||||
MinPort: 1,
|
MinPort: 1,
|
||||||
MaxPort: 65535,
|
MaxPort: 65535,
|
||||||
MinTimeout: 1000,
|
MinTimeout: 1,
|
||||||
DefaultTimeout: 30000,
|
DefaultTimeout: 30,
|
||||||
DefaultMaxSockets: 50,
|
DefaultMaxSockets: 50,
|
||||||
DefaultMaxFreeSockets: 10,
|
DefaultMaxFreeSockets: 10,
|
||||||
}
|
}
|
||||||
@@ -62,8 +62,12 @@ func NewManager() (types.ConfigManager, error) {
|
|||||||
|
|
||||||
config := &Config{
|
config := &Config{
|
||||||
Server: types.ServerConfig{
|
Server: types.ServerConfig{
|
||||||
Port: parseInteger(os.Getenv("PORT"), 3000),
|
Port: parseInteger(os.Getenv("PORT"), 3000),
|
||||||
Host: getEnvOrDefault("HOST", "0.0.0.0"),
|
Host: getEnvOrDefault("HOST", "0.0.0.0"),
|
||||||
|
ReadTimeout: parseInteger(os.Getenv("SERVER_READ_TIMEOUT"), 120),
|
||||||
|
WriteTimeout: parseInteger(os.Getenv("SERVER_WRITE_TIMEOUT"), 1800),
|
||||||
|
IdleTimeout: parseInteger(os.Getenv("SERVER_IDLE_TIMEOUT"), 120),
|
||||||
|
GracefulShutdownTimeout: parseInteger(os.Getenv("SERVER_GRACEFUL_SHUTDOWN_TIMEOUT"), 60),
|
||||||
},
|
},
|
||||||
Keys: types.KeysConfig{
|
Keys: types.KeysConfig{
|
||||||
FilePath: getEnvOrDefault("KEYS_FILE", "keys.txt"),
|
FilePath: getEnvOrDefault("KEYS_FILE", "keys.txt"),
|
||||||
@@ -72,8 +76,10 @@ func NewManager() (types.ConfigManager, error) {
|
|||||||
MaxRetries: parseInteger(os.Getenv("MAX_RETRIES"), 3),
|
MaxRetries: parseInteger(os.Getenv("MAX_RETRIES"), 3),
|
||||||
},
|
},
|
||||||
OpenAI: types.OpenAIConfig{
|
OpenAI: types.OpenAIConfig{
|
||||||
BaseURLs: parseArray(os.Getenv("OPENAI_BASE_URL"), []string{"https://api.openai.com"}),
|
BaseURLs: parseArray(os.Getenv("OPENAI_BASE_URL"), []string{"https://api.openai.com"}),
|
||||||
Timeout: parseInteger(os.Getenv("REQUEST_TIMEOUT"), DefaultConstants.DefaultTimeout),
|
RequestTimeout: parseInteger(os.Getenv("REQUEST_TIMEOUT"), DefaultConstants.DefaultTimeout),
|
||||||
|
ResponseTimeout: parseInteger(os.Getenv("RESPONSE_TIMEOUT"), 30),
|
||||||
|
IdleConnTimeout: parseInteger(os.Getenv("IDLE_CONN_TIMEOUT"), 120),
|
||||||
},
|
},
|
||||||
Auth: types.AuthConfig{
|
Auth: types.AuthConfig{
|
||||||
Key: os.Getenv("AUTH_KEY"),
|
Key: os.Getenv("AUTH_KEY"),
|
||||||
@@ -88,7 +94,6 @@ func NewManager() (types.ConfigManager, error) {
|
|||||||
},
|
},
|
||||||
Performance: types.PerformanceConfig{
|
Performance: types.PerformanceConfig{
|
||||||
MaxConcurrentRequests: parseInteger(os.Getenv("MAX_CONCURRENT_REQUESTS"), 100),
|
MaxConcurrentRequests: parseInteger(os.Getenv("MAX_CONCURRENT_REQUESTS"), 100),
|
||||||
RequestTimeout: parseInteger(os.Getenv("REQUEST_TIMEOUT"), DefaultConstants.DefaultTimeout),
|
|
||||||
EnableGzip: parseBoolean(os.Getenv("ENABLE_GZIP"), true),
|
EnableGzip: parseBoolean(os.Getenv("ENABLE_GZIP"), true),
|
||||||
},
|
},
|
||||||
Log: types.LogConfig{
|
Log: types.LogConfig{
|
||||||
@@ -173,8 +178,8 @@ func (m *Manager) Validate() error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Validate timeout
|
// Validate timeout
|
||||||
if m.config.OpenAI.Timeout < DefaultConstants.MinTimeout {
|
if m.config.OpenAI.RequestTimeout < DefaultConstants.MinTimeout {
|
||||||
validationErrors = append(validationErrors, fmt.Sprintf("request timeout cannot be less than %dms", DefaultConstants.MinTimeout))
|
validationErrors = append(validationErrors, fmt.Sprintf("request timeout cannot be less than %ds", DefaultConstants.MinTimeout))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Validate upstream URL format
|
// Validate upstream URL format
|
||||||
@@ -212,7 +217,9 @@ func (m *Manager) DisplayConfig() {
|
|||||||
logrus.Infof(" Blacklist threshold: %d errors", m.config.Keys.BlacklistThreshold)
|
logrus.Infof(" Blacklist threshold: %d errors", m.config.Keys.BlacklistThreshold)
|
||||||
logrus.Infof(" Max retries: %d", m.config.Keys.MaxRetries)
|
logrus.Infof(" Max retries: %d", m.config.Keys.MaxRetries)
|
||||||
logrus.Infof(" Upstream URLs: %s", strings.Join(m.config.OpenAI.BaseURLs, ", "))
|
logrus.Infof(" Upstream URLs: %s", strings.Join(m.config.OpenAI.BaseURLs, ", "))
|
||||||
logrus.Infof(" Request timeout: %dms", m.config.OpenAI.Timeout)
|
logrus.Infof(" Request timeout: %ds", m.config.OpenAI.RequestTimeout)
|
||||||
|
logrus.Infof(" Response timeout: %ds", m.config.OpenAI.ResponseTimeout)
|
||||||
|
logrus.Infof(" Idle connection timeout: %ds", m.config.OpenAI.IdleConnTimeout)
|
||||||
|
|
||||||
authStatus := "disabled"
|
authStatus := "disabled"
|
||||||
if m.config.Auth.Enabled {
|
if m.config.Auth.Enabled {
|
||||||
|
@@ -176,8 +176,10 @@ func (h *Handler) GetConfig(c *gin.Context) {
|
|||||||
"max_retries": keysConfig.MaxRetries,
|
"max_retries": keysConfig.MaxRetries,
|
||||||
},
|
},
|
||||||
"openai": gin.H{
|
"openai": gin.H{
|
||||||
"base_url": openaiConfig.BaseURL,
|
"base_url": openaiConfig.BaseURL,
|
||||||
"timeout": openaiConfig.Timeout,
|
"request_timeout": openaiConfig.RequestTimeout,
|
||||||
|
"response_timeout": openaiConfig.ResponseTimeout,
|
||||||
|
"idle_conn_timeout": openaiConfig.IdleConnTimeout,
|
||||||
},
|
},
|
||||||
"auth": gin.H{
|
"auth": gin.H{
|
||||||
"enabled": authConfig.Enabled,
|
"enabled": authConfig.Enabled,
|
||||||
@@ -192,9 +194,17 @@ func (h *Handler) GetConfig(c *gin.Context) {
|
|||||||
},
|
},
|
||||||
"performance": gin.H{
|
"performance": gin.H{
|
||||||
"max_concurrent_requests": perfConfig.MaxConcurrentRequests,
|
"max_concurrent_requests": perfConfig.MaxConcurrentRequests,
|
||||||
"request_timeout": perfConfig.RequestTimeout,
|
|
||||||
"enable_gzip": perfConfig.EnableGzip,
|
"enable_gzip": perfConfig.EnableGzip,
|
||||||
},
|
},
|
||||||
|
"timeout_config": gin.H{
|
||||||
|
"request_timeout_s": openaiConfig.RequestTimeout,
|
||||||
|
"response_timeout_s": openaiConfig.ResponseTimeout,
|
||||||
|
"idle_conn_timeout_s": openaiConfig.IdleConnTimeout,
|
||||||
|
"server_read_timeout_s": serverConfig.ReadTimeout,
|
||||||
|
"server_write_timeout_s": serverConfig.WriteTimeout,
|
||||||
|
"server_idle_timeout_s": serverConfig.IdleTimeout,
|
||||||
|
"graceful_shutdown_timeout_s": serverConfig.GracefulShutdownTimeout,
|
||||||
|
},
|
||||||
"log": gin.H{
|
"log": gin.H{
|
||||||
"level": logConfig.Level,
|
"level": logConfig.Level,
|
||||||
"format": logConfig.Format,
|
"format": logConfig.Format,
|
||||||
|
@@ -53,14 +53,13 @@ func NewProxyServer(keyManager types.KeyManager, configManager types.ConfigManag
|
|||||||
openaiConfig := configManager.GetOpenAIConfig()
|
openaiConfig := configManager.GetOpenAIConfig()
|
||||||
perfConfig := configManager.GetPerformanceConfig()
|
perfConfig := configManager.GetPerformanceConfig()
|
||||||
|
|
||||||
|
|
||||||
// Create high-performance HTTP client
|
// Create high-performance HTTP client
|
||||||
transport := &http.Transport{
|
transport := &http.Transport{
|
||||||
MaxIdleConns: 100,
|
MaxIdleConns: 100,
|
||||||
MaxIdleConnsPerHost: 20,
|
MaxIdleConnsPerHost: 20,
|
||||||
MaxConnsPerHost: 0,
|
MaxConnsPerHost: 0,
|
||||||
IdleConnTimeout: 120 * time.Second,
|
IdleConnTimeout: time.Duration(openaiConfig.IdleConnTimeout) * time.Second,
|
||||||
TLSHandshakeTimeout: 30 * time.Second,
|
TLSHandshakeTimeout: time.Duration(openaiConfig.ResponseTimeout) * time.Second,
|
||||||
ExpectContinueTimeout: 1 * time.Second,
|
ExpectContinueTimeout: 1 * time.Second,
|
||||||
DisableCompression: !perfConfig.EnableGzip,
|
DisableCompression: !perfConfig.EnableGzip,
|
||||||
ForceAttemptHTTP2: true,
|
ForceAttemptHTTP2: true,
|
||||||
@@ -73,19 +72,19 @@ func NewProxyServer(keyManager types.KeyManager, configManager types.ConfigManag
|
|||||||
MaxIdleConns: 200,
|
MaxIdleConns: 200,
|
||||||
MaxIdleConnsPerHost: 40,
|
MaxIdleConnsPerHost: 40,
|
||||||
MaxConnsPerHost: 0,
|
MaxConnsPerHost: 0,
|
||||||
IdleConnTimeout: 300 * time.Second, // Keep streaming connections longer
|
IdleConnTimeout: time.Duration(openaiConfig.IdleConnTimeout) * time.Second,
|
||||||
TLSHandshakeTimeout: 30 * time.Second,
|
TLSHandshakeTimeout: time.Duration(openaiConfig.ResponseTimeout) * time.Second,
|
||||||
ExpectContinueTimeout: 1 * time.Second,
|
ExpectContinueTimeout: 1 * time.Second,
|
||||||
DisableCompression: true, // Always disable compression for streaming
|
DisableCompression: true, // Always disable compression for streaming
|
||||||
ForceAttemptHTTP2: true,
|
ForceAttemptHTTP2: true,
|
||||||
WriteBufferSize: 64 * 1024,
|
WriteBufferSize: 64 * 1024,
|
||||||
ReadBufferSize: 64 * 1024,
|
ReadBufferSize: 64 * 1024,
|
||||||
ResponseHeaderTimeout: 30 * time.Second,
|
ResponseHeaderTimeout: time.Duration(openaiConfig.ResponseTimeout) * time.Second,
|
||||||
}
|
}
|
||||||
|
|
||||||
httpClient := &http.Client{
|
httpClient := &http.Client{
|
||||||
Transport: transport,
|
Transport: transport,
|
||||||
Timeout: time.Duration(openaiConfig.Timeout) * time.Millisecond,
|
Timeout: time.Duration(openaiConfig.RequestTimeout) * time.Second,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Streaming client without overall timeout
|
// Streaming client without overall timeout
|
||||||
@@ -229,7 +228,7 @@ func (ps *ProxyServer) executeRequestWithRetry(c *gin.Context, startTime time.Ti
|
|||||||
ctx, cancel = context.WithCancel(c.Request.Context())
|
ctx, cancel = context.WithCancel(c.Request.Context())
|
||||||
} else {
|
} else {
|
||||||
// Non-streaming requests use configured timeout from the already fetched config
|
// Non-streaming requests use configured timeout from the already fetched config
|
||||||
timeout := time.Duration(openaiConfig.Timeout) * time.Millisecond
|
timeout := time.Duration(openaiConfig.RequestTimeout) * time.Second
|
||||||
ctx, cancel = context.WithTimeout(c.Request.Context(), timeout)
|
ctx, cancel = context.WithTimeout(c.Request.Context(), timeout)
|
||||||
}
|
}
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
@@ -40,8 +40,12 @@ type ProxyServer interface {
|
|||||||
|
|
||||||
// ServerConfig represents server configuration
|
// ServerConfig represents server configuration
|
||||||
type ServerConfig struct {
|
type ServerConfig struct {
|
||||||
Port int `json:"port"`
|
Port int `json:"port"`
|
||||||
Host string `json:"host"`
|
Host string `json:"host"`
|
||||||
|
ReadTimeout int `json:"readTimeout"`
|
||||||
|
WriteTimeout int `json:"writeTimeout"`
|
||||||
|
IdleTimeout int `json:"idleTimeout"`
|
||||||
|
GracefulShutdownTimeout int `json:"gracefulShutdownTimeout"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// KeysConfig represents keys configuration
|
// KeysConfig represents keys configuration
|
||||||
@@ -54,9 +58,11 @@ type KeysConfig struct {
|
|||||||
|
|
||||||
// OpenAIConfig represents OpenAI API configuration
|
// OpenAIConfig represents OpenAI API configuration
|
||||||
type OpenAIConfig struct {
|
type OpenAIConfig struct {
|
||||||
BaseURL string `json:"baseUrl"`
|
BaseURL string `json:"baseUrl"`
|
||||||
BaseURLs []string `json:"baseUrls"`
|
BaseURLs []string `json:"baseUrls"`
|
||||||
Timeout int `json:"timeout"`
|
RequestTimeout int `json:"requestTimeout"`
|
||||||
|
ResponseTimeout int `json:"responseTimeout"`
|
||||||
|
IdleConnTimeout int `json:"idleConnTimeout"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// AuthConfig represents authentication configuration
|
// AuthConfig represents authentication configuration
|
||||||
@@ -77,7 +83,6 @@ type CORSConfig struct {
|
|||||||
// PerformanceConfig represents performance configuration
|
// PerformanceConfig represents performance configuration
|
||||||
type PerformanceConfig struct {
|
type PerformanceConfig struct {
|
||||||
MaxConcurrentRequests int `json:"maxConcurrentRequests"`
|
MaxConcurrentRequests int `json:"maxConcurrentRequests"`
|
||||||
RequestTimeout int `json:"requestTimeout"`
|
|
||||||
EnableGzip bool `json:"enableGzip"`
|
EnableGzip bool `json:"enableGzip"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user