diff --git a/cmd/gpt-load/main.go b/cmd/gpt-load/main.go index cfbc76b..8943b23 100644 --- a/cmd/gpt-load/main.go +++ b/cmd/gpt-load/main.go @@ -61,8 +61,8 @@ func main() { server := &http.Server{ Addr: fmt.Sprintf("%s:%d", serverConfig.Host, serverConfig.Port), Handler: router, - ReadTimeout: 60 * time.Second, // Increased read timeout for large file uploads - WriteTimeout: 300 * time.Second, // Increased write timeout for streaming responses + ReadTimeout: 120 * time.Second, // Increased read timeout for large file uploads + WriteTimeout: 1800 * time.Second, // Increased write timeout for streaming responses IdleTimeout: 120 * time.Second, // Increased idle timeout for connection reuse MaxHeaderBytes: 1 << 20, // 1MB header limit } diff --git a/internal/middleware/middleware.go b/internal/middleware/middleware.go index 0c19e85..cda0e7e 100644 --- a/internal/middleware/middleware.go +++ b/internal/middleware/middleware.go @@ -2,7 +2,6 @@ package middleware import ( - "context" "fmt" "strings" "time" @@ -215,32 +214,6 @@ func RateLimiter(config types.PerformanceConfig) gin.HandlerFunc { } } -// Timeout creates a timeout middleware -func Timeout(timeout time.Duration) gin.HandlerFunc { - return func(c *gin.Context) { - acceptHeader := c.Request.Header.Get("Accept") - if strings.Contains(acceptHeader, "text/event-stream") || - strings.Contains(acceptHeader, "application/x-ndjson") || - c.Request.Header.Get("X-Accel-Buffering") == "no" { - c.Next() - return - } - ctx, cancel := context.WithTimeout(c.Request.Context(), timeout) - defer cancel() - - c.Request = c.Request.WithContext(ctx) - c.Next() - - if ctx.Err() == context.DeadlineExceeded { - c.JSON(408, gin.H{ - "error": "Request timeout", - "code": errors.ErrProxyTimeout, - }) - c.Abort() - } - } -} - // ErrorHandler creates an error handling middleware func ErrorHandler() gin.HandlerFunc { return func(c *gin.Context) { diff --git a/internal/proxy/server.go b/internal/proxy/server.go index b2d65fe..8b99a60 100644 --- a/internal/proxy/server.go +++ b/internal/proxy/server.go @@ -56,11 +56,11 @@ func NewProxyServer(keyManager types.KeyManager, configManager types.ConfigManag // Create high-performance HTTP client transport := &http.Transport{ - MaxIdleConns: 50, - MaxIdleConnsPerHost: 10, - MaxConnsPerHost: 0, // No limit to avoid connection pool bottleneck - IdleConnTimeout: 90 * time.Second, - TLSHandshakeTimeout: 10 * time.Second, + MaxIdleConns: 100, + MaxIdleConnsPerHost: 20, + MaxConnsPerHost: 0, + IdleConnTimeout: 120 * time.Second, + TLSHandshakeTimeout: 30 * time.Second, ExpectContinueTimeout: 1 * time.Second, DisableCompression: !perfConfig.EnableGzip, ForceAttemptHTTP2: true, @@ -70,17 +70,17 @@ func NewProxyServer(keyManager types.KeyManager, configManager types.ConfigManag // Create dedicated transport for streaming, optimize TCP parameters streamTransport := &http.Transport{ - MaxIdleConns: 100, - MaxIdleConnsPerHost: 20, + MaxIdleConns: 200, + MaxIdleConnsPerHost: 40, MaxConnsPerHost: 0, IdleConnTimeout: 300 * time.Second, // Keep streaming connections longer - TLSHandshakeTimeout: 10 * time.Second, + TLSHandshakeTimeout: 30 * time.Second, ExpectContinueTimeout: 1 * time.Second, DisableCompression: true, // Always disable compression for streaming ForceAttemptHTTP2: true, WriteBufferSize: 64 * 1024, ReadBufferSize: 64 * 1024, - ResponseHeaderTimeout: 10 * time.Second, + ResponseHeaderTimeout: 30 * time.Second, } httpClient := &http.Client{