diff --git a/README.md b/README.md index 411a5c6..12511de 100644 --- a/README.md +++ b/README.md @@ -28,42 +28,56 @@ ```json { - "bind": "127.0.0.1:8181", - "proxy_url": "", - "timeout": 600, - "codex_api_base": "https://api-proxy.oaipro.com/v1", - "codex_api_key": "sk-xxx", - "codex_api_organization": "", - "codex_api_project": "", - "code_instruct_model": "gpt-3.5-turbo-instruct", - "chat_api_base": "https://api-proxy.oaipro.com/v1", - "chat_api_key": "sk-xxx", - "chat_api_organization": "", - "chat_api_project": "", - "chat_max_tokens": 4096, - "chat_model_default": "gpt-4o", - "chat_model_map": {}, - "auth_token": "" + "bind": "127.0.0.1:8181", + "proxy_url": "", + "timeout": 600, + "codex_api_base": "https://api-proxy.oaipro.com/v1", + "codex_api_key": "sk-xxx", + "codex_api_organization": "", + "codex_api_project": "", + "codex_max_tokens": 500, + "code_instruct_model": "gpt-3.5-turbo-instruct", + "chat_api_base": "https://api-proxy.oaipro.com/v1", + "chat_api_key": "sk-xxx", + "chat_api_organization": "", + "chat_api_project": "", + "chat_max_tokens": 4096, + "chat_model_default": "gpt-4o", + "chat_model_map": {}, + "chat_locale": "zh_CN", + "auth_token": "" } + ``` `organization` 和 `project` 除非你有,且知道怎么回事再填。 `chat_model_map` 是个模型映射的字典。会将请求的模型映射到你想要的,如果不存在映射,则使用 `chat_model_default` 。 +`codex_max_tokens` 可以设置为你希望的最大Token数,你设置的时候最好知道自己在做什么。代码生成通常使用 `500` 即可。 + `chat_max_tokens` 可以设置为你希望的最大Token数,你设置的时候最好知道自己在做什么。`gpt-4o` 输出最大为 `4096` 可以通过 `OVERRIDE_` + 大写配置项作为环境变量,可以覆盖 `config.json` 中的值。例如:`OVERRIDE_CODEX_API_KEY=sk-xxxx` +### DeepSeek Code 设置 +如果你希望使用 DeepSeek FIM 来进行代码补全,着重修改以下配置: + +```json + "codex_api_base": "https://api.deepseek.com/beta/v1", + "codex_api_key": "sk-xxx", + "code_instruct_model": "deepseek-coder", +``` + ### 本地大模型设置 1. 安装ollama 2. ollama run stable-code:code (这个模型较小,大部分显卡都能跑) 或者你的显卡比较高安装这个:ollama run stable-code:3b-code-fp16 3. 修改config.json里面的codex_api_base为http://localhost:11434/v1/chat 4. 修改code_instruct_model为你的模型名称,stable-code:code或者stable-code:3b-code-fp16 -4. 剩下的就按照正常流程走即可。 -5. 如果调不通,请确认http://localhost:11434/v1/chat可用。 - +5. 剩下的就按照正常流程走即可。 +6. 如果调不通,请确认http://localhost:11434/v1/chat可用。 + ### 重要说明 `codex_max_tokens` 工作并不完美,已经移除。**JetBrains IDE 完美工作**,`VSCode` 需要执行以下脚本Patch之: diff --git a/config.json.example b/config.json.example index d3047f1..35c6499 100644 --- a/config.json.example +++ b/config.json.example @@ -6,6 +6,7 @@ "codex_api_key": "sk-xxx", "codex_api_organization": "", "codex_api_project": "", + "codex_max_tokens": 500, "code_instruct_model": "gpt-3.5-turbo-instruct", "chat_api_base": "https://api-proxy.oaipro.com/v1", "chat_api_key": "sk-xxx", diff --git a/localModel.go b/localModel.go deleted file mode 100644 index 06ab7d0..0000000 --- a/localModel.go +++ /dev/null @@ -1 +0,0 @@ -package main diff --git a/main.go b/main.go index 96996c9..6937120 100644 --- a/main.go +++ b/main.go @@ -25,6 +25,8 @@ const DefaultInstructModel = "gpt-3.5-turbo-instruct" const StableCodeModelPrefix = "stable-code" +const DeepSeekCoderModel = "deepseek-coder" + type config struct { Bind string `json:"bind"` ProxyUrl string `json:"proxy_url"` @@ -33,6 +35,7 @@ type config struct { CodexApiKey string `json:"codex_api_key"` CodexApiOrganization string `json:"codex_api_organization"` CodexApiProject string `json:"codex_api_project"` + CodexMaxTokens int `json:"codex_max_tokens"` CodeInstructModel string `json:"code_instruct_model"` ChatApiBase string `json:"chat_api_base"` ChatApiKey string `json:"chat_api_key"` @@ -97,6 +100,14 @@ func readConfig() *config { _cfg.CodeInstructModel = DefaultInstructModel } + if _cfg.CodexMaxTokens == 0 { + _cfg.CodexMaxTokens = 500 + } + + if _cfg.ChatMaxTokens == 0 { + _cfg.ChatMaxTokens = 4096 + } + return _cfg } @@ -173,6 +184,7 @@ func AuthMiddleware(authToken string) gin.HandlerFunc { func (s *ProxyService) InitRoutes(e *gin.Engine) { e.GET("/_ping", s.pong) e.GET("/models", s.models) + e.GET("/v1/models", s.models) authToken := s.cfg.AuthToken // replace with your dynamic value as needed if authToken != "" { // 鉴权 @@ -180,10 +192,16 @@ func (s *ProxyService) InitRoutes(e *gin.Engine) { { v1.POST("/chat/completions", s.completions) v1.POST("/engines/copilot-codex/completions", s.codeCompletions) + + v1.POST("/v1/chat/completions", s.completions) + v1.POST("/v1/engines/copilot-codex/completions", s.codeCompletions) } } else { e.POST("/v1/chat/completions", s.completions) e.POST("/v1/engines/copilot-codex/completions", s.codeCompletions) + + e.POST("/v1/v1/chat/completions", s.completions) + e.POST("/v1/v1/engines/copilot-codex/completions", s.codeCompletions) } } @@ -394,7 +412,7 @@ func (s *ProxyService) completions(c *gin.Context) { func (s *ProxyService) codeCompletions(c *gin.Context) { ctx := c.Request.Context() - time.Sleep(100 * time.Millisecond) + time.Sleep(200 * time.Millisecond) if ctx.Err() != nil { abortCodex(c, http.StatusRequestTimeout) return @@ -411,7 +429,6 @@ func (s *ProxyService) codeCompletions(c *gin.Context) { proxyUrl := s.cfg.CodexApiBase + "/completions" req, err := http.NewRequestWithContext(ctx, http.MethodPost, proxyUrl, io.NopCloser(bytes.NewBuffer(body))) if nil != err { - // abortCodex(c, http.StatusInternalServerError) return } @@ -460,13 +477,24 @@ func ConstructRequestBody(body []byte, cfg *config) []byte { body, _ = sjson.DeleteBytes(body, "extra") body, _ = sjson.DeleteBytes(body, "nwo") body, _ = sjson.SetBytes(body, "model", cfg.CodeInstructModel) + + if int(gjson.GetBytes(body, "max_tokens").Int()) > cfg.CodexMaxTokens { + body, _ = sjson.SetBytes(body, "max_tokens", cfg.CodexMaxTokens) + } + if strings.Contains(cfg.CodeInstructModel, StableCodeModelPrefix) { return constructWithStableCodeModel(body) + } else if strings.HasPrefix(cfg.CodeInstructModel, DeepSeekCoderModel) { + if gjson.GetBytes(body, "n").Int() > 1 { + body, _ = sjson.SetBytes(body, "n", 1) + } } + if strings.HasSuffix(cfg.ChatApiBase, "chat") { // @Todo constructWithChatModel // 如果code base以chat结尾则构建chatModel,暂时没有好的prompt } + return body }