add deepseek-coder fim support

Signed-off-by: wozulong <>
2024-07-26 15:20:18 +08:00 · 2024-07-26 15:20:18 +08:00 · 6325a5e2f5
parent e251e9e50b
commit 6325a5e2f5
4 changed files with 64 additions and 22 deletions
--- a/README.md
+++ b/README.md
@ -28,41 +28,55 @@

 ```json
 {
-  "bind": "127.0.0.1:8181",
-  "proxy_url": "",
-  "timeout": 600,
-  "codex_api_base": "https://api-proxy.oaipro.com/v1",
-  "codex_api_key": "sk-xxx",
-  "codex_api_organization": "",
-  "codex_api_project": "",
-  "code_instruct_model": "gpt-3.5-turbo-instruct",
-  "chat_api_base": "https://api-proxy.oaipro.com/v1",
-  "chat_api_key": "sk-xxx",
-  "chat_api_organization": "",
-  "chat_api_project": "",
-  "chat_max_tokens": 4096,
-  "chat_model_default": "gpt-4o",
-  "chat_model_map": {},
-  "auth_token": ""
+ "bind": "127.0.0.1:8181",
+ "proxy_url": "",
+ "timeout": 600,
+ "codex_api_base": "https://api-proxy.oaipro.com/v1",
+ "codex_api_key": "sk-xxx",
+ "codex_api_organization": "",
+ "codex_api_project": "",
+ "codex_max_tokens": 500,
+ "code_instruct_model": "gpt-3.5-turbo-instruct",
+ "chat_api_base": "https://api-proxy.oaipro.com/v1",
+ "chat_api_key": "sk-xxx",
+ "chat_api_organization": "",
+ "chat_api_project": "",
+ "chat_max_tokens": 4096,
+ "chat_model_default": "gpt-4o",
+ "chat_model_map": {},
+ "chat_locale": "zh_CN",
+ "auth_token": ""
 }
+
 ```

 `organization` 和 `project` 除非你有，且知道怎么回事再填。

 `chat_model_map` 是个模型映射的字典。会将请求的模型映射到你想要的，如果不存在映射，则使用 `chat_model_default` 。

+`codex_max_tokens` 可以设置为你希望的最大Token数，你设置的时候最好知道自己在做什么。代码生成通常使用 `500` 即可。
+
 `chat_max_tokens` 可以设置为你希望的最大Token数，你设置的时候最好知道自己在做什么。`gpt-4o` 输出最大为 `4096`

 可以通过 `OVERRIDE_` + 大写配置项作为环境变量，可以覆盖 `config.json` 中的值。例如：`OVERRIDE_CODEX_API_KEY=sk-xxxx`

+### DeepSeek Code 设置
+如果你希望使用 DeepSeek FIM 来进行代码补全，着重修改以下配置：
+
+```json
+  "codex_api_base": "https://api.deepseek.com/beta/v1",
+  "codex_api_key": "sk-xxx",
+  "code_instruct_model": "deepseek-coder",
+```
+
 ### 本地大模型设置
 1. 安装ollama 
 2. ollama run stable-code:code  (这个模型较小，大部分显卡都能跑)  
 或者你的显卡比较高安装这个：ollama run stable-code:3b-code-fp16
 3. 修改config.json里面的codex_api_base为http://localhost:11434/v1/chat
 4. 修改code_instruct_model为你的模型名称，stable-code:code或者stable-code:3b-code-fp16
-4. 剩下的就按照正常流程走即可。
-5. 如果调不通，请确认http://localhost:11434/v1/chat可用。
+5. 剩下的就按照正常流程走即可。
+6. 如果调不通，请确认http://localhost:11434/v1/chat可用。
        
 ### 重要说明
 `codex_max_tokens` 工作并不完美，已经移除。**JetBrains IDE 完美工作**，`VSCode` 需要执行以下脚本Patch之：
--- a/config.json.example
+++ b/config.json.example
@ -6,6 +6,7 @@
  "codex_api_key": "sk-xxx",
  "codex_api_organization": "",
  "codex_api_project": "",
+  "codex_max_tokens": 500,
  "code_instruct_model": "gpt-3.5-turbo-instruct",
  "chat_api_base": "https://api-proxy.oaipro.com/v1",
  "chat_api_key": "sk-xxx",
--- a/localModel.go
+++ b/localModel.go
@ -1 +0,0 @@
-package main
--- a/main.go
+++ b/main.go
@ -25,6 +25,8 @@ const DefaultInstructModel = "gpt-3.5-turbo-instruct"

 const StableCodeModelPrefix = "stable-code"

+const DeepSeekCoderModel = "deepseek-coder"
+
 type config struct {
 	Bind                 string            `json:"bind"`
 	ProxyUrl             string            `json:"proxy_url"`
@ -33,6 +35,7 @@ type config struct {
 	CodexApiKey          string            `json:"codex_api_key"`
 	CodexApiOrganization string            `json:"codex_api_organization"`
 	CodexApiProject      string            `json:"codex_api_project"`
+	CodexMaxTokens       int               `json:"codex_max_tokens"`
 	CodeInstructModel    string            `json:"code_instruct_model"`
 	ChatApiBase          string            `json:"chat_api_base"`
 	ChatApiKey           string            `json:"chat_api_key"`
@ -97,6 +100,14 @@ func readConfig() *config {
 		_cfg.CodeInstructModel = DefaultInstructModel
 	}

+	if _cfg.CodexMaxTokens == 0 {
+		_cfg.CodexMaxTokens = 500
+	}
+
+	if _cfg.ChatMaxTokens == 0 {
+		_cfg.ChatMaxTokens = 4096
+	}
+
 	return _cfg
 }

@ -173,6 +184,7 @@ func AuthMiddleware(authToken string) gin.HandlerFunc {
 func (s *ProxyService) InitRoutes(e *gin.Engine) {
 	e.GET("/_ping", s.pong)
 	e.GET("/models", s.models)
+	e.GET("/v1/models", s.models)
 	authToken := s.cfg.AuthToken // replace with your dynamic value as needed
 	if authToken != "" {
 		// 鉴权
@ -180,10 +192,16 @@ func (s *ProxyService) InitRoutes(e *gin.Engine) {
 		{
 			v1.POST("/chat/completions", s.completions)
 			v1.POST("/engines/copilot-codex/completions", s.codeCompletions)
+
+			v1.POST("/v1/chat/completions", s.completions)
+			v1.POST("/v1/engines/copilot-codex/completions", s.codeCompletions)
 		}
 	} else {
 		e.POST("/v1/chat/completions", s.completions)
 		e.POST("/v1/engines/copilot-codex/completions", s.codeCompletions)
+
+		e.POST("/v1/v1/chat/completions", s.completions)
+		e.POST("/v1/v1/engines/copilot-codex/completions", s.codeCompletions)
 	}
 }

@ -394,7 +412,7 @@ func (s *ProxyService) completions(c *gin.Context) {
 func (s *ProxyService) codeCompletions(c *gin.Context) {
 	ctx := c.Request.Context()

-	time.Sleep(100 * time.Millisecond)
+	time.Sleep(200 * time.Millisecond)
 	if ctx.Err() != nil {
 		abortCodex(c, http.StatusRequestTimeout)
 		return
@ -411,7 +429,6 @@ func (s *ProxyService) codeCompletions(c *gin.Context) {
 	proxyUrl := s.cfg.CodexApiBase + "/completions"
 	req, err := http.NewRequestWithContext(ctx, http.MethodPost, proxyUrl, io.NopCloser(bytes.NewBuffer(body)))
 	if nil != err {
-		//
 		abortCodex(c, http.StatusInternalServerError)
 		return
 	}
@ -460,13 +477,24 @@ func ConstructRequestBody(body []byte, cfg *config) []byte {
 	body, _ = sjson.DeleteBytes(body, "extra")
 	body, _ = sjson.DeleteBytes(body, "nwo")
 	body, _ = sjson.SetBytes(body, "model", cfg.CodeInstructModel)
+
+	if int(gjson.GetBytes(body, "max_tokens").Int()) > cfg.CodexMaxTokens {
+		body, _ = sjson.SetBytes(body, "max_tokens", cfg.CodexMaxTokens)
+	}
+
 	if strings.Contains(cfg.CodeInstructModel, StableCodeModelPrefix) {
 		return constructWithStableCodeModel(body)
+	} else if strings.HasPrefix(cfg.CodeInstructModel, DeepSeekCoderModel) {
+		if gjson.GetBytes(body, "n").Int() > 1 {
+			body, _ = sjson.SetBytes(body, "n", 1)
+		}
 	}
+
 	if strings.HasSuffix(cfg.ChatApiBase, "chat") {
 		// @Todo  constructWithChatModel
 		// 如果code base以chat结尾则构建chatModel，暂时没有好的prompt
 	}
+
 	return body
 }