add deepseek-coder fim support
Signed-off-by: wozulong <>
This commit is contained in:
parent
e251e9e50b
commit
6325a5e2f5
52
README.md
52
README.md
|
|
@ -28,42 +28,56 @@
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"bind": "127.0.0.1:8181",
|
"bind": "127.0.0.1:8181",
|
||||||
"proxy_url": "",
|
"proxy_url": "",
|
||||||
"timeout": 600,
|
"timeout": 600,
|
||||||
"codex_api_base": "https://api-proxy.oaipro.com/v1",
|
"codex_api_base": "https://api-proxy.oaipro.com/v1",
|
||||||
"codex_api_key": "sk-xxx",
|
"codex_api_key": "sk-xxx",
|
||||||
"codex_api_organization": "",
|
"codex_api_organization": "",
|
||||||
"codex_api_project": "",
|
"codex_api_project": "",
|
||||||
"code_instruct_model": "gpt-3.5-turbo-instruct",
|
"codex_max_tokens": 500,
|
||||||
"chat_api_base": "https://api-proxy.oaipro.com/v1",
|
"code_instruct_model": "gpt-3.5-turbo-instruct",
|
||||||
"chat_api_key": "sk-xxx",
|
"chat_api_base": "https://api-proxy.oaipro.com/v1",
|
||||||
"chat_api_organization": "",
|
"chat_api_key": "sk-xxx",
|
||||||
"chat_api_project": "",
|
"chat_api_organization": "",
|
||||||
"chat_max_tokens": 4096,
|
"chat_api_project": "",
|
||||||
"chat_model_default": "gpt-4o",
|
"chat_max_tokens": 4096,
|
||||||
"chat_model_map": {},
|
"chat_model_default": "gpt-4o",
|
||||||
"auth_token": ""
|
"chat_model_map": {},
|
||||||
|
"chat_locale": "zh_CN",
|
||||||
|
"auth_token": ""
|
||||||
}
|
}
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
`organization` 和 `project` 除非你有,且知道怎么回事再填。
|
`organization` 和 `project` 除非你有,且知道怎么回事再填。
|
||||||
|
|
||||||
`chat_model_map` 是个模型映射的字典。会将请求的模型映射到你想要的,如果不存在映射,则使用 `chat_model_default` 。
|
`chat_model_map` 是个模型映射的字典。会将请求的模型映射到你想要的,如果不存在映射,则使用 `chat_model_default` 。
|
||||||
|
|
||||||
|
`codex_max_tokens` 可以设置为你希望的最大Token数,你设置的时候最好知道自己在做什么。代码生成通常使用 `500` 即可。
|
||||||
|
|
||||||
`chat_max_tokens` 可以设置为你希望的最大Token数,你设置的时候最好知道自己在做什么。`gpt-4o` 输出最大为 `4096`
|
`chat_max_tokens` 可以设置为你希望的最大Token数,你设置的时候最好知道自己在做什么。`gpt-4o` 输出最大为 `4096`
|
||||||
|
|
||||||
可以通过 `OVERRIDE_` + 大写配置项作为环境变量,可以覆盖 `config.json` 中的值。例如:`OVERRIDE_CODEX_API_KEY=sk-xxxx`
|
可以通过 `OVERRIDE_` + 大写配置项作为环境变量,可以覆盖 `config.json` 中的值。例如:`OVERRIDE_CODEX_API_KEY=sk-xxxx`
|
||||||
|
|
||||||
|
### DeepSeek Code 设置
|
||||||
|
如果你希望使用 DeepSeek FIM 来进行代码补全,着重修改以下配置:
|
||||||
|
|
||||||
|
```json
|
||||||
|
"codex_api_base": "https://api.deepseek.com/beta/v1",
|
||||||
|
"codex_api_key": "sk-xxx",
|
||||||
|
"code_instruct_model": "deepseek-coder",
|
||||||
|
```
|
||||||
|
|
||||||
### 本地大模型设置
|
### 本地大模型设置
|
||||||
1. 安装ollama
|
1. 安装ollama
|
||||||
2. ollama run stable-code:code (这个模型较小,大部分显卡都能跑)
|
2. ollama run stable-code:code (这个模型较小,大部分显卡都能跑)
|
||||||
或者你的显卡比较高安装这个:ollama run stable-code:3b-code-fp16
|
或者你的显卡比较高安装这个:ollama run stable-code:3b-code-fp16
|
||||||
3. 修改config.json里面的codex_api_base为http://localhost:11434/v1/chat
|
3. 修改config.json里面的codex_api_base为http://localhost:11434/v1/chat
|
||||||
4. 修改code_instruct_model为你的模型名称,stable-code:code或者stable-code:3b-code-fp16
|
4. 修改code_instruct_model为你的模型名称,stable-code:code或者stable-code:3b-code-fp16
|
||||||
4. 剩下的就按照正常流程走即可。
|
5. 剩下的就按照正常流程走即可。
|
||||||
5. 如果调不通,请确认http://localhost:11434/v1/chat可用。
|
6. 如果调不通,请确认http://localhost:11434/v1/chat可用。
|
||||||
|
|
||||||
### 重要说明
|
### 重要说明
|
||||||
`codex_max_tokens` 工作并不完美,已经移除。**JetBrains IDE 完美工作**,`VSCode` 需要执行以下脚本Patch之:
|
`codex_max_tokens` 工作并不完美,已经移除。**JetBrains IDE 完美工作**,`VSCode` 需要执行以下脚本Patch之:
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@
|
||||||
"codex_api_key": "sk-xxx",
|
"codex_api_key": "sk-xxx",
|
||||||
"codex_api_organization": "",
|
"codex_api_organization": "",
|
||||||
"codex_api_project": "",
|
"codex_api_project": "",
|
||||||
|
"codex_max_tokens": 500,
|
||||||
"code_instruct_model": "gpt-3.5-turbo-instruct",
|
"code_instruct_model": "gpt-3.5-turbo-instruct",
|
||||||
"chat_api_base": "https://api-proxy.oaipro.com/v1",
|
"chat_api_base": "https://api-proxy.oaipro.com/v1",
|
||||||
"chat_api_key": "sk-xxx",
|
"chat_api_key": "sk-xxx",
|
||||||
|
|
|
||||||
|
|
@ -1 +0,0 @@
|
||||||
package main
|
|
||||||
32
main.go
32
main.go
|
|
@ -25,6 +25,8 @@ const DefaultInstructModel = "gpt-3.5-turbo-instruct"
|
||||||
|
|
||||||
const StableCodeModelPrefix = "stable-code"
|
const StableCodeModelPrefix = "stable-code"
|
||||||
|
|
||||||
|
const DeepSeekCoderModel = "deepseek-coder"
|
||||||
|
|
||||||
type config struct {
|
type config struct {
|
||||||
Bind string `json:"bind"`
|
Bind string `json:"bind"`
|
||||||
ProxyUrl string `json:"proxy_url"`
|
ProxyUrl string `json:"proxy_url"`
|
||||||
|
|
@ -33,6 +35,7 @@ type config struct {
|
||||||
CodexApiKey string `json:"codex_api_key"`
|
CodexApiKey string `json:"codex_api_key"`
|
||||||
CodexApiOrganization string `json:"codex_api_organization"`
|
CodexApiOrganization string `json:"codex_api_organization"`
|
||||||
CodexApiProject string `json:"codex_api_project"`
|
CodexApiProject string `json:"codex_api_project"`
|
||||||
|
CodexMaxTokens int `json:"codex_max_tokens"`
|
||||||
CodeInstructModel string `json:"code_instruct_model"`
|
CodeInstructModel string `json:"code_instruct_model"`
|
||||||
ChatApiBase string `json:"chat_api_base"`
|
ChatApiBase string `json:"chat_api_base"`
|
||||||
ChatApiKey string `json:"chat_api_key"`
|
ChatApiKey string `json:"chat_api_key"`
|
||||||
|
|
@ -97,6 +100,14 @@ func readConfig() *config {
|
||||||
_cfg.CodeInstructModel = DefaultInstructModel
|
_cfg.CodeInstructModel = DefaultInstructModel
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if _cfg.CodexMaxTokens == 0 {
|
||||||
|
_cfg.CodexMaxTokens = 500
|
||||||
|
}
|
||||||
|
|
||||||
|
if _cfg.ChatMaxTokens == 0 {
|
||||||
|
_cfg.ChatMaxTokens = 4096
|
||||||
|
}
|
||||||
|
|
||||||
return _cfg
|
return _cfg
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -173,6 +184,7 @@ func AuthMiddleware(authToken string) gin.HandlerFunc {
|
||||||
func (s *ProxyService) InitRoutes(e *gin.Engine) {
|
func (s *ProxyService) InitRoutes(e *gin.Engine) {
|
||||||
e.GET("/_ping", s.pong)
|
e.GET("/_ping", s.pong)
|
||||||
e.GET("/models", s.models)
|
e.GET("/models", s.models)
|
||||||
|
e.GET("/v1/models", s.models)
|
||||||
authToken := s.cfg.AuthToken // replace with your dynamic value as needed
|
authToken := s.cfg.AuthToken // replace with your dynamic value as needed
|
||||||
if authToken != "" {
|
if authToken != "" {
|
||||||
// 鉴权
|
// 鉴权
|
||||||
|
|
@ -180,10 +192,16 @@ func (s *ProxyService) InitRoutes(e *gin.Engine) {
|
||||||
{
|
{
|
||||||
v1.POST("/chat/completions", s.completions)
|
v1.POST("/chat/completions", s.completions)
|
||||||
v1.POST("/engines/copilot-codex/completions", s.codeCompletions)
|
v1.POST("/engines/copilot-codex/completions", s.codeCompletions)
|
||||||
|
|
||||||
|
v1.POST("/v1/chat/completions", s.completions)
|
||||||
|
v1.POST("/v1/engines/copilot-codex/completions", s.codeCompletions)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
e.POST("/v1/chat/completions", s.completions)
|
e.POST("/v1/chat/completions", s.completions)
|
||||||
e.POST("/v1/engines/copilot-codex/completions", s.codeCompletions)
|
e.POST("/v1/engines/copilot-codex/completions", s.codeCompletions)
|
||||||
|
|
||||||
|
e.POST("/v1/v1/chat/completions", s.completions)
|
||||||
|
e.POST("/v1/v1/engines/copilot-codex/completions", s.codeCompletions)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -394,7 +412,7 @@ func (s *ProxyService) completions(c *gin.Context) {
|
||||||
func (s *ProxyService) codeCompletions(c *gin.Context) {
|
func (s *ProxyService) codeCompletions(c *gin.Context) {
|
||||||
ctx := c.Request.Context()
|
ctx := c.Request.Context()
|
||||||
|
|
||||||
time.Sleep(100 * time.Millisecond)
|
time.Sleep(200 * time.Millisecond)
|
||||||
if ctx.Err() != nil {
|
if ctx.Err() != nil {
|
||||||
abortCodex(c, http.StatusRequestTimeout)
|
abortCodex(c, http.StatusRequestTimeout)
|
||||||
return
|
return
|
||||||
|
|
@ -411,7 +429,6 @@ func (s *ProxyService) codeCompletions(c *gin.Context) {
|
||||||
proxyUrl := s.cfg.CodexApiBase + "/completions"
|
proxyUrl := s.cfg.CodexApiBase + "/completions"
|
||||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost, proxyUrl, io.NopCloser(bytes.NewBuffer(body)))
|
req, err := http.NewRequestWithContext(ctx, http.MethodPost, proxyUrl, io.NopCloser(bytes.NewBuffer(body)))
|
||||||
if nil != err {
|
if nil != err {
|
||||||
//
|
|
||||||
abortCodex(c, http.StatusInternalServerError)
|
abortCodex(c, http.StatusInternalServerError)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
@ -460,13 +477,24 @@ func ConstructRequestBody(body []byte, cfg *config) []byte {
|
||||||
body, _ = sjson.DeleteBytes(body, "extra")
|
body, _ = sjson.DeleteBytes(body, "extra")
|
||||||
body, _ = sjson.DeleteBytes(body, "nwo")
|
body, _ = sjson.DeleteBytes(body, "nwo")
|
||||||
body, _ = sjson.SetBytes(body, "model", cfg.CodeInstructModel)
|
body, _ = sjson.SetBytes(body, "model", cfg.CodeInstructModel)
|
||||||
|
|
||||||
|
if int(gjson.GetBytes(body, "max_tokens").Int()) > cfg.CodexMaxTokens {
|
||||||
|
body, _ = sjson.SetBytes(body, "max_tokens", cfg.CodexMaxTokens)
|
||||||
|
}
|
||||||
|
|
||||||
if strings.Contains(cfg.CodeInstructModel, StableCodeModelPrefix) {
|
if strings.Contains(cfg.CodeInstructModel, StableCodeModelPrefix) {
|
||||||
return constructWithStableCodeModel(body)
|
return constructWithStableCodeModel(body)
|
||||||
|
} else if strings.HasPrefix(cfg.CodeInstructModel, DeepSeekCoderModel) {
|
||||||
|
if gjson.GetBytes(body, "n").Int() > 1 {
|
||||||
|
body, _ = sjson.SetBytes(body, "n", 1)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if strings.HasSuffix(cfg.ChatApiBase, "chat") {
|
if strings.HasSuffix(cfg.ChatApiBase, "chat") {
|
||||||
// @Todo constructWithChatModel
|
// @Todo constructWithChatModel
|
||||||
// 如果code base以chat结尾则构建chatModel,暂时没有好的prompt
|
// 如果code base以chat结尾则构建chatModel,暂时没有好的prompt
|
||||||
}
|
}
|
||||||
|
|
||||||
return body
|
return body
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue