From 7be0c5ecd282ec583ff7cfc91579ce4d46552e2b Mon Sep 17 00:00:00 2001 From: wozulong <> Date: Fri, 17 May 2024 18:36:40 +0800 Subject: [PATCH 01/10] fix chat Signed-off-by: wozulong <> --- main.go | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/main.go b/main.go index c89d734..e387ac2 100644 --- a/main.go +++ b/main.go @@ -21,7 +21,7 @@ import ( "time" ) -const INSTRUCT_MODEL = "gpt-3.5-turbo-instruct" +const InstructModel = "gpt-3.5-turbo-instruct" type config struct { Bind string `json:"bind"` @@ -146,7 +146,7 @@ func NewProxyService(cfg *config) (*ProxyService, error) { return nil, err } - tokenizer, err := tiktoken.EncodingForModel(INSTRUCT_MODEL) + tokenizer, err := tiktoken.EncodingForModel(InstructModel) if nil != err { return nil, err } @@ -180,6 +180,8 @@ func (s *ProxyService) completions(c *gin.Context) { } body, _ = sjson.SetBytes(body, "model", model) body, _ = sjson.DeleteBytes(body, "intent") + body, _ = sjson.DeleteBytes(body, "intent_threshold") + body, _ = sjson.DeleteBytes(body, "intent_content") proxyUrl := s.cfg.ChatApiBase + "/chat/completions" req, err := http.NewRequestWithContext(ctx, http.MethodPost, proxyUrl, io.NopCloser(bytes.NewBuffer(body))) @@ -227,6 +229,14 @@ func (s *ProxyService) completions(c *gin.Context) { _, _ = io.Copy(c.Writer, resp.Body) } +func (s *ProxyService) countToken(token string) int { + if "" == token { + return 0 + } + + return len(s.tokenizer.Encode(token, nil, nil)) +} + func (s *ProxyService) codeCompletions(c *gin.Context) { ctx := c.Request.Context() @@ -244,8 +254,8 @@ func (s *ProxyService) codeCompletions(c *gin.Context) { prompt := gjson.GetBytes(body, "prompt").String() suffix := gjson.GetBytes(body, "suffix").String() - inputTokens := len(s.tokenizer.Encode(prompt, nil, nil)) - suffixTokens := len(s.tokenizer.Encode(suffix, nil, nil)) + inputTokens := s.countToken(prompt) + suffixTokens := s.countToken(suffix) outputTokens := int(gjson.GetBytes(body, "max_tokens").Int()) totalTokens := inputTokens + suffixTokens + outputTokens @@ -254,7 +264,7 @@ func (s *ProxyService) codeCompletions(c *gin.Context) { for left < right { mid := (left + right) / 2 subPrompt := prompt[mid:] - subInputTokens := len(s.tokenizer.Encode(subPrompt, nil, nil)) + subInputTokens := s.countToken(subPrompt) totalTokens = subInputTokens + suffixTokens + outputTokens if totalTokens > s.cfg.CodexMaxTokens { left = mid + 1 @@ -268,7 +278,7 @@ func (s *ProxyService) codeCompletions(c *gin.Context) { body, _ = sjson.DeleteBytes(body, "extra") body, _ = sjson.DeleteBytes(body, "nwo") - body, _ = sjson.SetBytes(body, "model", INSTRUCT_MODEL) + body, _ = sjson.SetBytes(body, "model", InstructModel) proxyUrl := s.cfg.CodexApiBase + "/completions" req, err := http.NewRequestWithContext(ctx, http.MethodPost, proxyUrl, io.NopCloser(bytes.NewBuffer(body))) From 075342606f4e42a7a51d013efe7e2fcdcc6fe7a8 Mon Sep 17 00:00:00 2001 From: wozulong <> Date: Sat, 18 May 2024 19:55:51 +0800 Subject: [PATCH 02/10] reduce output tokens Signed-off-by: wozulong <> --- config.json.example | 1 + main.go | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/config.json.example b/config.json.example index 0d315b1..6c2a5d2 100644 --- a/config.json.example +++ b/config.json.example @@ -11,6 +11,7 @@ "chat_api_key": "sk-xxx", "chat_api_organization": "", "chat_api_project": "", + "chat_max_tokens": 4096, "chat_model_default": "gpt-4o", "chat_model_map": {} } \ No newline at end of file diff --git a/main.go b/main.go index e387ac2..4f55082 100644 --- a/main.go +++ b/main.go @@ -36,6 +36,7 @@ type config struct { ChatApiKey string `json:"chat_api_key"` ChatApiOrganization string `json:"chat_api_organization"` ChatApiProject string `json:"chat_api_project"` + ChatMaxTokens int `json:"chat_max_tokens"` ChatModelDefault string `json:"chat_model_default"` ChatModelMap map[string]string `json:"chat_model_map"` } @@ -183,6 +184,10 @@ func (s *ProxyService) completions(c *gin.Context) { body, _ = sjson.DeleteBytes(body, "intent_threshold") body, _ = sjson.DeleteBytes(body, "intent_content") + if int(gjson.GetBytes(body, "max_tokens").Int()) > s.cfg.ChatMaxTokens { + body, _ = sjson.SetBytes(body, "max_tokens", s.cfg.ChatMaxTokens) + } + proxyUrl := s.cfg.ChatApiBase + "/chat/completions" req, err := http.NewRequestWithContext(ctx, http.MethodPost, proxyUrl, io.NopCloser(bytes.NewBuffer(body))) if nil != err { From 5341ccb23a0fa85dac8e05277c78a1e3ed491863 Mon Sep 17 00:00:00 2001 From: wozulong <> Date: Sat, 18 May 2024 19:59:11 +0800 Subject: [PATCH 03/10] update README Signed-off-by: wozulong <> --- README.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 3da5661..2202233 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,7 @@ "chat_api_key": "sk-xxx", "chat_api_organization": "", "chat_api_project": "", + "chat_max_tokens": 4096, "chat_model_default": "gpt-4o", "chat_model_map": {} } @@ -48,7 +49,9 @@ `chat_model_map` 是个模型映射的字典。会将请求的模型映射到你想要的,如果不存在映射,则使用 `chat_model_default` 。 -`code_max_tokens` 可以设置为你希望的最大Token数,你设置的时候最好知道自己在做什么。 +`codex_max_tokens` 可以设置为你希望的最大Token数,你设置的时候最好知道自己在做什么。 + +`chat_max_tokens` 可以设置为你希望的最大Token数,你设置的时候最好知道自己在做什么。`gpt-4o` 输出最大为 `4096` 可以通过 `OVERRIDE_` + 大写配置项作为环境变量,可以覆盖 `config.json` 中的值。例如:`OVERRIDE_CODEX_API_KEY=sk-xxxx` From f8e28c371902516892ecf3d151ea3e9cda2f122e Mon Sep 17 00:00:00 2001 From: wozulong <> Date: Sat, 18 May 2024 22:46:48 +0800 Subject: [PATCH 04/10] update max_tokens Signed-off-by: wozulong <> --- README.md | 12 ++++++++--- config.json.example | 1 - go.mod | 3 --- go.sum | 6 ------ main.go | 49 ++++----------------------------------------- 5 files changed, 13 insertions(+), 58 deletions(-) diff --git a/README.md b/README.md index 2202233..c9e6267 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,6 @@ "codex_api_key": "sk-xxx", "codex_api_organization": "", "codex_api_project": "", - "codex_max_tokens": 4093, "chat_api_base": "https://api-proxy.oaipro.com/v1", "chat_api_key": "sk-xxx", "chat_api_organization": "", @@ -49,12 +48,19 @@ `chat_model_map` 是个模型映射的字典。会将请求的模型映射到你想要的,如果不存在映射,则使用 `chat_model_default` 。 -`codex_max_tokens` 可以设置为你希望的最大Token数,你设置的时候最好知道自己在做什么。 - `chat_max_tokens` 可以设置为你希望的最大Token数,你设置的时候最好知道自己在做什么。`gpt-4o` 输出最大为 `4096` 可以通过 `OVERRIDE_` + 大写配置项作为环境变量,可以覆盖 `config.json` 中的值。例如:`OVERRIDE_CODEX_API_KEY=sk-xxxx` +### 重要说明 +`codex_max_tokens` 工作并不完美,已经移除。**JetBrains IDE 完美工作**,`VSCode` 需要执行以下脚本Patch之: + +* macOS `sed -i '' -E 's/\.maxPromptCompletionTokens\(([a-zA-Z0-9_]+),([0-9]+)\)/.maxPromptCompletionTokens(\1,2048)/' ~/.vscode/extensions/github.copilot-*/dist/extension.js` +* Linux `sed -E 's/\.maxPromptCompletionTokens\(([a-zA-Z0-9_]+),([0-9]+)\)/.maxPromptCompletionTokens(\1,2048)/' ~/.vscode/extensions/github.copilot-*/dist/extension.js` +* Windows 不知道怎么写,期待大佬PR。 +* 因为是Patch,所以:**Copilot每次升级都要执行一次**。 +* 具体原因是客户端需要根据 `max_tokens` 精密计算prompt,后台删减会有问题。 + ### 其他说明 1. 理论上,Chat 部分可以使用 `chat2api` ,而 Codex 代码生成部分则不太适合使用 `chat2api` 。 2. 代码生成部分做过延时生成和客户端 Cancel 处理,很有效节省你的Token。 diff --git a/config.json.example b/config.json.example index 6c2a5d2..1712968 100644 --- a/config.json.example +++ b/config.json.example @@ -6,7 +6,6 @@ "codex_api_key": "sk-xxx", "codex_api_organization": "", "codex_api_project": "", - "codex_max_tokens": 2048, "chat_api_base": "https://api-proxy.oaipro.com/v1", "chat_api_key": "sk-xxx", "chat_api_organization": "", diff --git a/go.mod b/go.mod index 34c8f1d..5ef356a 100644 --- a/go.mod +++ b/go.mod @@ -6,7 +6,6 @@ toolchain go1.21.4 require ( github.com/gin-gonic/gin v1.10.0 - github.com/linux-do/tiktoken-go v0.7.0 github.com/tidwall/gjson v1.17.1 github.com/tidwall/sjson v1.2.5 golang.org/x/net v0.25.0 @@ -17,7 +16,6 @@ require ( github.com/bytedance/sonic/loader v0.1.1 // indirect github.com/cloudwego/base64x v0.1.4 // indirect github.com/cloudwego/iasm v0.2.0 // indirect - github.com/dlclark/regexp2 v1.11.0 // indirect github.com/gabriel-vasile/mimetype v1.4.3 // indirect github.com/gin-contrib/sse v0.1.0 // indirect github.com/go-playground/locales v0.14.1 // indirect @@ -25,7 +23,6 @@ require ( github.com/go-playground/validator/v10 v10.20.0 // indirect github.com/goccy/go-json v0.10.2 // indirect github.com/google/go-cmp v0.5.9 // indirect - github.com/google/uuid v1.6.0 // indirect github.com/json-iterator/go v1.1.12 // indirect github.com/klauspost/cpuid/v2 v2.2.7 // indirect github.com/kr/pretty v0.3.0 // indirect diff --git a/go.sum b/go.sum index e05fea7..ebce207 100644 --- a/go.sum +++ b/go.sum @@ -10,8 +10,6 @@ github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ3 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/dlclark/regexp2 v1.11.0 h1:G/nrcoOa7ZXlpoa/91N3X7mM3r8eIlMBBJZvsz/mxKI= -github.com/dlclark/regexp2 v1.11.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0= github.com/gabriel-vasile/mimetype v1.4.3/go.mod h1:d8uq/6HKRL6CGdk+aubisF/M5GcPfT7nKyLpA0lbSSk= github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE= @@ -31,8 +29,6 @@ github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MG github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= -github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= -github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= @@ -49,8 +45,6 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ= github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI= -github.com/linux-do/tiktoken-go v0.7.0 h1:Kcm/miJ5gp77srtF8GQWnfq7W9kTaXEuHZg/g9IVEu8= -github.com/linux-do/tiktoken-go v0.7.0/go.mod h1:9Vkdtp0ngi4USmrdSx984iuIQ5IMr0hnUdz4jZZTJb8= github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= diff --git a/main.go b/main.go index 4f55082..f77175f 100644 --- a/main.go +++ b/main.go @@ -6,7 +6,6 @@ import ( "encoding/json" "errors" "github.com/gin-gonic/gin" - "github.com/linux-do/tiktoken-go" "github.com/tidwall/gjson" "github.com/tidwall/sjson" "golang.org/x/net/http2" @@ -31,7 +30,6 @@ type config struct { CodexApiKey string `json:"codex_api_key"` CodexApiOrganization string `json:"codex_api_organization"` CodexApiProject string `json:"codex_api_project"` - CodexMaxTokens int `json:"codex_max_tokens"` ChatApiBase string `json:"chat_api_base"` ChatApiKey string `json:"chat_api_key"` ChatApiOrganization string `json:"chat_api_organization"` @@ -136,9 +134,8 @@ func closeIO(c io.Closer) { } type ProxyService struct { - cfg *config - client *http.Client - tokenizer *tiktoken.Tiktoken + cfg *config + client *http.Client } func NewProxyService(cfg *config) (*ProxyService, error) { @@ -147,15 +144,9 @@ func NewProxyService(cfg *config) (*ProxyService, error) { return nil, err } - tokenizer, err := tiktoken.EncodingForModel(InstructModel) - if nil != err { - return nil, err - } - return &ProxyService{ - cfg: cfg, - client: client, - tokenizer: tokenizer, + cfg: cfg, + client: client, }, nil } @@ -234,14 +225,6 @@ func (s *ProxyService) completions(c *gin.Context) { _, _ = io.Copy(c.Writer, resp.Body) } -func (s *ProxyService) countToken(token string) int { - if "" == token { - return 0 - } - - return len(s.tokenizer.Encode(token, nil, nil)) -} - func (s *ProxyService) codeCompletions(c *gin.Context) { ctx := c.Request.Context() @@ -257,30 +240,6 @@ func (s *ProxyService) codeCompletions(c *gin.Context) { return } - prompt := gjson.GetBytes(body, "prompt").String() - suffix := gjson.GetBytes(body, "suffix").String() - inputTokens := s.countToken(prompt) - suffixTokens := s.countToken(suffix) - outputTokens := int(gjson.GetBytes(body, "max_tokens").Int()) - - totalTokens := inputTokens + suffixTokens + outputTokens - if totalTokens > s.cfg.CodexMaxTokens { // reduce - left, right := 0, len(prompt) - for left < right { - mid := (left + right) / 2 - subPrompt := prompt[mid:] - subInputTokens := s.countToken(subPrompt) - totalTokens = subInputTokens + suffixTokens + outputTokens - if totalTokens > s.cfg.CodexMaxTokens { - left = mid + 1 - } else { - right = mid - } - } - - body, _ = sjson.SetBytes(body, "prompt", prompt[left:]) - } - body, _ = sjson.DeleteBytes(body, "extra") body, _ = sjson.DeleteBytes(body, "nwo") body, _ = sjson.SetBytes(body, "model", InstructModel) From 247a8748dc63c0146f6f5f8bda3da803b7f6099b Mon Sep 17 00:00:00 2001 From: wozulong <> Date: Sat, 18 May 2024 22:52:20 +0800 Subject: [PATCH 05/10] update README Signed-off-by: wozulong <> --- README.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index c9e6267..ca01719 100644 --- a/README.md +++ b/README.md @@ -64,9 +64,8 @@ ### 其他说明 1. 理论上,Chat 部分可以使用 `chat2api` ,而 Codex 代码生成部分则不太适合使用 `chat2api` 。 2. 代码生成部分做过延时生成和客户端 Cancel 处理,很有效节省你的Token。 -3. 我目前就试了下 `VSCode` ,至于 `JetBrains` 等IDE尚未适配,如果你有相关经验,请告诉我。 -4. 项目基于 `MIT` 协议发布,你可以修改,请保留原作者信息。 -5. 有什么问题,请在论坛 https://linux.do 讨论,欢迎PR。 +3. 项目基于 `MIT` 协议发布,你可以修改,请保留原作者信息。 +4. 有什么问题,请在论坛 https://linux.do 讨论,欢迎PR。 ### Star History From ed40f68e99fadb7704d395fd8faac710b986c5de Mon Sep 17 00:00:00 2001 From: wzdnzd Date: Sun, 19 May 2024 16:00:21 +0800 Subject: [PATCH 06/10] add some scripts for replacing and restoring max tokens (#21) --- scripts/replace_max_tokens.vbs | 49 ++++++++++++++++++++++++++++++++++ scripts/restore_max_tokens.vbs | 30 +++++++++++++++++++++ 2 files changed, 79 insertions(+) create mode 100644 scripts/replace_max_tokens.vbs create mode 100644 scripts/restore_max_tokens.vbs diff --git a/scripts/replace_max_tokens.vbs b/scripts/replace_max_tokens.vbs new file mode 100644 index 0000000..06e61ba --- /dev/null +++ b/scripts/replace_max_tokens.vbs @@ -0,0 +1,49 @@ +' VBScript to change max tokens to 2048 + +MsgBox "It may take a few seconds to execute this script." & vbCrLf & vbCrLf & "Click 'OK' button and wait for the prompt of 'Done.' to pop up!" + +Const ForReading = 1 +Const ForWriting = 2 + +' Subpath of the file to be replaced +subpath = "dist\extension.js" + +pattern = "\.maxPromptCompletionTokens\(([a-zA-Z0-9_]+),([0-9]+)\)" +replacement = ".maxPromptCompletionTokens($1,2048)" + +' Iterate over all github copilot directories +Set objFSO = CreateObject("Scripting.FileSystemObject") +Set objShell = CreateObject("WScript.Shell") +Set colExtensions = objFSO.GetFolder(objShell.ExpandEnvironmentStrings("%USERPROFILE%") & "\.vscode\extensions").SubFolders + +For Each objExtension In colExtensions + extension_path = objExtension.Path & "\" & subpath + If objFSO.FileExists(extension_path) Then + backupfile = extension_path & ".bak" + + ' Delete if backup file exists + If objFSO.FileExists(backupfile) Then + objFSO.DeleteFile backupfile, True + End If + + ' Backup + objFSO.CopyFile extension_path, backupfile + + ' Do search and replace with pattern + Set objFile = objFSO.OpenTextFile(extension_path, ForReading) + strContent = objFile.ReadAll + objFile.Close + + Set objRegEx = New RegExp + objRegEx.Global = True + objRegEx.IgnoreCase = True + objRegEx.Pattern = pattern + strContent = objRegEx.Replace(strContent, replacement) + + Set objFile = objFSO.OpenTextFile(extension_path, ForWriting) + objFile.Write strContent + objFile.Close + End If +Next + +MsgBox "Max tokens modification completed" diff --git a/scripts/restore_max_tokens.vbs b/scripts/restore_max_tokens.vbs new file mode 100644 index 0000000..3a10b8e --- /dev/null +++ b/scripts/restore_max_tokens.vbs @@ -0,0 +1,30 @@ +' VBScript to recovery max tokens +MsgBox "It may take a few seconds to execute this script." & vbCrLf & vbCrLf & "Click 'OK' button and wait for the prompt of 'Done.' to pop up!" + +Const ForReading = 1 +Const ForWriting = 2 + +' Subpath of the file to be recovery +subpath = "dist\extension.js" + +' Iterate over all github copilot directories +Set objFSO = CreateObject("Scripting.FileSystemObject") +Set objShell = CreateObject("WScript.Shell") +Set colExtensions = objFSO.GetFolder(objShell.ExpandEnvironmentStrings("%USERPROFILE%") & "\.vscode\extensions").SubFolders + +For Each objExtension In colExtensions + extension_path = objExtension.Path & "\" & subpath + backupfile = extension_path & ".bak" + + If objFSO.FileExists(backupfile) Then + ' Delete if exist extension file + If objFSO.FileExists(extension_path) Then + objFSO.DeleteFile extension_path, True + End If + + ' Replace + objFSO.MoveFile backupfile, extension_path + End If +Next + +MsgBox "Restore max tokens to default successed" From 7992cbe8f25030b1163a4e695bb3f56ae0e74ccf Mon Sep 17 00:00:00 2001 From: mibody2 <163755168+mibody2@users.noreply.github.com> Date: Sun, 19 May 2024 22:36:44 +0800 Subject: [PATCH 07/10] feat: jb can set the locale for chat (#22) Co-authored-by: mibody2 --- config.json.example | 3 ++- main.go | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/config.json.example b/config.json.example index 1712968..26c77b0 100644 --- a/config.json.example +++ b/config.json.example @@ -12,5 +12,6 @@ "chat_api_project": "", "chat_max_tokens": 4096, "chat_model_default": "gpt-4o", - "chat_model_map": {} + "chat_model_map": {}, + "chat_locale": "zh_CN" } \ No newline at end of file diff --git a/main.go b/main.go index f77175f..1a36a42 100644 --- a/main.go +++ b/main.go @@ -37,6 +37,7 @@ type config struct { ChatMaxTokens int `json:"chat_max_tokens"` ChatModelDefault string `json:"chat_model_default"` ChatModelMap map[string]string `json:"chat_model_map"` + ChatLocale string `json:"chat_locale"` } func readConfig() *config { @@ -171,6 +172,19 @@ func (s *ProxyService) completions(c *gin.Context) { model = s.cfg.ChatModelDefault } body, _ = sjson.SetBytes(body, "model", model) + + if !gjson.GetBytes(body, "function_call").Exists() { + messages := gjson.GetBytes(body, "messages").Array() + lastIndex := len(messages) - 1 + if !strings.Contains(messages[lastIndex].Get("content").String(), "Respond in the following locale") { + locale := s.cfg.ChatLocale + if locale == "" { + locale = "zh_CN" + } + body, _ = sjson.SetBytes(body, "messages."+strconv.Itoa(lastIndex)+".content", messages[lastIndex].Get("content").String()+"Respond in the following locale: "+locale+".") + } + } + body, _ = sjson.DeleteBytes(body, "intent") body, _ = sjson.DeleteBytes(body, "intent_threshold") body, _ = sjson.DeleteBytes(body, "intent_content") From 55d6961c3b2058f650933e10eae5aba215a07d4d Mon Sep 17 00:00:00 2001 From: xpxz <97490782+heheda123123@users.noreply.github.com> Date: Thu, 23 May 2024 11:03:23 +0800 Subject: [PATCH 08/10] =?UTF-8?q?=E8=A1=A5=E5=85=85windows=E4=B8=8A?= =?UTF-8?q?=E4=BF=AE=E6=94=B9maxPromptCompletionTokens=E7=9A=84=E6=96=B9?= =?UTF-8?q?=E6=B3=95=20(#23)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index ca01719..ee7f073 100644 --- a/README.md +++ b/README.md @@ -57,10 +57,41 @@ * macOS `sed -i '' -E 's/\.maxPromptCompletionTokens\(([a-zA-Z0-9_]+),([0-9]+)\)/.maxPromptCompletionTokens(\1,2048)/' ~/.vscode/extensions/github.copilot-*/dist/extension.js` * Linux `sed -E 's/\.maxPromptCompletionTokens\(([a-zA-Z0-9_]+),([0-9]+)\)/.maxPromptCompletionTokens(\1,2048)/' ~/.vscode/extensions/github.copilot-*/dist/extension.js` -* Windows 不知道怎么写,期待大佬PR。 +* Windows 可以用如下的python脚本进行替换 * 因为是Patch,所以:**Copilot每次升级都要执行一次**。 * 具体原因是客户端需要根据 `max_tokens` 精密计算prompt,后台删减会有问题。 +``` +# github copilot extention replace script +import re +import glob +import os + +file_paths = glob.glob(os.getenv("USERPROFILE") + r'\.vscode\extensions\github.copilot-*\dist\extension.js') +if file_paths == list(): + print("no copilot extension found") + exit() + +pattern = re.compile(r'\.maxPromptCompletionTokens\(([a-zA-Z0-9_]+),([0-9]+)\)') +replacement = r'.maxPromptCompletionTokens(\1,2048)' + +for file_path in file_paths: + with open(file_path, 'r', encoding="utf-8") as file: + content = file.read() + + new_content = pattern.sub(replacement, content) + if new_content == content: + print("no match found in " + file_path) + continue + else: + print("replaced " + file_path) + + with open(file_path, 'w', encoding='utf-8') as file: + file.write(new_content) + +print("replace finish") +``` + ### 其他说明 1. 理论上,Chat 部分可以使用 `chat2api` ,而 Codex 代码生成部分则不太适合使用 `chat2api` 。 2. 代码生成部分做过延时生成和客户端 Cancel 处理,很有效节省你的Token。 From c9e7d75fec4da5529301e083f4ccdc327f4cfe73 Mon Sep 17 00:00:00 2001 From: xixingya <2679431923@qq.com> Date: Thu, 23 May 2024 18:54:40 +0800 Subject: [PATCH 10/10] add stable-code-3b local model support (#30) * add stable-code-3b local model support * add stable-code-3b local model support * add stable-code-3b local model support * add stable-code-3b local model support * fix code struct add chat model todo --- README.md | 13 +++++++- localModel.go | 1 + main.go | 82 +++++++++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 89 insertions(+), 7 deletions(-) create mode 100644 localModel.go diff --git a/README.md b/README.md index ee7f073..b5e42d7 100644 --- a/README.md +++ b/README.md @@ -34,13 +34,15 @@ "codex_api_key": "sk-xxx", "codex_api_organization": "", "codex_api_project": "", + "code_instruct_model": "gpt-3.5-turbo-instruct", "chat_api_base": "https://api-proxy.oaipro.com/v1", "chat_api_key": "sk-xxx", "chat_api_organization": "", "chat_api_project": "", "chat_max_tokens": 4096, "chat_model_default": "gpt-4o", - "chat_model_map": {} + "chat_model_map": {}, + "auth_token": "" } ``` @@ -52,6 +54,15 @@ 可以通过 `OVERRIDE_` + 大写配置项作为环境变量,可以覆盖 `config.json` 中的值。例如:`OVERRIDE_CODEX_API_KEY=sk-xxxx` +### 本地大模型设置 +1. 安装ollama +2. ollama run stable-code:code (这个模型较小,大部分显卡都能跑) + 或者你的显卡比较高安装这个:ollama run stable-code:3b-code-fp16 +3. 修改config.json里面的codex_api_base为http://localhost:11434/v1/chat +4. 修改code_instruct_model为你的模型名称,stable-code:code或者stable-code:3b-code-fp16 +4. 剩下的就按照正常流程走即可。 +5. 如果调不通,请确认http://localhost:11434/v1/chat可用。 + ### 重要说明 `codex_max_tokens` 工作并不完美,已经移除。**JetBrains IDE 完美工作**,`VSCode` 需要执行以下脚本Patch之: diff --git a/localModel.go b/localModel.go new file mode 100644 index 0000000..06ab7d0 --- /dev/null +++ b/localModel.go @@ -0,0 +1 @@ +package main diff --git a/main.go b/main.go index 1a36a42..3bae8fe 100644 --- a/main.go +++ b/main.go @@ -5,6 +5,7 @@ import ( "context" "encoding/json" "errors" + "fmt" "github.com/gin-gonic/gin" "github.com/tidwall/gjson" "github.com/tidwall/sjson" @@ -20,7 +21,9 @@ import ( "time" ) -const InstructModel = "gpt-3.5-turbo-instruct" +const DefaultInstructModel = "gpt-3.5-turbo-instruct" + +const StableCodeModelPrefix = "stable-code" type config struct { Bind string `json:"bind"` @@ -30,6 +33,7 @@ type config struct { CodexApiKey string `json:"codex_api_key"` CodexApiOrganization string `json:"codex_api_organization"` CodexApiProject string `json:"codex_api_project"` + CodeInstructModel string `json:"code_instruct_model"` ChatApiBase string `json:"chat_api_base"` ChatApiKey string `json:"chat_api_key"` ChatApiOrganization string `json:"chat_api_organization"` @@ -38,6 +42,7 @@ type config struct { ChatModelDefault string `json:"chat_model_default"` ChatModelMap map[string]string `json:"chat_model_map"` ChatLocale string `json:"chat_locale"` + AuthToken string `json:"auth_token"` } func readConfig() *config { @@ -88,6 +93,9 @@ func readConfig() *config { } } } + if _cfg.CodeInstructModel == "" { + _cfg.CodeInstructModel = DefaultInstructModel + } return _cfg } @@ -150,10 +158,31 @@ func NewProxyService(cfg *config) (*ProxyService, error) { client: client, }, nil } +func AuthMiddleware(authToken string) gin.HandlerFunc { + return func(c *gin.Context) { + token := c.Param("token") + if token != authToken { + c.JSON(http.StatusUnauthorized, gin.H{"error": "Unauthorized"}) + c.Abort() + return + } + c.Next() + } +} func (s *ProxyService) InitRoutes(e *gin.Engine) { - e.POST("/v1/chat/completions", s.completions) - e.POST("/v1/engines/copilot-codex/completions", s.codeCompletions) + authToken := s.cfg.AuthToken // replace with your dynamic value as needed + if authToken != "" { + // 鉴权 + v1 := e.Group("/:token/v1/", AuthMiddleware(authToken)) + { + v1.POST("/chat/completions", s.completions) + v1.POST("/engines/copilot-codex/completions", s.codeCompletions) + } + } else { + e.POST("/v1/chat/completions", s.completions) + e.POST("/v1/engines/copilot-codex/completions", s.codeCompletions) + } } func (s *ProxyService) completions(c *gin.Context) { @@ -254,13 +283,12 @@ func (s *ProxyService) codeCompletions(c *gin.Context) { return } - body, _ = sjson.DeleteBytes(body, "extra") - body, _ = sjson.DeleteBytes(body, "nwo") - body, _ = sjson.SetBytes(body, "model", InstructModel) + body = ConstructRequestBody(body, s.cfg) proxyUrl := s.cfg.CodexApiBase + "/completions" req, err := http.NewRequestWithContext(ctx, http.MethodPost, proxyUrl, io.NopCloser(bytes.NewBuffer(body))) if nil != err { + // abortCodex(c, http.StatusInternalServerError) return } @@ -305,6 +333,47 @@ func (s *ProxyService) codeCompletions(c *gin.Context) { _, _ = io.Copy(c.Writer, resp.Body) } +func ConstructRequestBody(body []byte, cfg *config) []byte { + body, _ = sjson.DeleteBytes(body, "extra") + body, _ = sjson.DeleteBytes(body, "nwo") + body, _ = sjson.SetBytes(body, "model", cfg.CodeInstructModel) + if strings.Contains(cfg.CodeInstructModel, StableCodeModelPrefix) { + return constructWithStableCodeModel(body) + } + if strings.HasSuffix(cfg.ChatApiBase, "chat") { + // @Todo constructWithChatModel + // 如果code base以chat结尾则构建chatModel,暂时没有好的prompt + } + return body +} + +func constructWithStableCodeModel(body []byte) []byte { + suffix := gjson.GetBytes(body, "suffix") + prompt := gjson.GetBytes(body, "prompt") + content := fmt.Sprintf("%s%s", prompt, suffix) + + // 创建新的 JSON 对象并添加到 body 中 + messages := []map[string]string{ + { + "role": "user", + "content": content, + }, + } + return constructWithChatModel(body, messages) +} + +func constructWithChatModel(body []byte, messages interface{}) []byte { + + body, _ = sjson.SetBytes(body, "messages", messages) + + // fmt.Printf("Request Body: %s\n", body) + // 2. 将转义的字符替换回原来的字符 + jsonStr := string(body) + jsonStr = strings.ReplaceAll(jsonStr, "\\u003c", "<") + jsonStr = strings.ReplaceAll(jsonStr, "\\u003e", ">") + return []byte(jsonStr) +} + func main() { cfg := readConfig() @@ -324,4 +393,5 @@ func main() { log.Fatal(err) return } + }