feat: 增加 siliconflow fim 的支持 (#63 )

siliconflow 已支持标准格式的 FIM 补全,特此 PR
Fix: Repair the logic for obtaining the configuration file path. (#60 )
2024-10-11 18:49:08 -07:00 · 2024-09-25 00:11:30 -07:00 · 2024-09-25 00:10:50 -07:00 · 2024-09-08 18:24:12 -07:00 · 2024-07-29 00:33:11 -07:00 · 2024-07-26 18:41:27 +08:00
3 changed files with 140 additions and 43 deletions
--- a/2
+++ b/2
@ -18,5 +18,5 @@ COPY config.json.example /app/config.json
 WORKDIR /app
 VOLUME /app

-EXPOSE 8080
+EXPOSE 8181
 CMD ["override"]
--- a/README.md
+++ b/README.md
@ -6,7 +6,7 @@

 ```json
    "github.copilot.advanced": {
-        "debug.overrideCAPIUrl": "http://127.0.0.1:8181",
+        "debug.overrideCAPIUrl": "http://127.0.0.1:8181/v1",
        "debug.overrideProxyUrl": "http://127.0.0.1:8181",
        "debug.chatOverrideProxyUrl": "http://127.0.0.1:8181/v1/chat/completions",
        "authProvider": "github-enterprise"
@ -69,6 +69,21 @@
  "code_instruct_model": "deepseek-coder",
 ```

+### Siliconflow 设置
+如果你希望使用 Siliconflow FIM 模型来进行代码补全，着重修改以下配置：
+
+```json
+  "codex_api_base": "https://api.siliconflow.cn/v1",
+  "codex_api_key": "sk-xxx,sk-xxx2,sk-xxx3...",
+  "code_instruct_model": "Qwen/Qwen2.5-Coder-7B-Instruct",
+```
+
+截至目前，Siliconflow 共有三个模型支持 FIM。分别是 `Qwen/Qwen2.5-Coder-7B-Instruct`、`deepseek-ai/DeepSeek-Coder-V2-Instruct` 、`deepseek-ai/DeepSeek-V2.5`。其中 `Qwen/Qwen2.5-Coder-7B-Instruct` 是免费模型，另外两个是收费模型。
+
+如果你有很多 Siliconflow API Key, 可以以英文逗号分隔填入`codex_api_key`字段, 这样可以很好的避免Siliconflow官方的 TPM RateLimit 对你编码速度影响(尤其使用收费模型时，用户级别较低，TPM 最低只有 10k)。
+
+
+
 ### 本地大模型设置
 1. 安装ollama 
 2. ollama run stable-code:code  (这个模型较小，大部分显卡都能跑)  
--- a/main.go
+++ b/main.go
@ -19,6 +19,7 @@ import (
 	"strconv"
 	"strings"
 	"time"
+    "math/rand"
 )

 const DefaultInstructModel = "gpt-3.5-turbo-instruct"
@ -27,6 +28,8 @@ const StableCodeModelPrefix = "stable-code"

 const DeepSeekCoderModel = "deepseek-coder"

+var SiliconflowModels = []string{"deepseek-ai/DeepSeek-V2.5", "deepseek-ai/DeepSeek-Coder-V2-Instruct", "Qwen/Qwen2.5-Coder-7B-Instruct"}
+
 type config struct {
 	Bind                 string            `json:"bind"`
 	ProxyUrl             string            `json:"proxy_url"`
@ -49,7 +52,13 @@ type config struct {
 }

 func readConfig() *config {
-	content, err := os.ReadFile("config.json")
+	var configPath string
+	if len(os.Args) > 1 {
+		configPath = os.Args[1]
+	} else {
+		configPath = "config.json"
+	}
+	content, err := os.ReadFile(configPath)
 	if nil != err {
 		log.Fatal(err)
 	}
@ -224,9 +233,12 @@ func (s *ProxyService) models(c *gin.Context) {
 		"data": []gin.H{
 			{
 				"capabilities": gin.H{
-					"family": "gpt-3.5-turbo",
-					"object": "model_capabilities",
-					"type":   "chat",
+					"family":    "gpt-3.5-turbo",
+					"limits":    gin.H{"max_prompt_tokens": 12288},
+					"object":    "model_capabilities",
+					"supports":  gin.H{"tool_calls": true},
+					"tokenizer": "cl100k_base",
+					"type":      "chat",
 				},
 				"id":      "gpt-3.5-turbo",
 				"name":    "GPT 3.5 Turbo",
@ -235,20 +247,26 @@ func (s *ProxyService) models(c *gin.Context) {
 			},
 			{
 				"capabilities": gin.H{
-					"family": "gpt-3.5-turbo",
-					"object": "model_capabilities",
-					"type":   "chat",
+					"family":    "gpt-3.5-turbo",
+					"limits":    gin.H{"max_prompt_tokens": 12288},
+					"object":    "model_capabilities",
+					"supports":  gin.H{"tool_calls": true},
+					"tokenizer": "cl100k_base",
+					"type":      "chat",
 				},
 				"id":      "gpt-3.5-turbo-0613",
-				"name":    "GPT 3.5 Turbo (2023-06-13)",
+				"name":    "GPT 3.5 Turbo",
 				"object":  "model",
 				"version": "gpt-3.5-turbo-0613",
 			},
 			{
 				"capabilities": gin.H{
-					"family": "gpt-4",
-					"object": "model_capabilities",
-					"type":   "chat",
+					"family":    "gpt-4",
+					"limits":    gin.H{"max_prompt_tokens": 20000},
+					"object":    "model_capabilities",
+					"supports":  gin.H{"tool_calls": true},
+					"tokenizer": "cl100k_base",
+					"type":      "chat",
 				},
 				"id":      "gpt-4",
 				"name":    "GPT 4",
@ -257,31 +275,81 @@ func (s *ProxyService) models(c *gin.Context) {
 			},
 			{
 				"capabilities": gin.H{
-					"family": "gpt-4",
-					"object": "model_capabilities",
-					"type":   "chat",
+					"family":    "gpt-4",
+					"limits":    gin.H{"max_prompt_tokens": 20000},
+					"object":    "model_capabilities",
+					"supports":  gin.H{"tool_calls": true},
+					"tokenizer": "cl100k_base",
+					"type":      "chat",
 				},
 				"id":      "gpt-4-0613",
-				"name":    "GPT 4 (2023-06-13)",
+				"name":    "GPT 4",
 				"object":  "model",
 				"version": "gpt-4-0613",
 			},
 			{
 				"capabilities": gin.H{
-					"family": "gpt-4-turbo",
-					"object": "model_capabilities",
-					"type":   "chat",
+					"family":    "gpt-4-turbo",
+					"limits":    gin.H{"max_prompt_tokens": 20000},
+					"object":    "model_capabilities",
+					"supports":  gin.H{"parallel_tool_calls": true, "tool_calls": true},
+					"tokenizer": "cl100k_base",
+					"type":      "chat",
 				},
 				"id":      "gpt-4-0125-preview",
-				"name":    "GPT 4 Turbo (2024-01-25 Preview)",
+				"name":    "GPT 4 Turbo",
 				"object":  "model",
 				"version": "gpt-4-0125-preview",
 			},
 			{
 				"capabilities": gin.H{
-					"family": "text-embedding-ada-002",
-					"object": "model_capabilities",
-					"type":   "embeddings",
+					"family":    "gpt-4o",
+					"limits":    gin.H{"max_prompt_tokens": 20000},
+					"object":    "model_capabilities",
+					"supports":  gin.H{"parallel_tool_calls": true, "tool_calls": true},
+					"tokenizer": "o200k_base",
+					"type":      "chat",
+				},
+				"id":      "gpt-4o",
+				"name":    "GPT 4o",
+				"object":  "model",
+				"version": "gpt-4o-2024-05-13",
+			},
+			{
+				"capabilities": gin.H{
+					"family":    "gpt-4o",
+					"limits":    gin.H{"max_prompt_tokens": 20000},
+					"object":    "model_capabilities",
+					"supports":  gin.H{"parallel_tool_calls": true, "tool_calls": true},
+					"tokenizer": "o200k_base",
+					"type":      "chat",
+				},
+				"id":      "gpt-4o-2024-05-13",
+				"name":    "GPT 4o",
+				"object":  "model",
+				"version": "gpt-4o-2024-05-13",
+			},
+			{
+				"capabilities": gin.H{
+					"family":    "gpt-4o",
+					"limits":    gin.H{"max_prompt_tokens": 20000},
+					"object":    "model_capabilities",
+					"supports":  gin.H{"parallel_tool_calls": true, "tool_calls": true},
+					"tokenizer": "o200k_base",
+					"type":      "chat",
+				},
+				"id":     "gpt-4-o-preview",
+				"name":   "GPT 4o",
+				"object": "model",
+			},
+			{
+				"capabilities": gin.H{
+					"family":    "text-embedding-ada-002",
+					"limits":    gin.H{"max_inputs": 256},
+					"object":    "model_capabilities",
+					"supports":  gin.H{},
+					"tokenizer": "cl100k_base",
+					"type":      "embeddings",
 				},
 				"id":      "text-embedding-ada-002",
 				"name":    "Embedding V2 Ada",
@ -290,20 +358,12 @@ func (s *ProxyService) models(c *gin.Context) {
 			},
 			{
 				"capabilities": gin.H{
-					"family": "text-embedding-ada-002",
-					"object": "model_capabilities",
-					"type":   "embeddings",
-				},
-				"id":      "text-embedding-ada-002-index",
-				"name":    "Embedding V2 Ada (Index)",
-				"object":  "model",
-				"version": "text-embedding-ada-002",
-			},
-			{
-				"capabilities": gin.H{
-					"family": "text-embedding-3-small",
-					"object": "model_capabilities",
-					"type":   "embeddings",
+					"family":    "text-embedding-3-small",
+					"limits":    gin.H{"max_inputs": 256},
+					"object":    "model_capabilities",
+					"supports":  gin.H{"dimensions": true},
+					"tokenizer": "cl100k_base",
+					"type":      "embeddings",
 				},
 				"id":      "text-embedding-3-small",
 				"name":    "Embedding V3 small",
@ -312,9 +372,11 @@ func (s *ProxyService) models(c *gin.Context) {
 			},
 			{
 				"capabilities": gin.H{
-					"family": "text-embedding-3-small",
-					"object": "model_capabilities",
-					"type":   "embeddings",
+					"family":    "text-embedding-3-small",
+					"object":    "model_capabilities",
+					"supports":  gin.H{"dimensions": true},
+					"tokenizer": "cl100k_base",
+					"type":      "embeddings",
 				},
 				"id":      "text-embedding-3-small-inference",
 				"name":    "Embedding V3 small (Inference)",
@ -345,6 +407,12 @@ func (s *ProxyService) completions(c *gin.Context) {

 	if !gjson.GetBytes(body, "function_call").Exists() {
 		messages := gjson.GetBytes(body, "messages").Array()
+		for i, msg := range messages {
+			toolCalls := msg.Get("tool_calls").Array()
+			if len(toolCalls) == 0 {
+				body, _ = sjson.DeleteBytes(body, fmt.Sprintf("messages.%d.tool_calls", i))
+			}
+		}
 		lastIndex := len(messages) - 1
 		if !strings.Contains(messages[lastIndex].Get("content").String(), "Respond in the following locale") {
 			locale := s.cfg.ChatLocale
@ -409,6 +477,10 @@ func (s *ProxyService) completions(c *gin.Context) {
 	_, _ = io.Copy(c.Writer, resp.Body)
 }

+func contains(arr []string, str string) bool {
+    return strings.Contains(strings.Join(arr, ","), str)
+}
+
 func (s *ProxyService) codeCompletions(c *gin.Context) {
 	ctx := c.Request.Context()

@ -434,7 +506,7 @@ func (s *ProxyService) codeCompletions(c *gin.Context) {
 	}

 	req.Header.Set("Content-Type", "application/json")
-	req.Header.Set("Authorization", "Bearer "+s.cfg.CodexApiKey)
+	req.Header.Set("Authorization", "Bearer " + getRandomApiKey(s.cfg.CodexApiKey))
 	if "" != s.cfg.CodexApiOrganization {
 		req.Header.Set("OpenAI-Organization", s.cfg.CodexApiOrganization)
 	}
@ -473,6 +545,16 @@ func (s *ProxyService) codeCompletions(c *gin.Context) {
 	_, _ = io.Copy(c.Writer, resp.Body)
 }

+// 随机取一个apiKey
+func getRandomApiKey(paramStr string) string {
+    params := strings.Split(paramStr, ",")
+    rand.Seed(time.Now().UnixNano())
+    randomIndex := rand.Intn(len(params))
+	fmt.Println("Code completion API Key index:", randomIndex)
+	fmt.Println("Code completion API Key:", strings.TrimSpace(params[randomIndex]))
+    return strings.TrimSpace(params[randomIndex])
+}
+
 func ConstructRequestBody(body []byte, cfg *config) []byte {
 	body, _ = sjson.DeleteBytes(body, "extra")
 	body, _ = sjson.DeleteBytes(body, "nwo")
@ -484,7 +566,7 @@ func ConstructRequestBody(body []byte, cfg *config) []byte {

 	if strings.Contains(cfg.CodeInstructModel, StableCodeModelPrefix) {
 		return constructWithStableCodeModel(body)
-	} else if strings.HasPrefix(cfg.CodeInstructModel, DeepSeekCoderModel) {
+	} else if strings.HasPrefix(cfg.CodeInstructModel, DeepSeekCoderModel) || contains(SiliconflowModels, cfg.CodeInstructModel) {
 		if gjson.GetBytes(body, "n").Int() > 1 {
 			body, _ = sjson.SetBytes(body, "n", 1)
 		}
Author	SHA1	Message	Date
zhuiyue132	8603e7429e	feat: 增加 siliconflow fim 的支持 (#63 ) siliconflow 已支持标准格式的 FIM 补全,特此 PR	2024-10-11 18:49:08 -07:00
aliensb	6d9ba954dd	Fix: Repair the logic for obtaining the configuration file path. (#60 ) Fixed the logic for obtaining the configuration file path to ensure that config.json is used as the default when no command line arguments are provided.	2024-09-25 00:11:30 -07:00
今夕是何年	9ef70da47b	当message中的tool_calls字段为空数组时移除掉这个属性，防止deepseek报错。 (#57 ) Co-authored-by: liyuzhe <banyebushui@>	2024-09-25 00:10:50 -07:00
Huanzhang Hu	aef14559a1	change models api (#54 ) Co-authored-by: huhuanzhang <huhuanzhang@parkingwang.com>	2024-09-08 18:24:12 -07:00
Liu Bingyan	0685e8c153	Modify expose port (#46 ) Modify expose port to match the port in the docker-compose file.	2024-07-29 00:33:11 -07:00
wozulong	8fdd840460	update README Signed-off-by: wozulong <>	2024-07-26 18:41:27 +08:00