3 changed files with 43 additions and 140 deletions
--- a/2
+++ b/2
@ -18,5 +18,5 @@ COPY config.json.example /app/config.json
 WORKDIR /app
 VOLUME /app
-EXPOSE 8181
+EXPOSE 8080
 CMD ["override"]
--- a/README.md
+++ b/README.md
@ -6,7 +6,7 @@
 ```json
    "github.copilot.advanced": {
-        "debug.overrideCAPIUrl": "http://127.0.0.1:8181/v1",
+        "debug.overrideCAPIUrl": "http://127.0.0.1:8181",
        "debug.overrideProxyUrl": "http://127.0.0.1:8181",
        "debug.chatOverrideProxyUrl": "http://127.0.0.1:8181/v1/chat/completions",
        "authProvider": "github-enterprise"
@ -69,21 +69,6 @@
  "code_instruct_model": "deepseek-coder",
 ```
 ### Siliconflow 设置
 如果你希望使用 Siliconflow FIM 模型来进行代码补全，着重修改以下配置：
 ```json
  "codex_api_base": "https://api.siliconflow.cn/v1",
  "codex_api_key": "sk-xxx,sk-xxx2,sk-xxx3...",
  "code_instruct_model": "Qwen/Qwen2.5-Coder-7B-Instruct",
 ```
 截至目前，Siliconflow 共有三个模型支持 FIM。分别是 `Qwen/Qwen2.5-Coder-7B-Instruct`、`deepseek-ai/DeepSeek-Coder-V2-Instruct` 、`deepseek-ai/DeepSeek-V2.5`。其中 `Qwen/Qwen2.5-Coder-7B-Instruct` 是免费模型，另外两个是收费模型。
 如果你有很多 Siliconflow API Key, 可以以英文逗号分隔填入`codex_api_key`字段, 这样可以很好的避免Siliconflow官方的 TPM RateLimit 对你编码速度影响(尤其使用收费模型时，用户级别较低，TPM 最低只有 10k)。
 ### 本地大模型设置
 1. 安装ollama 
 2. ollama run stable-code:code  (这个模型较小，大部分显卡都能跑)  
--- a/main.go
+++ b/main.go
@ -19,7 +19,6 @@ import (
 	"strconv"
 	"strings"
 	"time"
    "math/rand"
 )
 const DefaultInstructModel = "gpt-3.5-turbo-instruct"
@ -28,8 +27,6 @@ const StableCodeModelPrefix = "stable-code"
 const DeepSeekCoderModel = "deepseek-coder"
 var SiliconflowModels = []string{"deepseek-ai/DeepSeek-V2.5", "deepseek-ai/DeepSeek-Coder-V2-Instruct", "Qwen/Qwen2.5-Coder-7B-Instruct"}
 type config struct {
 	Bind                 string            `json:"bind"`
 	ProxyUrl             string            `json:"proxy_url"`
@ -52,13 +49,7 @@ type config struct {
 }
 func readConfig() *config {
-	var configPath string
+	content, err := os.ReadFile("config.json")
 	if len(os.Args) > 1 {
 		configPath = os.Args[1]
 	} else {
 		configPath = "config.json"
 	}
 	content, err := os.ReadFile(configPath)
 	if nil != err {
 		log.Fatal(err)
 	}
@ -233,12 +224,9 @@ func (s *ProxyService) models(c *gin.Context) {
 		"data": []gin.H{
 			{
 				"capabilities": gin.H{
-					"family":    "gpt-3.5-turbo",
+					"family": "gpt-3.5-turbo",
-					"limits":    gin.H{"max_prompt_tokens": 12288},
+					"object": "model_capabilities",
-					"object":    "model_capabilities",
+					"type":   "chat",
 					"supports":  gin.H{"tool_calls": true},
 					"tokenizer": "cl100k_base",
 					"type":      "chat",
 				},
 				"id":      "gpt-3.5-turbo",
 				"name":    "GPT 3.5 Turbo",
@ -247,26 +235,20 @@ func (s *ProxyService) models(c *gin.Context) {
 			},
 			{
 				"capabilities": gin.H{
-					"family":    "gpt-3.5-turbo",
+					"family": "gpt-3.5-turbo",
-					"limits":    gin.H{"max_prompt_tokens": 12288},
+					"object": "model_capabilities",
-					"object":    "model_capabilities",
+					"type":   "chat",
 					"supports":  gin.H{"tool_calls": true},
 					"tokenizer": "cl100k_base",
 					"type":      "chat",
 				},
 				"id":      "gpt-3.5-turbo-0613",
-				"name":    "GPT 3.5 Turbo",
+				"name":    "GPT 3.5 Turbo (2023-06-13)",
 				"object":  "model",
 				"version": "gpt-3.5-turbo-0613",
 			},
 			{
 				"capabilities": gin.H{
-					"family":    "gpt-4",
+					"family": "gpt-4",
-					"limits":    gin.H{"max_prompt_tokens": 20000},
+					"object": "model_capabilities",
-					"object":    "model_capabilities",
+					"type":   "chat",
 					"supports":  gin.H{"tool_calls": true},
 					"tokenizer": "cl100k_base",
 					"type":      "chat",
 				},
 				"id":      "gpt-4",
 				"name":    "GPT 4",
@ -275,81 +257,31 @@ func (s *ProxyService) models(c *gin.Context) {
 			},
 			{
 				"capabilities": gin.H{
-					"family":    "gpt-4",
+					"family": "gpt-4",
-					"limits":    gin.H{"max_prompt_tokens": 20000},
+					"object": "model_capabilities",
-					"object":    "model_capabilities",
+					"type":   "chat",
 					"supports":  gin.H{"tool_calls": true},
 					"tokenizer": "cl100k_base",
 					"type":      "chat",
 				},
 				"id":      "gpt-4-0613",
-				"name":    "GPT 4",
+				"name":    "GPT 4 (2023-06-13)",
 				"object":  "model",
 				"version": "gpt-4-0613",
 			},
 			{
 				"capabilities": gin.H{
-					"family":    "gpt-4-turbo",
+					"family": "gpt-4-turbo",
-					"limits":    gin.H{"max_prompt_tokens": 20000},
+					"object": "model_capabilities",
-					"object":    "model_capabilities",
+					"type":   "chat",
 					"supports":  gin.H{"parallel_tool_calls": true, "tool_calls": true},
 					"tokenizer": "cl100k_base",
 					"type":      "chat",
 				},
 				"id":      "gpt-4-0125-preview",
-				"name":    "GPT 4 Turbo",
+				"name":    "GPT 4 Turbo (2024-01-25 Preview)",
 				"object":  "model",
 				"version": "gpt-4-0125-preview",
 			},
 			{
 				"capabilities": gin.H{
-					"family":    "gpt-4o",
+					"family": "text-embedding-ada-002",
-					"limits":    gin.H{"max_prompt_tokens": 20000},
+					"object": "model_capabilities",
-					"object":    "model_capabilities",
+					"type":   "embeddings",
 					"supports":  gin.H{"parallel_tool_calls": true, "tool_calls": true},
 					"tokenizer": "o200k_base",
 					"type":      "chat",
 				},
 				"id":      "gpt-4o",
 				"name":    "GPT 4o",
 				"object":  "model",
 				"version": "gpt-4o-2024-05-13",
 			},
 			{
 				"capabilities": gin.H{
 					"family":    "gpt-4o",
 					"limits":    gin.H{"max_prompt_tokens": 20000},
 					"object":    "model_capabilities",
 					"supports":  gin.H{"parallel_tool_calls": true, "tool_calls": true},
 					"tokenizer": "o200k_base",
 					"type":      "chat",
 				},
 				"id":      "gpt-4o-2024-05-13",
 				"name":    "GPT 4o",
 				"object":  "model",
 				"version": "gpt-4o-2024-05-13",
 			},
 			{
 				"capabilities": gin.H{
 					"family":    "gpt-4o",
 					"limits":    gin.H{"max_prompt_tokens": 20000},
 					"object":    "model_capabilities",
 					"supports":  gin.H{"parallel_tool_calls": true, "tool_calls": true},
 					"tokenizer": "o200k_base",
 					"type":      "chat",
 				},
 				"id":     "gpt-4-o-preview",
 				"name":   "GPT 4o",
 				"object": "model",
 			},
 			{
 				"capabilities": gin.H{
 					"family":    "text-embedding-ada-002",
 					"limits":    gin.H{"max_inputs": 256},
 					"object":    "model_capabilities",
 					"supports":  gin.H{},
 					"tokenizer": "cl100k_base",
 					"type":      "embeddings",
 				},
 				"id":      "text-embedding-ada-002",
 				"name":    "Embedding V2 Ada",
@ -358,12 +290,20 @@ func (s *ProxyService) models(c *gin.Context) {
 			},
 			{
 				"capabilities": gin.H{
-					"family":    "text-embedding-3-small",
+					"family": "text-embedding-ada-002",
-					"limits":    gin.H{"max_inputs": 256},
+					"object": "model_capabilities",
-					"object":    "model_capabilities",
+					"type":   "embeddings",
-					"supports":  gin.H{"dimensions": true},
+				},
-					"tokenizer": "cl100k_base",
+				"id":      "text-embedding-ada-002-index",
-					"type":      "embeddings",
+				"name":    "Embedding V2 Ada (Index)",
 				"object":  "model",
 				"version": "text-embedding-ada-002",
 			},
 			{
 				"capabilities": gin.H{
 					"family": "text-embedding-3-small",
 					"object": "model_capabilities",
 					"type":   "embeddings",
 				},
 				"id":      "text-embedding-3-small",
 				"name":    "Embedding V3 small",
@ -372,11 +312,9 @@ func (s *ProxyService) models(c *gin.Context) {
 			},
 			{
 				"capabilities": gin.H{
-					"family":    "text-embedding-3-small",
+					"family": "text-embedding-3-small",
-					"object":    "model_capabilities",
+					"object": "model_capabilities",
-					"supports":  gin.H{"dimensions": true},
+					"type":   "embeddings",
 					"tokenizer": "cl100k_base",
 					"type":      "embeddings",
 				},
 				"id":      "text-embedding-3-small-inference",
 				"name":    "Embedding V3 small (Inference)",
@ -407,12 +345,6 @@ func (s *ProxyService) completions(c *gin.Context) {
 	if !gjson.GetBytes(body, "function_call").Exists() {
 		messages := gjson.GetBytes(body, "messages").Array()
 		for i, msg := range messages {
 			toolCalls := msg.Get("tool_calls").Array()
 			if len(toolCalls) == 0 {
 				body, _ = sjson.DeleteBytes(body, fmt.Sprintf("messages.%d.tool_calls", i))
 			}
 		}
 		lastIndex := len(messages) - 1
 		if !strings.Contains(messages[lastIndex].Get("content").String(), "Respond in the following locale") {
 			locale := s.cfg.ChatLocale
@ -477,10 +409,6 @@ func (s *ProxyService) completions(c *gin.Context) {
 	_, _ = io.Copy(c.Writer, resp.Body)
 }
 func contains(arr []string, str string) bool {
    return strings.Contains(strings.Join(arr, ","), str)
 }
 func (s *ProxyService) codeCompletions(c *gin.Context) {
 	ctx := c.Request.Context()
@ -506,7 +434,7 @@ func (s *ProxyService) codeCompletions(c *gin.Context) {
 	}
 	req.Header.Set("Content-Type", "application/json")
-	req.Header.Set("Authorization", "Bearer " + getRandomApiKey(s.cfg.CodexApiKey))
+	req.Header.Set("Authorization", "Bearer "+s.cfg.CodexApiKey)
 	if "" != s.cfg.CodexApiOrganization {
 		req.Header.Set("OpenAI-Organization", s.cfg.CodexApiOrganization)
 	}
@ -545,16 +473,6 @@ func (s *ProxyService) codeCompletions(c *gin.Context) {
 	_, _ = io.Copy(c.Writer, resp.Body)
 }
 // 随机取一个apiKey
 func getRandomApiKey(paramStr string) string {
    params := strings.Split(paramStr, ",")
    rand.Seed(time.Now().UnixNano())
    randomIndex := rand.Intn(len(params))
 	fmt.Println("Code completion API Key index:", randomIndex)
 	fmt.Println("Code completion API Key:", strings.TrimSpace(params[randomIndex]))
    return strings.TrimSpace(params[randomIndex])
 }
 func ConstructRequestBody(body []byte, cfg *config) []byte {
 	body, _ = sjson.DeleteBytes(body, "extra")
 	body, _ = sjson.DeleteBytes(body, "nwo")
@ -566,7 +484,7 @@ func ConstructRequestBody(body []byte, cfg *config) []byte {
 	if strings.Contains(cfg.CodeInstructModel, StableCodeModelPrefix) {
 		return constructWithStableCodeModel(body)
-	} else if strings.HasPrefix(cfg.CodeInstructModel, DeepSeekCoderModel) || contains(SiliconflowModels, cfg.CodeInstructModel) {
+	} else if strings.HasPrefix(cfg.CodeInstructModel, DeepSeekCoderModel) {
 		if gjson.GetBytes(body, "n").Int() > 1 {
 			body, _ = sjson.SetBytes(body, "n", 1)
 		}