Compare commits
6 Commits
| Author | SHA1 | Date |
|---|---|---|
|
|
8603e7429e | |
|
|
6d9ba954dd | |
|
|
9ef70da47b | |
|
|
aef14559a1 | |
|
|
0685e8c153 | |
|
|
8fdd840460 |
|
|
@ -18,5 +18,5 @@ COPY config.json.example /app/config.json
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
VOLUME /app
|
VOLUME /app
|
||||||
|
|
||||||
EXPOSE 8080
|
EXPOSE 8181
|
||||||
CMD ["override"]
|
CMD ["override"]
|
||||||
|
|
|
||||||
17
README.md
17
README.md
|
|
@ -6,7 +6,7 @@
|
||||||
|
|
||||||
```json
|
```json
|
||||||
"github.copilot.advanced": {
|
"github.copilot.advanced": {
|
||||||
"debug.overrideCAPIUrl": "http://127.0.0.1:8181",
|
"debug.overrideCAPIUrl": "http://127.0.0.1:8181/v1",
|
||||||
"debug.overrideProxyUrl": "http://127.0.0.1:8181",
|
"debug.overrideProxyUrl": "http://127.0.0.1:8181",
|
||||||
"debug.chatOverrideProxyUrl": "http://127.0.0.1:8181/v1/chat/completions",
|
"debug.chatOverrideProxyUrl": "http://127.0.0.1:8181/v1/chat/completions",
|
||||||
"authProvider": "github-enterprise"
|
"authProvider": "github-enterprise"
|
||||||
|
|
@ -69,6 +69,21 @@
|
||||||
"code_instruct_model": "deepseek-coder",
|
"code_instruct_model": "deepseek-coder",
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Siliconflow 设置
|
||||||
|
如果你希望使用 Siliconflow FIM 模型来进行代码补全,着重修改以下配置:
|
||||||
|
|
||||||
|
```json
|
||||||
|
"codex_api_base": "https://api.siliconflow.cn/v1",
|
||||||
|
"codex_api_key": "sk-xxx,sk-xxx2,sk-xxx3...",
|
||||||
|
"code_instruct_model": "Qwen/Qwen2.5-Coder-7B-Instruct",
|
||||||
|
```
|
||||||
|
|
||||||
|
截至目前,Siliconflow 共有三个模型支持 FIM。分别是 `Qwen/Qwen2.5-Coder-7B-Instruct`、`deepseek-ai/DeepSeek-Coder-V2-Instruct` 、`deepseek-ai/DeepSeek-V2.5`。其中 `Qwen/Qwen2.5-Coder-7B-Instruct` 是免费模型,另外两个是收费模型。
|
||||||
|
|
||||||
|
如果你有很多 Siliconflow API Key, 可以以英文逗号分隔填入`codex_api_key`字段, 这样可以很好的避免Siliconflow官方的 TPM RateLimit 对你编码速度影响(尤其使用收费模型时,用户级别较低,TPM 最低只有 10k)。
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
### 本地大模型设置
|
### 本地大模型设置
|
||||||
1. 安装ollama
|
1. 安装ollama
|
||||||
2. ollama run stable-code:code (这个模型较小,大部分显卡都能跑)
|
2. ollama run stable-code:code (这个模型较小,大部分显卡都能跑)
|
||||||
|
|
|
||||||
164
main.go
164
main.go
|
|
@ -19,6 +19,7 @@ import (
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
"math/rand"
|
||||||
)
|
)
|
||||||
|
|
||||||
const DefaultInstructModel = "gpt-3.5-turbo-instruct"
|
const DefaultInstructModel = "gpt-3.5-turbo-instruct"
|
||||||
|
|
@ -27,6 +28,8 @@ const StableCodeModelPrefix = "stable-code"
|
||||||
|
|
||||||
const DeepSeekCoderModel = "deepseek-coder"
|
const DeepSeekCoderModel = "deepseek-coder"
|
||||||
|
|
||||||
|
var SiliconflowModels = []string{"deepseek-ai/DeepSeek-V2.5", "deepseek-ai/DeepSeek-Coder-V2-Instruct", "Qwen/Qwen2.5-Coder-7B-Instruct"}
|
||||||
|
|
||||||
type config struct {
|
type config struct {
|
||||||
Bind string `json:"bind"`
|
Bind string `json:"bind"`
|
||||||
ProxyUrl string `json:"proxy_url"`
|
ProxyUrl string `json:"proxy_url"`
|
||||||
|
|
@ -49,7 +52,13 @@ type config struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
func readConfig() *config {
|
func readConfig() *config {
|
||||||
content, err := os.ReadFile("config.json")
|
var configPath string
|
||||||
|
if len(os.Args) > 1 {
|
||||||
|
configPath = os.Args[1]
|
||||||
|
} else {
|
||||||
|
configPath = "config.json"
|
||||||
|
}
|
||||||
|
content, err := os.ReadFile(configPath)
|
||||||
if nil != err {
|
if nil != err {
|
||||||
log.Fatal(err)
|
log.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
@ -224,9 +233,12 @@ func (s *ProxyService) models(c *gin.Context) {
|
||||||
"data": []gin.H{
|
"data": []gin.H{
|
||||||
{
|
{
|
||||||
"capabilities": gin.H{
|
"capabilities": gin.H{
|
||||||
"family": "gpt-3.5-turbo",
|
"family": "gpt-3.5-turbo",
|
||||||
"object": "model_capabilities",
|
"limits": gin.H{"max_prompt_tokens": 12288},
|
||||||
"type": "chat",
|
"object": "model_capabilities",
|
||||||
|
"supports": gin.H{"tool_calls": true},
|
||||||
|
"tokenizer": "cl100k_base",
|
||||||
|
"type": "chat",
|
||||||
},
|
},
|
||||||
"id": "gpt-3.5-turbo",
|
"id": "gpt-3.5-turbo",
|
||||||
"name": "GPT 3.5 Turbo",
|
"name": "GPT 3.5 Turbo",
|
||||||
|
|
@ -235,20 +247,26 @@ func (s *ProxyService) models(c *gin.Context) {
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"capabilities": gin.H{
|
"capabilities": gin.H{
|
||||||
"family": "gpt-3.5-turbo",
|
"family": "gpt-3.5-turbo",
|
||||||
"object": "model_capabilities",
|
"limits": gin.H{"max_prompt_tokens": 12288},
|
||||||
"type": "chat",
|
"object": "model_capabilities",
|
||||||
|
"supports": gin.H{"tool_calls": true},
|
||||||
|
"tokenizer": "cl100k_base",
|
||||||
|
"type": "chat",
|
||||||
},
|
},
|
||||||
"id": "gpt-3.5-turbo-0613",
|
"id": "gpt-3.5-turbo-0613",
|
||||||
"name": "GPT 3.5 Turbo (2023-06-13)",
|
"name": "GPT 3.5 Turbo",
|
||||||
"object": "model",
|
"object": "model",
|
||||||
"version": "gpt-3.5-turbo-0613",
|
"version": "gpt-3.5-turbo-0613",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"capabilities": gin.H{
|
"capabilities": gin.H{
|
||||||
"family": "gpt-4",
|
"family": "gpt-4",
|
||||||
"object": "model_capabilities",
|
"limits": gin.H{"max_prompt_tokens": 20000},
|
||||||
"type": "chat",
|
"object": "model_capabilities",
|
||||||
|
"supports": gin.H{"tool_calls": true},
|
||||||
|
"tokenizer": "cl100k_base",
|
||||||
|
"type": "chat",
|
||||||
},
|
},
|
||||||
"id": "gpt-4",
|
"id": "gpt-4",
|
||||||
"name": "GPT 4",
|
"name": "GPT 4",
|
||||||
|
|
@ -257,31 +275,81 @@ func (s *ProxyService) models(c *gin.Context) {
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"capabilities": gin.H{
|
"capabilities": gin.H{
|
||||||
"family": "gpt-4",
|
"family": "gpt-4",
|
||||||
"object": "model_capabilities",
|
"limits": gin.H{"max_prompt_tokens": 20000},
|
||||||
"type": "chat",
|
"object": "model_capabilities",
|
||||||
|
"supports": gin.H{"tool_calls": true},
|
||||||
|
"tokenizer": "cl100k_base",
|
||||||
|
"type": "chat",
|
||||||
},
|
},
|
||||||
"id": "gpt-4-0613",
|
"id": "gpt-4-0613",
|
||||||
"name": "GPT 4 (2023-06-13)",
|
"name": "GPT 4",
|
||||||
"object": "model",
|
"object": "model",
|
||||||
"version": "gpt-4-0613",
|
"version": "gpt-4-0613",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"capabilities": gin.H{
|
"capabilities": gin.H{
|
||||||
"family": "gpt-4-turbo",
|
"family": "gpt-4-turbo",
|
||||||
"object": "model_capabilities",
|
"limits": gin.H{"max_prompt_tokens": 20000},
|
||||||
"type": "chat",
|
"object": "model_capabilities",
|
||||||
|
"supports": gin.H{"parallel_tool_calls": true, "tool_calls": true},
|
||||||
|
"tokenizer": "cl100k_base",
|
||||||
|
"type": "chat",
|
||||||
},
|
},
|
||||||
"id": "gpt-4-0125-preview",
|
"id": "gpt-4-0125-preview",
|
||||||
"name": "GPT 4 Turbo (2024-01-25 Preview)",
|
"name": "GPT 4 Turbo",
|
||||||
"object": "model",
|
"object": "model",
|
||||||
"version": "gpt-4-0125-preview",
|
"version": "gpt-4-0125-preview",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"capabilities": gin.H{
|
"capabilities": gin.H{
|
||||||
"family": "text-embedding-ada-002",
|
"family": "gpt-4o",
|
||||||
"object": "model_capabilities",
|
"limits": gin.H{"max_prompt_tokens": 20000},
|
||||||
"type": "embeddings",
|
"object": "model_capabilities",
|
||||||
|
"supports": gin.H{"parallel_tool_calls": true, "tool_calls": true},
|
||||||
|
"tokenizer": "o200k_base",
|
||||||
|
"type": "chat",
|
||||||
|
},
|
||||||
|
"id": "gpt-4o",
|
||||||
|
"name": "GPT 4o",
|
||||||
|
"object": "model",
|
||||||
|
"version": "gpt-4o-2024-05-13",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"capabilities": gin.H{
|
||||||
|
"family": "gpt-4o",
|
||||||
|
"limits": gin.H{"max_prompt_tokens": 20000},
|
||||||
|
"object": "model_capabilities",
|
||||||
|
"supports": gin.H{"parallel_tool_calls": true, "tool_calls": true},
|
||||||
|
"tokenizer": "o200k_base",
|
||||||
|
"type": "chat",
|
||||||
|
},
|
||||||
|
"id": "gpt-4o-2024-05-13",
|
||||||
|
"name": "GPT 4o",
|
||||||
|
"object": "model",
|
||||||
|
"version": "gpt-4o-2024-05-13",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"capabilities": gin.H{
|
||||||
|
"family": "gpt-4o",
|
||||||
|
"limits": gin.H{"max_prompt_tokens": 20000},
|
||||||
|
"object": "model_capabilities",
|
||||||
|
"supports": gin.H{"parallel_tool_calls": true, "tool_calls": true},
|
||||||
|
"tokenizer": "o200k_base",
|
||||||
|
"type": "chat",
|
||||||
|
},
|
||||||
|
"id": "gpt-4-o-preview",
|
||||||
|
"name": "GPT 4o",
|
||||||
|
"object": "model",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"capabilities": gin.H{
|
||||||
|
"family": "text-embedding-ada-002",
|
||||||
|
"limits": gin.H{"max_inputs": 256},
|
||||||
|
"object": "model_capabilities",
|
||||||
|
"supports": gin.H{},
|
||||||
|
"tokenizer": "cl100k_base",
|
||||||
|
"type": "embeddings",
|
||||||
},
|
},
|
||||||
"id": "text-embedding-ada-002",
|
"id": "text-embedding-ada-002",
|
||||||
"name": "Embedding V2 Ada",
|
"name": "Embedding V2 Ada",
|
||||||
|
|
@ -290,20 +358,12 @@ func (s *ProxyService) models(c *gin.Context) {
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"capabilities": gin.H{
|
"capabilities": gin.H{
|
||||||
"family": "text-embedding-ada-002",
|
"family": "text-embedding-3-small",
|
||||||
"object": "model_capabilities",
|
"limits": gin.H{"max_inputs": 256},
|
||||||
"type": "embeddings",
|
"object": "model_capabilities",
|
||||||
},
|
"supports": gin.H{"dimensions": true},
|
||||||
"id": "text-embedding-ada-002-index",
|
"tokenizer": "cl100k_base",
|
||||||
"name": "Embedding V2 Ada (Index)",
|
"type": "embeddings",
|
||||||
"object": "model",
|
|
||||||
"version": "text-embedding-ada-002",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"capabilities": gin.H{
|
|
||||||
"family": "text-embedding-3-small",
|
|
||||||
"object": "model_capabilities",
|
|
||||||
"type": "embeddings",
|
|
||||||
},
|
},
|
||||||
"id": "text-embedding-3-small",
|
"id": "text-embedding-3-small",
|
||||||
"name": "Embedding V3 small",
|
"name": "Embedding V3 small",
|
||||||
|
|
@ -312,9 +372,11 @@ func (s *ProxyService) models(c *gin.Context) {
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"capabilities": gin.H{
|
"capabilities": gin.H{
|
||||||
"family": "text-embedding-3-small",
|
"family": "text-embedding-3-small",
|
||||||
"object": "model_capabilities",
|
"object": "model_capabilities",
|
||||||
"type": "embeddings",
|
"supports": gin.H{"dimensions": true},
|
||||||
|
"tokenizer": "cl100k_base",
|
||||||
|
"type": "embeddings",
|
||||||
},
|
},
|
||||||
"id": "text-embedding-3-small-inference",
|
"id": "text-embedding-3-small-inference",
|
||||||
"name": "Embedding V3 small (Inference)",
|
"name": "Embedding V3 small (Inference)",
|
||||||
|
|
@ -345,6 +407,12 @@ func (s *ProxyService) completions(c *gin.Context) {
|
||||||
|
|
||||||
if !gjson.GetBytes(body, "function_call").Exists() {
|
if !gjson.GetBytes(body, "function_call").Exists() {
|
||||||
messages := gjson.GetBytes(body, "messages").Array()
|
messages := gjson.GetBytes(body, "messages").Array()
|
||||||
|
for i, msg := range messages {
|
||||||
|
toolCalls := msg.Get("tool_calls").Array()
|
||||||
|
if len(toolCalls) == 0 {
|
||||||
|
body, _ = sjson.DeleteBytes(body, fmt.Sprintf("messages.%d.tool_calls", i))
|
||||||
|
}
|
||||||
|
}
|
||||||
lastIndex := len(messages) - 1
|
lastIndex := len(messages) - 1
|
||||||
if !strings.Contains(messages[lastIndex].Get("content").String(), "Respond in the following locale") {
|
if !strings.Contains(messages[lastIndex].Get("content").String(), "Respond in the following locale") {
|
||||||
locale := s.cfg.ChatLocale
|
locale := s.cfg.ChatLocale
|
||||||
|
|
@ -409,6 +477,10 @@ func (s *ProxyService) completions(c *gin.Context) {
|
||||||
_, _ = io.Copy(c.Writer, resp.Body)
|
_, _ = io.Copy(c.Writer, resp.Body)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func contains(arr []string, str string) bool {
|
||||||
|
return strings.Contains(strings.Join(arr, ","), str)
|
||||||
|
}
|
||||||
|
|
||||||
func (s *ProxyService) codeCompletions(c *gin.Context) {
|
func (s *ProxyService) codeCompletions(c *gin.Context) {
|
||||||
ctx := c.Request.Context()
|
ctx := c.Request.Context()
|
||||||
|
|
||||||
|
|
@ -434,7 +506,7 @@ func (s *ProxyService) codeCompletions(c *gin.Context) {
|
||||||
}
|
}
|
||||||
|
|
||||||
req.Header.Set("Content-Type", "application/json")
|
req.Header.Set("Content-Type", "application/json")
|
||||||
req.Header.Set("Authorization", "Bearer "+s.cfg.CodexApiKey)
|
req.Header.Set("Authorization", "Bearer " + getRandomApiKey(s.cfg.CodexApiKey))
|
||||||
if "" != s.cfg.CodexApiOrganization {
|
if "" != s.cfg.CodexApiOrganization {
|
||||||
req.Header.Set("OpenAI-Organization", s.cfg.CodexApiOrganization)
|
req.Header.Set("OpenAI-Organization", s.cfg.CodexApiOrganization)
|
||||||
}
|
}
|
||||||
|
|
@ -473,6 +545,16 @@ func (s *ProxyService) codeCompletions(c *gin.Context) {
|
||||||
_, _ = io.Copy(c.Writer, resp.Body)
|
_, _ = io.Copy(c.Writer, resp.Body)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 随机取一个apiKey
|
||||||
|
func getRandomApiKey(paramStr string) string {
|
||||||
|
params := strings.Split(paramStr, ",")
|
||||||
|
rand.Seed(time.Now().UnixNano())
|
||||||
|
randomIndex := rand.Intn(len(params))
|
||||||
|
fmt.Println("Code completion API Key index:", randomIndex)
|
||||||
|
fmt.Println("Code completion API Key:", strings.TrimSpace(params[randomIndex]))
|
||||||
|
return strings.TrimSpace(params[randomIndex])
|
||||||
|
}
|
||||||
|
|
||||||
func ConstructRequestBody(body []byte, cfg *config) []byte {
|
func ConstructRequestBody(body []byte, cfg *config) []byte {
|
||||||
body, _ = sjson.DeleteBytes(body, "extra")
|
body, _ = sjson.DeleteBytes(body, "extra")
|
||||||
body, _ = sjson.DeleteBytes(body, "nwo")
|
body, _ = sjson.DeleteBytes(body, "nwo")
|
||||||
|
|
@ -484,7 +566,7 @@ func ConstructRequestBody(body []byte, cfg *config) []byte {
|
||||||
|
|
||||||
if strings.Contains(cfg.CodeInstructModel, StableCodeModelPrefix) {
|
if strings.Contains(cfg.CodeInstructModel, StableCodeModelPrefix) {
|
||||||
return constructWithStableCodeModel(body)
|
return constructWithStableCodeModel(body)
|
||||||
} else if strings.HasPrefix(cfg.CodeInstructModel, DeepSeekCoderModel) {
|
} else if strings.HasPrefix(cfg.CodeInstructModel, DeepSeekCoderModel) || contains(SiliconflowModels, cfg.CodeInstructModel) {
|
||||||
if gjson.GetBytes(body, "n").Int() > 1 {
|
if gjson.GetBytes(body, "n").Int() > 1 {
|
||||||
body, _ = sjson.SetBytes(body, "n", 1)
|
body, _ = sjson.SetBytes(body, "n", 1)
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue