Merge 9504f917e2 into 98e5cb7b77
This commit is contained in:
commit
459ebbc41f
|
|
@ -36,7 +36,8 @@ AVAIL_LLM_MODELS = ["gpt-4-1106-preview", "gpt-4-turbo-preview", "gpt-4-vision-p
|
|||
"gpt-4o", "gpt-4o-mini", "gpt-4-turbo", "gpt-4-turbo-2024-04-09",
|
||||
"gpt-3.5-turbo-1106", "gpt-3.5-turbo-16k", "gpt-3.5-turbo", "azure-gpt-3.5",
|
||||
"gpt-4", "gpt-4-32k", "azure-gpt-4", "glm-4", "glm-4v", "glm-3-turbo",
|
||||
"gemini-1.5-pro", "chatglm3"
|
||||
"gemini-1.5-pro", "chatglm3", "HF:Qwen/Qwen2.5-72B-Instruct",
|
||||
"HF:meta-llama/Meta-Llama-3-70B-Instruct"
|
||||
]
|
||||
|
||||
EMBEDDING_MODEL = "text-embedding-3-small"
|
||||
|
|
@ -55,6 +56,7 @@ EMBEDDING_MODEL = "text-embedding-3-small"
|
|||
# "deepseek-chat" ,"deepseek-coder",
|
||||
# "gemini-1.5-flash",
|
||||
# "yi-34b-chat-0205","yi-34b-chat-200k","yi-large","yi-medium","yi-spark","yi-large-turbo","yi-large-preview",
|
||||
# Hugging Face Playground中的模型,可自行添加,模型名字前需加"HF:"
|
||||
# ]
|
||||
# --- --- --- ---
|
||||
# 此外,您还可以在接入one-api/vllm/ollama/Openroute时,
|
||||
|
|
@ -259,8 +261,7 @@ CUSTOM_API_KEY_PATTERN = ""
|
|||
# Google Gemini API-Key
|
||||
GEMINI_API_KEY = ''
|
||||
|
||||
|
||||
# HUGGINGFACE的TOKEN,下载LLAMA时起作用 https://huggingface.co/docs/hub/security-tokens
|
||||
# HUGGINGFACE的TOKEN,使用Playground及下载LLAMA时起作用 https://huggingface.co/docs/hub/security-tokens
|
||||
HUGGINGFACE_ACCESS_TOKEN = "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"
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -35,6 +35,9 @@ from .bridge_google_gemini import predict_no_ui_long_connection as genai_noui
|
|||
from .bridge_zhipu import predict_no_ui_long_connection as zhipu_noui
|
||||
from .bridge_zhipu import predict as zhipu_ui
|
||||
|
||||
from .bridge_hf import predict_no_ui_long_connection as hf_noui
|
||||
from .bridge_hf import predict as hf_ui
|
||||
|
||||
from .bridge_taichu import predict_no_ui_long_connection as taichu_noui
|
||||
from .bridge_taichu import predict as taichu_ui
|
||||
|
||||
|
|
@ -987,6 +990,37 @@ if "zhipuai" in AVAIL_LLM_MODELS: # zhipuai 是glm-4的别名,向后兼容
|
|||
})
|
||||
except:
|
||||
logger.error(trimmed_format_exc())
|
||||
# -=-=-=-=-=-=- Hugging Face Playground -=-=-=-=-=-=-
|
||||
if any("HF:" in x for x in AVAIL_LLM_MODELS):
|
||||
try:
|
||||
for x in AVAIL_LLM_MODELS:
|
||||
if x == "HF:Qwen/Qwen2.5-72B-Instruct" or x == 'HF:meta-llama/Meta-Llama-3-70B-Instruct':
|
||||
model_info.update({
|
||||
x: {
|
||||
"fn_with_ui": hf_ui,
|
||||
"fn_without_ui": hf_noui,
|
||||
"endpoint": None,
|
||||
"max_token": 8192,
|
||||
'can_multi_thread': True,
|
||||
"tokenizer": tokenizer_gpt35,
|
||||
"token_cnt": get_token_num_gpt35,
|
||||
},
|
||||
})
|
||||
elif "HF:" in x: # 默认max_token=8192
|
||||
model_info.update({
|
||||
x: {
|
||||
"fn_with_ui": hf_ui,
|
||||
"fn_without_ui": hf_noui,
|
||||
"endpoint": None,
|
||||
"max_token": 8192,
|
||||
'can_multi_thread': True,
|
||||
"tokenizer": tokenizer_gpt35,
|
||||
"token_cnt": get_token_num_gpt35,
|
||||
},
|
||||
})
|
||||
except:
|
||||
logger.error(trimmed_format_exc())
|
||||
|
||||
# -=-=-=-=-=-=- 幻方-深度求索大模型 -=-=-=-=-=-=-
|
||||
if "deepseekcoder" in AVAIL_LLM_MODELS: # deepseekcoder
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -0,0 +1,79 @@
|
|||
import time
|
||||
import os
|
||||
from toolbox import update_ui, get_conf, update_ui_lastest_msg, log_chat
|
||||
from toolbox import check_packages, report_exception, have_any_recent_upload_image_files
|
||||
from toolbox import ChatBotWithCookies
|
||||
|
||||
model_name = 'Hugging Face Playground'
|
||||
|
||||
|
||||
def validate_key():
|
||||
HUGGINGFACE_ACCESS_TOKEN = get_conf("HUGGINGFACE_ACCESS_TOKEN")
|
||||
if HUGGINGFACE_ACCESS_TOKEN == '': return False
|
||||
return True
|
||||
|
||||
|
||||
def predict_no_ui_long_connection(inputs: str, llm_kwargs: dict, history: list = [], sys_prompt: str = "",
|
||||
observe_window: list = [], console_slience: bool = False):
|
||||
"""
|
||||
⭐多线程方法
|
||||
函数的说明请见 request_llms/bridge_all.py
|
||||
"""
|
||||
watch_dog_patience = 5
|
||||
response = ""
|
||||
|
||||
|
||||
if validate_key() is False:
|
||||
raise RuntimeError('请配置HUGGINGFACE_ACCESS_TOKEN')
|
||||
|
||||
# 开始接收回复
|
||||
from .com_hfplayground import HFPlaygroundInit
|
||||
hfp_init = HFPlaygroundInit()
|
||||
for chunk, response in hfp_init.generate_chat(inputs, llm_kwargs, history, sys_prompt):
|
||||
if len(observe_window) >= 1:
|
||||
observe_window[0] = response
|
||||
if len(observe_window) >= 2:
|
||||
if (time.time() - observe_window[1]) > watch_dog_patience:
|
||||
raise RuntimeError("程序终止。")
|
||||
return response
|
||||
|
||||
|
||||
def predict(inputs: str, llm_kwargs: dict, plugin_kwargs: dict, chatbot: ChatBotWithCookies,
|
||||
history: list = [], system_prompt: str = '', stream: bool = True, additional_fn: str = None):
|
||||
"""
|
||||
⭐单线程方法
|
||||
函数的说明请见 request_llms/bridge_all.py
|
||||
"""
|
||||
chatbot.append([inputs, ""])
|
||||
yield from update_ui(chatbot=chatbot, history=history)
|
||||
|
||||
# 尝试导入依赖,如果缺少依赖,则给出安装建议
|
||||
try:
|
||||
check_packages(["openai"])
|
||||
except:
|
||||
yield from update_ui_lastest_msg(
|
||||
f"导入软件依赖失败。使用该模型需要额外依赖,安装方法```pip install --upgrade openai```。",
|
||||
chatbot=chatbot, history=history, delay=0)
|
||||
return
|
||||
|
||||
if validate_key() is False:
|
||||
yield from update_ui_lastest_msg(lastmsg="[Local Message] 请配置HUGGINGFACE_ACCESS_TOKEN", chatbot=chatbot,
|
||||
history=history, delay=0)
|
||||
return
|
||||
|
||||
if additional_fn is not None:
|
||||
from core_functional import handle_core_functionality
|
||||
inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot)
|
||||
chatbot[-1] = [inputs, ""]
|
||||
yield from update_ui(chatbot=chatbot, history=history)
|
||||
|
||||
|
||||
# 开始接收回复
|
||||
from .com_hfplayground import HFPlaygroundInit
|
||||
hfp_init = HFPlaygroundInit()
|
||||
for chunk, response in hfp_init.generate_chat(inputs, llm_kwargs, history, system_prompt):
|
||||
chatbot[-1] = [inputs, response]
|
||||
yield from update_ui(chatbot=chatbot, history=history)
|
||||
history.extend([inputs, response])
|
||||
log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=response)
|
||||
yield from update_ui(chatbot=chatbot, history=history)
|
||||
|
|
@ -0,0 +1,117 @@
|
|||
import httpx
|
||||
from huggingface_hub import InferenceClient
|
||||
from toolbox import get_conf, encode_image, get_pictures_list
|
||||
from loguru import logger
|
||||
import os
|
||||
|
||||
proxies = get_conf("proxies")
|
||||
|
||||
def input_encode_handler(inputs: str, llm_kwargs: dict):
|
||||
if llm_kwargs["most_recent_uploaded"].get("path"):
|
||||
image_paths = get_pictures_list(llm_kwargs["most_recent_uploaded"]["path"])
|
||||
md_encode = []
|
||||
for md_path in image_paths:
|
||||
type_ = os.path.splitext(md_path)[1].replace(".", "")
|
||||
type_ = "jpeg" if type_ == "jpg" else type_
|
||||
md_encode.append({"data": encode_image(md_path), "type": type_})
|
||||
return inputs, md_encode
|
||||
|
||||
|
||||
class HFPlaygroundInit:
|
||||
|
||||
def __init__(self):
|
||||
HUGGINGFACE_ACCESS_TOKEN = get_conf("HUGGINGFACE_ACCESS_TOKEN")
|
||||
self.client = InferenceClient(api_key=HUGGINGFACE_ACCESS_TOKEN, proxies=proxies)
|
||||
self.model = ''
|
||||
|
||||
def __conversation_user(self, user_input: str, llm_kwargs: dict):
|
||||
return {"role": "user", "content": user_input}
|
||||
|
||||
def __conversation_history(self, history: list, llm_kwargs: dict):
|
||||
messages = []
|
||||
conversation_cnt = len(history) // 2
|
||||
if conversation_cnt:
|
||||
for index in range(0, 2 * conversation_cnt, 2):
|
||||
what_i_have_asked = self.__conversation_user(history[index], llm_kwargs)
|
||||
what_gpt_answer = {
|
||||
"role": "assistant",
|
||||
"content": history[index + 1]
|
||||
}
|
||||
messages.append(what_i_have_asked)
|
||||
messages.append(what_gpt_answer)
|
||||
return messages
|
||||
|
||||
@staticmethod
|
||||
def preprocess_param(param, default=0.95, min_val=0.01, max_val=0.99):
|
||||
"""预处理参数,保证其在允许范围内,并处理精度问题"""
|
||||
try:
|
||||
param = float(param)
|
||||
except ValueError:
|
||||
return default
|
||||
|
||||
if param <= min_val:
|
||||
return min_val
|
||||
elif param >= max_val:
|
||||
return max_val
|
||||
else:
|
||||
return round(param, 2) # 可挑选精度,目前是两位小数
|
||||
|
||||
def __conversation_message_payload(self, inputs: str, llm_kwargs: dict, history: list, system_prompt: str):
|
||||
messages = []
|
||||
if system_prompt:
|
||||
messages.append({"role": "system", "content": system_prompt})
|
||||
self.model = llm_kwargs['llm_model'].replace("HF:", "").strip()
|
||||
messages.extend(self.__conversation_history(history, llm_kwargs)) # 处理 history
|
||||
if inputs.strip() == "": # 处理空输入导致报错的问题 https://github.com/binary-husky/gpt_academic/issues/1640 提示 {"error":{"code":"1214","message":"messages[1]:content和tool_calls 字段不能同时为空"}
|
||||
inputs = "." # 空格、换行、空字符串都会报错,所以用最没有意义的一个点代替
|
||||
messages.append(self.__conversation_user(inputs, llm_kwargs)) # 处理用户对话
|
||||
"""
|
||||
采样温度,控制输出的随机性,必须为正数
|
||||
取值范围是:(0.0, 1.0),不能等于 0,默认值为 0.95,
|
||||
值越大,会使输出更随机,更具创造性;
|
||||
值越小,输出会更加稳定或确定
|
||||
建议您根据应用场景调整 top_p 或 temperature 参数,但不要同时调整两个参数
|
||||
"""
|
||||
temperature = self.preprocess_param(
|
||||
param=llm_kwargs.get('temperature', 0.95),
|
||||
default=0.95,
|
||||
min_val=0.01,
|
||||
max_val=0.99
|
||||
)
|
||||
"""
|
||||
用温度取样的另一种方法,称为核取样
|
||||
取值范围是:(0.0, 1.0) 开区间,
|
||||
不能等于 0 或 1,默认值为 0.7
|
||||
模型考虑具有 top_p 概率质量 tokens 的结果
|
||||
例如:0.1 意味着模型解码器只考虑从前 10% 的概率的候选集中取 tokens
|
||||
建议您根据应用场景调整 top_p 或 temperature 参数,
|
||||
但不要同时调整两个参数
|
||||
"""
|
||||
top_p = self.preprocess_param(
|
||||
param=llm_kwargs.get('top_p', 0.70),
|
||||
default=0.70,
|
||||
min_val=0.01,
|
||||
max_val=0.99
|
||||
)
|
||||
response = self.client.chat.completions.create(
|
||||
model=self.model, messages=messages, stream=True,
|
||||
temperature=temperature,
|
||||
top_p=top_p,
|
||||
max_tokens=llm_kwargs.get('max_tokens', 1024 * 4),
|
||||
)
|
||||
return response
|
||||
|
||||
def generate_chat(self, inputs: str, llm_kwargs: dict, history: list, system_prompt: str):
|
||||
self.model = llm_kwargs['llm_model'].replace("HF:", "").strip()
|
||||
response = self.__conversation_message_payload(inputs, llm_kwargs, history, system_prompt)
|
||||
bro_results = ''
|
||||
for chunk in response:
|
||||
bro_results += chunk.choices[0].delta.content
|
||||
yield chunk.choices[0].delta.content, bro_results
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
HFP = HFPlaygroundInit()
|
||||
r = HFP.generate_chat('你好', {'llm_model': 'Qwen/Qwen2.5-72B-Instruct'}, [], '你是WPSAi')
|
||||
for i in r:
|
||||
print(i)
|
||||
|
|
@ -14,13 +14,15 @@ validate_path() # validate path so you can run from base directory
|
|||
|
||||
if "在线模型":
|
||||
if __name__ == "__main__":
|
||||
from request_llms.bridge_taichu import predict_no_ui_long_connection
|
||||
from request_llms.bridge_hf import predict_no_ui_long_connection
|
||||
|
||||
# from request_llms.bridge_taichu import predict_no_ui_long_connection
|
||||
# from request_llms.bridge_cohere import predict_no_ui_long_connection
|
||||
# from request_llms.bridge_spark import predict_no_ui_long_connection
|
||||
# from request_llms.bridge_zhipu import predict_no_ui_long_connection
|
||||
# from request_llms.bridge_chatglm3 import predict_no_ui_long_connection
|
||||
llm_kwargs = {
|
||||
"llm_model": "taichu",
|
||||
"llm_model": "Qwen/Qwen2.5-72B-Instruct",
|
||||
"max_length": 4096,
|
||||
"top_p": 1,
|
||||
"temperature": 1,
|
||||
|
|
|
|||
Loading…
Reference in New Issue