Spaces:
Running
Running
up
Browse files- README.md +5 -3
- app.py +1 -1
- config.py +73 -2
- crazy_functional.py +37 -5
- crazy_functions/latex_fns/latex_toolbox.py +4 -1
- crazy_functions/pdf_fns/parse_pdf.py +25 -0
- crazy_functions/批量翻译PDF文档_多线程.py +126 -24
- crazy_functions/辅助功能.py +43 -0
- docker-compose.yml +8 -3
- docs/GithubAction+ChatGLM+Moss +1 -0
- request_llm/bridge_all.py +31 -2
- request_llm/bridge_chatgpt.py +3 -5
- request_llm/bridge_chatgpt_website.py +0 -15
- request_llm/bridge_llama2.py +91 -0
- request_llm/bridge_qianfan.py +164 -0
- request_llm/local_llm_class.py +2 -2
- requirements.txt +2 -1
- tests/test_plugins.py +3 -3
- tests/test_utils.py +3 -0
- version +2 -2
README.md
CHANGED
@@ -39,7 +39,7 @@ To translate this project to arbitary language with GPT, read and run [`multi_la
|
|
39 |
|
40 |
功能(⭐= 近期新增功能) | 描述
|
41 |
--- | ---
|
42 |
-
⭐[接入新模型](https://github.com/binary-husky/gpt_academic/wiki/%E5%A6%82%E4%BD%95%E5%88%87%E6%8D%A2%E6%A8%A1%E5%9E%8B)! |
|
43 |
一键润色 | 支持一键润色、一键查找论文语法错误
|
44 |
一键中英互译 | 一键中英互译
|
45 |
一键代码解释 | 显示代码、解释代码、生成代码、给代码加注释
|
@@ -178,7 +178,7 @@ docker run --rm -it -e WEB_PORT=50923 -p 50923:50923 gpt-academic
|
|
178 |
```
|
179 |
P.S. 如果需要依赖Latex的插件功能,请见Wiki。另外,您也可以直接使用docker-compose获取Latex功能(修改docker-compose.yml,保留方案4并删除其他方案)。
|
180 |
|
181 |
-
2. ChatGPT + ChatGLM2 + MOSS
|
182 |
[![chatglm](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-chatglm.yml/badge.svg?branch=master)](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-chatglm.yml)
|
183 |
|
184 |
``` sh
|
@@ -186,7 +186,7 @@ P.S. 如果需要依赖Latex的插件功能,请见Wiki。另外,您也可以
|
|
186 |
docker-compose up
|
187 |
```
|
188 |
|
189 |
-
3. ChatGPT + LLAMA + 盘古 + RWKV(需要熟悉Docker
|
190 |
[![jittorllms](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-jittorllms.yml/badge.svg?branch=master)](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-jittorllms.yml)
|
191 |
|
192 |
``` sh
|
@@ -313,6 +313,8 @@ Tip:不指定文件直接点击 `载入对话历史存档` 可以查看历史h
|
|
313 |
|
314 |
### II:版本:
|
315 |
- version 3.5(Todo): 使用自然语言调用本项目的所有函数插件(高优先级)
|
|
|
|
|
316 |
- version 3.46: 支持完全脱手操作的实时语音对话
|
317 |
- version 3.45: 支持自定义ChatGLM2微调模型
|
318 |
- version 3.44: 正式支持Azure,优化界面易用性
|
|
|
39 |
|
40 |
功能(⭐= 近期新增功能) | 描述
|
41 |
--- | ---
|
42 |
+
⭐[接入新模型](https://github.com/binary-husky/gpt_academic/wiki/%E5%A6%82%E4%BD%95%E5%88%87%E6%8D%A2%E6%A8%A1%E5%9E%8B)! | 百度[千帆](https://cloud.baidu.com/doc/WENXINWORKSHOP/s/Nlks5zkzu)与文心一言, [通义千问](https://modelscope.cn/models/qwen/Qwen-7B-Chat/summary),上海AI-Lab[书生](https://github.com/InternLM/InternLM),讯飞[星火](https://xinghuo.xfyun.cn/),[LLaMa2](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf)
|
43 |
一键润色 | 支持一键润色、一键查找论文语法错误
|
44 |
一键中英互译 | 一键中英互译
|
45 |
一键代码解释 | 显示代码、解释代码、生成代码、给代码加注释
|
|
|
178 |
```
|
179 |
P.S. 如果需要依赖Latex的插件功能,请见Wiki。另外,您也可以直接使用docker-compose获取Latex功能(修改docker-compose.yml,保留方案4并删除其他方案)。
|
180 |
|
181 |
+
2. ChatGPT + ChatGLM2 + MOSS + LLAMA2 + 通义千问(需要熟悉[Nvidia Docker](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#installing-on-ubuntu-and-debian)运行时)
|
182 |
[![chatglm](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-chatglm.yml/badge.svg?branch=master)](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-chatglm.yml)
|
183 |
|
184 |
``` sh
|
|
|
186 |
docker-compose up
|
187 |
```
|
188 |
|
189 |
+
3. ChatGPT + LLAMA + 盘古 + RWKV(需要熟悉[Nvidia Docker](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#installing-on-ubuntu-and-debian)运行时)
|
190 |
[![jittorllms](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-jittorllms.yml/badge.svg?branch=master)](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-jittorllms.yml)
|
191 |
|
192 |
``` sh
|
|
|
313 |
|
314 |
### II:版本:
|
315 |
- version 3.5(Todo): 使用自然语言调用本项目的所有函数插件(高优先级)
|
316 |
+
- version 3.49: 支持百度千帆平台和文心一言
|
317 |
+
- version 3.48: 支持阿里达摩院通义千问,上海AI-Lab书生,讯飞星火
|
318 |
- version 3.46: 支持完全脱手操作的实时语音对话
|
319 |
- version 3.45: 支持自定义ChatGLM2微调模型
|
320 |
- version 3.44: 正式支持Azure,优化界面易用性
|
app.py
CHANGED
@@ -4,7 +4,7 @@ def main():
|
|
4 |
import subprocess, sys
|
5 |
subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'gradio-stable-fork'])
|
6 |
import gradio as gr
|
7 |
-
if gr.__version__ not in ['3.28.3','3.32.
|
8 |
from request_llm.bridge_all import predict
|
9 |
from toolbox import format_io, find_free_port, on_file_uploaded, on_report_generated, get_conf, ArgsGeneralWrapper, load_chat_cookies, DummyWith
|
10 |
# 建议您复制一个config_private.py放自己的秘密, 如API和代理网址, 避免不小心传github被别人看到
|
|
|
4 |
import subprocess, sys
|
5 |
subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'gradio-stable-fork'])
|
6 |
import gradio as gr
|
7 |
+
if gr.__version__ not in ['3.28.3','3.32.2']: assert False, "需要特殊依赖,请务必用 pip install -r requirements.txt 指令安装依赖,详情信息见requirements.txt"
|
8 |
from request_llm.bridge_all import predict
|
9 |
from toolbox import format_io, find_free_port, on_file_uploaded, on_report_generated, get_conf, ArgsGeneralWrapper, load_chat_cookies, DummyWith
|
10 |
# 建议您复制一个config_private.py放自己的秘密, 如API和代理网址, 避免不小心传github被别人看到
|
config.py
CHANGED
@@ -11,7 +11,11 @@
|
|
11 |
API_KEY = "此处填API密钥" # 可同时填写多个API-KEY,用英文逗号分割,例如API_KEY = "sk-openaikey1,sk-openaikey2,fkxxxx-api2dkey3,azure-apikey4"
|
12 |
|
13 |
|
14 |
-
# [step
|
|
|
|
|
|
|
|
|
15 |
USE_PROXY = False
|
16 |
if USE_PROXY:
|
17 |
"""
|
@@ -69,7 +73,7 @@ MAX_RETRY = 2
|
|
69 |
|
70 |
# OpenAI模型选择是(gpt4现在只对申请成功的人开放)
|
71 |
LLM_MODEL = "gpt-3.5-turbo" # 可选 "chatglm"
|
72 |
-
AVAIL_LLM_MODELS = ["
|
73 |
|
74 |
# ChatGLM(2) Finetune Model Path (如果使用ChatGLM2微调模型,需要把"chatglmft"加入AVAIL_LLM_MODELS中)
|
75 |
ChatGLM_PTUNING_CHECKPOINT = "" # 例如"/home/hmp/ChatGLM2-6B/ptuning/output/6b-pt-128-1e-2/checkpoint-100"
|
@@ -147,3 +151,70 @@ ANTHROPIC_API_KEY = ""
|
|
147 |
|
148 |
# 自定义API KEY格式
|
149 |
CUSTOM_API_KEY_PATTERN = ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
API_KEY = "此处填API密钥" # 可同时填写多个API-KEY,用英文逗号分割,例如API_KEY = "sk-openaikey1,sk-openaikey2,fkxxxx-api2dkey3,azure-apikey4"
|
12 |
|
13 |
|
14 |
+
# [step 1]>> API_KEY = "sk-123456789xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx123456789"。极少数情况下,还需要填写组织(格式如org-123456789abcdefghijklmno的),请向下翻,找 API_ORG 设置项
|
15 |
+
API_KEY = "此处填API密钥" # 可同时填写多个API-KEY,用英文逗号分割,例如API_KEY = "sk-openaikey1,sk-openaikey2,fkxxxx-api2dkey3,azure-apikey4"
|
16 |
+
|
17 |
+
|
18 |
+
# [step 2]>> 改为True应用代理,如果直接在海外服务器部署,此处不修改;如果使用本地或无地域限制的大模型时,此处也不需要修改
|
19 |
USE_PROXY = False
|
20 |
if USE_PROXY:
|
21 |
"""
|
|
|
73 |
|
74 |
# OpenAI模型选择是(gpt4现在只对申请成功的人开放)
|
75 |
LLM_MODEL = "gpt-3.5-turbo" # 可选 "chatglm"
|
76 |
+
AVAIL_LLM_MODELS = ["gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "api2d-gpt-3.5-turbo", "spark", "azure-gpt-3.5"]
|
77 |
|
78 |
# ChatGLM(2) Finetune Model Path (如果使用ChatGLM2微调模型,需要把"chatglmft"加入AVAIL_LLM_MODELS中)
|
79 |
ChatGLM_PTUNING_CHECKPOINT = "" # 例如"/home/hmp/ChatGLM2-6B/ptuning/output/6b-pt-128-1e-2/checkpoint-100"
|
|
|
151 |
|
152 |
# 自定义API KEY格式
|
153 |
CUSTOM_API_KEY_PATTERN = ""
|
154 |
+
|
155 |
+
|
156 |
+
# HUGGINGFACE的TOKEN,下载LLAMA时起作用 https://huggingface.co/docs/hub/security-tokens
|
157 |
+
HUGGINGFACE_ACCESS_TOKEN = "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"
|
158 |
+
|
159 |
+
|
160 |
+
# GROBID服务器地址(填写多个可以均衡负载),用于高质量地读取PDF文档
|
161 |
+
# 获取方法:复制以下空间https://huggingface.co/spaces/qingxu98/grobid,设为public,然后GROBID_URL = "https://(你的hf用户名如qingxu98)-(你的填写的空间名如grobid).hf.space"
|
162 |
+
GROBID_URLS = [
|
163 |
+
"https://qingxu98-grobid.hf.space","https://qingxu98-grobid2.hf.space","https://qingxu98-grobid3.hf.space",
|
164 |
+
"https://shaocongma-grobid.hf.space","https://FBR123-grobid.hf.space",
|
165 |
+
]
|
166 |
+
|
167 |
+
|
168 |
+
|
169 |
+
"""
|
170 |
+
在线大模型配置关联关系示意图
|
171 |
+
│
|
172 |
+
├── "gpt-3.5-turbo" 等openai模型
|
173 |
+
│ ├── API_KEY
|
174 |
+
│ ├── CUSTOM_API_KEY_PATTERN(不常用)
|
175 |
+
│ ├── API_ORG(不常用)
|
176 |
+
│ └── API_URL_REDIRECT(不常用)
|
177 |
+
│
|
178 |
+
├── "azure-gpt-3.5" 等azure模型
|
179 |
+
│ ├── API_KEY
|
180 |
+
│ ├── AZURE_ENDPOINT
|
181 |
+
│ ├── AZURE_API_KEY
|
182 |
+
│ ├── AZURE_ENGINE
|
183 |
+
│ └── API_URL_REDIRECT
|
184 |
+
│
|
185 |
+
├── "spark" 星火认知大模型
|
186 |
+
│ ├── XFYUN_APPID
|
187 |
+
│ ├── XFYUN_API_SECRET
|
188 |
+
│ └── XFYUN_API_KEY
|
189 |
+
│
|
190 |
+
├── "claude-1-100k" 等claude模型
|
191 |
+
│ └── ANTHROPIC_API_KEY
|
192 |
+
│
|
193 |
+
├── "stack-claude"
|
194 |
+
│ ├── SLACK_CLAUDE_BOT_ID
|
195 |
+
│ └── SLACK_CLAUDE_USER_TOKEN
|
196 |
+
│
|
197 |
+
├── "qianfan" 百度千帆大模型库
|
198 |
+
│ ├── BAIDU_CLOUD_QIANFAN_MODEL
|
199 |
+
│ ├── BAIDU_CLOUD_API_KEY
|
200 |
+
│ └── BAIDU_CLOUD_SECRET_KEY
|
201 |
+
│
|
202 |
+
├── "newbing" Newbing接口不再稳定,不推荐使用
|
203 |
+
├── NEWBING_STYLE
|
204 |
+
└── NEWBING_COOKIES
|
205 |
+
|
206 |
+
|
207 |
+
|
208 |
+
插件在线服务配置依赖关系示意图
|
209 |
+
│
|
210 |
+
├── 语音功能
|
211 |
+
│ ├── ENABLE_AUDIO
|
212 |
+
│ ├── ALIYUN_TOKEN
|
213 |
+
│ ├── ALIYUN_APPKEY
|
214 |
+
│ ├── ALIYUN_ACCESSKEY
|
215 |
+
│ └── ALIYUN_SECRET
|
216 |
+
│
|
217 |
+
├── PDF文档精准解析
|
218 |
+
│ └── GROBID_URLS
|
219 |
+
|
220 |
+
"""
|
crazy_functional.py
CHANGED
@@ -24,6 +24,7 @@ def get_crazy_functions():
|
|
24 |
from crazy_functions.对话历史存档 import 对话历史存档
|
25 |
from crazy_functions.对话历史存档 import 载入对话历史存档
|
26 |
from crazy_functions.对话历史存档 import 删除所有本地对话历史记录
|
|
|
27 |
|
28 |
from crazy_functions.批量Markdown翻译 import Markdown英译中
|
29 |
function_plugins = {
|
@@ -40,7 +41,12 @@ def get_crazy_functions():
|
|
40 |
"AsButton":False,
|
41 |
"Function": HotReload(删除所有本地对话历史记录)
|
42 |
},
|
43 |
-
"
|
|
|
|
|
|
|
|
|
|
|
44 |
"Color": "stop",
|
45 |
"AsButton":False,
|
46 |
"Function": HotReload(解析ipynb文件),
|
@@ -328,7 +334,7 @@ def get_crazy_functions():
|
|
328 |
try:
|
329 |
from crazy_functions.Langchain知识库 import 知识库问答
|
330 |
function_plugins.update({
|
331 |
-
"
|
332 |
"Color": "stop",
|
333 |
"AsButton": False,
|
334 |
"AdvancedArgs": True,
|
@@ -342,7 +348,7 @@ def get_crazy_functions():
|
|
342 |
try:
|
343 |
from crazy_functions.Langchain知识库 import 读取知识库作答
|
344 |
function_plugins.update({
|
345 |
-
"
|
346 |
"Color": "stop",
|
347 |
"AsButton": False,
|
348 |
"AdvancedArgs": True,
|
@@ -352,6 +358,32 @@ def get_crazy_functions():
|
|
352 |
})
|
353 |
except:
|
354 |
print('Load function plugin failed')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
355 |
|
356 |
try:
|
357 |
from crazy_functions.Latex输出PDF结果 import Latex英文纠错加PDF对比
|
@@ -366,7 +398,7 @@ def get_crazy_functions():
|
|
366 |
})
|
367 |
from crazy_functions.Latex输出PDF结果 import Latex翻译中文并重新编译PDF
|
368 |
function_plugins.update({
|
369 |
-
"Arixv
|
370 |
"Color": "stop",
|
371 |
"AsButton": False,
|
372 |
"AdvancedArgs": True,
|
@@ -377,7 +409,7 @@ def get_crazy_functions():
|
|
377 |
}
|
378 |
})
|
379 |
function_plugins.update({
|
380 |
-
"
|
381 |
"Color": "stop",
|
382 |
"AsButton": False,
|
383 |
"AdvancedArgs": True,
|
|
|
24 |
from crazy_functions.对话历史存档 import 对话历史存档
|
25 |
from crazy_functions.对话历史存档 import 载入对话历史存档
|
26 |
from crazy_functions.对话历史存档 import 删除所有本地对话历史记录
|
27 |
+
from crazy_functions.辅助功能 import 清除缓存
|
28 |
|
29 |
from crazy_functions.批量Markdown翻译 import Markdown英译中
|
30 |
function_plugins = {
|
|
|
41 |
"AsButton":False,
|
42 |
"Function": HotReload(删除所有本地对话历史记录)
|
43 |
},
|
44 |
+
"清除所有缓存文件(请谨慎操作)": {
|
45 |
+
"Color": "stop",
|
46 |
+
"AsButton": False, # 加入下拉菜单中
|
47 |
+
"Function": HotReload(清除缓存)
|
48 |
+
},
|
49 |
+
"解析Jupyter Notebook文件": {
|
50 |
"Color": "stop",
|
51 |
"AsButton":False,
|
52 |
"Function": HotReload(解析ipynb文件),
|
|
|
334 |
try:
|
335 |
from crazy_functions.Langchain知识库 import 知识库问答
|
336 |
function_plugins.update({
|
337 |
+
"构建知识库(请先上传文件素材)": {
|
338 |
"Color": "stop",
|
339 |
"AsButton": False,
|
340 |
"AdvancedArgs": True,
|
|
|
348 |
try:
|
349 |
from crazy_functions.Langchain知识库 import 读取知识库作答
|
350 |
function_plugins.update({
|
351 |
+
"知识库问答": {
|
352 |
"Color": "stop",
|
353 |
"AsButton": False,
|
354 |
"AdvancedArgs": True,
|
|
|
358 |
})
|
359 |
except:
|
360 |
print('Load function plugin failed')
|
361 |
+
|
362 |
+
try:
|
363 |
+
from crazy_functions.交互功能函数模板 import 交互功能模板函数
|
364 |
+
function_plugins.update({
|
365 |
+
"交互功能模板函数": {
|
366 |
+
"Color": "stop",
|
367 |
+
"AsButton": False,
|
368 |
+
"Function": HotReload(交互功能模板函数)
|
369 |
+
}
|
370 |
+
})
|
371 |
+
except:
|
372 |
+
print('Load function plugin failed')
|
373 |
+
|
374 |
+
# try:
|
375 |
+
# from crazy_functions.chatglm微调工具 import 微调数据集生成
|
376 |
+
# function_plugins.update({
|
377 |
+
# "黑盒模型学习: 微调数据集生成 (先上传数据集)": {
|
378 |
+
# "Color": "stop",
|
379 |
+
# "AsButton": False,
|
380 |
+
# "AdvancedArgs": True,
|
381 |
+
# "ArgsReminder": "针对数据集输入(如 绿帽子*深蓝色衬衫*黑色运动裤)给出指令,例如您可以将以下命令复制到下方: --llm_to_learn=azure-gpt-3.5 --prompt_prefix='根据下面的服装类型提示,想象一个穿着者,对这个人外貌、身处的环境、内心世界、过去经历进行描写。要求:100字以内,用第二人称。' --system_prompt=''",
|
382 |
+
# "Function": HotReload(微调数据集生成)
|
383 |
+
# }
|
384 |
+
# })
|
385 |
+
# except:
|
386 |
+
# print('Load function plugin failed')
|
387 |
|
388 |
try:
|
389 |
from crazy_functions.Latex输出PDF结果 import Latex英文纠错加PDF对比
|
|
|
398 |
})
|
399 |
from crazy_functions.Latex输出PDF结果 import Latex翻译中文并重新编译PDF
|
400 |
function_plugins.update({
|
401 |
+
"Arixv论文精细翻译(输入arxivID)[需Latex]": {
|
402 |
"Color": "stop",
|
403 |
"AsButton": False,
|
404 |
"AdvancedArgs": True,
|
|
|
409 |
}
|
410 |
})
|
411 |
function_plugins.update({
|
412 |
+
"本地Latex论文精细翻译(上传Latex项目)[需Latex]": {
|
413 |
"Color": "stop",
|
414 |
"AsButton": False,
|
415 |
"AdvancedArgs": True,
|
crazy_functions/latex_fns/latex_toolbox.py
CHANGED
@@ -281,9 +281,12 @@ def rm_comments(main_file):
|
|
281 |
def find_tex_file_ignore_case(fp):
|
282 |
dir_name = os.path.dirname(fp)
|
283 |
base_name = os.path.basename(fp)
|
|
|
|
|
|
|
284 |
if not base_name.endswith('.tex'): base_name+='.tex'
|
285 |
if os.path.exists(pj(dir_name, base_name)): return pj(dir_name, base_name)
|
286 |
-
#
|
287 |
import glob
|
288 |
for f in glob.glob(dir_name+'/*.tex'):
|
289 |
base_name_s = os.path.basename(fp)
|
|
|
281 |
def find_tex_file_ignore_case(fp):
|
282 |
dir_name = os.path.dirname(fp)
|
283 |
base_name = os.path.basename(fp)
|
284 |
+
# 如果输入的文件路径是正确的
|
285 |
+
if os.path.exists(pj(dir_name, base_name)): return pj(dir_name, base_name)
|
286 |
+
# 如果不正确,试着加上.tex后缀试试
|
287 |
if not base_name.endswith('.tex'): base_name+='.tex'
|
288 |
if os.path.exists(pj(dir_name, base_name)): return pj(dir_name, base_name)
|
289 |
+
# 如果还找不到,解除大小写限制,再试一次
|
290 |
import glob
|
291 |
for f in glob.glob(dir_name+'/*.tex'):
|
292 |
base_name_s = os.path.basename(fp)
|
crazy_functions/pdf_fns/parse_pdf.py
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
import random
|
3 |
+
from functools import lru_cache
|
4 |
+
class GROBID_OFFLINE_EXCEPTION(Exception): pass
|
5 |
+
|
6 |
+
def get_avail_grobid_url():
|
7 |
+
from toolbox import get_conf
|
8 |
+
GROBID_URLS, = get_conf('GROBID_URLS')
|
9 |
+
if len(GROBID_URLS) == 0: return None
|
10 |
+
try:
|
11 |
+
_grobid_url = random.choice(GROBID_URLS) # 随机负载均衡
|
12 |
+
if _grobid_url.endswith('/'): _grobid_url = _grobid_url.rstrip('/')
|
13 |
+
res = requests.get(_grobid_url+'/api/isalive')
|
14 |
+
if res.text=='true': return _grobid_url
|
15 |
+
else: return None
|
16 |
+
except:
|
17 |
+
return None
|
18 |
+
|
19 |
+
@lru_cache(maxsize=32)
|
20 |
+
def parse_pdf(pdf_path, grobid_url):
|
21 |
+
import scipdf # pip install scipdf_parser
|
22 |
+
if grobid_url.endswith('/'): grobid_url = grobid_url.rstrip('/')
|
23 |
+
article_dict = scipdf.parse_pdf_to_dict(pdf_path, grobid_url=grobid_url)
|
24 |
+
return article_dict
|
25 |
+
|
crazy_functions/批量翻译PDF文档_多线程.py
CHANGED
@@ -1,15 +1,19 @@
|
|
1 |
from toolbox import CatchException, report_execption, write_results_to_file
|
2 |
-
from toolbox import update_ui, promote_file_to_downloadzone
|
|
|
3 |
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
4 |
from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
|
5 |
from .crazy_utils import read_and_clean_pdf_text
|
|
|
6 |
from colorful import *
|
|
|
|
|
|
|
7 |
|
8 |
@CatchException
|
9 |
-
def 批量翻译PDF文档(txt, llm_kwargs, plugin_kwargs, chatbot, history,
|
10 |
-
import glob
|
11 |
-
import os
|
12 |
|
|
|
13 |
# 基本信息:功能、贡献者
|
14 |
chatbot.append([
|
15 |
"函数插件功能?",
|
@@ -30,20 +34,11 @@ def 批量翻译PDF文档(txt, llm_kwargs, plugin_kwargs, chatbot, history, sys_
|
|
30 |
# 清空历史,以免输入溢出
|
31 |
history = []
|
32 |
|
|
|
|
|
33 |
# 检测输入参数,如没有给定输入参数,直接退出
|
34 |
-
if
|
35 |
-
|
36 |
-
else:
|
37 |
-
if txt == "":
|
38 |
-
txt = '空空如也的输入栏'
|
39 |
-
report_execption(chatbot, history,
|
40 |
-
a=f"解析项目: {txt}", b=f"找不到本地项目或无权访问: {txt}")
|
41 |
-
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
42 |
-
return
|
43 |
-
|
44 |
-
# 搜索需要处理的文件清单
|
45 |
-
file_manifest = [f for f in glob.glob(
|
46 |
-
f'{project_folder}/**/*.pdf', recursive=True)]
|
47 |
|
48 |
# 如果没找到任何文件
|
49 |
if len(file_manifest) == 0:
|
@@ -53,22 +48,130 @@ def 批量翻译PDF文档(txt, llm_kwargs, plugin_kwargs, chatbot, history, sys_
|
|
53 |
return
|
54 |
|
55 |
# 开始正式执行任务
|
56 |
-
|
|
|
|
|
|
|
|
|
|
|
57 |
|
58 |
|
59 |
-
def 解析
|
60 |
-
import os
|
61 |
import copy
|
62 |
import tiktoken
|
63 |
TOKEN_LIMIT_PER_FRAGMENT = 1280
|
64 |
generated_conclusion_files = []
|
65 |
generated_html_files = []
|
|
|
66 |
for index, fp in enumerate(file_manifest):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
# 读取PDF文件
|
69 |
file_content, page_one = read_and_clean_pdf_text(fp)
|
70 |
file_content = file_content.encode('utf-8', 'ignore').decode() # avoid reading non-utf8 chars
|
71 |
page_one = str(page_one).encode('utf-8', 'ignore').decode() # avoid reading non-utf8 chars
|
|
|
72 |
# 递归地切割PDF文件
|
73 |
from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
|
74 |
from request_llm.bridge_all import model_info
|
@@ -140,8 +243,7 @@ def 解析PDF(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot,
|
|
140 |
trans = k
|
141 |
ch.add_row(a=orig, b=trans)
|
142 |
create_report_file_name = f"{os.path.basename(fp)}.trans.html"
|
143 |
-
ch.save_file(create_report_file_name)
|
144 |
-
generated_html_files.append(f'./gpt_log/{create_report_file_name}')
|
145 |
except:
|
146 |
from toolbox import trimmed_format_exc
|
147 |
print('writing html result failed:', trimmed_format_exc())
|
@@ -202,6 +304,6 @@ class construct_html():
|
|
202 |
|
203 |
|
204 |
def save_file(self, file_name):
|
205 |
-
with open(
|
206 |
f.write(self.html_string.encode('utf-8', 'ignore').decode())
|
207 |
-
|
|
|
1 |
from toolbox import CatchException, report_execption, write_results_to_file
|
2 |
+
from toolbox import update_ui, promote_file_to_downloadzone, update_ui_lastest_msg, disable_auto_promotion
|
3 |
+
from toolbox import write_history_to_file, get_log_folder
|
4 |
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
5 |
from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
|
6 |
from .crazy_utils import read_and_clean_pdf_text
|
7 |
+
from .pdf_fns.parse_pdf import parse_pdf, get_avail_grobid_url
|
8 |
from colorful import *
|
9 |
+
import glob
|
10 |
+
import os
|
11 |
+
import math
|
12 |
|
13 |
@CatchException
|
14 |
+
def 批量翻译PDF文档(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
|
|
|
|
15 |
|
16 |
+
disable_auto_promotion(chatbot)
|
17 |
# 基本信息:功能、贡献者
|
18 |
chatbot.append([
|
19 |
"函数插件功能?",
|
|
|
34 |
# 清空历史,以免输入溢出
|
35 |
history = []
|
36 |
|
37 |
+
from .crazy_utils import get_files_from_everything
|
38 |
+
success, file_manifest, project_folder = get_files_from_everything(txt, type='.pdf')
|
39 |
# 检测输入参数,如没有给定输入参数,直接退出
|
40 |
+
if not success:
|
41 |
+
if txt == "": txt = '空空如也的输入栏'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
|
43 |
# 如果没找到任何文件
|
44 |
if len(file_manifest) == 0:
|
|
|
48 |
return
|
49 |
|
50 |
# 开始正式执行任务
|
51 |
+
grobid_url = get_avail_grobid_url()
|
52 |
+
if grobid_url is not None:
|
53 |
+
yield from 解析PDF_基于GROBID(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, grobid_url)
|
54 |
+
else:
|
55 |
+
yield from update_ui_lastest_msg("GROBID服务不可用,请检查config中的GROBID_URL。作为替代,现在将执行效果稍差的旧版代码。", chatbot, history, delay=3)
|
56 |
+
yield from 解析PDF(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
|
57 |
|
58 |
|
59 |
+
def 解析PDF_基于GROBID(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, grobid_url):
|
|
|
60 |
import copy
|
61 |
import tiktoken
|
62 |
TOKEN_LIMIT_PER_FRAGMENT = 1280
|
63 |
generated_conclusion_files = []
|
64 |
generated_html_files = []
|
65 |
+
DST_LANG = "中文"
|
66 |
for index, fp in enumerate(file_manifest):
|
67 |
+
chatbot.append(["当前进度:", f"正在连接GROBID服务,请稍候: {grobid_url}\n如果等待时间过长,请修改config中的GROBID_URL,可修改成本地GROBID服务。"]); yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
68 |
+
article_dict = parse_pdf(fp, grobid_url)
|
69 |
+
print(article_dict)
|
70 |
+
prompt = "以下是一篇学术论文的基本信息:\n"
|
71 |
+
# title
|
72 |
+
title = article_dict.get('title', '无法获取 title'); prompt += f'title:{title}\n\n'
|
73 |
+
# authors
|
74 |
+
authors = article_dict.get('authors', '无法获取 authors'); prompt += f'authors:{authors}\n\n'
|
75 |
+
# abstract
|
76 |
+
abstract = article_dict.get('abstract', '无法获取 abstract'); prompt += f'abstract:{abstract}\n\n'
|
77 |
+
# command
|
78 |
+
prompt += f"请将题目和摘要翻译为{DST_LANG}。"
|
79 |
+
meta = [f'# Title:\n\n', title, f'# Abstract:\n\n', abstract ]
|
80 |
+
|
81 |
+
# 单线,获取文章meta信息
|
82 |
+
paper_meta_info = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
83 |
+
inputs=prompt,
|
84 |
+
inputs_show_user=prompt,
|
85 |
+
llm_kwargs=llm_kwargs,
|
86 |
+
chatbot=chatbot, history=[],
|
87 |
+
sys_prompt="You are an academic paper reader。",
|
88 |
+
)
|
89 |
+
|
90 |
+
# 多线,翻译
|
91 |
+
inputs_array = []
|
92 |
+
inputs_show_user_array = []
|
93 |
|
94 |
+
# get_token_num
|
95 |
+
from request_llm.bridge_all import model_info
|
96 |
+
enc = model_info[llm_kwargs['llm_model']]['tokenizer']
|
97 |
+
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
|
98 |
+
from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
|
99 |
+
|
100 |
+
def break_down(txt):
|
101 |
+
raw_token_num = get_token_num(txt)
|
102 |
+
if raw_token_num <= TOKEN_LIMIT_PER_FRAGMENT:
|
103 |
+
return [txt]
|
104 |
+
else:
|
105 |
+
# raw_token_num > TOKEN_LIMIT_PER_FRAGMENT
|
106 |
+
# find a smooth token limit to achieve even seperation
|
107 |
+
count = int(math.ceil(raw_token_num / TOKEN_LIMIT_PER_FRAGMENT))
|
108 |
+
token_limit_smooth = raw_token_num // count + count
|
109 |
+
return breakdown_txt_to_satisfy_token_limit_for_pdf(txt, get_token_fn=get_token_num, limit=token_limit_smooth)
|
110 |
+
|
111 |
+
for section in article_dict.get('sections'):
|
112 |
+
if len(section['text']) == 0: continue
|
113 |
+
section_frags = break_down(section['text'])
|
114 |
+
for i, fragment in enumerate(section_frags):
|
115 |
+
heading = section['heading']
|
116 |
+
if len(section_frags) > 1: heading += f'Part-{i+1}'
|
117 |
+
inputs_array.append(
|
118 |
+
f"你需要翻译{heading}章节,内容如下: \n\n{fragment}"
|
119 |
+
)
|
120 |
+
inputs_show_user_array.append(
|
121 |
+
f"# {heading}\n\n{fragment}"
|
122 |
+
)
|
123 |
+
|
124 |
+
gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
|
125 |
+
inputs_array=inputs_array,
|
126 |
+
inputs_show_user_array=inputs_show_user_array,
|
127 |
+
llm_kwargs=llm_kwargs,
|
128 |
+
chatbot=chatbot,
|
129 |
+
history_array=[meta for _ in inputs_array],
|
130 |
+
sys_prompt_array=[
|
131 |
+
"请你作为一个学术翻译,负责把学术论文准确翻译成中文。注意文章中的每一句话都要翻译。" for _ in inputs_array],
|
132 |
+
)
|
133 |
+
res_path = write_history_to_file(meta + ["# Meta Translation" , paper_meta_info] + gpt_response_collection, file_basename=None, file_fullname=None)
|
134 |
+
promote_file_to_downloadzone(res_path, rename_file=os.path.basename(fp)+'.md', chatbot=chatbot)
|
135 |
+
generated_conclusion_files.append(res_path)
|
136 |
+
|
137 |
+
ch = construct_html()
|
138 |
+
orig = ""
|
139 |
+
trans = ""
|
140 |
+
gpt_response_collection_html = copy.deepcopy(gpt_response_collection)
|
141 |
+
for i,k in enumerate(gpt_response_collection_html):
|
142 |
+
if i%2==0:
|
143 |
+
gpt_response_collection_html[i] = inputs_show_user_array[i//2]
|
144 |
+
else:
|
145 |
+
gpt_response_collection_html[i] = gpt_response_collection_html[i]
|
146 |
+
|
147 |
+
final = ["", "", "一、论文概况", "", "Abstract", paper_meta_info, "二、论文翻译", ""]
|
148 |
+
final.extend(gpt_response_collection_html)
|
149 |
+
for i, k in enumerate(final):
|
150 |
+
if i%2==0:
|
151 |
+
orig = k
|
152 |
+
if i%2==1:
|
153 |
+
trans = k
|
154 |
+
ch.add_row(a=orig, b=trans)
|
155 |
+
create_report_file_name = f"{os.path.basename(fp)}.trans.html"
|
156 |
+
html_file = ch.save_file(create_report_file_name)
|
157 |
+
generated_html_files.append(html_file)
|
158 |
+
promote_file_to_downloadzone(html_file, rename_file=os.path.basename(html_file), chatbot=chatbot)
|
159 |
+
|
160 |
+
chatbot.append(("给出输出文件清单", str(generated_conclusion_files + generated_html_files)))
|
161 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
162 |
+
|
163 |
+
|
164 |
+
def 解析PDF(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
|
165 |
+
import copy
|
166 |
+
TOKEN_LIMIT_PER_FRAGMENT = 1280
|
167 |
+
generated_conclusion_files = []
|
168 |
+
generated_html_files = []
|
169 |
+
for index, fp in enumerate(file_manifest):
|
170 |
# 读取PDF文件
|
171 |
file_content, page_one = read_and_clean_pdf_text(fp)
|
172 |
file_content = file_content.encode('utf-8', 'ignore').decode() # avoid reading non-utf8 chars
|
173 |
page_one = str(page_one).encode('utf-8', 'ignore').decode() # avoid reading non-utf8 chars
|
174 |
+
|
175 |
# 递归地切割PDF文件
|
176 |
from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
|
177 |
from request_llm.bridge_all import model_info
|
|
|
243 |
trans = k
|
244 |
ch.add_row(a=orig, b=trans)
|
245 |
create_report_file_name = f"{os.path.basename(fp)}.trans.html"
|
246 |
+
generated_html_files.append(ch.save_file(create_report_file_name))
|
|
|
247 |
except:
|
248 |
from toolbox import trimmed_format_exc
|
249 |
print('writing html result failed:', trimmed_format_exc())
|
|
|
304 |
|
305 |
|
306 |
def save_file(self, file_name):
|
307 |
+
with open(os.path.join(get_log_folder(), file_name), 'w', encoding='utf8') as f:
|
308 |
f.write(self.html_string.encode('utf-8', 'ignore').decode())
|
309 |
+
return os.path.join(get_log_folder(), file_name)
|
crazy_functions/辅助功能.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# encoding: utf-8
|
2 |
+
# @Time : 2023/4/19
|
3 |
+
# @Author : Spike
|
4 |
+
# @Descr :
|
5 |
+
from toolbox import update_ui
|
6 |
+
from toolbox import CatchException, report_execption, write_results_to_file, get_log_folder
|
7 |
+
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
8 |
+
|
9 |
+
|
10 |
+
@CatchException
|
11 |
+
def 猜你想问(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
12 |
+
if txt:
|
13 |
+
show_say = txt
|
14 |
+
prompt = txt+'\n回答完问题后,再列出用户可能提出的三个问题。'
|
15 |
+
else:
|
16 |
+
prompt = history[-1]+"\n分析上述回答,再列出用户可能提出的三个问题。"
|
17 |
+
show_say = '分析上述回答,再列出用户可能提出的三个问题。'
|
18 |
+
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
19 |
+
inputs=prompt,
|
20 |
+
inputs_show_user=show_say,
|
21 |
+
llm_kwargs=llm_kwargs,
|
22 |
+
chatbot=chatbot,
|
23 |
+
history=history,
|
24 |
+
sys_prompt=system_prompt
|
25 |
+
)
|
26 |
+
chatbot[-1] = (show_say, gpt_say)
|
27 |
+
history.extend([show_say, gpt_say])
|
28 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
29 |
+
|
30 |
+
|
31 |
+
@CatchException
|
32 |
+
def 清除缓存(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
33 |
+
chatbot.append(['清除本地缓存数据', '执行中. 删除 gpt_log & private_upload'])
|
34 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
35 |
+
|
36 |
+
import shutil, os
|
37 |
+
gpt_log_dir = os.path.join(os.path.dirname(__file__), '..', 'gpt_log')
|
38 |
+
private_upload_dir = os.path.join(os.path.dirname(__file__), '..', 'private_upload')
|
39 |
+
shutil.rmtree(gpt_log_dir, ignore_errors=True)
|
40 |
+
shutil.rmtree(private_upload_dir, ignore_errors=True)
|
41 |
+
|
42 |
+
chatbot.append(['清除本地缓存数据', '执行完成'])
|
43 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
docker-compose.yml
CHANGED
@@ -16,6 +16,7 @@ services:
|
|
16 |
AVAIL_LLM_MODELS: ' ["gpt-3.5-turbo", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "newbing"] '
|
17 |
WEB_PORT: ' 22303 '
|
18 |
ADD_WAIFU: ' True '
|
|
|
19 |
# DEFAULT_WORKER_NUM: ' 10 '
|
20 |
# AUTHENTICATION: ' [("username", "passwd"), ("username2", "passwd2")] '
|
21 |
|
@@ -28,7 +29,7 @@ services:
|
|
28 |
|
29 |
|
30 |
### ===================================================
|
31 |
-
### 【方案二】 如果需要运行ChatGLM
|
32 |
### ===================================================
|
33 |
version: '3'
|
34 |
services:
|
@@ -36,11 +37,11 @@ services:
|
|
36 |
image: ghcr.io/binary-husky/gpt_academic_chatglm_moss:master # (Auto Built by Dockerfile: docs/Dockerfile+ChatGLM)
|
37 |
environment:
|
38 |
# 请查阅 `config.py` 以查看所有的配置信息
|
39 |
-
API_KEY: ' sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
|
40 |
USE_PROXY: ' True '
|
41 |
proxies: ' { "http": "socks5h://localhost:10880", "https": "socks5h://localhost:10880", } '
|
42 |
LLM_MODEL: ' gpt-3.5-turbo '
|
43 |
-
AVAIL_LLM_MODELS: ' ["chatglm", "moss", "gpt-3.5-turbo", "gpt-4", "newbing"]
|
44 |
LOCAL_MODEL_DEVICE: ' cuda '
|
45 |
DEFAULT_WORKER_NUM: ' 10 '
|
46 |
WEB_PORT: ' 12303 '
|
@@ -57,6 +58,10 @@ services:
|
|
57 |
command: >
|
58 |
bash -c "python3 -u main.py"
|
59 |
|
|
|
|
|
|
|
|
|
60 |
### ===================================================
|
61 |
### 【方案三】 如果需要运行ChatGPT + LLAMA + 盘古 + RWKV本地模型
|
62 |
### ===================================================
|
|
|
16 |
AVAIL_LLM_MODELS: ' ["gpt-3.5-turbo", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "newbing"] '
|
17 |
WEB_PORT: ' 22303 '
|
18 |
ADD_WAIFU: ' True '
|
19 |
+
# THEME: ' Chuanhu-Small-and-Beautiful '
|
20 |
# DEFAULT_WORKER_NUM: ' 10 '
|
21 |
# AUTHENTICATION: ' [("username", "passwd"), ("username2", "passwd2")] '
|
22 |
|
|
|
29 |
|
30 |
|
31 |
### ===================================================
|
32 |
+
### 【方案二】 如果需要运行ChatGLM + Qwen + MOSS等本地模型
|
33 |
### ===================================================
|
34 |
version: '3'
|
35 |
services:
|
|
|
37 |
image: ghcr.io/binary-husky/gpt_academic_chatglm_moss:master # (Auto Built by Dockerfile: docs/Dockerfile+ChatGLM)
|
38 |
environment:
|
39 |
# 请查阅 `config.py` 以查看所有的配置信息
|
40 |
+
API_KEY: ' sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx '
|
41 |
USE_PROXY: ' True '
|
42 |
proxies: ' { "http": "socks5h://localhost:10880", "https": "socks5h://localhost:10880", } '
|
43 |
LLM_MODEL: ' gpt-3.5-turbo '
|
44 |
+
AVAIL_LLM_MODELS: ' ["chatglm", "qwen", "moss", "gpt-3.5-turbo", "gpt-4", "newbing"] '
|
45 |
LOCAL_MODEL_DEVICE: ' cuda '
|
46 |
DEFAULT_WORKER_NUM: ' 10 '
|
47 |
WEB_PORT: ' 12303 '
|
|
|
58 |
command: >
|
59 |
bash -c "python3 -u main.py"
|
60 |
|
61 |
+
# P.S. 通过对 command 进行微调,可以便捷地安装额外的依赖
|
62 |
+
# command: >
|
63 |
+
# bash -c "pip install -r request_llm/requirements_qwen.txt && python3 -u main.py"
|
64 |
+
|
65 |
### ===================================================
|
66 |
### 【方案三】 如果需要运行ChatGPT + LLAMA + 盘古 + RWKV本地模型
|
67 |
### ===================================================
|
docs/GithubAction+ChatGLM+Moss
CHANGED
@@ -18,6 +18,7 @@ WORKDIR /gpt/gpt_academic
|
|
18 |
RUN git clone https://github.com/OpenLMLab/MOSS.git request_llm/moss
|
19 |
RUN python3 -m pip install -r requirements.txt
|
20 |
RUN python3 -m pip install -r request_llm/requirements_moss.txt
|
|
|
21 |
RUN python3 -m pip install -r request_llm/requirements_chatglm.txt
|
22 |
RUN python3 -m pip install -r request_llm/requirements_newbing.txt
|
23 |
|
|
|
18 |
RUN git clone https://github.com/OpenLMLab/MOSS.git request_llm/moss
|
19 |
RUN python3 -m pip install -r requirements.txt
|
20 |
RUN python3 -m pip install -r request_llm/requirements_moss.txt
|
21 |
+
RUN python3 -m pip install -r request_llm/requirements_qwen.txt
|
22 |
RUN python3 -m pip install -r request_llm/requirements_chatglm.txt
|
23 |
RUN python3 -m pip install -r request_llm/requirements_newbing.txt
|
24 |
|
request_llm/bridge_all.py
CHANGED
@@ -19,6 +19,12 @@ from .bridge_chatgpt import predict as chatgpt_ui
|
|
19 |
from .bridge_chatglm import predict_no_ui_long_connection as chatglm_noui
|
20 |
from .bridge_chatglm import predict as chatglm_ui
|
21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
colors = ['#FF00FF', '#00FFFF', '#FF0000', '#990099', '#009999', '#990044']
|
23 |
|
24 |
class LazyloadTiktoken(object):
|
@@ -165,7 +171,14 @@ model_info = {
|
|
165 |
"tokenizer": tokenizer_gpt35,
|
166 |
"token_cnt": get_token_num_gpt35,
|
167 |
},
|
168 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
169 |
}
|
170 |
|
171 |
# -=-=-=-=-=-=- 以下部分是新加入的模型,可能附带额外依赖 -=-=-=-=-=-=-
|
@@ -361,7 +374,7 @@ if "chatgpt_website" in AVAIL_LLM_MODELS: # 接入一些逆向工程https://gi
|
|
361 |
"chatgpt_website": {
|
362 |
"fn_with_ui": chatgpt_website_ui,
|
363 |
"fn_without_ui": chatgpt_website_noui,
|
364 |
-
"endpoint":
|
365 |
"max_token": 4096,
|
366 |
"tokenizer": tokenizer_gpt35,
|
367 |
"token_cnt": get_token_num_gpt35,
|
@@ -385,6 +398,22 @@ if "spark" in AVAIL_LLM_MODELS: # 讯飞星火认知大模型
|
|
385 |
})
|
386 |
except:
|
387 |
print(trimmed_format_exc())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
388 |
|
389 |
|
390 |
|
|
|
19 |
from .bridge_chatglm import predict_no_ui_long_connection as chatglm_noui
|
20 |
from .bridge_chatglm import predict as chatglm_ui
|
21 |
|
22 |
+
from .bridge_chatglm import predict_no_ui_long_connection as chatglm_noui
|
23 |
+
from .bridge_chatglm import predict as chatglm_ui
|
24 |
+
|
25 |
+
from .bridge_qianfan import predict_no_ui_long_connection as qianfan_noui
|
26 |
+
from .bridge_qianfan import predict as qianfan_ui
|
27 |
+
|
28 |
colors = ['#FF00FF', '#00FFFF', '#FF0000', '#990099', '#009999', '#990044']
|
29 |
|
30 |
class LazyloadTiktoken(object):
|
|
|
171 |
"tokenizer": tokenizer_gpt35,
|
172 |
"token_cnt": get_token_num_gpt35,
|
173 |
},
|
174 |
+
"qianfan": {
|
175 |
+
"fn_with_ui": qianfan_ui,
|
176 |
+
"fn_without_ui": qianfan_noui,
|
177 |
+
"endpoint": None,
|
178 |
+
"max_token": 2000,
|
179 |
+
"tokenizer": tokenizer_gpt35,
|
180 |
+
"token_cnt": get_token_num_gpt35,
|
181 |
+
},
|
182 |
}
|
183 |
|
184 |
# -=-=-=-=-=-=- 以下部分是新加入的模型,可能附带额外依赖 -=-=-=-=-=-=-
|
|
|
374 |
"chatgpt_website": {
|
375 |
"fn_with_ui": chatgpt_website_ui,
|
376 |
"fn_without_ui": chatgpt_website_noui,
|
377 |
+
"endpoint": openai_endpoint,
|
378 |
"max_token": 4096,
|
379 |
"tokenizer": tokenizer_gpt35,
|
380 |
"token_cnt": get_token_num_gpt35,
|
|
|
398 |
})
|
399 |
except:
|
400 |
print(trimmed_format_exc())
|
401 |
+
if "llama2" in AVAIL_LLM_MODELS: # llama2
|
402 |
+
try:
|
403 |
+
from .bridge_llama2 import predict_no_ui_long_connection as llama2_noui
|
404 |
+
from .bridge_llama2 import predict as llama2_ui
|
405 |
+
model_info.update({
|
406 |
+
"llama2": {
|
407 |
+
"fn_with_ui": llama2_ui,
|
408 |
+
"fn_without_ui": llama2_noui,
|
409 |
+
"endpoint": None,
|
410 |
+
"max_token": 4096,
|
411 |
+
"tokenizer": tokenizer_gpt35,
|
412 |
+
"token_cnt": get_token_num_gpt35,
|
413 |
+
}
|
414 |
+
})
|
415 |
+
except:
|
416 |
+
print(trimmed_format_exc())
|
417 |
|
418 |
|
419 |
|
request_llm/bridge_chatgpt.py
CHANGED
@@ -177,14 +177,13 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
|
|
177 |
yield from update_ui(chatbot=chatbot, history=history, msg="非Openai官方接口返回了错误:" + chunk.decode()) # 刷新界面
|
178 |
return
|
179 |
|
180 |
-
|
181 |
-
if is_head_of_the_stream and (r'"object":"error"' not in
|
182 |
# 数据流的第一帧不携带content
|
183 |
is_head_of_the_stream = False; continue
|
184 |
|
185 |
if chunk:
|
186 |
try:
|
187 |
-
chunk_decoded = chunk.decode()
|
188 |
# 前者是API2D的结束条件,后者是OPENAI的结束条件
|
189 |
if ('data: [DONE]' in chunk_decoded) or (len(json.loads(chunk_decoded[6:])['choices'][0]["delta"]) == 0):
|
190 |
# 判定为数据流的结束,gpt_replying_buffer也写完了
|
@@ -192,7 +191,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
|
|
192 |
break
|
193 |
# 处理数据流的主体
|
194 |
chunkjson = json.loads(chunk_decoded[6:])
|
195 |
-
status_text = f"finish_reason: {chunkjson['choices'][0]
|
196 |
# 如果这里抛出异常,一般是文本过长,详情见get_full_error的输出
|
197 |
gpt_replying_buffer = gpt_replying_buffer + json.loads(chunk_decoded[6:])['choices'][0]["delta"]["content"]
|
198 |
history[-1] = gpt_replying_buffer
|
@@ -216,7 +215,6 @@ def handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg)
|
|
216 |
history = clip_history(inputs=inputs, history=history, tokenizer=model_info[llm_kwargs['llm_model']]['tokenizer'],
|
217 |
max_token_limit=(model_info[llm_kwargs['llm_model']]['max_token'])) # history至少释放二分之一
|
218 |
chatbot[-1] = (chatbot[-1][0], "[Local Message] Reduce the length. 本次输入过长, 或历史数据过长. 历史缓存数据已部分释放, 您可以请再次尝试. (若再次失败则更可能是因为输入过长.)")
|
219 |
-
# history = [] # 清除历史
|
220 |
elif "does not exist" in error_msg:
|
221 |
chatbot[-1] = (chatbot[-1][0], f"[Local Message] Model {llm_kwargs['llm_model']} does not exist. 模型不存在, 或者您没有获得体验资格.")
|
222 |
elif "Incorrect API key" in error_msg:
|
|
|
177 |
yield from update_ui(chatbot=chatbot, history=history, msg="非Openai官方接口返回了错误:" + chunk.decode()) # 刷新界面
|
178 |
return
|
179 |
|
180 |
+
chunk_decoded = chunk.decode()
|
181 |
+
if is_head_of_the_stream and (r'"object":"error"' not in chunk_decoded) and (r"choices" not in chunk_decoded):
|
182 |
# 数据流的第一帧不携带content
|
183 |
is_head_of_the_stream = False; continue
|
184 |
|
185 |
if chunk:
|
186 |
try:
|
|
|
187 |
# 前者是API2D的结束条件,后者是OPENAI的结束条件
|
188 |
if ('data: [DONE]' in chunk_decoded) or (len(json.loads(chunk_decoded[6:])['choices'][0]["delta"]) == 0):
|
189 |
# 判定为数据流的结束,gpt_replying_buffer也写完了
|
|
|
191 |
break
|
192 |
# 处理数据流的主体
|
193 |
chunkjson = json.loads(chunk_decoded[6:])
|
194 |
+
status_text = f"finish_reason: {chunkjson['choices'][0].get('finish_reason', 'null')}"
|
195 |
# 如果这里抛出异常,一般是文本过长,详情见get_full_error的输出
|
196 |
gpt_replying_buffer = gpt_replying_buffer + json.loads(chunk_decoded[6:])['choices'][0]["delta"]["content"]
|
197 |
history[-1] = gpt_replying_buffer
|
|
|
215 |
history = clip_history(inputs=inputs, history=history, tokenizer=model_info[llm_kwargs['llm_model']]['tokenizer'],
|
216 |
max_token_limit=(model_info[llm_kwargs['llm_model']]['max_token'])) # history至少释放二分之一
|
217 |
chatbot[-1] = (chatbot[-1][0], "[Local Message] Reduce the length. 本次输入过长, 或历史数据过长. 历史缓存数据已部分释放, 您可以请再次尝试. (若再次失败则更可能是因为输入过长.)")
|
|
|
218 |
elif "does not exist" in error_msg:
|
219 |
chatbot[-1] = (chatbot[-1][0], f"[Local Message] Model {llm_kwargs['llm_model']} does not exist. 模型不存在, 或者您没有获得体验资格.")
|
220 |
elif "Incorrect API key" in error_msg:
|
request_llm/bridge_chatgpt_website.py
CHANGED
@@ -118,16 +118,6 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
|
|
118 |
chatbot 为WebUI中显示的对话列表,修改它,然后yeild出去,可以直接修改对话界面内容
|
119 |
additional_fn代表点击的哪个按钮,按钮见functional.py
|
120 |
"""
|
121 |
-
if is_any_api_key(inputs):
|
122 |
-
chatbot._cookies['api_key'] = inputs
|
123 |
-
chatbot.append(("输入已识别为openai的api_key", what_keys(inputs)))
|
124 |
-
yield from update_ui(chatbot=chatbot, history=history, msg="api_key已导入") # 刷新界面
|
125 |
-
return
|
126 |
-
elif not is_any_api_key(chatbot._cookies['api_key']):
|
127 |
-
chatbot.append((inputs, "缺少api_key。\n\n1. 临时解决方案:直接在输入区键入api_key,然后回车提交。\n\n2. 长效解决方案:在config.py中配置。"))
|
128 |
-
yield from update_ui(chatbot=chatbot, history=history, msg="缺少api_key") # 刷新界面
|
129 |
-
return
|
130 |
-
|
131 |
if additional_fn is not None:
|
132 |
from core_functional import handle_core_functionality
|
133 |
inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot)
|
@@ -245,14 +235,9 @@ def generate_payload(inputs, llm_kwargs, history, system_prompt, stream):
|
|
245 |
if not is_any_api_key(llm_kwargs['api_key']):
|
246 |
raise AssertionError("你提供了错误的API_KEY。\n\n1. 临时解决方案:直接在输入区键入api_key,然后回车提交。\n\n2. 长效解决方案:在config.py中配置。")
|
247 |
|
248 |
-
api_key = select_api_key(llm_kwargs['api_key'], llm_kwargs['llm_model'])
|
249 |
-
|
250 |
headers = {
|
251 |
"Content-Type": "application/json",
|
252 |
-
"Authorization": f"Bearer {api_key}"
|
253 |
}
|
254 |
-
if API_ORG.startswith('org-'): headers.update({"OpenAI-Organization": API_ORG})
|
255 |
-
if llm_kwargs['llm_model'].startswith('azure-'): headers.update({"api-key": api_key})
|
256 |
|
257 |
conversation_cnt = len(history) // 2
|
258 |
|
|
|
118 |
chatbot 为WebUI中显示的对话列表,修改它,然后yeild出去,可以直接修改对话界面内容
|
119 |
additional_fn代表点击的哪个按钮,按钮见functional.py
|
120 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
if additional_fn is not None:
|
122 |
from core_functional import handle_core_functionality
|
123 |
inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot)
|
|
|
235 |
if not is_any_api_key(llm_kwargs['api_key']):
|
236 |
raise AssertionError("你提供了错误的API_KEY。\n\n1. 临时解决方案:直接在输入区键入api_key,然后回车提交。\n\n2. 长效解决方案:在config.py中配置。")
|
237 |
|
|
|
|
|
238 |
headers = {
|
239 |
"Content-Type": "application/json",
|
|
|
240 |
}
|
|
|
|
|
241 |
|
242 |
conversation_cnt = len(history) // 2
|
243 |
|
request_llm/bridge_llama2.py
ADDED
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
model_name = "LLaMA"
|
2 |
+
cmd_to_install = "`pip install -r request_llm/requirements_chatglm.txt`"
|
3 |
+
|
4 |
+
|
5 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
|
6 |
+
from toolbox import update_ui, get_conf, ProxyNetworkActivate
|
7 |
+
from multiprocessing import Process, Pipe
|
8 |
+
from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns, SingletonLocalLLM
|
9 |
+
from threading import Thread
|
10 |
+
|
11 |
+
|
12 |
+
# ------------------------------------------------------------------------------------------------------------------------
|
13 |
+
# 🔌💻 Local Model
|
14 |
+
# ------------------------------------------------------------------------------------------------------------------------
|
15 |
+
@SingletonLocalLLM
|
16 |
+
class GetONNXGLMHandle(LocalLLMHandle):
|
17 |
+
|
18 |
+
def load_model_info(self):
|
19 |
+
# 🏃♂️🏃♂️🏃♂️ 子进程执行
|
20 |
+
self.model_name = model_name
|
21 |
+
self.cmd_to_install = cmd_to_install
|
22 |
+
|
23 |
+
def load_model_and_tokenizer(self):
|
24 |
+
# 🏃♂️🏃♂️🏃♂️ 子进程执行
|
25 |
+
import os, glob
|
26 |
+
import os
|
27 |
+
import platform
|
28 |
+
huggingface_token, device = get_conf('HUGGINGFACE_ACCESS_TOKEN', 'LOCAL_MODEL_DEVICE')
|
29 |
+
assert len(huggingface_token) != 0, "没有填写 HUGGINGFACE_ACCESS_TOKEN"
|
30 |
+
with open(os.path.expanduser('~/.cache/huggingface/token'), 'w') as f:
|
31 |
+
f.write(huggingface_token)
|
32 |
+
model_id = 'meta-llama/Llama-2-7b-chat-hf'
|
33 |
+
with ProxyNetworkActivate():
|
34 |
+
self._tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=huggingface_token)
|
35 |
+
# use fp16
|
36 |
+
model = AutoModelForCausalLM.from_pretrained(model_id, use_auth_token=huggingface_token).eval()
|
37 |
+
if device.startswith('cuda'): model = model.half().to(device)
|
38 |
+
self._model = model
|
39 |
+
|
40 |
+
return self._model, self._tokenizer
|
41 |
+
|
42 |
+
def llm_stream_generator(self, **kwargs):
|
43 |
+
# 🏃♂️🏃♂️🏃♂️ 子进程执行
|
44 |
+
def adaptor(kwargs):
|
45 |
+
query = kwargs['query']
|
46 |
+
max_length = kwargs['max_length']
|
47 |
+
top_p = kwargs['top_p']
|
48 |
+
temperature = kwargs['temperature']
|
49 |
+
history = kwargs['history']
|
50 |
+
console_slience = kwargs.get('console_slience', True)
|
51 |
+
return query, max_length, top_p, temperature, history, console_slience
|
52 |
+
|
53 |
+
def convert_messages_to_prompt(query, history):
|
54 |
+
prompt = ""
|
55 |
+
for a, b in history:
|
56 |
+
prompt += f"\n[INST]{a}[/INST]"
|
57 |
+
prompt += "\n{b}" + b
|
58 |
+
prompt += f"\n[INST]{query}[/INST]"
|
59 |
+
return prompt
|
60 |
+
|
61 |
+
query, max_length, top_p, temperature, history, console_slience = adaptor(kwargs)
|
62 |
+
prompt = convert_messages_to_prompt(query, history)
|
63 |
+
# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=--=-=-
|
64 |
+
# code from transformers.llama
|
65 |
+
streamer = TextIteratorStreamer(self._tokenizer)
|
66 |
+
# Run the generation in a separate thread, so that we can fetch the generated text in a non-blocking way.
|
67 |
+
inputs = self._tokenizer([prompt], return_tensors="pt")
|
68 |
+
prompt_tk_back = self._tokenizer.batch_decode(inputs['input_ids'])[0]
|
69 |
+
|
70 |
+
generation_kwargs = dict(inputs.to(self._model.device), streamer=streamer, max_new_tokens=max_length)
|
71 |
+
thread = Thread(target=self._model.generate, kwargs=generation_kwargs)
|
72 |
+
thread.start()
|
73 |
+
generated_text = ""
|
74 |
+
for new_text in streamer:
|
75 |
+
generated_text += new_text
|
76 |
+
if not console_slience: print(new_text, end='')
|
77 |
+
yield generated_text.lstrip(prompt_tk_back).rstrip("</s>")
|
78 |
+
if not console_slience: print()
|
79 |
+
# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=--=-=-
|
80 |
+
|
81 |
+
def try_to_import_special_deps(self, **kwargs):
|
82 |
+
# import something that will raise error if the user does not install requirement_*.txt
|
83 |
+
# 🏃♂️🏃♂️🏃♂️ 主进程执行
|
84 |
+
import importlib
|
85 |
+
importlib.import_module('transformers')
|
86 |
+
|
87 |
+
|
88 |
+
# ------------------------------------------------------------------------------------------------------------------------
|
89 |
+
# 🔌💻 GPT-Academic Interface
|
90 |
+
# ------------------------------------------------------------------------------------------------------------------------
|
91 |
+
predict_no_ui_long_connection, predict = get_local_llm_predict_fns(GetONNXGLMHandle, model_name)
|
request_llm/bridge_qianfan.py
ADDED
@@ -0,0 +1,164 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import time, requests, json
|
3 |
+
from multiprocessing import Process, Pipe
|
4 |
+
from functools import wraps
|
5 |
+
from datetime import datetime, timedelta
|
6 |
+
from toolbox import get_conf, update_ui, is_any_api_key, select_api_key, what_keys, clip_history, trimmed_format_exc, get_conf
|
7 |
+
|
8 |
+
model_name = '千帆大模型平台'
|
9 |
+
timeout_bot_msg = '[Local Message] Request timeout. Network error.'
|
10 |
+
|
11 |
+
def cache_decorator(timeout):
|
12 |
+
cache = {}
|
13 |
+
def decorator(func):
|
14 |
+
@wraps(func)
|
15 |
+
def wrapper(*args, **kwargs):
|
16 |
+
key = (func.__name__, args, frozenset(kwargs.items()))
|
17 |
+
# Check if result is already cached and not expired
|
18 |
+
if key in cache:
|
19 |
+
result, timestamp = cache[key]
|
20 |
+
if datetime.now() - timestamp < timedelta(seconds=timeout):
|
21 |
+
return result
|
22 |
+
|
23 |
+
# Call the function and cache the result
|
24 |
+
result = func(*args, **kwargs)
|
25 |
+
cache[key] = (result, datetime.now())
|
26 |
+
return result
|
27 |
+
return wrapper
|
28 |
+
return decorator
|
29 |
+
|
30 |
+
@cache_decorator(timeout=3600)
|
31 |
+
def get_access_token():
|
32 |
+
"""
|
33 |
+
使用 AK,SK 生成鉴权签名(Access Token)
|
34 |
+
:return: access_token,或是None(如果错误)
|
35 |
+
"""
|
36 |
+
# if (access_token_cache is None) or (time.time() - last_access_token_obtain_time > 3600):
|
37 |
+
BAIDU_CLOUD_API_KEY, BAIDU_CLOUD_SECRET_KEY = get_conf('BAIDU_CLOUD_API_KEY', 'BAIDU_CLOUD_SECRET_KEY')
|
38 |
+
|
39 |
+
if len(BAIDU_CLOUD_SECRET_KEY) == 0: raise RuntimeError("没有配置BAIDU_CLOUD_SECRET_KEY")
|
40 |
+
if len(BAIDU_CLOUD_API_KEY) == 0: raise RuntimeError("没有配置BAIDU_CLOUD_API_KEY")
|
41 |
+
|
42 |
+
url = "https://aip.baidubce.com/oauth/2.0/token"
|
43 |
+
params = {"grant_type": "client_credentials", "client_id": BAIDU_CLOUD_API_KEY, "client_secret": BAIDU_CLOUD_SECRET_KEY}
|
44 |
+
access_token_cache = str(requests.post(url, params=params).json().get("access_token"))
|
45 |
+
return access_token_cache
|
46 |
+
# else:
|
47 |
+
# return access_token_cache
|
48 |
+
|
49 |
+
|
50 |
+
def generate_message_payload(inputs, llm_kwargs, history, system_prompt):
|
51 |
+
conversation_cnt = len(history) // 2
|
52 |
+
messages = [{"role": "user", "content": system_prompt}]
|
53 |
+
messages.append({"role": "assistant", "content": 'Certainly!'})
|
54 |
+
if conversation_cnt:
|
55 |
+
for index in range(0, 2*conversation_cnt, 2):
|
56 |
+
what_i_have_asked = {}
|
57 |
+
what_i_have_asked["role"] = "user"
|
58 |
+
what_i_have_asked["content"] = history[index]
|
59 |
+
what_gpt_answer = {}
|
60 |
+
what_gpt_answer["role"] = "assistant"
|
61 |
+
what_gpt_answer["content"] = history[index+1]
|
62 |
+
if what_i_have_asked["content"] != "":
|
63 |
+
if what_gpt_answer["content"] == "": continue
|
64 |
+
if what_gpt_answer["content"] == timeout_bot_msg: continue
|
65 |
+
messages.append(what_i_have_asked)
|
66 |
+
messages.append(what_gpt_answer)
|
67 |
+
else:
|
68 |
+
messages[-1]['content'] = what_gpt_answer['content']
|
69 |
+
what_i_ask_now = {}
|
70 |
+
what_i_ask_now["role"] = "user"
|
71 |
+
what_i_ask_now["content"] = inputs
|
72 |
+
messages.append(what_i_ask_now)
|
73 |
+
return messages
|
74 |
+
|
75 |
+
|
76 |
+
def generate_from_baidu_qianfan(inputs, llm_kwargs, history, system_prompt):
|
77 |
+
BAIDU_CLOUD_QIANFAN_MODEL, = get_conf('BAIDU_CLOUD_QIANFAN_MODEL')
|
78 |
+
|
79 |
+
url_lib = {
|
80 |
+
"ERNIE-Bot": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/completions" ,
|
81 |
+
"ERNIE-Bot-turbo": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/eb-instant" ,
|
82 |
+
"BLOOMZ-7B": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/bloomz_7b1",
|
83 |
+
|
84 |
+
"Llama-2-70B-Chat": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/llama_2_70b",
|
85 |
+
"Llama-2-13B-Chat": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/llama_2_13b",
|
86 |
+
"Llama-2-7B-Chat": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/llama_2_7b",
|
87 |
+
}
|
88 |
+
|
89 |
+
url = url_lib[BAIDU_CLOUD_QIANFAN_MODEL]
|
90 |
+
|
91 |
+
url += "?access_token=" + get_access_token()
|
92 |
+
|
93 |
+
|
94 |
+
payload = json.dumps({
|
95 |
+
"messages": generate_message_payload(inputs, llm_kwargs, history, system_prompt),
|
96 |
+
"stream": True
|
97 |
+
})
|
98 |
+
headers = {
|
99 |
+
'Content-Type': 'application/json'
|
100 |
+
}
|
101 |
+
response = requests.request("POST", url, headers=headers, data=payload, stream=True)
|
102 |
+
buffer = ""
|
103 |
+
for line in response.iter_lines():
|
104 |
+
if len(line) == 0: continue
|
105 |
+
try:
|
106 |
+
dec = line.decode().lstrip('data:')
|
107 |
+
dec = json.loads(dec)
|
108 |
+
incoming = dec['result']
|
109 |
+
buffer += incoming
|
110 |
+
yield buffer
|
111 |
+
except:
|
112 |
+
if ('error_code' in dec) and ("max length" in dec['error_msg']):
|
113 |
+
raise ConnectionAbortedError(dec['error_msg']) # 上下文太长导致 token 溢出
|
114 |
+
elif ('error_code' in dec):
|
115 |
+
raise RuntimeError(dec['error_msg'])
|
116 |
+
|
117 |
+
|
118 |
+
def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=[], console_slience=False):
|
119 |
+
"""
|
120 |
+
⭐多线程方法
|
121 |
+
函数的说明请见 request_llm/bridge_all.py
|
122 |
+
"""
|
123 |
+
watch_dog_patience = 5
|
124 |
+
response = ""
|
125 |
+
|
126 |
+
for response in generate_from_baidu_qianfan(inputs, llm_kwargs, history, sys_prompt):
|
127 |
+
if len(observe_window) >= 1:
|
128 |
+
observe_window[0] = response
|
129 |
+
if len(observe_window) >= 2:
|
130 |
+
if (time.time()-observe_window[1]) > watch_dog_patience: raise RuntimeError("程序终止。")
|
131 |
+
return response
|
132 |
+
|
133 |
+
def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
|
134 |
+
"""
|
135 |
+
⭐单线程方法
|
136 |
+
函数的说明请见 request_llm/bridge_all.py
|
137 |
+
"""
|
138 |
+
chatbot.append((inputs, ""))
|
139 |
+
|
140 |
+
if additional_fn is not None:
|
141 |
+
from core_functional import handle_core_functionality
|
142 |
+
inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot)
|
143 |
+
|
144 |
+
yield from update_ui(chatbot=chatbot, history=history)
|
145 |
+
# 开始接收回复
|
146 |
+
try:
|
147 |
+
for response in generate_from_baidu_qianfan(inputs, llm_kwargs, history, system_prompt):
|
148 |
+
chatbot[-1] = (inputs, response)
|
149 |
+
yield from update_ui(chatbot=chatbot, history=history)
|
150 |
+
except ConnectionAbortedError as e:
|
151 |
+
from .bridge_all import model_info
|
152 |
+
if len(history) >= 2: history[-1] = ""; history[-2] = "" # 清除当前溢出的输入:history[-2] 是本次输入, history[-1] 是本次输出
|
153 |
+
history = clip_history(inputs=inputs, history=history, tokenizer=model_info[llm_kwargs['llm_model']]['tokenizer'],
|
154 |
+
max_token_limit=(model_info[llm_kwargs['llm_model']]['max_token'])) # history至少释放二分之一
|
155 |
+
chatbot[-1] = (chatbot[-1][0], "[Local Message] Reduce the length. 本次输入过长, 或历史数据过长. 历史缓存数据已部分释放, 您可以请再次尝试. (若再次失败则更可能是因为输入过长.)")
|
156 |
+
yield from update_ui(chatbot=chatbot, history=history, msg="异常") # 刷新界面
|
157 |
+
return
|
158 |
+
|
159 |
+
# 总结输出
|
160 |
+
response = f"[Local Message]: {model_name}响应异常 ..."
|
161 |
+
if response == f"[Local Message]: 等待{model_name}响应中 ...":
|
162 |
+
response = f"[Local Message]: {model_name}响应异常 ..."
|
163 |
+
history.extend([inputs, response])
|
164 |
+
yield from update_ui(chatbot=chatbot, history=history)
|
request_llm/local_llm_class.py
CHANGED
@@ -128,7 +128,7 @@ def get_local_llm_predict_fns(LLMSingletonClass, model_name):
|
|
128 |
|
129 |
# chatglm 没有 sys_prompt 接口,因此把prompt加入 history
|
130 |
history_feedin = []
|
131 |
-
history_feedin.append([
|
132 |
for i in range(len(history)//2):
|
133 |
history_feedin.append([history[2*i], history[2*i+1]] )
|
134 |
|
@@ -161,7 +161,7 @@ def get_local_llm_predict_fns(LLMSingletonClass, model_name):
|
|
161 |
|
162 |
# 处理历史信息
|
163 |
history_feedin = []
|
164 |
-
history_feedin.append([
|
165 |
for i in range(len(history)//2):
|
166 |
history_feedin.append([history[2*i], history[2*i+1]] )
|
167 |
|
|
|
128 |
|
129 |
# chatglm 没有 sys_prompt 接口,因此把prompt加入 history
|
130 |
history_feedin = []
|
131 |
+
history_feedin.append([sys_prompt, "Certainly!"])
|
132 |
for i in range(len(history)//2):
|
133 |
history_feedin.append([history[2*i], history[2*i+1]] )
|
134 |
|
|
|
161 |
|
162 |
# 处理历史信息
|
163 |
history_feedin = []
|
164 |
+
history_feedin.append([system_prompt, "Certainly!"])
|
165 |
for i in range(len(history)//2):
|
166 |
history_feedin.append([history[2*i], history[2*i+1]] )
|
167 |
|
requirements.txt
CHANGED
@@ -17,5 +17,6 @@ openai
|
|
17 |
numpy
|
18 |
arxiv
|
19 |
rich
|
20 |
-
websocket-client
|
21 |
pypdf2==2.12.1
|
|
|
|
|
|
17 |
numpy
|
18 |
arxiv
|
19 |
rich
|
|
|
20 |
pypdf2==2.12.1
|
21 |
+
websocket-client
|
22 |
+
scipdf_parser==0.3
|
tests/test_plugins.py
CHANGED
@@ -9,9 +9,9 @@ validate_path() # 返回项目根路径
|
|
9 |
from tests.test_utils import plugin_test
|
10 |
|
11 |
if __name__ == "__main__":
|
12 |
-
plugin_test(plugin='crazy_functions.命令行助手->命令行助手', main_input='查看当前的docker容器列表')
|
13 |
|
14 |
-
plugin_test(plugin='crazy_functions.解析项目源代码->解析一个Python项目', main_input="crazy_functions/test_project/python/dqn")
|
15 |
|
16 |
# plugin_test(plugin='crazy_functions.解析项目源代码->解析一个C项目', main_input="crazy_functions/test_project/cpp/cppipc")
|
17 |
|
@@ -19,7 +19,7 @@ if __name__ == "__main__":
|
|
19 |
|
20 |
# plugin_test(plugin='crazy_functions.批量Markdown翻译->Markdown中译英', main_input="README.md")
|
21 |
|
22 |
-
|
23 |
|
24 |
# plugin_test(plugin='crazy_functions.谷歌检索小助手->谷歌检索小助手', main_input="https://scholar.google.com/scholar?hl=en&as_sdt=0%2C5&q=auto+reinforcement+learning&btnG=")
|
25 |
|
|
|
9 |
from tests.test_utils import plugin_test
|
10 |
|
11 |
if __name__ == "__main__":
|
12 |
+
# plugin_test(plugin='crazy_functions.命令行助手->命令行助手', main_input='查看当前的docker容器列表')
|
13 |
|
14 |
+
# plugin_test(plugin='crazy_functions.解析项目源代码->解析一个Python项目', main_input="crazy_functions/test_project/python/dqn")
|
15 |
|
16 |
# plugin_test(plugin='crazy_functions.解析项目源代码->解析一个C项目', main_input="crazy_functions/test_project/cpp/cppipc")
|
17 |
|
|
|
19 |
|
20 |
# plugin_test(plugin='crazy_functions.批量Markdown翻译->Markdown中译英', main_input="README.md")
|
21 |
|
22 |
+
plugin_test(plugin='crazy_functions.批量翻译PDF文档_多线程->批量翻译PDF文档', main_input='crazy_functions/test_project/pdf_and_word/aaai.pdf')
|
23 |
|
24 |
# plugin_test(plugin='crazy_functions.谷歌检索小助手->谷歌检索小助手', main_input="https://scholar.google.com/scholar?hl=en&as_sdt=0%2C5&q=auto+reinforcement+learning&btnG=")
|
25 |
|
tests/test_utils.py
CHANGED
@@ -22,10 +22,12 @@ def silence_stdout(func):
|
|
22 |
def wrapper(*args, **kwargs):
|
23 |
_original_stdout = sys.stdout
|
24 |
sys.stdout = open(os.devnull, 'w')
|
|
|
25 |
for q in func(*args, **kwargs):
|
26 |
sys.stdout = _original_stdout
|
27 |
yield q
|
28 |
sys.stdout = open(os.devnull, 'w')
|
|
|
29 |
sys.stdout.close()
|
30 |
sys.stdout = _original_stdout
|
31 |
return wrapper
|
@@ -35,6 +37,7 @@ def silence_stdout_fn(func):
|
|
35 |
def wrapper(*args, **kwargs):
|
36 |
_original_stdout = sys.stdout
|
37 |
sys.stdout = open(os.devnull, 'w')
|
|
|
38 |
result = func(*args, **kwargs)
|
39 |
sys.stdout.close()
|
40 |
sys.stdout = _original_stdout
|
|
|
22 |
def wrapper(*args, **kwargs):
|
23 |
_original_stdout = sys.stdout
|
24 |
sys.stdout = open(os.devnull, 'w')
|
25 |
+
sys.stdout.reconfigure(encoding='utf-8')
|
26 |
for q in func(*args, **kwargs):
|
27 |
sys.stdout = _original_stdout
|
28 |
yield q
|
29 |
sys.stdout = open(os.devnull, 'w')
|
30 |
+
sys.stdout.reconfigure(encoding='utf-8')
|
31 |
sys.stdout.close()
|
32 |
sys.stdout = _original_stdout
|
33 |
return wrapper
|
|
|
37 |
def wrapper(*args, **kwargs):
|
38 |
_original_stdout = sys.stdout
|
39 |
sys.stdout = open(os.devnull, 'w')
|
40 |
+
sys.stdout.reconfigure(encoding='utf-8')
|
41 |
result = func(*args, **kwargs)
|
42 |
sys.stdout.close()
|
43 |
sys.stdout = _original_stdout
|
version
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"version": 3.
|
3 |
"show_feature": true,
|
4 |
-
"new_feature": "接入阿里通义千问、讯飞星火、上海AI-Lab书生 <-> 优化一键升级 <-> 提高arxiv翻译速度和成功率 <-> 支持自定义APIKEY格式 <-> 临时修复theme的文件丢失问题 <-> 新增实时语音对话插件(自动断句,脱手对话) <-> 支持加载自定义的ChatGLM2微调模型 <-> 动态ChatBot窗口高度 <-> 修复Azure接口的BUG <-> 完善多语言模块
|
5 |
}
|
|
|
1 |
{
|
2 |
+
"version": 3.49,
|
3 |
"show_feature": true,
|
4 |
+
"new_feature": "支持借助GROBID实现PDF高精度翻译 <-> 接入百度千帆平台和文心一言 <-> 接入阿里通义千问、讯飞星火、上海AI-Lab书生 <-> 优化一键升级 <-> 提高arxiv翻译速度和成功率 <-> 支持自定义APIKEY格式 <-> 临时修复theme的文件丢失问题 <-> 新增实时语音对话插件(自动断句,脱手对话) <-> 支持加载自定义的ChatGLM2微调模型 <-> 动态ChatBot窗口高度 <-> 修复Azure接口的BUG <-> 完善多语言模块"
|
5 |
}
|