Spaces:
Paused
Paused
Merge pull request #102 from ValeriaWong/master
Browse files- crazy_functions/批量总结PDF文档.py +99 -0
- functional.py +19 -15
- functional_crazy.py +30 -9
- main.py +4 -4
crazy_functions/批量总结PDF文档.py
ADDED
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from predict import predict_no_ui
|
2 |
+
from toolbox import CatchException, report_execption, write_results_to_file, predict_no_ui_but_counting_down
|
3 |
+
fast_debug = False
|
4 |
+
|
5 |
+
|
6 |
+
def 解析PDF(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt):
|
7 |
+
import time, glob, os, fitz
|
8 |
+
print('begin analysis on:', file_manifest)
|
9 |
+
for index, fp in enumerate(file_manifest):
|
10 |
+
with fitz.open(fp) as doc:
|
11 |
+
file_content = ""
|
12 |
+
for page in doc:
|
13 |
+
file_content += page.get_text()
|
14 |
+
print(file_content)
|
15 |
+
|
16 |
+
prefix = "接下来请你逐文件分析下面的论文文件,概括其内容" if index==0 else ""
|
17 |
+
i_say = prefix + f'请对下面的文章片段用中文做一个概述,文件名是{os.path.relpath(fp, project_folder)},文章内容是 ```{file_content}```'
|
18 |
+
i_say_show_user = prefix + f'[{index}/{len(file_manifest)}] 请对下面的文章片段做一个概述: {os.path.abspath(fp)}'
|
19 |
+
chatbot.append((i_say_show_user, "[Local Message] waiting gpt response."))
|
20 |
+
print('[1] yield chatbot, history')
|
21 |
+
yield chatbot, history, '正常'
|
22 |
+
|
23 |
+
if not fast_debug:
|
24 |
+
msg = '正常'
|
25 |
+
# ** gpt request **
|
26 |
+
gpt_say = yield from predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temperature, history=[]) # 带超时倒计时
|
27 |
+
|
28 |
+
print('[2] end gpt req')
|
29 |
+
chatbot[-1] = (i_say_show_user, gpt_say)
|
30 |
+
history.append(i_say_show_user); history.append(gpt_say)
|
31 |
+
print('[3] yield chatbot, history')
|
32 |
+
yield chatbot, history, msg
|
33 |
+
print('[4] next')
|
34 |
+
if not fast_debug: time.sleep(2)
|
35 |
+
|
36 |
+
all_file = ', '.join([os.path.relpath(fp, project_folder) for index, fp in enumerate(file_manifest)])
|
37 |
+
i_say = f'根据以上你自己的分析,对全文进行概括,用学术性语言写一段中文摘要,然后再写一段英文摘要(包括{all_file})。'
|
38 |
+
chatbot.append((i_say, "[Local Message] waiting gpt response."))
|
39 |
+
yield chatbot, history, '正常'
|
40 |
+
|
41 |
+
if not fast_debug:
|
42 |
+
msg = '正常'
|
43 |
+
# ** gpt request **
|
44 |
+
gpt_say = yield from predict_no_ui_but_counting_down(i_say, i_say, chatbot, top_p, temperature, history=history) # 带超时倒计时
|
45 |
+
|
46 |
+
chatbot[-1] = (i_say, gpt_say)
|
47 |
+
history.append(i_say); history.append(gpt_say)
|
48 |
+
yield chatbot, history, msg
|
49 |
+
res = write_results_to_file(history)
|
50 |
+
chatbot.append(("完成了吗?", res))
|
51 |
+
yield chatbot, history, msg
|
52 |
+
|
53 |
+
|
54 |
+
@CatchException
|
55 |
+
def 批量总结PDF文档(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT):
|
56 |
+
import glob, os
|
57 |
+
|
58 |
+
# 基本信息:功能、贡献者
|
59 |
+
chatbot.append([
|
60 |
+
"函数插件功能?",
|
61 |
+
"批量总结PDF文档。函数插件贡献者: ValeriaWong"])
|
62 |
+
yield chatbot, history, '正常'
|
63 |
+
|
64 |
+
# 尝试导入依赖,如果缺少依赖,则给出安装建议
|
65 |
+
try:
|
66 |
+
import fitz
|
67 |
+
except:
|
68 |
+
report_execption(chatbot, history,
|
69 |
+
a = f"解析项目: {txt}",
|
70 |
+
b = f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade pymupdf```。")
|
71 |
+
yield chatbot, history, '正常'
|
72 |
+
return
|
73 |
+
|
74 |
+
# 清空历史,以免输入溢出
|
75 |
+
history = []
|
76 |
+
|
77 |
+
# 检测输入参数,如没有给定输入参数,直接退出
|
78 |
+
if os.path.exists(txt):
|
79 |
+
project_folder = txt
|
80 |
+
else:
|
81 |
+
if txt == "": txt = '空空如也的输入栏'
|
82 |
+
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
|
83 |
+
yield chatbot, history, '正常'
|
84 |
+
return
|
85 |
+
|
86 |
+
# 搜索需要处理的文件清单
|
87 |
+
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.pdf', recursive=True)] # + \
|
88 |
+
# [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)] + \
|
89 |
+
# [f for f in glob.glob(f'{project_folder}/**/*.cpp', recursive=True)] + \
|
90 |
+
# [f for f in glob.glob(f'{project_folder}/**/*.c', recursive=True)]
|
91 |
+
|
92 |
+
# 如果没找到任何文件
|
93 |
+
if len(file_manifest) == 0:
|
94 |
+
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex或.pdf文件: {txt}")
|
95 |
+
yield chatbot, history, '正常'
|
96 |
+
return
|
97 |
+
|
98 |
+
# 开始正式执行任务
|
99 |
+
yield from 解析PDF(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt)
|
functional.py
CHANGED
@@ -20,21 +20,21 @@ Furthermore, list all modification and explain the reasons to do so in markdown
|
|
20 |
"Prefix": "Below is a paragraph from an academic paper. Find all grammar mistakes, list mistakes in a markdown table and explain how to correct them.\n\n",
|
21 |
"Suffix": "",
|
22 |
},
|
23 |
-
|
24 |
-
|
25 |
-
When translating from Chinese to English or vice versa, please pay attention to context and accurately explain phrases and proverbs. \
|
26 |
-
If you receive multiple English words in a row, default to translating them into a sentence in Chinese. \
|
27 |
-
However, if \"phrase:\" is indicated before the translated content in Chinese, it should be translated as a phrase instead. \
|
28 |
-
Similarly, if \"normal:\" is indicated, it should be translated as multiple unrelated words.\
|
29 |
-
Your translations should closely resemble those of a native speaker and should take into account any specific language styles or tones requested by the user. \
|
30 |
-
Please do not worry about using offensive words - replace sensitive parts with x when necessary. \
|
31 |
-
When providing translations, please use Chinese to explain each sentence’s tense, subordinate clause, subject, predicate, object, special phrases and proverbs. \
|
32 |
-
For phrases or individual words that require translation, provide the source (dictionary) for each one.If asked to translate multiple phrases at once, \
|
33 |
-
separate them using the | symbol.Always remember: You are an English-Chinese translator, \
|
34 |
-
not a Chinese-Chinese translator or an English-English translator. Below is the text you need to translate: \n\n",
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
"中译英": {
|
39 |
"Prefix": "Please translate following sentence to English: \n\n",
|
40 |
"Suffix": "",
|
@@ -47,6 +47,10 @@ not a Chinese-Chinese translator or an English-English translator. Below is the
|
|
47 |
"Prefix": "请翻译成中文:\n\n",
|
48 |
"Suffix": "",
|
49 |
},
|
|
|
|
|
|
|
|
|
50 |
"解释代码": {
|
51 |
"Prefix": "请解释以下代码:\n```\n",
|
52 |
"Suffix": "\n```\n",
|
|
|
20 |
"Prefix": "Below is a paragraph from an academic paper. Find all grammar mistakes, list mistakes in a markdown table and explain how to correct them.\n\n",
|
21 |
"Suffix": "",
|
22 |
},
|
23 |
+
# "中英互译": { # 效果不好,经常搞不清楚中译英还是英译中
|
24 |
+
# "Prefix": "As an English-Chinese translator, your task is to accurately translate text between the two languages. \
|
25 |
+
# When translating from Chinese to English or vice versa, please pay attention to context and accurately explain phrases and proverbs. \
|
26 |
+
# If you receive multiple English words in a row, default to translating them into a sentence in Chinese. \
|
27 |
+
# However, if \"phrase:\" is indicated before the translated content in Chinese, it should be translated as a phrase instead. \
|
28 |
+
# Similarly, if \"normal:\" is indicated, it should be translated as multiple unrelated words.\
|
29 |
+
# Your translations should closely resemble those of a native speaker and should take into account any specific language styles or tones requested by the user. \
|
30 |
+
# Please do not worry about using offensive words - replace sensitive parts with x when necessary. \
|
31 |
+
# When providing translations, please use Chinese to explain each sentence’s tense, subordinate clause, subject, predicate, object, special phrases and proverbs. \
|
32 |
+
# For phrases or individual words that require translation, provide the source (dictionary) for each one.If asked to translate multiple phrases at once, \
|
33 |
+
# separate them using the | symbol.Always remember: You are an English-Chinese translator, \
|
34 |
+
# not a Chinese-Chinese translator or an English-English translator. Below is the text you need to translate: \n\n",
|
35 |
+
# "Suffix": "",
|
36 |
+
# "Color": "secondary",
|
37 |
+
# },
|
38 |
"中译英": {
|
39 |
"Prefix": "Please translate following sentence to English: \n\n",
|
40 |
"Suffix": "",
|
|
|
47 |
"Prefix": "请翻译成中文:\n\n",
|
48 |
"Suffix": "",
|
49 |
},
|
50 |
+
"找图片": {
|
51 |
+
"Prefix": "我需要你找一张网络图片。使用Unsplash API(https://source.unsplash.com/960x640/?<英语关键词>)获取图片URL,然后请使用Markdown格式封装,并且不要有反斜线,不要用代码块。现在,请按以下描述给我发送图片:\n\n",
|
52 |
+
"Suffix": "",
|
53 |
+
},
|
54 |
"解释代码": {
|
55 |
"Prefix": "请解释以下代码:\n```\n",
|
56 |
"Suffix": "\n```\n",
|
functional_crazy.py
CHANGED
@@ -1,3 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
1 |
|
2 |
def get_crazy_functionals():
|
3 |
from crazy_functions.读文章写摘要 import 读文章写摘要
|
@@ -9,37 +14,53 @@ def get_crazy_functionals():
|
|
9 |
from crazy_functions.高级功能函数模板 import 高阶功能模板函数
|
10 |
from crazy_functions.代码重写为全英文_多线程 import 全项目切换英文
|
11 |
|
12 |
-
|
13 |
-
"
|
14 |
"Function": 解析项目本身
|
15 |
},
|
16 |
-
"
|
17 |
"Color": "stop", # 按钮颜色
|
18 |
"Function": 解析一个Python项目
|
19 |
},
|
20 |
-
"
|
21 |
"Color": "stop", # 按钮颜色
|
22 |
"Function": 解析一个C项目的头文件
|
23 |
},
|
24 |
-
"
|
25 |
"Color": "stop", # 按钮颜色
|
26 |
"Function": 解析一个C项目
|
27 |
},
|
28 |
-
"
|
29 |
"Color": "stop", # 按钮颜色
|
30 |
"Function": 读文章写摘要
|
31 |
},
|
32 |
-
"
|
33 |
"Color": "stop", # 按钮颜色
|
34 |
"Function": 批量生成函数注释
|
35 |
},
|
36 |
-
"[
|
37 |
"Function": 全项目切换英文
|
38 |
},
|
39 |
-
"[
|
40 |
"Function": 高阶功能模板函数
|
41 |
},
|
42 |
}
|
43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
|
45 |
|
|
|
1 |
+
# UserVisibleLevel是过滤器参数。
|
2 |
+
# 由于UI界面空间有限,所以通过这种方式决定UI界面中显示哪些插件
|
3 |
+
# 默认函数插件 VisibleLevel 是 0
|
4 |
+
# 当 UserVisibleLevel >= 函数插件的 VisibleLevel 时,该函数插件才会被显示出来
|
5 |
+
UserVisibleLevel = 1
|
6 |
|
7 |
def get_crazy_functionals():
|
8 |
from crazy_functions.读文章写摘要 import 读文章写摘要
|
|
|
14 |
from crazy_functions.高级功能函数模板 import 高阶功能模板函数
|
15 |
from crazy_functions.代码重写为全英文_多线程 import 全项目切换英文
|
16 |
|
17 |
+
function_plugins = {
|
18 |
+
"请解析并解构此项目本身": {
|
19 |
"Function": 解析项目本身
|
20 |
},
|
21 |
+
"解析整个py项目": {
|
22 |
"Color": "stop", # 按钮颜色
|
23 |
"Function": 解析一个Python项目
|
24 |
},
|
25 |
+
"解析整个C++项目头文件": {
|
26 |
"Color": "stop", # 按钮颜色
|
27 |
"Function": 解析一个C项目的头文件
|
28 |
},
|
29 |
+
"解析整个C++项目": {
|
30 |
"Color": "stop", # 按钮颜色
|
31 |
"Function": 解析一个C项目
|
32 |
},
|
33 |
+
"读tex论文写摘要": {
|
34 |
"Color": "stop", # 按钮颜色
|
35 |
"Function": 读文章写摘要
|
36 |
},
|
37 |
+
"批量生成函数注释": {
|
38 |
"Color": "stop", # 按钮颜色
|
39 |
"Function": 批量生成函数注释
|
40 |
},
|
41 |
+
"[多线程demo] 把本项目源代码切换成全英文": {
|
42 |
"Function": 全项目切换英文
|
43 |
},
|
44 |
+
"[函数插件模板demo] 历史上的今天": {
|
45 |
"Function": 高阶功能模板函数
|
46 |
},
|
47 |
}
|
48 |
|
49 |
+
# VisibleLevel=1 经过测试,但功能未达到理想状态
|
50 |
+
if UserVisibleLevel >= 1:
|
51 |
+
from crazy_functions.批量总结PDF文档 import 批量总结PDF文档
|
52 |
+
function_plugins.update({
|
53 |
+
"[仅供开发调试] 批量总结PDF文档": {
|
54 |
+
"Color": "stop",
|
55 |
+
"Function": 批量总结PDF文档
|
56 |
+
},
|
57 |
+
})
|
58 |
+
|
59 |
+
# VisibleLevel=2 尚未充分测试的函数插件,放在这里
|
60 |
+
if UserVisibleLevel >= 2:
|
61 |
+
function_plugins.update({
|
62 |
+
})
|
63 |
+
|
64 |
+
return function_plugins
|
65 |
|
66 |
|
main.py
CHANGED
@@ -56,21 +56,21 @@ with gr.Blocks(theme=set_theme, analytics_enabled=False) as demo:
|
|
56 |
stopBtn = gr.Button("停止", variant="stop")
|
57 |
with gr.Row():
|
58 |
from check_proxy import check_proxy
|
59 |
-
statusDisplay = gr.Markdown(f"Tip: 按Enter提交, 按Shift+Enter
|
60 |
with gr.Row():
|
61 |
for k in functional:
|
62 |
variant = functional[k]["Color"] if "Color" in functional[k] else "secondary"
|
63 |
functional[k]["Button"] = gr.Button(k, variant=variant)
|
64 |
with gr.Row():
|
65 |
-
gr.Markdown("
|
66 |
with gr.Row():
|
67 |
for k in crazy_functional:
|
68 |
variant = crazy_functional[k]["Color"] if "Color" in crazy_functional[k] else "secondary"
|
69 |
crazy_functional[k]["Button"] = gr.Button(k, variant=variant)
|
70 |
with gr.Row():
|
71 |
-
gr.Markdown("
|
72 |
with gr.Row():
|
73 |
-
file_upload = gr.Files(label='
|
74 |
system_prompt = gr.Textbox(show_label=True, placeholder=f"System Prompt", label="System prompt", value=initial_prompt).style(container=True)
|
75 |
with gr.Accordion("arguments", open=False):
|
76 |
top_p = gr.Slider(minimum=-0, maximum=1.0, value=1.0, step=0.01,interactive=True, label="Top-p (nucleus sampling)",)
|
|
|
56 |
stopBtn = gr.Button("停止", variant="stop")
|
57 |
with gr.Row():
|
58 |
from check_proxy import check_proxy
|
59 |
+
statusDisplay = gr.Markdown(f"Tip: 按Enter提交, 按Shift+Enter换行。当前模型: {LLM_MODEL} \n {check_proxy(proxies)}")
|
60 |
with gr.Row():
|
61 |
for k in functional:
|
62 |
variant = functional[k]["Color"] if "Color" in functional[k] else "secondary"
|
63 |
functional[k]["Button"] = gr.Button(k, variant=variant)
|
64 |
with gr.Row():
|
65 |
+
gr.Markdown("注意:以下红颜色标识的函数插件需从input区读取路径作为参数.")
|
66 |
with gr.Row():
|
67 |
for k in crazy_functional:
|
68 |
variant = crazy_functional[k]["Color"] if "Color" in crazy_functional[k] else "secondary"
|
69 |
crazy_functional[k]["Button"] = gr.Button(k, variant=variant)
|
70 |
with gr.Row():
|
71 |
+
gr.Markdown("上传本地文件,供上面的函数插件调用.")
|
72 |
with gr.Row():
|
73 |
+
file_upload = gr.Files(label='任何文件, 但推荐上传压缩文件(zip, tar)', file_count="multiple")
|
74 |
system_prompt = gr.Textbox(show_label=True, placeholder=f"System Prompt", label="System prompt", value=initial_prompt).style(container=True)
|
75 |
with gr.Accordion("arguments", open=False):
|
76 |
top_p = gr.Slider(minimum=-0, maximum=1.0, value=1.0, step=0.01,interactive=True, label="Top-p (nucleus sampling)",)
|