Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,52 +1,91 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
import openai
|
5 |
from openai.error import RateLimitError
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
|
8 |
openai.api_key = ""
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
"""
|
18 |
|
|
|
19 |
def get_chatgpt_reply(query, context=[]):
|
20 |
context += [query]
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
responses = [(u,b) for u,b in zip(context[::2], context[1::2])]
|
31 |
return responses, context
|
32 |
|
33 |
|
34 |
def get_chatgpt_summary(content):
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
response = completion.choices[0].message["content"]
|
43 |
-
except RateLimitError:
|
44 |
-
response = "服务器请求次数过多,请稍后重新尝试"
|
45 |
-
return response
|
46 |
|
47 |
|
48 |
import gradio as gr
|
49 |
from docx import Document
|
|
|
50 |
|
51 |
|
52 |
def upload_file(file):
|
@@ -55,16 +94,17 @@ def upload_file(file):
|
|
55 |
for para in doc.paragraphs:
|
56 |
content += para.text
|
57 |
content += '\n'
|
|
|
|
|
|
|
|
|
|
|
58 |
return content
|
59 |
|
60 |
-
def download_file(content):
|
61 |
-
doc = Document()
|
62 |
-
doc.add_paragraph(content)
|
63 |
-
doc.save("会议纪要.docx")
|
64 |
-
return None
|
65 |
|
66 |
def set_api_key(api_key):
|
67 |
openai.api_key = api_key
|
|
|
68 |
return None
|
69 |
|
70 |
|
@@ -75,40 +115,37 @@ with gr.Blocks(theme=gr.themes.Default(text_size='lg', radius_size='sm')) as dem
|
|
75 |
desc = gr.Markdown("<center>让AI帮你整理会议纪要\n\n支持.docx文件</center>")
|
76 |
|
77 |
with gr.Column():
|
78 |
-
#
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
with gr.Row():
|
83 |
with gr.Column():
|
84 |
-
#
|
85 |
-
|
86 |
-
|
87 |
-
api_btn.click(fn=set_api_key, inputs=api_input, outputs=None)
|
88 |
# 文字展示
|
89 |
with gr.Tab("原文"):
|
90 |
# 原文
|
91 |
content_box = gr.Textbox(label="文稿内容")
|
92 |
-
download_btn = gr.Button(value="下载")
|
93 |
with gr.Tab("总结"):
|
94 |
# 总结
|
95 |
summary_box = gr.Textbox(label="总结内容")
|
96 |
-
download_btn = gr.Button(value="下载")
|
97 |
with gr.Column():
|
98 |
# 对话交互
|
99 |
-
chatbot = gr.Chatbot(label="对话内容").style(height=
|
100 |
state = gr.State([])
|
101 |
txt = gr.Textbox(label="用户", placeholder="请输入内容")
|
102 |
with gr.Row():
|
103 |
-
summary = gr.Button(value="
|
104 |
clear = gr.Button(value="清空")
|
105 |
|
106 |
summary.click(fn=get_chatgpt_summary, inputs=content_box, outputs=summary_box)
|
107 |
txt.submit(get_chatgpt_reply, [txt, state], [chatbot, state])
|
108 |
clear.click(lambda: None, None, chatbot, queue=False)
|
109 |
|
|
|
110 |
upload_btn.click(fn=upload_file, inputs=file_input, outputs=content_box)
|
111 |
-
download_btn.click(fn=download_file, inputs=content_box, outputs=None)
|
112 |
|
113 |
|
114 |
demo.launch()
|
|
|
1 |
+
# 长文本总结
|
2 |
+
from langchain.chains.summarize import load_summarize_chain
|
3 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
4 |
+
from langchain import OpenAI
|
5 |
+
from langchain import PromptTemplate
|
6 |
+
from langchain.docstore.document import Document as LangDoc
|
7 |
+
|
8 |
+
|
9 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=100)
|
10 |
+
|
11 |
+
|
12 |
import openai
|
13 |
from openai.error import RateLimitError
|
14 |
+
from langchain.embeddings.openai import OpenAIEmbeddings
|
15 |
+
from langchain.vectorstores import Chroma
|
16 |
+
from langchain.prompts.chat import (
|
17 |
+
ChatPromptTemplate,
|
18 |
+
SystemMessagePromptTemplate,
|
19 |
+
HumanMessagePromptTemplate
|
20 |
+
)
|
21 |
+
from langchain.chains import ConversationalRetrievalChain
|
22 |
+
from langchain.chat_models import ChatOpenAI
|
23 |
+
from langchain.chains import RetrievalQA
|
24 |
+
from langchain.output_parsers import RegexParser
|
25 |
|
26 |
|
27 |
openai.api_key = ""
|
28 |
+
|
29 |
+
summary_prompt = (
|
30 |
+
"总结以下会议记录中所探讨的主要话题,忽略细节\n"
|
31 |
+
"会议记录:{text}\n"
|
32 |
+
"在输出时,请注意以下几点:\n"
|
33 |
+
"1. 输出内容中避免口语化内容\n"
|
34 |
+
"2. 每个话题用序号标注\n"
|
35 |
+
"3. 不输出无关信息"
|
36 |
+
)
|
37 |
+
|
38 |
+
qa_prompt = """
|
39 |
+
结合下面的信息,用中文回答最后的问题。如果你不知道答案,说“我不知道”,不可以编造答案。
|
40 |
+
除了回答问题外,还需要输出一个分数,表示你对这个问题的回答的自信程度。分数越高,你越自信。按照以下的格式输出:
|
41 |
+
|
42 |
+
回答:[回答内容]
|
43 |
+
分数:[0到100间的数字]
|
44 |
+
|
45 |
+
开始回答:
|
46 |
+
{context}
|
47 |
+
|
48 |
+
问题:{question}
|
49 |
"""
|
50 |
|
51 |
+
|
52 |
def get_chatgpt_reply(query, context=[]):
|
53 |
context += [query]
|
54 |
+
|
55 |
+
llm_chat = ChatOpenAI(model_name="gpt-3.5-turbo", max_tokens=2000, temperature=0.3)
|
56 |
+
embeddings = OpenAIEmbeddings()
|
57 |
+
docsearch = Chroma(persist_directory="./VectorDB", embedding_function=embeddings)
|
58 |
+
|
59 |
+
output_parser = RegexParser(
|
60 |
+
regex=r"(.*)\n*分数:([0-9]*).*",
|
61 |
+
output_keys=["answer", "score"],
|
62 |
+
)
|
63 |
+
PROMPT = PromptTemplate(
|
64 |
+
template=qa_prompt, input_variables=["context", "question"], output_parser=output_parser
|
65 |
+
)
|
66 |
+
chain_type_kwargs = {"prompt": PROMPT}
|
67 |
+
|
68 |
+
qa = RetrievalQA.from_chain_type(llm_chat, chain_type="map_rerank", retriever=docsearch.as_retriever(), chain_type_kwargs=chain_type_kwargs)
|
69 |
+
result = qa.run(query)
|
70 |
+
|
71 |
+
context += [result]
|
72 |
responses = [(u,b) for u,b in zip(context[::2], context[1::2])]
|
73 |
return responses, context
|
74 |
|
75 |
|
76 |
def get_chatgpt_summary(content):
|
77 |
+
texts = text_splitter.split_text(content)
|
78 |
+
docs = [LangDoc(page_content=t) for t in texts]
|
79 |
+
llm_summary = OpenAI(model_name="gpt-3.5-turbo", max_tokens=300, temperature=0.2)
|
80 |
+
each_round_template = PromptTemplate(input_variables=["text"], template=summary_prompt)
|
81 |
+
chain_summary = load_summarize_chain(llm_summary, chain_type="stuff", prompt=each_round_template)
|
82 |
+
summary = "\n*******\n".join([chain_summary.run([doc]) for doc in docs])
|
83 |
+
return summary
|
|
|
|
|
|
|
|
|
84 |
|
85 |
|
86 |
import gradio as gr
|
87 |
from docx import Document
|
88 |
+
import os
|
89 |
|
90 |
|
91 |
def upload_file(file):
|
|
|
94 |
for para in doc.paragraphs:
|
95 |
content += para.text
|
96 |
content += '\n'
|
97 |
+
texts = text_splitter.split_text(content)
|
98 |
+
docs = [LangDoc(page_content=t) for t in texts]
|
99 |
+
embeddings = OpenAIEmbeddings()
|
100 |
+
docsearch = Chroma.from_documents(docs, embeddings, persist_directory="./VectorDB")
|
101 |
+
docsearch.persist()
|
102 |
return content
|
103 |
|
|
|
|
|
|
|
|
|
|
|
104 |
|
105 |
def set_api_key(api_key):
|
106 |
openai.api_key = api_key
|
107 |
+
os.environ["OPENAI_API_KEY"] = api_key
|
108 |
return None
|
109 |
|
110 |
|
|
|
115 |
desc = gr.Markdown("<center>让AI帮你整理会议纪要\n\n支持.docx文件</center>")
|
116 |
|
117 |
with gr.Column():
|
118 |
+
# api key
|
119 |
+
api_input = gr.Textbox(label="API Key", placeholder="请输入API Key", type="password")
|
120 |
+
api_btn = gr.Button(value="设置")
|
121 |
+
api_btn.click(fn=set_api_key, inputs=api_input, outputs=None)
|
122 |
with gr.Row():
|
123 |
with gr.Column():
|
124 |
+
# 文件上传
|
125 |
+
file_input = gr.File(file_types=[".docx"], label="原始文稿", interactive=True)
|
126 |
+
upload_btn = gr.Button(value="上传")
|
|
|
127 |
# 文字展示
|
128 |
with gr.Tab("原文"):
|
129 |
# 原文
|
130 |
content_box = gr.Textbox(label="文稿内容")
|
|
|
131 |
with gr.Tab("总结"):
|
132 |
# 总结
|
133 |
summary_box = gr.Textbox(label="总结内容")
|
|
|
134 |
with gr.Column():
|
135 |
# 对话交互
|
136 |
+
chatbot = gr.Chatbot(label="对话内容").style(height=500)
|
137 |
state = gr.State([])
|
138 |
txt = gr.Textbox(label="用户", placeholder="请输入内容")
|
139 |
with gr.Row():
|
140 |
+
summary = gr.Button(value="一键总结")
|
141 |
clear = gr.Button(value="清空")
|
142 |
|
143 |
summary.click(fn=get_chatgpt_summary, inputs=content_box, outputs=summary_box)
|
144 |
txt.submit(get_chatgpt_reply, [txt, state], [chatbot, state])
|
145 |
clear.click(lambda: None, None, chatbot, queue=False)
|
146 |
|
147 |
+
# 上传文件,langchain解析
|
148 |
upload_btn.click(fn=upload_file, inputs=file_input, outputs=content_box)
|
|
|
149 |
|
150 |
|
151 |
demo.launch()
|