Iris commited on
Commit
0b4ef96
·
1 Parent(s): d077e20

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -49
app.py CHANGED
@@ -1,52 +1,91 @@
1
- import os
2
- os.system('pip install openai')
3
- os.system('pip install python-docx')
 
 
 
 
 
 
 
 
4
  import openai
5
  from openai.error import RateLimitError
 
 
 
 
 
 
 
 
 
 
 
6
 
7
 
8
  openai.api_key = ""
9
- prompt = """
10
- 我需要你帮助我完成会议记录的总结和梳理。我会给你提供一长段会议记录,其中包含多个参会者的对话记录。你需要完成如下的任务:
11
- 1. 提取出会议主要讨论的几个主题或者要点。
12
- 2. 针对会议的每一个主题,请你找到所有相关的会议记录,并将它们梳理总结,为我生成有条理的摘要。
13
-
14
- 在输出时,请注意以下几点:
15
- 1. 输出内容中避免口语化内容
16
- 2. 输出格式是层级列表
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  """
18
 
 
19
  def get_chatgpt_reply(query, context=[]):
20
  context += [query]
21
- try:
22
- completion = openai.ChatCompletion.create(
23
- model="gpt-3.5-turbo",
24
- messages=[{"role": "user", "content": '\n\n'.join(context)[:4096]}]
25
- )
26
- response = completion.choices[0].message["content"]
27
- except RateLimitError:
28
- response = "服务器请求次数过多,请稍后重新尝试"
29
- context += [response]
 
 
 
 
 
 
 
 
 
30
  responses = [(u,b) for u,b in zip(context[::2], context[1::2])]
31
  return responses, context
32
 
33
 
34
  def get_chatgpt_summary(content):
35
- try:
36
- completion = openai.ChatCompletion.create(
37
- model="gpt-3.5-turbo",
38
- messages=[{"role": "system", "content": prompt},
39
- {"role": "user", "content": content}],
40
- temperature=0.5
41
- )
42
- response = completion.choices[0].message["content"]
43
- except RateLimitError:
44
- response = "服务器请求次数过多,请稍后重新尝试"
45
- return response
46
 
47
 
48
  import gradio as gr
49
  from docx import Document
 
50
 
51
 
52
  def upload_file(file):
@@ -55,16 +94,17 @@ def upload_file(file):
55
  for para in doc.paragraphs:
56
  content += para.text
57
  content += '\n'
 
 
 
 
 
58
  return content
59
 
60
- def download_file(content):
61
- doc = Document()
62
- doc.add_paragraph(content)
63
- doc.save("会议纪要.docx")
64
- return None
65
 
66
  def set_api_key(api_key):
67
  openai.api_key = api_key
 
68
  return None
69
 
70
 
@@ -75,40 +115,37 @@ with gr.Blocks(theme=gr.themes.Default(text_size='lg', radius_size='sm')) as dem
75
  desc = gr.Markdown("<center>让AI帮你整理会议纪要\n\n支持.docx文件</center>")
76
 
77
  with gr.Column():
78
- # 文件上传
79
- file_input = gr.File(file_types=[".docx"], label="原始文稿", interactive=True)
80
- upload_btn = gr.Button(value="上传")
81
-
82
  with gr.Row():
83
  with gr.Column():
84
- # api key
85
- api_input = gr.Textbox(label="API Key", placeholder="请输入API Key", type="password")
86
- api_btn = gr.Button(value="设置")
87
- api_btn.click(fn=set_api_key, inputs=api_input, outputs=None)
88
  # 文字展示
89
  with gr.Tab("原文"):
90
  # 原文
91
  content_box = gr.Textbox(label="文稿内容")
92
- download_btn = gr.Button(value="下载")
93
  with gr.Tab("总结"):
94
  # 总结
95
  summary_box = gr.Textbox(label="总结内容")
96
- download_btn = gr.Button(value="下载")
97
  with gr.Column():
98
  # 对话交互
99
- chatbot = gr.Chatbot(label="对话内容").style(height=400)
100
  state = gr.State([])
101
  txt = gr.Textbox(label="用户", placeholder="请输入内容")
102
  with gr.Row():
103
- summary = gr.Button(value="一键梳理")
104
  clear = gr.Button(value="清空")
105
 
106
  summary.click(fn=get_chatgpt_summary, inputs=content_box, outputs=summary_box)
107
  txt.submit(get_chatgpt_reply, [txt, state], [chatbot, state])
108
  clear.click(lambda: None, None, chatbot, queue=False)
109
 
 
110
  upload_btn.click(fn=upload_file, inputs=file_input, outputs=content_box)
111
- download_btn.click(fn=download_file, inputs=content_box, outputs=None)
112
 
113
 
114
  demo.launch()
 
1
+ # 长文本总结
2
+ from langchain.chains.summarize import load_summarize_chain
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ from langchain import OpenAI
5
+ from langchain import PromptTemplate
6
+ from langchain.docstore.document import Document as LangDoc
7
+
8
+
9
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=100)
10
+
11
+
12
  import openai
13
  from openai.error import RateLimitError
14
+ from langchain.embeddings.openai import OpenAIEmbeddings
15
+ from langchain.vectorstores import Chroma
16
+ from langchain.prompts.chat import (
17
+ ChatPromptTemplate,
18
+ SystemMessagePromptTemplate,
19
+ HumanMessagePromptTemplate
20
+ )
21
+ from langchain.chains import ConversationalRetrievalChain
22
+ from langchain.chat_models import ChatOpenAI
23
+ from langchain.chains import RetrievalQA
24
+ from langchain.output_parsers import RegexParser
25
 
26
 
27
  openai.api_key = ""
28
+
29
+ summary_prompt = (
30
+ "总结以下会议记录中所探讨的主要话题,忽略细节\n"
31
+ "会议记录:{text}\n"
32
+ "在输出时,请注意以下几点:\n"
33
+ "1. 输出内容中避免口语化内容\n"
34
+ "2. 每个话题用序号标注\n"
35
+ "3. 不输出无关信息"
36
+ )
37
+
38
+ qa_prompt = """
39
+ 结合下面的信息,用中文回答最后的问题。如果你不知道答案,说“我不知道”,不可以编造答案。
40
+ 除了回答问题外,还需要输出一个分数,表示你对这个问题的回答的自信程度。分数越高,你越自信。按照以下的格式输出:
41
+
42
+ 回答:[回答内容]
43
+ 分数:[0到100间的数字]
44
+
45
+ 开始回答:
46
+ {context}
47
+
48
+ 问题:{question}
49
  """
50
 
51
+
52
  def get_chatgpt_reply(query, context=[]):
53
  context += [query]
54
+
55
+ llm_chat = ChatOpenAI(model_name="gpt-3.5-turbo", max_tokens=2000, temperature=0.3)
56
+ embeddings = OpenAIEmbeddings()
57
+ docsearch = Chroma(persist_directory="./VectorDB", embedding_function=embeddings)
58
+
59
+ output_parser = RegexParser(
60
+ regex=r"(.*)\n*分数:([0-9]*).*",
61
+ output_keys=["answer", "score"],
62
+ )
63
+ PROMPT = PromptTemplate(
64
+ template=qa_prompt, input_variables=["context", "question"], output_parser=output_parser
65
+ )
66
+ chain_type_kwargs = {"prompt": PROMPT}
67
+
68
+ qa = RetrievalQA.from_chain_type(llm_chat, chain_type="map_rerank", retriever=docsearch.as_retriever(), chain_type_kwargs=chain_type_kwargs)
69
+ result = qa.run(query)
70
+
71
+ context += [result]
72
  responses = [(u,b) for u,b in zip(context[::2], context[1::2])]
73
  return responses, context
74
 
75
 
76
  def get_chatgpt_summary(content):
77
+ texts = text_splitter.split_text(content)
78
+ docs = [LangDoc(page_content=t) for t in texts]
79
+ llm_summary = OpenAI(model_name="gpt-3.5-turbo", max_tokens=300, temperature=0.2)
80
+ each_round_template = PromptTemplate(input_variables=["text"], template=summary_prompt)
81
+ chain_summary = load_summarize_chain(llm_summary, chain_type="stuff", prompt=each_round_template)
82
+ summary = "\n*******\n".join([chain_summary.run([doc]) for doc in docs])
83
+ return summary
 
 
 
 
84
 
85
 
86
  import gradio as gr
87
  from docx import Document
88
+ import os
89
 
90
 
91
  def upload_file(file):
 
94
  for para in doc.paragraphs:
95
  content += para.text
96
  content += '\n'
97
+ texts = text_splitter.split_text(content)
98
+ docs = [LangDoc(page_content=t) for t in texts]
99
+ embeddings = OpenAIEmbeddings()
100
+ docsearch = Chroma.from_documents(docs, embeddings, persist_directory="./VectorDB")
101
+ docsearch.persist()
102
  return content
103
 
 
 
 
 
 
104
 
105
  def set_api_key(api_key):
106
  openai.api_key = api_key
107
+ os.environ["OPENAI_API_KEY"] = api_key
108
  return None
109
 
110
 
 
115
  desc = gr.Markdown("<center>让AI帮你整理会议纪要\n\n支持.docx文件</center>")
116
 
117
  with gr.Column():
118
+ # api key
119
+ api_input = gr.Textbox(label="API Key", placeholder="请输入API Key", type="password")
120
+ api_btn = gr.Button(value="设置")
121
+ api_btn.click(fn=set_api_key, inputs=api_input, outputs=None)
122
  with gr.Row():
123
  with gr.Column():
124
+ # 文件上传
125
+ file_input = gr.File(file_types=[".docx"], label="原始文稿", interactive=True)
126
+ upload_btn = gr.Button(value="上传")
 
127
  # 文字展示
128
  with gr.Tab("原文"):
129
  # 原文
130
  content_box = gr.Textbox(label="文稿内容")
 
131
  with gr.Tab("总结"):
132
  # 总结
133
  summary_box = gr.Textbox(label="总结内容")
 
134
  with gr.Column():
135
  # 对话交互
136
+ chatbot = gr.Chatbot(label="对话内容").style(height=500)
137
  state = gr.State([])
138
  txt = gr.Textbox(label="用户", placeholder="请输入内容")
139
  with gr.Row():
140
+ summary = gr.Button(value="一键总结")
141
  clear = gr.Button(value="清空")
142
 
143
  summary.click(fn=get_chatgpt_summary, inputs=content_box, outputs=summary_box)
144
  txt.submit(get_chatgpt_reply, [txt, state], [chatbot, state])
145
  clear.click(lambda: None, None, chatbot, queue=False)
146
 
147
+ # 上传文件,langchain解析
148
  upload_btn.click(fn=upload_file, inputs=file_input, outputs=content_box)
 
149
 
150
 
151
  demo.launch()