Spaces:
Runtime error
Runtime error
File size: 5,308 Bytes
0b4ef96 8544ad2 0b4ef96 8544ad2 f739b4d 0b4ef96 8544ad2 0b4ef96 8544ad2 0b4ef96 8544ad2 0b4ef96 8544ad2 0b4ef96 8544ad2 0b4ef96 8544ad2 d48ccdd 0b4ef96 d48ccdd 8544ad2 0b4ef96 8544ad2 f739b4d 0b4ef96 f739b4d 8544ad2 0b4ef96 8544ad2 0b4ef96 8544ad2 0b4ef96 8544ad2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 |
# 长文本总结
from langchain.chains.summarize import load_summarize_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain import OpenAI
from langchain import PromptTemplate
from langchain.docstore.document import Document as LangDoc
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=100)
import openai
from openai.error import RateLimitError
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.prompts.chat import (
ChatPromptTemplate,
SystemMessagePromptTemplate,
HumanMessagePromptTemplate
)
from langchain.chains import ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.output_parsers import RegexParser
openai.api_key = ""
summary_prompt = (
"总结以下会议记录中所探讨的主要话题,忽略细节\n"
"会议记录:{text}\n"
"在输出时,请注意以下几点:\n"
"1. 输出内容中避免口语化内容\n"
"2. 每个话题用序号标注\n"
"3. 不输出无关信息"
)
qa_prompt = """
结合下面的信息,用中文回答最后的问题。如果你不知道答案,说“我不知道”,不可以编造答案。
除了回答问题外,还需要输出一个分数,表示你对这个问题的回答的自信程度。分数越高,你越自信。按照以下的格式输出:
回答:[回答内容]
分数:[0到100间的数字]
开始回答:
{context}
问题:{question}
"""
def get_chatgpt_reply(query, context=[]):
context += [query]
llm_chat = ChatOpenAI(model_name="gpt-3.5-turbo", max_tokens=2000, temperature=0.3)
embeddings = OpenAIEmbeddings()
docsearch = Chroma(persist_directory="./VectorDB", embedding_function=embeddings)
output_parser = RegexParser(
regex=r"(.*)\n*分数:([0-9]*).*",
output_keys=["answer", "score"],
)
PROMPT = PromptTemplate(
template=qa_prompt, input_variables=["context", "question"], output_parser=output_parser
)
chain_type_kwargs = {"prompt": PROMPT}
qa = RetrievalQA.from_chain_type(llm_chat, chain_type="map_rerank", retriever=docsearch.as_retriever(), chain_type_kwargs=chain_type_kwargs)
result = qa.run(query)
context += [result]
responses = [(u,b) for u,b in zip(context[::2], context[1::2])]
return responses, context
def get_chatgpt_summary(content):
texts = text_splitter.split_text(content)
docs = [LangDoc(page_content=t) for t in texts]
llm_summary = OpenAI(model_name="gpt-3.5-turbo", max_tokens=300, temperature=0.2)
each_round_template = PromptTemplate(input_variables=["text"], template=summary_prompt)
chain_summary = load_summarize_chain(llm_summary, chain_type="stuff", prompt=each_round_template)
summary = "\n*******\n".join([chain_summary.run([doc]) for doc in docs])
return summary
import gradio as gr
from docx import Document
import os
def upload_file(file):
doc = Document(file.name)
content = ""
for para in doc.paragraphs:
content += para.text
content += '\n'
texts = text_splitter.split_text(content)
docs = [LangDoc(page_content=t) for t in texts]
embeddings = OpenAIEmbeddings()
docsearch = Chroma.from_documents(docs, embeddings, persist_directory="./VectorDB")
docsearch.persist()
return content
def set_api_key(api_key):
openai.api_key = api_key
os.environ["OPENAI_API_KEY"] = api_key
return None
with gr.Blocks(theme=gr.themes.Default(text_size='lg', radius_size='sm')) as demo:
with gr.Column():
# 产品介绍
title = gr.Markdown("# <center>ChatMeeting</center>")
desc = gr.Markdown("<center>让AI帮你整理会议纪要\n\n支持.docx文件</center>")
with gr.Column():
# api key
api_input = gr.Textbox(label="API Key", placeholder="请输入API Key", type="password")
api_btn = gr.Button(value="设置")
api_btn.click(fn=set_api_key, inputs=api_input, outputs=None)
with gr.Row():
with gr.Column():
# 文件上传
file_input = gr.File(file_types=[".docx"], label="原始文稿", interactive=True)
upload_btn = gr.Button(value="上传")
# 文字展示
with gr.Tab("原文"):
# 原文
content_box = gr.Textbox(label="文稿内容")
with gr.Tab("总结"):
# 总结
summary_box = gr.Textbox(label="总结内容")
with gr.Column():
# 对话交互
chatbot = gr.Chatbot(label="对话内容").style(height=500)
state = gr.State([])
txt = gr.Textbox(label="用户", placeholder="请输入内容")
with gr.Row():
summary = gr.Button(value="一键总结")
clear = gr.Button(value="清空")
summary.click(fn=get_chatgpt_summary, inputs=content_box, outputs=summary_box)
txt.submit(get_chatgpt_reply, [txt, state], [chatbot, state])
clear.click(lambda: None, None, chatbot, queue=False)
# 上传文件,langchain解析
upload_btn.click(fn=upload_file, inputs=file_input, outputs=content_box)
demo.launch() |