File size: 7,390 Bytes
469385b b5e8566 469385b b5e8566 469385b b5e8566 469385b b5e8566 469385b b5e8566 469385b dfb2496 469385b dfb2496 469385b e03e141 b5e8566 469385b dfb2496 e03e141 dfb2496 469385b dfb2496 469385b dfb2496 469385b dfb2496 469385b 23bffea |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 |
import os
import openai
import gradio as gr
from langchain.chains import ConversationalRetrievalChain
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_loaders import PyMuPDFLoader, PyPDFLoader
from langchain.vectorstores import Chroma
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_community.chat_models import ChatOpenAI
import shutil # 用於文件複製
# 獲取 OpenAI API 密鑰(初始不使用固定密鑰)
api_key_env = os.getenv("OPENAI_API_KEY")
if api_key_env:
openai.api_key = api_key_env
else:
print("未設置固定的 OpenAI API 密鑰。將使用使用者提供的密鑰。")
# 確保向量資料庫目錄存在且有寫入權限
VECTORDB_DIR = os.path.abspath("./data")
os.makedirs(VECTORDB_DIR, exist_ok=True)
os.chmod(VECTORDB_DIR, 0o755)
# 定義載入和處理 PDF 文件的函數
def load_and_process_documents(file_paths, loader_type='PyMuPDFLoader', api_key=None):
if not api_key:
raise ValueError("未提供 OpenAI API 密鑰。")
documents = []
for file_path in file_paths:
if not os.path.exists(file_path):
continue
try:
if loader_type == 'PyMuPDFLoader':
loader = PyMuPDFLoader(file_path)
elif loader_type == 'PyPDFLoader':
loader = PyPDFLoader(file_path)
else:
continue
loaded_docs = loader.load()
if loaded_docs:
documents.extend(loaded_docs)
except Exception as e:
continue
if not documents:
raise ValueError("沒有找到任何 PDF 文件或 PDF 文件無法載入。")
# 分割長文本
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=50)
documents = text_splitter.split_documents(documents)
if not documents:
raise ValueError("分割後的文檔列表為空。請檢查 PDF 文件內容。")
# 初始化向量資料庫
try:
embeddings = OpenAIEmbeddings(openai_api_key=api_key) # 使用使用者的 API 密鑰
except Exception as e:
raise ValueError(f"初始化 OpenAIEmbeddings 時出現錯誤: {e}")
try:
vectordb = Chroma.from_documents(
documents,
embedding=embeddings,
persist_directory=VECTORDB_DIR
)
except Exception as e:
raise ValueError(f"初始化 Chroma 向量資料庫時出現錯誤: {e}")
return vectordb
# 定義聊天處理函數
def handle_query(user_message, chat_history, vectordb, api_key):
try:
if not user_message:
return chat_history
# 添加角色指令前綴
preface = """
指令: 以繁體中文回答問題,200字以內。你是一位勞動法專家,針對員工權益與合同條款等法律問題進行回應。
非相關問題,請回應:「目前僅支援勞動法相關問題。」。
"""
query = f"{preface} 查詢內容:{user_message}"
# 初始化 ConversationalRetrievalChain,並傳遞 openai_api_key
pdf_qa = ConversationalRetrievalChain.from_llm(
ChatOpenAI(temperature=0.7, model="gpt-4", openai_api_key=api_key),
retriever=vectordb.as_retriever(search_kwargs={'k': 6}),
return_source_documents=True
)
# 呼叫模型並處理查詢
result = pdf_qa.invoke({"question": query, "chat_history": chat_history})
if "answer" in result:
chat_history = chat_history + [(user_message, result["answer"])]
else:
chat_history = chat_history + [(user_message, "抱歉,未能獲得有效回應。")]
return chat_history
except Exception as e:
return chat_history + [("系統", f"出現錯誤: {str(e)}")]
# 定義保存 API 密鑰的函數
def save_api_key(api_key, state):
if not api_key.startswith("sk-"):
return "請輸入有效的 OpenAI API 密鑰。", state
state['api_key'] = api_key
return "API 密鑰已成功保存。您現在可以上傳 PDF 文件並開始提問。", state
# 定義 Gradio 的處理函數
def process_files(files, state):
if files:
try:
api_key = state.get('api_key', None)
if not api_key:
return "請先輸入並保存您的 OpenAI API 密鑰。", state
saved_file_paths = []
for idx, file_data in enumerate(files):
filename = f"uploaded_{idx}.pdf"
save_path = os.path.join(VECTORDB_DIR, filename)
with open(save_path, "wb") as f:
f.write(file_data)
saved_file_paths.append(save_path)
vectordb = load_and_process_documents(saved_file_paths, loader_type='PyMuPDFLoader', api_key=api_key)
state['vectordb'] = vectordb
return "PDF 文件已成功上傳並處理。您現在可以開始提問。", state
except Exception as e:
return f"處理文件時出現錯誤: {e}", state
else:
return "請上傳至少一個 PDF 文件。", state
def chat_interface(user_message, chat_history, state):
vectordb = state.get('vectordb', None)
api_key = state.get('api_key', None)
if not vectordb:
return chat_history, state, "請先上傳 PDF 文件以進行處理。"
if not api_key:
return chat_history, state, "請先輸入並保存您的 OpenAI API 密鑰。"
updated_history = handle_query(user_message, chat_history, vectordb, api_key)
return updated_history, state, ""
# 設計 Gradio 介面
with gr.Blocks(css="body { background-color: #EBD6D6; }") as demo:
gr.Markdown("<h1 style='text-align: center;'>勞動法智能諮詢系統</h1>")
state = gr.State({"vectordb": None, "api_key": None})
# API 密鑰輸入框
api_key_input = gr.Textbox(
label="輸入您的 OpenAI API 密鑰",
placeholder="sk-...",
type="password",
interactive=True
)
save_api_key_btn = gr.Button("保存 API 密鑰")
api_key_status = gr.Textbox(label="狀態", interactive=False)
# 上傳 PDF 文件
gr.Markdown("<span style='font-size: 1.5em; font-weight: bold;'>請上傳勞動法相關文檔,讓我協助解決您的職場問題!🤖</span>")
upload = gr.File(
file_count="multiple",
file_types=[".pdf"],
label="上傳勞動法 PDF 文件",
interactive=True,
type="binary"
)
upload_btn = gr.Button("上傳並處理")
upload_status = gr.Textbox(label="上傳狀態", interactive=False)
# 智能諮詢
gr.Markdown("### 勞動法小幫手")
chatbot = gr.Chatbot()
txt = gr.Textbox(show_label=False, placeholder="請輸入您的法律問題...")
submit_btn = gr.Button("提問")
# 綁定事件
save_api_key_btn.click(
save_api_key,
inputs=[api_key_input, state],
outputs=[api_key_status, state]
)
upload_btn.click(
process_files,
inputs=[upload, state],
outputs=[upload_status, state]
)
submit_btn.click(
chat_interface,
inputs=[txt, chatbot, state],
outputs=[chatbot, state, txt]
)
txt.submit(
chat_interface,
inputs=[txt, chatbot, state],
outputs=[chatbot, state, txt]
)
# 啟動 Gradio 應用
demo.launch() |