Update app.py
Browse files
app.py
CHANGED
@@ -8,90 +8,134 @@ from langchain.chains import ConversationalRetrievalChain
|
|
8 |
from langchain_g4f import G4FLLM
|
9 |
from g4f import Provider, models
|
10 |
|
11 |
-
#
|
12 |
DB_FAISS_PATH = 'vectorstore/db_faiss'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
-
# Load LLM
|
15 |
def load_llm():
|
16 |
-
|
17 |
-
|
18 |
-
|
|
|
19 |
)
|
20 |
-
|
21 |
-
return llm
|
22 |
-
|
23 |
-
# Hide default Streamlit style
|
24 |
-
hide_streamlit_style = """
|
25 |
-
<style>
|
26 |
-
#MainMenu {visibility: hidden;}
|
27 |
-
footer {visibility: hidden;}
|
28 |
-
</style>
|
29 |
-
"""
|
30 |
-
st.markdown(hide_streamlit_style, unsafe_allow_html=True)
|
31 |
-
|
32 |
-
# Set the title for the Streamlit app
|
33 |
-
st.title("📄 PDF Chatbot - Zendo美女チャットボックス")
|
34 |
-
|
35 |
-
# Upload PDF file
|
36 |
-
uploaded_file = st.file_uploader("Tải lên tệp PDF của bạn", type="pdf")
|
37 |
|
38 |
-
|
39 |
-
|
|
|
40 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmpfile:
|
41 |
tmpfile.write(uploaded_file.getvalue())
|
42 |
tmpfile_path = tmpfile.name
|
43 |
-
|
44 |
-
# Load PDF using PyPDFLoader
|
45 |
loader = PyPDFLoader(tmpfile_path)
|
46 |
pdf_data = loader.load()
|
47 |
-
|
48 |
-
|
49 |
-
|
|
|
|
|
|
|
50 |
db = FAISS.from_documents(pdf_data, embeddings)
|
51 |
db.save_local(DB_FAISS_PATH)
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
from langchain_g4f import G4FLLM
|
9 |
from g4f import Provider, models
|
10 |
|
11 |
+
# Configuration
|
12 |
DB_FAISS_PATH = 'vectorstore/db_faiss'
|
13 |
+
EMBEDDING_MODEL = 'sentence-transformers/all-MiniLM-L6-v2'
|
14 |
+
LLM_MODEL = models.gpt_35_turbo
|
15 |
+
LLM_PROVIDER = Provider.Aichat
|
16 |
+
|
17 |
+
# UI Configuration
|
18 |
+
def configure_ui():
|
19 |
+
"""Configure Streamlit UI settings"""
|
20 |
+
st.set_page_config(page_title="Zendo AI Assistant", page_icon="📄")
|
21 |
+
hide_streamlit_style = """
|
22 |
+
<style>
|
23 |
+
#MainMenu {visibility: hidden;}
|
24 |
+
footer {visibility: hidden;}
|
25 |
+
.stTextInput input {font-size: 16px;}
|
26 |
+
</style>
|
27 |
+
"""
|
28 |
+
st.markdown(hide_streamlit_style, unsafe_allow_html=True)
|
29 |
+
|
30 |
+
# Initialize session state
|
31 |
+
def init_session_state():
|
32 |
+
"""Initialize session state variables"""
|
33 |
+
if 'history' not in st.session_state:
|
34 |
+
st.session_state['history'] = []
|
35 |
+
if 'generated' not in st.session_state:
|
36 |
+
st.session_state['generated'] = ["こんにちは!Zendoアシスタントです。PDFの内容について何でも聞いてください 🤗"]
|
37 |
+
if 'past' not in st.session_state:
|
38 |
+
st.session_state['past'] = ["ようこそ!"]
|
39 |
|
40 |
+
# Load LLM model
|
41 |
def load_llm():
|
42 |
+
"""Load the language model"""
|
43 |
+
return G4FLLM(
|
44 |
+
model=LLM_MODEL,
|
45 |
+
provider=LLM_PROVIDER,
|
46 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
|
48 |
+
# Process uploaded PDF
|
49 |
+
def process_pdf(uploaded_file):
|
50 |
+
"""Process the uploaded PDF file"""
|
51 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmpfile:
|
52 |
tmpfile.write(uploaded_file.getvalue())
|
53 |
tmpfile_path = tmpfile.name
|
54 |
+
|
|
|
55 |
loader = PyPDFLoader(tmpfile_path)
|
56 |
pdf_data = loader.load()
|
57 |
+
|
58 |
+
embeddings = HuggingFaceEmbeddings(
|
59 |
+
model_name=EMBEDDING_MODEL,
|
60 |
+
model_kwargs={'device': 'cpu'}
|
61 |
+
)
|
62 |
+
|
63 |
db = FAISS.from_documents(pdf_data, embeddings)
|
64 |
db.save_local(DB_FAISS_PATH)
|
65 |
+
return db
|
66 |
+
|
67 |
+
# Chat function
|
68 |
+
def conversational_chat(query, chain):
|
69 |
+
"""Handle conversational chat with memory"""
|
70 |
+
result = chain({
|
71 |
+
"question": query,
|
72 |
+
"chat_history": st.session_state['history']
|
73 |
+
})
|
74 |
+
st.session_state['history'].append((query, result["answer"]))
|
75 |
+
return result["answer"]
|
76 |
+
|
77 |
+
# Main application
|
78 |
+
def main():
|
79 |
+
configure_ui()
|
80 |
+
init_session_state()
|
81 |
+
|
82 |
+
st.title("📄 Zendo AI Assistant - PDFチャットボット")
|
83 |
+
|
84 |
+
# Language selection
|
85 |
+
col1, col2 = st.columns([1, 3])
|
86 |
+
with col1:
|
87 |
+
language = st.selectbox("言語/Language", ["日本語", "English", "Tiếng Việt"])
|
88 |
+
|
89 |
+
# Upload PDF
|
90 |
+
uploaded_file = st.file_uploader(
|
91 |
+
"PDFファイルをアップロードしてください (Upload PDF file)",
|
92 |
+
type="pdf",
|
93 |
+
help="PDFをアップロードすると、その内容について質問できます"
|
94 |
+
)
|
95 |
+
|
96 |
+
if uploaded_file:
|
97 |
+
with st.spinner("PDFを処理中...少々お待ちください"):
|
98 |
+
db = process_pdf(uploaded_file)
|
99 |
+
llm = load_llm()
|
100 |
+
chain = ConversationalRetrievalChain.from_llm(
|
101 |
+
llm=llm,
|
102 |
+
retriever=db.as_retriever()
|
103 |
+
)
|
104 |
+
st.success("PDFの処理が完了しました!質問をどうぞ")
|
105 |
+
|
106 |
+
# Chat interface
|
107 |
+
response_container = st.container()
|
108 |
+
|
109 |
+
with st.form(key='chat_form', clear_on_submit=True):
|
110 |
+
user_input = st.text_input(
|
111 |
+
"メッセージを入力...",
|
112 |
+
key='input',
|
113 |
+
placeholder="PDFについて質問してください"
|
114 |
+
)
|
115 |
+
submit_button = st.form_submit_button(label='送信')
|
116 |
+
|
117 |
+
if submit_button and user_input:
|
118 |
+
output = conversational_chat(user_input, chain)
|
119 |
+
st.session_state['past'].append(user_input)
|
120 |
+
st.session_state['generated'].append(output)
|
121 |
+
|
122 |
+
# Display chat history
|
123 |
+
if st.session_state['generated']:
|
124 |
+
with response_container:
|
125 |
+
for i in range(len(st.session_state['generated'])):
|
126 |
+
message(
|
127 |
+
st.session_state["past"][i],
|
128 |
+
is_user=True,
|
129 |
+
key=str(i) + '_user',
|
130 |
+
avatar_style="big-smile"
|
131 |
+
)
|
132 |
+
message(
|
133 |
+
st.session_state["generated"][i],
|
134 |
+
key=str(i),
|
135 |
+
avatar_style="thumbs"
|
136 |
+
)
|
137 |
+
else:
|
138 |
+
st.info("PDFファイルをアップロードしてチャットを開始してください")
|
139 |
+
|
140 |
+
if __name__ == "__main__":
|
141 |
+
main()
|