Spaces:
Runtime error
Runtime error
Commit
·
02e27e1
1
Parent(s):
67691d2
Upload 6 files
Browse files- app.py +64 -22
- common.py +26 -58
- pages/Chatbot.py +12 -5
- pages/ImportAllFile.py +70 -0
- requirements.txt +20 -20
app.py
CHANGED
@@ -3,11 +3,12 @@ import os
|
|
3 |
import pickle
|
4 |
import faiss
|
5 |
import common
|
6 |
-
|
7 |
from multiprocessing import Lock
|
8 |
from multiprocessing.managers import BaseManager
|
|
|
9 |
from llama_index.callbacks import CallbackManager, LlamaDebugHandler
|
10 |
-
from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext, StorageContext, load_index_from_storage
|
11 |
from llama_index.node_parser import SimpleNodeParser
|
12 |
from llama_index.langchain_helpers.text_splitter import TokenTextSplitter
|
13 |
from llama_index.constants import DEFAULT_CHUNK_OVERLAP
|
@@ -16,6 +17,11 @@ from llama_index.graph_stores import SimpleGraphStore
|
|
16 |
from llama_index.storage.docstore import SimpleDocumentStore
|
17 |
from llama_index.storage.index_store import SimpleIndexStore
|
18 |
from msal_streamlit_authentication import msal_authentication
|
|
|
|
|
|
|
|
|
|
|
19 |
import tiktoken
|
20 |
from llama_index.callbacks import CallbackManager, LlamaDebugHandler
|
21 |
from dotenv import load_dotenv
|
@@ -35,14 +41,15 @@ AUTHORITY = f"https://login.microsoftonline.com/{TENANT_ID}"
|
|
35 |
REDIRECT_URI = os.environ["REDIRECT_URI"]
|
36 |
SCOPES = ["openid", "profile", "User.Read"]
|
37 |
|
38 |
-
|
39 |
-
|
40 |
st.session_state.llama_debug_handler = LlamaDebugHandler()
|
41 |
from log import logger
|
42 |
|
43 |
def initialize_index():
|
44 |
logger.info("initialize_index start")
|
45 |
-
|
|
|
46 |
, chunk_overlap=DEFAULT_CHUNK_OVERLAP
|
47 |
, tokenizer=tiktoken.encoding_for_model("gpt-4").encode)
|
48 |
node_parser = SimpleNodeParser(text_splitter=text_splitter)
|
@@ -51,34 +58,57 @@ def initialize_index():
|
|
51 |
faiss_index = faiss.IndexFlatL2(d)
|
52 |
# デバッグ用
|
53 |
callback_manager = CallbackManager([st.session_state.llama_debug_handler])
|
54 |
-
service_context = ServiceContext.from_defaults(node_parser=node_parser,callback_manager=callback_manager)
|
55 |
lock = Lock()
|
56 |
with lock:
|
57 |
-
if os.path.exists(
|
58 |
logger.info("start import index")
|
59 |
storage_context = StorageContext.from_defaults(
|
60 |
-
docstore=SimpleDocumentStore.from_persist_dir(persist_dir=
|
61 |
-
graph_store=SimpleGraphStore.from_persist_dir(persist_dir=
|
62 |
-
vector_store=FaissVectorStore.from_persist_dir(persist_dir=
|
63 |
-
index_store=SimpleIndexStore.from_persist_dir(persist_dir=
|
64 |
)
|
65 |
st.session_state.index = load_index_from_storage(storage_context=storage_context,service_context=service_context)
|
|
|
|
|
66 |
common.setChatEngine()
|
67 |
else:
|
68 |
logger.info("start create index")
|
69 |
-
documents =
|
|
|
70 |
vector_store = FaissVectorStore(faiss_index=faiss_index)
|
71 |
storage_context = StorageContext.from_defaults(vector_store=vector_store)
|
72 |
-
st.session_state.index = VectorStoreIndex.from_documents(documents, storage_context=storage_context,service_context=service_context)
|
73 |
-
st.session_state.index.storage_context.persist(persist_dir=index_name)
|
74 |
-
common.setChatEngine()
|
75 |
-
if os.path.exists(pkl_name):
|
76 |
-
logger.info(pkl_name)
|
77 |
-
with open(pkl_name, "rb") as f:
|
78 |
-
st.session_state.stored_docs = pickle.load(f)
|
79 |
-
else:
|
80 |
st.session_state.stored_docs=list()
|
81 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
|
83 |
def logout():
|
84 |
st.session_state["login_token"] = None
|
@@ -110,4 +140,16 @@ st.session_state["login_token"] = msal_authentication(
|
|
110 |
if st.session_state.login_token:
|
111 |
initialize_index()
|
112 |
st.write("ようこそ", st.session_state.login_token["account"]["name"])
|
113 |
-
st.write("サイドメニューからファイルインポート又はChatbotへの質問を開始してください。")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
import pickle
|
4 |
import faiss
|
5 |
import common
|
6 |
+
import glob
|
7 |
from multiprocessing import Lock
|
8 |
from multiprocessing.managers import BaseManager
|
9 |
+
from pathlib import Path
|
10 |
from llama_index.callbacks import CallbackManager, LlamaDebugHandler
|
11 |
+
from llama_index import Document,VectorStoreIndex, SimpleDirectoryReader, ServiceContext, StorageContext, load_index_from_storage
|
12 |
from llama_index.node_parser import SimpleNodeParser
|
13 |
from llama_index.langchain_helpers.text_splitter import TokenTextSplitter
|
14 |
from llama_index.constants import DEFAULT_CHUNK_OVERLAP
|
|
|
17 |
from llama_index.storage.docstore import SimpleDocumentStore
|
18 |
from llama_index.storage.index_store import SimpleIndexStore
|
19 |
from msal_streamlit_authentication import msal_authentication
|
20 |
+
from llama_hub.file.cjk_pdf.base import CJKPDFReader
|
21 |
+
from llama_hub.file.pptx.base import PptxReader
|
22 |
+
from llama_hub.file.pandas_excel.base import PandasExcelReader
|
23 |
+
from llama_hub.file.docx.base import DocxReader
|
24 |
+
from llama_index.llms import OpenAI
|
25 |
import tiktoken
|
26 |
from llama_index.callbacks import CallbackManager, LlamaDebugHandler
|
27 |
from dotenv import load_dotenv
|
|
|
41 |
REDIRECT_URI = os.environ["REDIRECT_URI"]
|
42 |
SCOPES = ["openid", "profile", "User.Read"]
|
43 |
|
44 |
+
INDEX_NAME = os.environ["INDEX_NAME"]
|
45 |
+
PKL_NAME = os.environ["PKL_NAME"]
|
46 |
st.session_state.llama_debug_handler = LlamaDebugHandler()
|
47 |
from log import logger
|
48 |
|
49 |
def initialize_index():
|
50 |
logger.info("initialize_index start")
|
51 |
+
llm = OpenAI(model='gpt-4', temperature=0.8, max_tokens=256)
|
52 |
+
text_splitter = TokenTextSplitter(separator="。",chunk_size=1500
|
53 |
, chunk_overlap=DEFAULT_CHUNK_OVERLAP
|
54 |
, tokenizer=tiktoken.encoding_for_model("gpt-4").encode)
|
55 |
node_parser = SimpleNodeParser(text_splitter=text_splitter)
|
|
|
58 |
faiss_index = faiss.IndexFlatL2(d)
|
59 |
# デバッグ用
|
60 |
callback_manager = CallbackManager([st.session_state.llama_debug_handler])
|
61 |
+
service_context = ServiceContext.from_defaults(llm=llm,node_parser=node_parser,callback_manager=callback_manager)
|
62 |
lock = Lock()
|
63 |
with lock:
|
64 |
+
if os.path.exists(INDEX_NAME):
|
65 |
logger.info("start import index")
|
66 |
storage_context = StorageContext.from_defaults(
|
67 |
+
docstore=SimpleDocumentStore.from_persist_dir(persist_dir=INDEX_NAME),
|
68 |
+
graph_store=SimpleGraphStore.from_persist_dir(persist_dir=INDEX_NAME),
|
69 |
+
vector_store=FaissVectorStore.from_persist_dir(persist_dir=INDEX_NAME),
|
70 |
+
index_store=SimpleIndexStore.from_persist_dir(persist_dir=INDEX_NAME),
|
71 |
)
|
72 |
st.session_state.index = load_index_from_storage(storage_context=storage_context,service_context=service_context)
|
73 |
+
with open(PKL_NAME, "rb") as f:
|
74 |
+
st.session_state.stored_docs = pickle.load(f)
|
75 |
common.setChatEngine()
|
76 |
else:
|
77 |
logger.info("start create index")
|
78 |
+
documents = list()
|
79 |
+
files = glob.glob("./documents/*")
|
80 |
vector_store = FaissVectorStore(faiss_index=faiss_index)
|
81 |
storage_context = StorageContext.from_defaults(vector_store=vector_store)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
st.session_state.stored_docs=list()
|
83 |
+
for file in files:
|
84 |
+
loader=None
|
85 |
+
noextpath,extension = os.path.splitext(file)
|
86 |
+
logger.info(file)
|
87 |
+
document = Document()
|
88 |
+
if extension == ".txt" or ".md":
|
89 |
+
document = SimpleDirectoryReader(input_files=[file], filename_as_id=True).load_data()[0]
|
90 |
+
else:
|
91 |
+
if extension == ".pdf":
|
92 |
+
loader = CJKPDFReader()
|
93 |
+
elif extension == ".pptx":
|
94 |
+
loader = PptxReader()
|
95 |
+
elif extension == ".xlsx":
|
96 |
+
loader = PandasExcelReader(pandas_config={"header": 0})
|
97 |
+
elif extension == ".docx":
|
98 |
+
loader = DocxReader()
|
99 |
+
else:
|
100 |
+
logger.error("Can`t read file:" + file)
|
101 |
+
continue
|
102 |
+
document = loader.load_data(file=Path(file))[0]
|
103 |
+
document.metadata={'filename': os.path.basename(file)}
|
104 |
+
documents.append(document)
|
105 |
+
st.session_state.stored_docs.append(os.path.basename(file))
|
106 |
+
st.session_state.index = VectorStoreIndex.from_documents( documents=documents,storage_context=storage_context,service_context=service_context)
|
107 |
+
st.session_state.index.storage_context.persist(persist_dir=INDEX_NAME)
|
108 |
+
with open(PKL_NAME, "wb") as f:
|
109 |
+
print("pickle")
|
110 |
+
pickle.dump(st.session_state.stored_docs, f)
|
111 |
+
common.setChatEngine()
|
112 |
|
113 |
def logout():
|
114 |
st.session_state["login_token"] = None
|
|
|
140 |
if st.session_state.login_token:
|
141 |
initialize_index()
|
142 |
st.write("ようこそ", st.session_state.login_token["account"]["name"])
|
143 |
+
st.write("サイドメニューからファイルインポート又はChatbotへの質問を開始してください。")
|
144 |
+
st.markdown("""
|
145 |
+
## 使い方
|
146 |
+
- **Chatbot**
|
147 |
+
初期からインポートされているファイルとImportXXFileでインポートしたファイルの内容に関する質問に対して、GenerativeAIが回答します。
|
148 |
+
|
149 |
+
- **ChatbotWebRead**
|
150 |
+
入力したURLのサイトの情報に関して、GenerativeAIが回答します。
|
151 |
+
ImportXXFileの内容は登録されていません。
|
152 |
+
|
153 |
+
- **ImportAllFile**
|
154 |
+
テキストファイル,mdファイル,Excel,PDF,PowerPoint,Wordをインポートできます。
|
155 |
+
""")
|
common.py
CHANGED
@@ -18,6 +18,7 @@ from llama_index.langchain_helpers.text_splitter import TokenTextSplitter
|
|
18 |
from llama_index.constants import DEFAULT_CHUNK_OVERLAP
|
19 |
from llama_index.response_synthesizers import get_response_synthesizer
|
20 |
from llama_index.callbacks import CallbackManager
|
|
|
21 |
from log import logger
|
22 |
|
23 |
# 接続元制御
|
@@ -69,12 +70,13 @@ def check_login():
|
|
69 |
st.stop()
|
70 |
|
71 |
|
72 |
-
|
73 |
-
|
74 |
# デバッグ用
|
75 |
-
|
|
|
76 |
, chunk_overlap=DEFAULT_CHUNK_OVERLAP
|
77 |
-
, tokenizer=tiktoken.encoding_for_model("gpt-
|
78 |
node_parser = SimpleNodeParser(text_splitter=text_splitter)
|
79 |
custom_prompt = Prompt("""\
|
80 |
以下はこれまでの会話履歴と、ドキュメントを検索して回答する必要がある、ユーザーからの会話文です。
|
@@ -91,59 +93,10 @@ custom_prompt = Prompt("""\
|
|
91 |
""")
|
92 |
|
93 |
chat_history = []
|
94 |
-
def fileImportChatEngine(uploaded_file):
|
95 |
-
filepath = None
|
96 |
-
try:
|
97 |
-
filepath = os.path.join('documents', os.path.basename( uploaded_file.name))
|
98 |
-
logger.info(filepath)
|
99 |
-
with open(filepath, 'wb') as f:
|
100 |
-
f.write(uploaded_file.getvalue())
|
101 |
-
f.close()
|
102 |
-
document = SimpleDirectoryReader(input_files=[filepath]).load_data()[0]
|
103 |
-
st.session_state.stored_docs.append(uploaded_file.name)
|
104 |
-
logger.info(st.session_state.stored_docs)
|
105 |
-
st.session_state.index.insert(document=document)
|
106 |
-
st.session_state.index.storage_context.persist(persist_dir=index_name)
|
107 |
-
setChatEngine()
|
108 |
-
with open(pkl_name, "wb") as f:
|
109 |
-
print("pickle")
|
110 |
-
pickle.dump(st.session_state.stored_docs, f)
|
111 |
-
st.session_state["file_uploader_key"] += 1
|
112 |
-
st.experimental_rerun()
|
113 |
-
except Exception as e:
|
114 |
-
# cleanup temp file
|
115 |
-
logger.error(e)
|
116 |
-
if filepath is not None and os.path.exists(filepath):
|
117 |
-
os.remove(filepath)
|
118 |
-
|
119 |
-
def fileImportChatEngineCustomloader(uploaded_file,loader):
|
120 |
-
filepath = None
|
121 |
-
try:
|
122 |
-
filepath = os.path.join('documents', os.path.basename( uploaded_file.name))
|
123 |
-
logger.info(filepath)
|
124 |
-
with open(filepath, 'wb') as f:
|
125 |
-
f.write(uploaded_file.getvalue())
|
126 |
-
f.close()
|
127 |
-
document = loader.load_data(file=Path(filepath))[0]
|
128 |
-
st.session_state.stored_docs.append(uploaded_file.name)
|
129 |
-
logger.info(st.session_state.stored_docs)
|
130 |
-
st.session_state.index.insert(document=document)
|
131 |
-
st.session_state.index.storage_context.persist(persist_dir=index_name)
|
132 |
-
setChatEngine()
|
133 |
-
with open(pkl_name, "wb") as f:
|
134 |
-
print("pickle")
|
135 |
-
pickle.dump(st.session_state.stored_docs, f)
|
136 |
-
st.session_state["file_uploader_key"] += 1
|
137 |
-
st.experimental_rerun()
|
138 |
-
except Exception as e:
|
139 |
-
# cleanup temp file
|
140 |
-
logger.error(e)
|
141 |
-
if filepath is not None and os.path.exists(filepath):
|
142 |
-
os.remove(filepath)
|
143 |
|
144 |
def setChatEngine():
|
145 |
callback_manager = CallbackManager([st.session_state.llama_debug_handler])
|
146 |
-
service_context = ServiceContext.from_defaults(node_parser=node_parser,callback_manager=callback_manager)
|
147 |
response_synthesizer = get_response_synthesizer(response_mode='refine')
|
148 |
st.session_state.query_engine = st.session_state.index.as_query_engine(
|
149 |
response_synthesizer=response_synthesizer,
|
@@ -162,10 +115,11 @@ def setChatEngine():
|
|
162 |
# HumanMessagePromptTemplate,
|
163 |
# SystemMessagePromptTemplate,
|
164 |
# )
|
|
|
165 |
# from llama_index.prompts import Prompt
|
166 |
# chat_text_qa_msgs = [
|
167 |
# SystemMessagePromptTemplate.from_template(
|
168 |
-
# "
|
169 |
# ),
|
170 |
# HumanMessagePromptTemplate.from_template(
|
171 |
# "以下に、コンテキスト情報を提供します。 \n"
|
@@ -174,13 +128,26 @@ def setChatEngine():
|
|
174 |
# "\n---------------------\n"
|
175 |
# "回答には以下を含めてください。\n"
|
176 |
# "・最初に問い合わせへのお礼してください\n"
|
177 |
-
# "
|
178 |
# "・質問内容を要約してください\n"
|
179 |
# "・最後に不明な点がないか確認してください \n"
|
180 |
# "この情報を踏まえて、次の質問に回答して��ださい: {query_str}\n"
|
181 |
-
# "
|
182 |
# ),
|
183 |
# ]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
184 |
# def setChatEngine():
|
185 |
# callback_manager = CallbackManager([st.session_state.llama_debug_handler])
|
186 |
# service_context = ServiceContext.from_defaults(node_parser=node_parser,callback_manager=callback_manager)
|
@@ -188,7 +155,8 @@ def setChatEngine():
|
|
188 |
# st.session_state.chat_engine = st.session_state.index.as_chat_engine(
|
189 |
# response_synthesizer=response_synthesizer,
|
190 |
# service_context=service_context,
|
191 |
-
# chat_mode="
|
192 |
# text_qa_template= Prompt.from_langchain_prompt(ChatPromptTemplate.from_messages(chat_text_qa_msgs)),
|
|
|
193 |
# verbose=True
|
194 |
# )
|
|
|
18 |
from llama_index.constants import DEFAULT_CHUNK_OVERLAP
|
19 |
from llama_index.response_synthesizers import get_response_synthesizer
|
20 |
from llama_index.callbacks import CallbackManager
|
21 |
+
from llama_index.llms import OpenAI
|
22 |
from log import logger
|
23 |
|
24 |
# 接続元制御
|
|
|
70 |
st.stop()
|
71 |
|
72 |
|
73 |
+
INDEX_NAME = os.environ["INDEX_NAME"]
|
74 |
+
PKL_NAME = os.environ["PKL_NAME"]
|
75 |
# デバッグ用
|
76 |
+
llm = OpenAI(model='gpt-4', temperature=0.8, max_tokens=256)
|
77 |
+
text_splitter = TokenTextSplitter(separator="。", chunk_size=1500
|
78 |
, chunk_overlap=DEFAULT_CHUNK_OVERLAP
|
79 |
+
, tokenizer=tiktoken.encoding_for_model("gpt-4").encode)
|
80 |
node_parser = SimpleNodeParser(text_splitter=text_splitter)
|
81 |
custom_prompt = Prompt("""\
|
82 |
以下はこれまでの会話履歴と、ドキュメントを検索して回答する必要がある、ユーザーからの会話文です。
|
|
|
93 |
""")
|
94 |
|
95 |
chat_history = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
|
97 |
def setChatEngine():
|
98 |
callback_manager = CallbackManager([st.session_state.llama_debug_handler])
|
99 |
+
service_context = ServiceContext.from_defaults(llm=llm,node_parser=node_parser,callback_manager=callback_manager)
|
100 |
response_synthesizer = get_response_synthesizer(response_mode='refine')
|
101 |
st.session_state.query_engine = st.session_state.index.as_query_engine(
|
102 |
response_synthesizer=response_synthesizer,
|
|
|
115 |
# HumanMessagePromptTemplate,
|
116 |
# SystemMessagePromptTemplate,
|
117 |
# )
|
118 |
+
# from llama_index.prompts.prompts import RefinePrompt, QuestionAnswerPrompt
|
119 |
# from llama_index.prompts import Prompt
|
120 |
# chat_text_qa_msgs = [
|
121 |
# SystemMessagePromptTemplate.from_template(
|
122 |
+
# "文脈が役に立たない場合でも、必ず日本語で質問に答えてください。"
|
123 |
# ),
|
124 |
# HumanMessagePromptTemplate.from_template(
|
125 |
# "以下に、コンテキスト情報を提供します。 \n"
|
|
|
128 |
# "\n---------------------\n"
|
129 |
# "回答には以下を含めてください。\n"
|
130 |
# "・最初に問い合わせへのお礼してください\n"
|
131 |
+
# "・回答には出典のドキュメント名を含めるようにしてください。\n"
|
132 |
# "・質問内容を要約してください\n"
|
133 |
# "・最後に不明な点がないか確認してください \n"
|
134 |
# "この情報を踏まえて、次の質問に回答して��ださい: {query_str}\n"
|
135 |
+
# "答えを知らない場合は、「わからない」と回答してください。また、必ず日本語で回答してください。"
|
136 |
# ),
|
137 |
# ]
|
138 |
+
# REFINE_PROMPT = ("元の質問は次のとおりです: {query_str} \n"
|
139 |
+
# "既存の回答を提供しました: {existing_answer} \n"
|
140 |
+
# "既存の答えを洗練する機会があります \n"
|
141 |
+
# "(必要な場合のみ)以下にコンテキストを追加します。 \n"
|
142 |
+
# "------------\n"
|
143 |
+
# "{context_msg}\n"
|
144 |
+
# "------------\n"
|
145 |
+
# "新しいコンテキストを考慮して、元の答えをより良く洗練して質問に答えてください。\n"
|
146 |
+
# "回答には出典のドキュメント名を含めるようにしてください。\n"
|
147 |
+
# "コンテキストが役に立たない場合は、元の回答と同じものを返します。"
|
148 |
+
# "どのような場合でも、返答は日本語で行います。")
|
149 |
+
# refine_prompt = RefinePrompt(REFINE_PROMPT)
|
150 |
+
|
151 |
# def setChatEngine():
|
152 |
# callback_manager = CallbackManager([st.session_state.llama_debug_handler])
|
153 |
# service_context = ServiceContext.from_defaults(node_parser=node_parser,callback_manager=callback_manager)
|
|
|
155 |
# st.session_state.chat_engine = st.session_state.index.as_chat_engine(
|
156 |
# response_synthesizer=response_synthesizer,
|
157 |
# service_context=service_context,
|
158 |
+
# chat_mode="condense_question",
|
159 |
# text_qa_template= Prompt.from_langchain_prompt(ChatPromptTemplate.from_messages(chat_text_qa_msgs)),
|
160 |
+
# refine_template=refine_prompt,
|
161 |
# verbose=True
|
162 |
# )
|
pages/Chatbot.py
CHANGED
@@ -3,8 +3,8 @@ import streamlit as st
|
|
3 |
import common
|
4 |
import os
|
5 |
|
6 |
-
|
7 |
-
|
8 |
from log import logger
|
9 |
common.check_login()
|
10 |
|
@@ -12,8 +12,9 @@ st.title("💬 Chatbot")
|
|
12 |
if st.button("リセット",use_container_width=True):
|
13 |
st.session_state.chat_engine.reset()
|
14 |
st.session_state.messages = [{"role": "assistant", "content": "お困りごとはございますか?"}]
|
15 |
-
st.experimental_rerun()
|
16 |
logger.info("reset")
|
|
|
|
|
17 |
|
18 |
if "messages" not in st.session_state:
|
19 |
st.session_state["messages"] = [{"role": "assistant", "content": "お困りごとはございますか?"}]
|
@@ -25,7 +26,13 @@ if prompt := st.chat_input():
|
|
25 |
st.session_state.messages.append({"role": "user", "content": prompt})
|
26 |
st.chat_message("user").write(prompt)
|
27 |
response = st.session_state.chat_engine.chat(prompt)
|
28 |
-
|
29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
st.session_state.messages.append({"role": "assistant", "content": msg})
|
31 |
st.chat_message("assistant").write(msg)
|
|
|
3 |
import common
|
4 |
import os
|
5 |
|
6 |
+
INDEX_NAME = os.environ["INDEX_NAME"]
|
7 |
+
PKL_NAME = os.environ["PKL_NAME"]
|
8 |
from log import logger
|
9 |
common.check_login()
|
10 |
|
|
|
12 |
if st.button("リセット",use_container_width=True):
|
13 |
st.session_state.chat_engine.reset()
|
14 |
st.session_state.messages = [{"role": "assistant", "content": "お困りごとはございますか?"}]
|
|
|
15 |
logger.info("reset")
|
16 |
+
st.experimental_rerun()
|
17 |
+
|
18 |
|
19 |
if "messages" not in st.session_state:
|
20 |
st.session_state["messages"] = [{"role": "assistant", "content": "お困りごとはございますか?"}]
|
|
|
26 |
st.session_state.messages.append({"role": "user", "content": prompt})
|
27 |
st.chat_message("user").write(prompt)
|
28 |
response = st.session_state.chat_engine.chat(prompt)
|
29 |
+
fname = " ※参照:"
|
30 |
+
for node in response.source_nodes:
|
31 |
+
logger.info(node)
|
32 |
+
if node.node.metadata is not None:
|
33 |
+
if "filename" in node.node.metadata:
|
34 |
+
fname = fname + " "+str(node.node.metadata["filename"])
|
35 |
+
msg = str(response) + str(fname)
|
36 |
+
logger.info(msg)
|
37 |
st.session_state.messages.append({"role": "assistant", "content": msg})
|
38 |
st.chat_message("assistant").write(msg)
|
pages/ImportAllFile.py
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import common
|
3 |
+
import os
|
4 |
+
import pickle
|
5 |
+
from llama_hub.file.cjk_pdf.base import CJKPDFReader
|
6 |
+
from llama_hub.file.pptx.base import PptxReader
|
7 |
+
from llama_hub.file.pandas_excel.base import PandasExcelReader
|
8 |
+
from llama_hub.file.docx.base import DocxReader
|
9 |
+
from llama_index import Document, SimpleDirectoryReader
|
10 |
+
from pathlib import Path
|
11 |
+
from log import logger
|
12 |
+
INDEX_NAME = os.environ["INDEX_NAME"]
|
13 |
+
PKL_NAME = os.environ["PKL_NAME"]
|
14 |
+
|
15 |
+
common.check_login()
|
16 |
+
|
17 |
+
if "file_uploader_key" not in st.session_state:
|
18 |
+
st.session_state["file_uploader_key"] = 0
|
19 |
+
|
20 |
+
st.title("📝 ImportAllFile")
|
21 |
+
|
22 |
+
uploaded_file = st.file_uploader("Upload an article", type=("txt", "md", "pdf", "xlsx", "docx", "pptx"),key=st.session_state["file_uploader_key"])
|
23 |
+
if st.button("import",use_container_width=True):
|
24 |
+
filepath = os.path.join('documents', os.path.basename( uploaded_file.name))
|
25 |
+
try:
|
26 |
+
with open(filepath, 'wb') as f:
|
27 |
+
f.write(uploaded_file.getvalue())
|
28 |
+
f.close()
|
29 |
+
|
30 |
+
loader=None
|
31 |
+
noextpath,extension = os.path.splitext(filepath)
|
32 |
+
logger.info(filepath)
|
33 |
+
document = Document()
|
34 |
+
if extension == ".txt" or ".md":
|
35 |
+
document = SimpleDirectoryReader(input_files=[filepath], filename_as_id=True).load_data()[0]
|
36 |
+
else:
|
37 |
+
if extension == ".pdf":
|
38 |
+
loader = CJKPDFReader()
|
39 |
+
elif extension == ".pptx":
|
40 |
+
loader = PptxReader()
|
41 |
+
elif extension == ".xlsx":
|
42 |
+
loader = PandasExcelReader(pandas_config={"header": 0})
|
43 |
+
elif extension == ".docx":
|
44 |
+
loader = DocxReader()
|
45 |
+
else:
|
46 |
+
logger.error("Can`t read file:" + uploaded_file.name)
|
47 |
+
document = loader.load_data(file=Path(filepath))[0]
|
48 |
+
document.metadata={'filename': os.path.basename(uploaded_file.name)}
|
49 |
+
st.session_state.stored_docs.append(uploaded_file.name)
|
50 |
+
logger.info(st.session_state.stored_docs)
|
51 |
+
st.session_state.index.insert(document=document)
|
52 |
+
st.session_state.index.storage_context.persist(persist_dir=INDEX_NAME)
|
53 |
+
os.remove(filepath)
|
54 |
+
common.setChatEngine()
|
55 |
+
with open(PKL_NAME, "wb") as f:
|
56 |
+
print("pickle")
|
57 |
+
pickle.dump(st.session_state.stored_docs, f)
|
58 |
+
st.session_state["file_uploader_key"] += 1
|
59 |
+
st.experimental_rerun()
|
60 |
+
except Exception as e:
|
61 |
+
# cleanup temp file
|
62 |
+
logger.error(e)
|
63 |
+
if filepath is not None and os.path.exists(filepath):
|
64 |
+
os.remove(filepath)
|
65 |
+
|
66 |
+
st.subheader("Import File List")
|
67 |
+
if "stored_docs" in st.session_state:
|
68 |
+
logger.info(st.session_state.stored_docs)
|
69 |
+
for docname in st.session_state.stored_docs:
|
70 |
+
st.write(docname)
|
requirements.txt
CHANGED
@@ -1,23 +1,23 @@
|
|
1 |
-
streamlit
|
2 |
-
langchain
|
3 |
-
openai
|
4 |
-
duckduckgo-search
|
5 |
-
anthropic
|
6 |
-
nltk
|
7 |
llama-index==0.8.4
|
8 |
pypdf==3.9.0
|
9 |
faiss-cpu==1.7.4
|
10 |
-
html2text
|
11 |
-
streamlit-authenticator
|
12 |
-
extra_streamlit_components
|
13 |
-
requests_oauthlib
|
14 |
-
python-dotenv
|
15 |
-
torch
|
16 |
-
transformers
|
17 |
-
python-pptx
|
18 |
-
Pillow
|
19 |
-
openpyxl
|
20 |
-
llama_hub
|
21 |
-
msal-streamlit-authentication
|
22 |
-
pdfminer.six
|
23 |
-
docx2txt
|
|
|
1 |
+
streamlit==1.25.0
|
2 |
+
langchain==0.0.266
|
3 |
+
openai==0.27.9
|
4 |
+
duckduckgo-search==3.8.5
|
5 |
+
anthropic==0.3.10
|
6 |
+
nltk==3.8.1
|
7 |
llama-index==0.8.4
|
8 |
pypdf==3.9.0
|
9 |
faiss-cpu==1.7.4
|
10 |
+
html2text==2020.1.16
|
11 |
+
streamlit-authenticator==0.2.2
|
12 |
+
extra_streamlit_components==0.1.56
|
13 |
+
requests_oauthlib==1.3.1
|
14 |
+
python-dotenv==1.0.0
|
15 |
+
torch==2.0.1
|
16 |
+
transformers==4.32.0
|
17 |
+
python-pptx==0.6.21
|
18 |
+
Pillow==9.5.0
|
19 |
+
openpyxl==3.1.2
|
20 |
+
llama_hub==0.0.25
|
21 |
+
msal-streamlit-authentication==1.0.9
|
22 |
+
pdfminer.six==20221105
|
23 |
+
docx2txt==0.8
|