Spaces:
Sleeping
Sleeping
Commit
·
8b16906
1
Parent(s):
1301e19
Upload 6 files
Browse files- app.py +156 -175
- common.py +161 -51
- log.py +5 -0
- pages/Chatbot.py +14 -26
- pages/ChatbotWebRead.py +20 -21
- pages/ImportAllFile.py +76 -0
app.py
CHANGED
@@ -1,175 +1,156 @@
|
|
1 |
-
import streamlit as st
|
2 |
-
import os
|
3 |
-
import pickle
|
4 |
-
import faiss
|
5 |
-
import
|
6 |
-
|
7 |
-
from multiprocessing import Lock
|
8 |
-
from multiprocessing.managers import BaseManager
|
9 |
-
from
|
10 |
-
from llama_index import
|
11 |
-
from llama_index
|
12 |
-
from llama_index.node_parser import SimpleNodeParser
|
13 |
-
from llama_index.langchain_helpers.text_splitter import TokenTextSplitter
|
14 |
-
from llama_index.constants import DEFAULT_CHUNK_OVERLAP
|
15 |
-
from llama_index.
|
16 |
-
from llama_index.
|
17 |
-
from llama_index.
|
18 |
-
from llama_index.storage.
|
19 |
-
from
|
20 |
-
import
|
21 |
-
from
|
22 |
-
from
|
23 |
-
import
|
24 |
-
|
25 |
-
import
|
26 |
-
from
|
27 |
-
from
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
""
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
)
|
80 |
-
|
81 |
-
|
82 |
-
st.session_state.
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
st.session_state.stored_docs
|
106 |
-
|
107 |
-
st.session_state.
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
#
|
132 |
-
|
133 |
-
|
134 |
-
#
|
135 |
-
|
136 |
-
#
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
config['preauthorized'],
|
158 |
-
)
|
159 |
-
|
160 |
-
name, authentication_status, username = authenticator.login('Login', 'main')
|
161 |
-
|
162 |
-
|
163 |
-
if 'authentication_status' not in st.session_state:
|
164 |
-
st.session_state['authentication_status'] = None
|
165 |
-
|
166 |
-
if st.session_state["authentication_status"]:
|
167 |
-
authenticator.logout('Logout', 'main')
|
168 |
-
st.write(f'ログインに成功しました')
|
169 |
-
initialize_index()
|
170 |
-
# ここにログイン後の処理を書く。
|
171 |
-
elif st.session_state["authentication_status"] is False:
|
172 |
-
st.error('ユーザ名またはパスワードが間違っています')
|
173 |
-
elif st.session_state["authentication_status"] is None:
|
174 |
-
st.warning('ユーザ名やパスワードを入力してください')
|
175 |
-
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import os
|
3 |
+
import pickle
|
4 |
+
import faiss
|
5 |
+
import common
|
6 |
+
import glob
|
7 |
+
from multiprocessing import Lock
|
8 |
+
from multiprocessing.managers import BaseManager
|
9 |
+
from pathlib import Path
|
10 |
+
from llama_index.callbacks import CallbackManager, LlamaDebugHandler
|
11 |
+
from llama_index import Document,VectorStoreIndex, SimpleDirectoryReader, ServiceContext, StorageContext, load_index_from_storage
|
12 |
+
from llama_index.node_parser import SimpleNodeParser
|
13 |
+
from llama_index.langchain_helpers.text_splitter import TokenTextSplitter
|
14 |
+
from llama_index.constants import DEFAULT_CHUNK_OVERLAP
|
15 |
+
from llama_index.vector_stores.faiss import FaissVectorStore
|
16 |
+
from llama_index.graph_stores import SimpleGraphStore
|
17 |
+
from llama_index.storage.docstore import SimpleDocumentStore
|
18 |
+
from llama_index.storage.index_store import SimpleIndexStore
|
19 |
+
from msal_streamlit_authentication import msal_authentication
|
20 |
+
from llama_hub.file.cjk_pdf.base import CJKPDFReader
|
21 |
+
from llama_hub.file.pptx.base import PptxReader
|
22 |
+
from llama_hub.file.pandas_excel.base import PandasExcelReader
|
23 |
+
from llama_hub.file.docx.base import DocxReader
|
24 |
+
from llama_index.llms import OpenAI
|
25 |
+
import tiktoken
|
26 |
+
from llama_index.callbacks import CallbackManager, LlamaDebugHandler
|
27 |
+
from dotenv import load_dotenv
|
28 |
+
|
29 |
+
load_dotenv()
|
30 |
+
|
31 |
+
# 接続元制御
|
32 |
+
ALLOW_IP_ADDRESS = os.environ["ALLOW_IP_ADDRESS"]
|
33 |
+
|
34 |
+
# Azure AD app registration details
|
35 |
+
CLIENT_ID = os.environ["CLIENT_ID"]
|
36 |
+
CLIENT_SECRET = os.environ["CLIENT_SECRET"]
|
37 |
+
TENANT_ID = os.environ["TENANT_ID"]
|
38 |
+
|
39 |
+
# Azure API
|
40 |
+
AUTHORITY = f"https://login.microsoftonline.com/{TENANT_ID}"
|
41 |
+
REDIRECT_URI = os.environ["REDIRECT_URI"]
|
42 |
+
SCOPES = ["openid", "profile", "User.Read"]
|
43 |
+
|
44 |
+
INDEX_NAME = os.environ["INDEX_NAME"]
|
45 |
+
PKL_NAME = os.environ["PKL_NAME"]
|
46 |
+
st.session_state.llama_debug_handler = LlamaDebugHandler()
|
47 |
+
from log import logger
|
48 |
+
|
49 |
+
def initialize_index():
|
50 |
+
logger.info("initialize_index start")
|
51 |
+
llm = OpenAI(model='gpt-3.5-turbo', temperature=0.8, max_tokens=256)
|
52 |
+
text_splitter = TokenTextSplitter(separator="。",chunk_size=1500
|
53 |
+
, chunk_overlap=DEFAULT_CHUNK_OVERLAP
|
54 |
+
, tokenizer=tiktoken.encoding_for_model("gpt-3.5-turbo").encode)
|
55 |
+
node_parser = SimpleNodeParser(text_splitter=text_splitter)
|
56 |
+
d = 1536
|
57 |
+
k=2
|
58 |
+
faiss_index = faiss.IndexFlatL2(d)
|
59 |
+
# デバッグ用
|
60 |
+
callback_manager = CallbackManager([st.session_state.llama_debug_handler])
|
61 |
+
service_context = ServiceContext.from_defaults(llm=llm,node_parser=node_parser,callback_manager=callback_manager)
|
62 |
+
lock = Lock()
|
63 |
+
with lock:
|
64 |
+
if os.path.exists(INDEX_NAME):
|
65 |
+
logger.info("start import index")
|
66 |
+
storage_context = StorageContext.from_defaults(
|
67 |
+
docstore=SimpleDocumentStore.from_persist_dir(persist_dir=INDEX_NAME),
|
68 |
+
graph_store=SimpleGraphStore.from_persist_dir(persist_dir=INDEX_NAME),
|
69 |
+
vector_store=FaissVectorStore.from_persist_dir(persist_dir=INDEX_NAME),
|
70 |
+
index_store=SimpleIndexStore.from_persist_dir(persist_dir=INDEX_NAME),
|
71 |
+
)
|
72 |
+
st.session_state.index = load_index_from_storage(storage_context=storage_context,service_context=service_context)
|
73 |
+
with open(PKL_NAME, "rb") as f:
|
74 |
+
st.session_state.stored_docs = pickle.load(f)
|
75 |
+
common.setChatEngine()
|
76 |
+
else:
|
77 |
+
logger.info("start create index")
|
78 |
+
documents = list()
|
79 |
+
files = glob.glob("./documents/*")
|
80 |
+
vector_store = FaissVectorStore(faiss_index=faiss_index)
|
81 |
+
storage_context = StorageContext.from_defaults(vector_store=vector_store)
|
82 |
+
st.session_state.stored_docs=list()
|
83 |
+
for file in files:
|
84 |
+
loader=None
|
85 |
+
noextpath,extension = os.path.splitext(file)
|
86 |
+
logger.info(file)
|
87 |
+
document = Document()
|
88 |
+
if extension == ".txt" or extension ==".md":
|
89 |
+
document = SimpleDirectoryReader(input_files=[file], filename_as_id=True).load_data()[0]
|
90 |
+
else:
|
91 |
+
if extension == ".pdf":
|
92 |
+
loader = CJKPDFReader()
|
93 |
+
elif extension == ".pptx":
|
94 |
+
loader = PptxReader()
|
95 |
+
elif extension == ".xlsx":
|
96 |
+
loader = PandasExcelReader(pandas_config={"header": 0})
|
97 |
+
elif extension == ".docx":
|
98 |
+
loader = DocxReader()
|
99 |
+
else:
|
100 |
+
logger.error("Can`t read file:" + file)
|
101 |
+
continue
|
102 |
+
document = loader.load_data(file=Path(file))[0]
|
103 |
+
document.metadata={'filename': os.path.basename(file)}
|
104 |
+
documents.append(document)
|
105 |
+
st.session_state.stored_docs.append(os.path.basename(file))
|
106 |
+
st.session_state.index = VectorStoreIndex.from_documents( documents=documents,storage_context=storage_context,service_context=service_context)
|
107 |
+
st.session_state.index.storage_context.persist(persist_dir=INDEX_NAME)
|
108 |
+
with open(PKL_NAME, "wb") as f:
|
109 |
+
print("pickle")
|
110 |
+
pickle.dump(st.session_state.stored_docs, f)
|
111 |
+
common.setChatEngine()
|
112 |
+
|
113 |
+
def logout():
|
114 |
+
st.session_state["login_token"] = None
|
115 |
+
|
116 |
+
# メイン
|
117 |
+
st.session_state["login_token"] = msal_authentication(
|
118 |
+
auth={
|
119 |
+
"clientId": CLIENT_ID,
|
120 |
+
"authority": AUTHORITY,
|
121 |
+
"redirectUri": REDIRECT_URI,
|
122 |
+
"postLogoutRedirectUri": ""
|
123 |
+
}, # Corresponds to the 'auth' configuration for an MSAL Instance
|
124 |
+
cache={
|
125 |
+
"cacheLocation": "sessionStorage",
|
126 |
+
"storeAuthStateInCookie": False
|
127 |
+
}, # Corresponds to the 'cache' configuration for an MSAL Instance
|
128 |
+
login_request={
|
129 |
+
"scopes": SCOPES
|
130 |
+
}, # Optional
|
131 |
+
logout_request={}, # Optional
|
132 |
+
login_button_text="Login", # Optional, defaults to "Login"
|
133 |
+
logout_button_text="Logout", # Optional, defaults to "Logout"
|
134 |
+
class_name="css_button_class_selector", # Optional, defaults to None. Corresponds to HTML class.
|
135 |
+
html_id="html_id_for_button", # Optional, defaults to None. Corresponds to HTML id.
|
136 |
+
#key=1 # Optional if only a single instance is needed
|
137 |
+
)
|
138 |
+
# st.write("Recevied login token:", st.session_state.login_token)
|
139 |
+
|
140 |
+
if st.session_state.login_token:
|
141 |
+
initialize_index()
|
142 |
+
st.write("ようこそ", st.session_state.login_token["account"]["name"])
|
143 |
+
st.write("サイドメニューからファイルインポート又はChatbotへの質問を開始してください。")
|
144 |
+
st.markdown("""
|
145 |
+
## 使い方
|
146 |
+
- **Chatbot**
|
147 |
+
初期からインポートされているファイルとImportXXFileでインポートしたファイルの内容に関する質問に対して、GenerativeAIが回答します。
|
148 |
+
※返答が正常に帰ってこない場合があります。参照ファイルを記載しているので、判断の目安にしてください。
|
149 |
+
|
150 |
+
- **ChatbotWebRead**
|
151 |
+
入力したURLのサイトの情報に関して、GenerativeAIが回答します。
|
152 |
+
ImportAllFileの内容は登録されていません。
|
153 |
+
|
154 |
+
- **ImportAllFile**
|
155 |
+
テキストファイル,mdファイル,Excel,PDF,PowerPoint,Wordをインポートできます。
|
156 |
+
""")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
common.py
CHANGED
@@ -1,51 +1,161 @@
|
|
1 |
-
|
2 |
-
import
|
3 |
-
import
|
4 |
-
import
|
5 |
-
import
|
6 |
-
|
7 |
-
from
|
8 |
-
from
|
9 |
-
from streamlit import
|
10 |
-
from streamlit.
|
11 |
-
import
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import os
|
3 |
+
import pickle
|
4 |
+
import ipaddress
|
5 |
+
import tiktoken
|
6 |
+
|
7 |
+
from pathlib import Path
|
8 |
+
from streamlit import runtime
|
9 |
+
from streamlit.runtime.scriptrunner import get_script_run_ctx
|
10 |
+
from streamlit.web.server.websocket_headers import _get_websocket_headers
|
11 |
+
from llama_index import SimpleDirectoryReader
|
12 |
+
from llama_index import Prompt
|
13 |
+
from llama_index.chat_engine import CondenseQuestionChatEngine;
|
14 |
+
from llama_index.response_synthesizers import get_response_synthesizer
|
15 |
+
from llama_index import ServiceContext, SimpleDirectoryReader
|
16 |
+
from llama_index.node_parser import SimpleNodeParser
|
17 |
+
from llama_index.langchain_helpers.text_splitter import TokenTextSplitter
|
18 |
+
from llama_index.constants import DEFAULT_CHUNK_OVERLAP
|
19 |
+
from llama_index.response_synthesizers import get_response_synthesizer
|
20 |
+
from llama_index.callbacks import CallbackManager
|
21 |
+
from llama_index.llms import OpenAI
|
22 |
+
from log import logger
|
23 |
+
|
24 |
+
# 接続元制御
|
25 |
+
ALLOW_IP_ADDRESS = os.environ["ALLOW_IP_ADDRESS"]
|
26 |
+
|
27 |
+
# Azure AD app registration details
|
28 |
+
CLIENT_ID = os.environ["CLIENT_ID"]
|
29 |
+
CLIENT_SECRET = os.environ["CLIENT_SECRET"]
|
30 |
+
TENANT_ID = os.environ["TENANT_ID"]
|
31 |
+
|
32 |
+
# Azure API
|
33 |
+
REDIRECT_URI = os.environ["REDIRECT_URI"]
|
34 |
+
AUTHORITY = f"https://login.microsoftonline.com/{TENANT_ID}"
|
35 |
+
SCOPES = ["openid", "profile", "User.Read"]
|
36 |
+
|
37 |
+
# 接続元IP取得
|
38 |
+
def get_remote_ip():
|
39 |
+
ctx = get_script_run_ctx()
|
40 |
+
session_info = runtime.get_instance().get_client(ctx.session_id)
|
41 |
+
headers = _get_websocket_headers()
|
42 |
+
return session_info.request.remote_ip, headers.get("X-Forwarded-For")
|
43 |
+
|
44 |
+
# 接続元IP許可判定
|
45 |
+
def is_allow_ip_address():
|
46 |
+
remote_ip, x_forwarded_for = get_remote_ip()
|
47 |
+
logger.info("remote_ip:"+remote_ip)
|
48 |
+
if x_forwarded_for is not None:
|
49 |
+
remote_ip = x_forwarded_for
|
50 |
+
# localhost
|
51 |
+
if remote_ip == "::1":
|
52 |
+
return True
|
53 |
+
|
54 |
+
# プライベートIP
|
55 |
+
ipaddr = ipaddress.IPv4Address(remote_ip)
|
56 |
+
logger.info("ipaddr:"+str(ipaddr))
|
57 |
+
if ipaddr.is_private:
|
58 |
+
return True
|
59 |
+
|
60 |
+
# その他(許可リスト判定)
|
61 |
+
return remote_ip in ALLOW_IP_ADDRESS
|
62 |
+
|
63 |
+
#ログインの確認
|
64 |
+
def check_login():
|
65 |
+
if not is_allow_ip_address():
|
66 |
+
st.title("HTTP 403 Forbidden")
|
67 |
+
st.stop()
|
68 |
+
if "login_token" not in st.session_state or not st.session_state.login_token:
|
69 |
+
st.warning("**ログインしてください**")
|
70 |
+
st.stop()
|
71 |
+
|
72 |
+
|
73 |
+
INDEX_NAME = os.environ["INDEX_NAME"]
|
74 |
+
PKL_NAME = os.environ["PKL_NAME"]
|
75 |
+
# デバッグ用
|
76 |
+
llm = OpenAI(model='gpt-3.5-turbo', temperature=0.8, max_tokens=256)
|
77 |
+
text_splitter = TokenTextSplitter(separator="。", chunk_size=1500
|
78 |
+
, chunk_overlap=DEFAULT_CHUNK_OVERLAP
|
79 |
+
, tokenizer=tiktoken.encoding_for_model("gpt-3.5-turbo").encode)
|
80 |
+
node_parser = SimpleNodeParser(text_splitter=text_splitter)
|
81 |
+
custom_prompt = Prompt("""\
|
82 |
+
以下はこれまでの会話履歴と、ドキュメントを検索して回答する必要がある、ユーザーからの会話文です。
|
83 |
+
会話と新しい会話文に基づいて、検索クエリを作成します。
|
84 |
+
挨拶された場合、挨拶を返してください。
|
85 |
+
答えを知らない場合は、「わかりません」と回答してください。
|
86 |
+
全ての回答は日本語で行ってください。
|
87 |
+
会話履歴:
|
88 |
+
{chat_history}
|
89 |
+
新しい会話文:
|
90 |
+
{question}
|
91 |
+
Search query:
|
92 |
+
""")
|
93 |
+
|
94 |
+
chat_history = []
|
95 |
+
|
96 |
+
def setChatEngine():
|
97 |
+
callback_manager = CallbackManager([st.session_state.llama_debug_handler])
|
98 |
+
service_context = ServiceContext.from_defaults(llm=llm,node_parser=node_parser,callback_manager=callback_manager)
|
99 |
+
response_synthesizer = get_response_synthesizer(response_mode='refine')
|
100 |
+
st.session_state.query_engine = st.session_state.index.as_query_engine(
|
101 |
+
response_synthesizer=response_synthesizer,
|
102 |
+
service_context=service_context,
|
103 |
+
)
|
104 |
+
st.session_state.chat_engine = CondenseQuestionChatEngine.from_defaults(
|
105 |
+
query_engine=st.session_state.query_engine,
|
106 |
+
condense_question_prompt=custom_prompt,
|
107 |
+
chat_history=chat_history,
|
108 |
+
verbose=True
|
109 |
+
)
|
110 |
+
|
111 |
+
# chat mode reacの記述
|
112 |
+
# from langchain.prompts.chat import (
|
113 |
+
# ChatPromptTemplate,
|
114 |
+
# HumanMessagePromptTemplate,
|
115 |
+
# SystemMessagePromptTemplate,
|
116 |
+
# )
|
117 |
+
# from llama_index.prompts.prompts import RefinePrompt, QuestionAnswerPrompt
|
118 |
+
# from llama_index.prompts import Prompt
|
119 |
+
# chat_text_qa_msgs = [
|
120 |
+
# SystemMessagePromptTemplate.from_template(
|
121 |
+
# "文脈が役に立たない場合でも、必ず日本語で質問に答えてください。"
|
122 |
+
# ),
|
123 |
+
# HumanMessagePromptTemplate.from_template(
|
124 |
+
# "以下に、コンテキスト情報を提供します。 \n"
|
125 |
+
# "---------------------\n"
|
126 |
+
# "{context_str}"
|
127 |
+
# "\n---------------------\n"
|
128 |
+
# "回答には以下を含めてください。\n"
|
129 |
+
# "・最初に問い合わせへのお礼してください\n"
|
130 |
+
# "・回答には出典のドキュメント名を含めるようにしてください。\n"
|
131 |
+
# "・質問内容を要約してください\n"
|
132 |
+
# "・最後に不明な点がないか確認してください \n"
|
133 |
+
# "この情報を踏まえて、次の質問に回答してください: {query_str}\n"
|
134 |
+
# "答えを知らない場合は、「わからない」と回答してください。また、必ず日本語で回答してください。"
|
135 |
+
# ),
|
136 |
+
# ]
|
137 |
+
# REFINE_PROMPT = ("元の質問は次のとおりです: {query_str} \n"
|
138 |
+
# "既存の回答を提供しました: {existing_answer} \n"
|
139 |
+
# "既存の答えを洗練する機会があります \n"
|
140 |
+
# "(必要な場合のみ)以下にコンテキストを追加します。 \n"
|
141 |
+
# "------------\n"
|
142 |
+
# "{context_msg}\n"
|
143 |
+
# "------------\n"
|
144 |
+
# "新しいコンテキストを考慮して、元の答えをより良く洗練して質問に答えてください。\n"
|
145 |
+
# "回答には出典のドキュメント名を含めるようにしてください。\n"
|
146 |
+
# "コンテキストが役に立たない場合は、元の回答と同じものを返します。"
|
147 |
+
# "どのような場合でも、返答は日本語で行います。")
|
148 |
+
# refine_prompt = RefinePrompt(REFINE_PROMPT)
|
149 |
+
|
150 |
+
# def setChatEngine():
|
151 |
+
# callback_manager = CallbackManager([st.session_state.llama_debug_handler])
|
152 |
+
# service_context = ServiceContext.from_defaults(node_parser=node_parser,callback_manager=callback_manager)
|
153 |
+
# response_synthesizer = get_response_synthesizer(response_mode='refine')
|
154 |
+
# st.session_state.chat_engine = st.session_state.index.as_chat_engine(
|
155 |
+
# response_synthesizer=response_synthesizer,
|
156 |
+
# service_context=service_context,
|
157 |
+
# chat_mode="condense_question",
|
158 |
+
# text_qa_template= Prompt.from_langchain_prompt(ChatPromptTemplate.from_messages(chat_text_qa_msgs)),
|
159 |
+
# refine_template=refine_prompt,
|
160 |
+
# verbose=True
|
161 |
+
# )
|
log.py
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
|
3 |
+
logging.basicConfig(level=logging.DEBUG)
|
4 |
+
logger = logging.getLogger("__name__")
|
5 |
+
|
pages/Chatbot.py
CHANGED
@@ -1,39 +1,20 @@
|
|
1 |
|
2 |
import streamlit as st
|
3 |
-
import logging
|
4 |
import common
|
5 |
-
|
6 |
-
|
7 |
-
index_name = "./data/storage"
|
8 |
-
pkl_name = "./data/stored_documents.pkl"
|
9 |
-
|
10 |
-
custom_prompt = Prompt("""\
|
11 |
-
以下はこれまでの会話履歴と、ドキュメントを検索して回答する必要がある、ユーザーからの会話文です。
|
12 |
-
会話と新しい会話文に基づいて、検索クエリを作成します。回答は日本語で行います。
|
13 |
-
新しい会話文が挨拶の場合、挨拶を返してください。
|
14 |
-
新しい会話文が質問の場合、検索した結果の回答を返してください。
|
15 |
-
答えがわからない場合は正直にわからないと回答してください。
|
16 |
-
会話履歴:
|
17 |
-
{chat_history}
|
18 |
-
新しい会話文:
|
19 |
-
{question}
|
20 |
-
Search query:
|
21 |
-
""")
|
22 |
-
|
23 |
-
chat_history = []
|
24 |
-
|
25 |
-
logging.basicConfig(level=logging.INFO)
|
26 |
-
logger = logging.getLogger("__name__")
|
27 |
-
logger.debug("調査用ログ")
|
28 |
|
|
|
|
|
|
|
29 |
common.check_login()
|
30 |
|
31 |
st.title("💬 Chatbot")
|
32 |
if st.button("リセット",use_container_width=True):
|
33 |
st.session_state.chat_engine.reset()
|
34 |
st.session_state.messages = [{"role": "assistant", "content": "お困りごとはございますか?"}]
|
35 |
-
st.experimental_rerun()
|
36 |
logger.info("reset")
|
|
|
|
|
37 |
|
38 |
if "messages" not in st.session_state:
|
39 |
st.session_state["messages"] = [{"role": "assistant", "content": "お困りごとはございますか?"}]
|
@@ -45,6 +26,13 @@ if prompt := st.chat_input():
|
|
45 |
st.session_state.messages.append({"role": "user", "content": prompt})
|
46 |
st.chat_message("user").write(prompt)
|
47 |
response = st.session_state.chat_engine.chat(prompt)
|
48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
st.session_state.messages.append({"role": "assistant", "content": msg})
|
50 |
st.chat_message("assistant").write(msg)
|
|
|
1 |
|
2 |
import streamlit as st
|
|
|
3 |
import common
|
4 |
+
import os
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
+
INDEX_NAME = os.environ["INDEX_NAME"]
|
7 |
+
PKL_NAME = os.environ["PKL_NAME"]
|
8 |
+
from log import logger
|
9 |
common.check_login()
|
10 |
|
11 |
st.title("💬 Chatbot")
|
12 |
if st.button("リセット",use_container_width=True):
|
13 |
st.session_state.chat_engine.reset()
|
14 |
st.session_state.messages = [{"role": "assistant", "content": "お困りごとはございますか?"}]
|
|
|
15 |
logger.info("reset")
|
16 |
+
st.experimental_rerun()
|
17 |
+
|
18 |
|
19 |
if "messages" not in st.session_state:
|
20 |
st.session_state["messages"] = [{"role": "assistant", "content": "お困りごとはございますか?"}]
|
|
|
26 |
st.session_state.messages.append({"role": "user", "content": prompt})
|
27 |
st.chat_message("user").write(prompt)
|
28 |
response = st.session_state.chat_engine.chat(prompt)
|
29 |
+
fname = " ※参照:"
|
30 |
+
for node in response.source_nodes:
|
31 |
+
logger.info(node)
|
32 |
+
if node.node.metadata is not None:
|
33 |
+
if "filename" in node.node.metadata:
|
34 |
+
fname = fname + " "+str(node.node.metadata["filename"])
|
35 |
+
msg = str(response) + str(fname)
|
36 |
+
logger.info(msg)
|
37 |
st.session_state.messages.append({"role": "assistant", "content": msg})
|
38 |
st.chat_message("assistant").write(msg)
|
pages/ChatbotWebRead.py
CHANGED
@@ -1,19 +1,21 @@
|
|
1 |
|
2 |
import streamlit as st
|
3 |
import faiss
|
4 |
-
import
|
5 |
-
|
6 |
-
from llama_index
|
7 |
-
from llama_index import
|
8 |
-
from llama_index.chat_engine import CondenseQuestionChatEngine;
|
9 |
from llama_index.node_parser import SimpleNodeParser
|
10 |
from llama_index.langchain_helpers.text_splitter import TokenTextSplitter
|
11 |
from llama_index.constants import DEFAULT_CHUNK_OVERLAP
|
12 |
from llama_index.response_synthesizers import get_response_synthesizer
|
13 |
-
from llama_index import
|
14 |
|
|
|
|
|
15 |
import tiktoken
|
16 |
import common
|
|
|
17 |
|
18 |
custom_prompt = Prompt("""\
|
19 |
以下はこれまでの会話履歴と、ドキュメントを検索して回答する必要がある、ユーザーからの会話文です。
|
@@ -27,13 +29,9 @@ custom_prompt = Prompt("""\
|
|
27 |
{question}
|
28 |
Search query:
|
29 |
""")
|
30 |
-
|
31 |
chat_history = []
|
32 |
|
33 |
-
|
34 |
-
logger = logging.getLogger("__name__")
|
35 |
-
logger.debug("調査用ログ")
|
36 |
-
|
37 |
|
38 |
common.check_login()
|
39 |
|
@@ -45,27 +43,29 @@ URLtext = st.text_input(
|
|
45 |
)
|
46 |
|
47 |
if st.button("URL reading",use_container_width=True):
|
48 |
-
text_splitter = TokenTextSplitter(
|
49 |
, chunk_overlap=DEFAULT_CHUNK_OVERLAP
|
50 |
, tokenizer=tiktoken.encoding_for_model("gpt-3.5-turbo").encode)
|
51 |
node_parser = SimpleNodeParser(text_splitter=text_splitter)
|
52 |
d = 1536
|
53 |
k=2
|
54 |
faiss_index = faiss.IndexFlatL2(d)
|
55 |
-
|
56 |
-
|
57 |
-
callback_manager = CallbackManager([llama_debug_handler])
|
58 |
service_context = ServiceContext.from_defaults(node_parser=node_parser,callback_manager=callback_manager)
|
59 |
|
60 |
webDocuments = SimpleWebPageReader(html_to_text=True).load_data(
|
61 |
[URLtext]
|
62 |
)
|
63 |
logger.info(webDocuments)
|
64 |
-
webIndex =
|
65 |
-
response_synthesizer = get_response_synthesizer(response_mode='
|
66 |
-
webQuery_engine = webIndex.as_query_engine(
|
|
|
|
|
|
|
67 |
st.session_state.web_chat_engine = CondenseQuestionChatEngine.from_defaults(
|
68 |
-
query_engine=webQuery_engine,
|
69 |
condense_question_prompt=custom_prompt,
|
70 |
chat_history=chat_history,
|
71 |
verbose=True
|
@@ -87,8 +87,7 @@ if prompt := st.chat_input(disabled = not URLtext):
|
|
87 |
st.session_state.webmessages.append({"role": "user", "content": prompt})
|
88 |
st.chat_message("user").write(prompt)
|
89 |
response = st.session_state.web_chat_engine.chat(prompt)
|
|
|
90 |
msg = str(response)
|
91 |
st.session_state.webmessages.append({"role": "assistant", "content": msg})
|
92 |
st.chat_message("assistant").write(msg)
|
93 |
-
|
94 |
-
|
|
|
1 |
|
2 |
import streamlit as st
|
3 |
import faiss
|
4 |
+
import langchain
|
5 |
+
from llama_index.callbacks import CallbackManager
|
6 |
+
from llama_index import ServiceContext,VectorStoreIndex
|
7 |
+
from llama_index.chat_engine import CondenseQuestionChatEngine
|
|
|
8 |
from llama_index.node_parser import SimpleNodeParser
|
9 |
from llama_index.langchain_helpers.text_splitter import TokenTextSplitter
|
10 |
from llama_index.constants import DEFAULT_CHUNK_OVERLAP
|
11 |
from llama_index.response_synthesizers import get_response_synthesizer
|
12 |
+
from llama_index import SimpleWebPageReader
|
13 |
|
14 |
+
# from llama_index.prompts import Prompt
|
15 |
+
from llama_index import Prompt
|
16 |
import tiktoken
|
17 |
import common
|
18 |
+
langchain.verbose = True
|
19 |
|
20 |
custom_prompt = Prompt("""\
|
21 |
以下はこれまでの会話履歴と、ドキュメントを検索して回答する必要がある、ユーザーからの会話文です。
|
|
|
29 |
{question}
|
30 |
Search query:
|
31 |
""")
|
|
|
32 |
chat_history = []
|
33 |
|
34 |
+
from log import logger
|
|
|
|
|
|
|
35 |
|
36 |
common.check_login()
|
37 |
|
|
|
43 |
)
|
44 |
|
45 |
if st.button("URL reading",use_container_width=True):
|
46 |
+
text_splitter = TokenTextSplitter( chunk_size=1500
|
47 |
, chunk_overlap=DEFAULT_CHUNK_OVERLAP
|
48 |
, tokenizer=tiktoken.encoding_for_model("gpt-3.5-turbo").encode)
|
49 |
node_parser = SimpleNodeParser(text_splitter=text_splitter)
|
50 |
d = 1536
|
51 |
k=2
|
52 |
faiss_index = faiss.IndexFlatL2(d)
|
53 |
+
|
54 |
+
callback_manager = CallbackManager([st.session_state.llama_debug_handler])
|
|
|
55 |
service_context = ServiceContext.from_defaults(node_parser=node_parser,callback_manager=callback_manager)
|
56 |
|
57 |
webDocuments = SimpleWebPageReader(html_to_text=True).load_data(
|
58 |
[URLtext]
|
59 |
)
|
60 |
logger.info(webDocuments)
|
61 |
+
webIndex = VectorStoreIndex.from_documents(webDocuments,service_context=service_context)
|
62 |
+
response_synthesizer = get_response_synthesizer(response_mode='refine')
|
63 |
+
st.session_state.webQuery_engine = webIndex.as_query_engine(
|
64 |
+
response_synthesizer=response_synthesizer,
|
65 |
+
service_context=service_context,
|
66 |
+
)
|
67 |
st.session_state.web_chat_engine = CondenseQuestionChatEngine.from_defaults(
|
68 |
+
query_engine=st.session_state.webQuery_engine,
|
69 |
condense_question_prompt=custom_prompt,
|
70 |
chat_history=chat_history,
|
71 |
verbose=True
|
|
|
87 |
st.session_state.webmessages.append({"role": "user", "content": prompt})
|
88 |
st.chat_message("user").write(prompt)
|
89 |
response = st.session_state.web_chat_engine.chat(prompt)
|
90 |
+
logger.debug(st.session_state.llama_debug_handler.get_llm_inputs_outputs())
|
91 |
msg = str(response)
|
92 |
st.session_state.webmessages.append({"role": "assistant", "content": msg})
|
93 |
st.chat_message("assistant").write(msg)
|
|
|
|
pages/ImportAllFile.py
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import common
|
3 |
+
import os
|
4 |
+
import pickle
|
5 |
+
from llama_hub.file.cjk_pdf.base import CJKPDFReader
|
6 |
+
from llama_hub.file.pptx.base import PptxReader
|
7 |
+
from llama_hub.file.pandas_excel.base import PandasExcelReader
|
8 |
+
from llama_hub.file.docx.base import DocxReader
|
9 |
+
from llama_index import Document, SimpleDirectoryReader
|
10 |
+
from pathlib import Path
|
11 |
+
from log import logger
|
12 |
+
INDEX_NAME = os.environ["INDEX_NAME"]
|
13 |
+
PKL_NAME = os.environ["PKL_NAME"]
|
14 |
+
|
15 |
+
common.check_login()
|
16 |
+
|
17 |
+
if "file_uploader_key" not in st.session_state:
|
18 |
+
st.session_state["file_uploader_key"] = 0
|
19 |
+
|
20 |
+
st.title("📝 ImportAllFile")
|
21 |
+
|
22 |
+
uploaded_file = st.file_uploader("Upload an article", type=("txt", "md", "pdf", "xlsx", "docx", "pptx"),key=st.session_state["file_uploader_key"])
|
23 |
+
if st.button("import",use_container_width=True):
|
24 |
+
filepath = os.path.join('documents', os.path.basename( uploaded_file.name))
|
25 |
+
try:
|
26 |
+
with open(filepath, 'wb') as f:
|
27 |
+
f.write(uploaded_file.getvalue())
|
28 |
+
f.close()
|
29 |
+
|
30 |
+
loader=None
|
31 |
+
noextpath,extension = os.path.splitext(filepath)
|
32 |
+
logger.info(filepath)
|
33 |
+
document = Document()
|
34 |
+
if extension == ".txt" or extension ==".md":
|
35 |
+
logger.info("extension")
|
36 |
+
document = SimpleDirectoryReader(input_files=[filepath], filename_as_id=True).load_data()[0]
|
37 |
+
else:
|
38 |
+
logger.info("else")
|
39 |
+
if extension == ".pdf":
|
40 |
+
logger.info("CJKPDFReader")
|
41 |
+
loader = CJKPDFReader()
|
42 |
+
elif extension == ".pptx":
|
43 |
+
logger.info("PptxReader")
|
44 |
+
loader = PptxReader()
|
45 |
+
elif extension == ".xlsx":
|
46 |
+
logger.info("PandasExcelReader")
|
47 |
+
loader = PandasExcelReader(pandas_config={"header": 0})
|
48 |
+
elif extension == ".docx":
|
49 |
+
logger.info("DocxReader")
|
50 |
+
loader = DocxReader()
|
51 |
+
else:
|
52 |
+
logger.error("Can`t read file:" + uploaded_file.name)
|
53 |
+
document = loader.load_data(file=Path(filepath))[0]
|
54 |
+
document.metadata={'filename': os.path.basename(uploaded_file.name)}
|
55 |
+
st.session_state.stored_docs.append(uploaded_file.name)
|
56 |
+
logger.info(st.session_state.stored_docs)
|
57 |
+
st.session_state.index.insert(document=document)
|
58 |
+
st.session_state.index.storage_context.persist(persist_dir=INDEX_NAME)
|
59 |
+
os.remove(filepath)
|
60 |
+
common.setChatEngine()
|
61 |
+
with open(PKL_NAME, "wb") as f:
|
62 |
+
print("pickle")
|
63 |
+
pickle.dump(st.session_state.stored_docs, f)
|
64 |
+
st.session_state["file_uploader_key"] += 1
|
65 |
+
st.experimental_rerun()
|
66 |
+
except Exception as e:
|
67 |
+
# cleanup temp file
|
68 |
+
logger.error(e)
|
69 |
+
if filepath is not None and os.path.exists(filepath):
|
70 |
+
os.remove(filepath)
|
71 |
+
|
72 |
+
st.subheader("Import File List")
|
73 |
+
if "stored_docs" in st.session_state:
|
74 |
+
logger.info(st.session_state.stored_docs)
|
75 |
+
for docname in st.session_state.stored_docs:
|
76 |
+
st.write(docname)
|