qatool / app.py
naotakigawa's picture
log
231ac24
raw
history blame
9.22 kB
import streamlit as st
import os
import pickle
import faiss
import logging
from multiprocessing import Lock
from multiprocessing.managers import BaseManager
from llama_index.callbacks import CallbackManager, LlamaDebugHandler
from llama_index import VectorStoreIndex, Document,Prompt, SimpleDirectoryReader, ServiceContext, StorageContext, load_index_from_storage
from llama_index.chat_engine import CondenseQuestionChatEngine;
from llama_index.node_parser import SimpleNodeParser
from llama_index.langchain_helpers.text_splitter import TokenTextSplitter
from llama_index.constants import DEFAULT_CHUNK_OVERLAP
from llama_index.response_synthesizers import get_response_synthesizer
from llama_index.vector_stores.faiss import FaissVectorStore
from llama_index.graph_stores import SimpleGraphStore
from llama_index.storage.docstore import SimpleDocumentStore
from llama_index.storage.index_store import SimpleIndexStore
import tiktoken
from streamlit import runtime
from streamlit.runtime.scriptrunner import get_script_run_ctx
import ipaddress
from requests_oauthlib import OAuth2Session
from time import time
from dotenv import load_dotenv
from streamlit import net_util
load_dotenv()
# 接続元制御
ALLOW_IP_ADDRESS = os.environ["ALLOW_IP_ADDRESS"]
# Azure AD app registration details
CLIENT_ID = os.environ["CLIENT_ID"]
CLIENT_SECRET = os.environ["CLIENT_SECRET"]
TENANT_ID = os.environ["TENANT_ID"]
# Azure API
AUTHORITY = f"https://login.microsoftonline.com/{TENANT_ID}"
REDIRECT_PATH = os.environ["REDIRECT_PATH"]
TOKEN_URL = f"{AUTHORITY}/oauth2/v2.0/token"
AUTHORIZATION_URL = f"{AUTHORITY}/oauth2/v2.0/authorize"
SCOPES = ["openid", "profile", "User.Read"]
# 認証用URL取得
def authorization_request():
oauth = OAuth2Session(CLIENT_ID, redirect_uri=REDIRECT_PATH, scope=SCOPES)
authorization_url, state = oauth.authorization_url(AUTHORIZATION_URL)
return authorization_url, state
# 認証トークン取得
def token_request(authorization_response, state):
oauth = OAuth2Session(CLIENT_ID, state=state)
token = oauth.fetch_token(
TOKEN_URL,
code=authorization_response[0],
authorization_response=authorization_response,
client_secret=CLIENT_SECRET,
)
return token
index_name = "./data/storage"
pkl_name = "./data/stored_documents.pkl"
custom_prompt = Prompt("""\
以下はこれまでの会話履歴と、ドキュメントを検索して回答する必要がある、ユーザーからの会話文です。
会話と新しい会話文に基づいて、検索クエリを作成します。回答は日本語で行います。
新しい会話文が挨拶の場合、挨拶を返してください。
新しい会話文が質問の場合、検索した結果の回答を返してください。
答えがわからない場合は正直にわからないと回答してください。
会話履歴:
{chat_history}
新しい会話文:
{question}
Search query:
""")
chat_history = []
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("__name__")
logger.debug("調査用ログ")
def initialize_index():
logger.info("initialize_index start")
text_splitter = TokenTextSplitter(separator="。", chunk_size=1500
, chunk_overlap=DEFAULT_CHUNK_OVERLAP
, tokenizer=tiktoken.encoding_for_model("gpt-3.5-turbo").encode)
node_parser = SimpleNodeParser(text_splitter=text_splitter)
d = 1536
k=2
faiss_index = faiss.IndexFlatL2(d)
# デバッグ用
llama_debug_handler = LlamaDebugHandler()
callback_manager = CallbackManager([llama_debug_handler])
service_context = ServiceContext.from_defaults(node_parser=node_parser,callback_manager=callback_manager)
lock = Lock()
with lock:
if os.path.exists(index_name):
storage_context = StorageContext.from_defaults(
docstore=SimpleDocumentStore.from_persist_dir(persist_dir=index_name),
graph_store=SimpleGraphStore.from_persist_dir(persist_dir=index_name),
vector_store=FaissVectorStore.from_persist_dir(persist_dir=index_name),
index_store=SimpleIndexStore.from_persist_dir(persist_dir=index_name),
)
st.session_state.index = load_index_from_storage(storage_context=storage_context,service_context=service_context)
response_synthesizer = get_response_synthesizer(response_mode='refine')
st.session_state.query_engine = st.session_state.index.as_query_engine(response_synthesizer=response_synthesizer,service_context=service_context)
st.session_state.chat_engine = CondenseQuestionChatEngine.from_defaults(
query_engine=st.session_state.query_engine,
condense_question_prompt=custom_prompt,
chat_history=chat_history,
verbose=True
)
else:
documents = SimpleDirectoryReader("./documents").load_data()
vector_store = FaissVectorStore(faiss_index=faiss_index)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
st.session_state.index = VectorStoreIndex.from_documents(documents, storage_context=storage_context,service_context=service_context)
st.session_state.index.storage_context.persist(persist_dir=index_name)
response_synthesizer = get_response_synthesizer(response_mode='refine')
st.session_state.query_engine = st.session_state.index.as_query_engine(response_synthesizer=response_synthesizer,service_context=service_context)
st.session_state.chat_engine = CondenseQuestionChatEngine.from_defaults(
query_engine=st.session_state.query_engine,
condense_question_prompt=custom_prompt,
chat_history=chat_history,
verbose=True
)
if os.path.exists(pkl_name):
with open(pkl_name, "rb") as f:
st.session_state.stored_docs = pickle.load(f)
else:
st.session_state.stored_docs=list()
# 接続元IP取得
def get_remote_ip():
ctx = get_script_run_ctx()
session_info = runtime.get_instance().get_client(ctx.session_id)
return session_info.request.remote_ip
# 接続元IP許可判定
def is_allow_ip_address():
remote_ip = get_remote_ip()
logger.info("remote_ip")
logger.info(remote_ip)
# localhost
if remote_ip == "::1":
return True
# プライベートIP
ipaddr = ipaddress.IPv4Address(remote_ip)
logger.info("ipaddr")
logger.info(ipaddr)
if ipaddr.is_private:
return True
# その他(許可リスト判定)
return remote_ip in ALLOW_IP_ADDRESS
def logout():
st.session_state["token"] = None
st.session_state["token_expires"] = None
st.session_state["authorization_state"] = None
# メイン
def app():
# 初期化
st.session_state["token"] = None
st.session_state["token_expires"] = time()
st.session_state["authorization_state"] = None
# 接続元IP許可判定
if not is_allow_ip_address():
st.title("HTTP 403 Forbidden")
return
# 接続元OK
st.title("Azure AD Login with Streamlit")
# 認証後のリダイレクトのGETパラメータ値を取得
authorization_response = st.experimental_get_query_params().get("code")
# 認証OK、トークン無し
if authorization_response and st.session_state["token"] is None:
# トークン設定
token = token_request(authorization_response, st.session_state["authorization_state"])
st.session_state["token"] = token
st.session_state["token_expires"] = token["expires_at"]
# トークン無し or 期限切れ
if st.session_state["token"] is None or float(st.session_state["token_expires"]) <= time():
# 認証用リンク表示
authorization_url, st.session_state["authorization_state"] = authorization_request()
st.markdown(f'[Click here to log in]({authorization_url})', unsafe_allow_html=True)
else:
# 認証OK
st.markdown(f"Logged in successfully. Welcome, {st.session_state['token']['token_type']}!")
if st.button("logout",use_container_width=True):
logout()
st.experimental_set_query_params()
st.experimental_rerun()
st.text("サイドバーから利用するメニューをお選びください。")
initialize_index()
if __name__ == "__main__":
if "token" not in st.session_state or st.session_state["token"] is None or float(st.session_state["token_expires"]) <= time():
app()
else:
st.title("Azure AD Login with Streamlit")
if st.button("logout",use_container_width=True):
logout()
st.experimental_set_query_params()
st.experimental_rerun()
st.text("ログイン済みです。")
st.text("サイドバーから利用するメニューをお選びください。")