Spaces:

naotakigawa
/

qatool

Sleeping

File size: 9,215 Bytes

import streamlit as st
import os
import pickle
import faiss
import logging

from multiprocessing import Lock
from multiprocessing.managers import BaseManager
from llama_index.callbacks import CallbackManager, LlamaDebugHandler
from llama_index import VectorStoreIndex, Document,Prompt, SimpleDirectoryReader, ServiceContext, StorageContext, load_index_from_storage
from llama_index.chat_engine import CondenseQuestionChatEngine;
from llama_index.node_parser import SimpleNodeParser
from llama_index.langchain_helpers.text_splitter import TokenTextSplitter
from llama_index.constants import DEFAULT_CHUNK_OVERLAP
from llama_index.response_synthesizers import get_response_synthesizer
from llama_index.vector_stores.faiss import FaissVectorStore
from llama_index.graph_stores import SimpleGraphStore
from llama_index.storage.docstore import SimpleDocumentStore
from llama_index.storage.index_store import SimpleIndexStore
import tiktoken
from streamlit import runtime
from streamlit.runtime.scriptrunner import get_script_run_ctx
import ipaddress

from requests_oauthlib import OAuth2Session
from time import time
from dotenv import load_dotenv
from streamlit import net_util

load_dotenv()

# 接続元制御
ALLOW_IP_ADDRESS = os.environ["ALLOW_IP_ADDRESS"]

# Azure AD app registration details
CLIENT_ID = os.environ["CLIENT_ID"]
CLIENT_SECRET = os.environ["CLIENT_SECRET"]
TENANT_ID = os.environ["TENANT_ID"]

# Azure API
AUTHORITY = f"https://login.microsoftonline.com/{TENANT_ID}"
REDIRECT_PATH = os.environ["REDIRECT_PATH"]
TOKEN_URL = f"{AUTHORITY}/oauth2/v2.0/token"
AUTHORIZATION_URL = f"{AUTHORITY}/oauth2/v2.0/authorize"
SCOPES = ["openid", "profile", "User.Read"]

# 認証用URL取得
def authorization_request():
    oauth = OAuth2Session(CLIENT_ID, redirect_uri=REDIRECT_PATH, scope=SCOPES)
    authorization_url, state = oauth.authorization_url(AUTHORIZATION_URL)
    return authorization_url, state

# 認証トークン取得
def token_request(authorization_response, state):
    oauth = OAuth2Session(CLIENT_ID, state=state)
    token = oauth.fetch_token(
        TOKEN_URL,
        code=authorization_response[0],
        authorization_response=authorization_response,
        client_secret=CLIENT_SECRET,

    )
    return token

index_name = "./data/storage"
pkl_name = "./data/stored_documents.pkl"

custom_prompt = Prompt("""\

  以下はこれまでの会話履歴と、ドキュメントを検索して回答する必要がある、ユーザーからの会話文です。

  会話と新しい会話文に基づいて、検索クエリを作成します。回答は日本語で行います。

  新しい会話文が挨拶の場合、挨拶を返してください。

  新しい会話文が質問の場合、検索した結果の回答を返してください。

  答えがわからない場合は正直にわからないと回答してください。

  会話履歴:

  {chat_history}

  新しい会話文:

  {question}

  Search query:

""")

chat_history = []

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("__name__")
logger.debug("調査用ログ")

def initialize_index():
    logger.info("initialize_index start")
    text_splitter = TokenTextSplitter(separator="。", chunk_size=1500
      , chunk_overlap=DEFAULT_CHUNK_OVERLAP
      , tokenizer=tiktoken.encoding_for_model("gpt-3.5-turbo").encode)
    node_parser = SimpleNodeParser(text_splitter=text_splitter)
    d = 1536
    k=2
    faiss_index = faiss.IndexFlatL2(d)
    # デバッグ用
    llama_debug_handler = LlamaDebugHandler()
    callback_manager = CallbackManager([llama_debug_handler])
    service_context = ServiceContext.from_defaults(node_parser=node_parser,callback_manager=callback_manager)
    lock = Lock()
    with lock:
        if os.path.exists(index_name):
            storage_context = StorageContext.from_defaults(
              docstore=SimpleDocumentStore.from_persist_dir(persist_dir=index_name),
              graph_store=SimpleGraphStore.from_persist_dir(persist_dir=index_name),
              vector_store=FaissVectorStore.from_persist_dir(persist_dir=index_name),
              index_store=SimpleIndexStore.from_persist_dir(persist_dir=index_name),
            )
            st.session_state.index = load_index_from_storage(storage_context=storage_context,service_context=service_context)
            response_synthesizer = get_response_synthesizer(response_mode='refine')
            st.session_state.query_engine = st.session_state.index.as_query_engine(response_synthesizer=response_synthesizer,service_context=service_context)
            st.session_state.chat_engine = CondenseQuestionChatEngine.from_defaults(
                query_engine=st.session_state.query_engine, 
                condense_question_prompt=custom_prompt,
                chat_history=chat_history,
                verbose=True
            )
        else:
            documents = SimpleDirectoryReader("./documents").load_data()
            vector_store = FaissVectorStore(faiss_index=faiss_index)
            storage_context = StorageContext.from_defaults(vector_store=vector_store)
            st.session_state.index = VectorStoreIndex.from_documents(documents, storage_context=storage_context,service_context=service_context)
            st.session_state.index.storage_context.persist(persist_dir=index_name)
            response_synthesizer = get_response_synthesizer(response_mode='refine')
            st.session_state.query_engine = st.session_state.index.as_query_engine(response_synthesizer=response_synthesizer,service_context=service_context)
            st.session_state.chat_engine = CondenseQuestionChatEngine.from_defaults(
                query_engine=st.session_state.query_engine, 
                condense_question_prompt=custom_prompt,
                chat_history=chat_history,
                verbose=True
            )
        if os.path.exists(pkl_name):
            with open(pkl_name, "rb") as f:
                st.session_state.stored_docs = pickle.load(f)
        else:
            st.session_state.stored_docs=list()

# 接続元IP取得
def get_remote_ip():
    ctx = get_script_run_ctx()
    session_info = runtime.get_instance().get_client(ctx.session_id)
    return session_info.request.remote_ip

# 接続元IP許可判定
def is_allow_ip_address():
    remote_ip = get_remote_ip()
    logger.info("remote_ip")
    logger.info(remote_ip)
    # localhost
    if remote_ip == "::1":
        return True

    # プライベートIP
    ipaddr = ipaddress.IPv4Address(remote_ip)
    logger.info("ipaddr")
    logger.info(ipaddr)
    if ipaddr.is_private:
        return True

    # その他（許可リスト判定）
    return remote_ip in ALLOW_IP_ADDRESS

def logout():
    st.session_state["token"] = None
    st.session_state["token_expires"] = None
    st.session_state["authorization_state"] = None

# メイン
def app():
    # 初期化
    st.session_state["token"] = None
    st.session_state["token_expires"] = time()
    st.session_state["authorization_state"] = None

    # 接続元IP許可判定
    if not is_allow_ip_address():
        st.title("HTTP 403 Forbidden")
        return

    # 接続元OK
    st.title("Azure AD Login with Streamlit")

    # 認証後のリダイレクトのGETパラメータ値を取得
    authorization_response = st.experimental_get_query_params().get("code")

    # 認証OK、トークン無し
    if authorization_response and st.session_state["token"] is None:
        # トークン設定
        token = token_request(authorization_response, st.session_state["authorization_state"])
        st.session_state["token"] = token
        st.session_state["token_expires"] = token["expires_at"]

    # トークン無し or 期限切れ
    if st.session_state["token"] is None or float(st.session_state["token_expires"]) <= time():
        # 認証用リンク表示
        authorization_url, st.session_state["authorization_state"] = authorization_request()
        st.markdown(f'[Click here to log in]({authorization_url})', unsafe_allow_html=True)
    else:
        # 認証OK
        st.markdown(f"Logged in successfully. Welcome, {st.session_state['token']['token_type']}!")
        if st.button("logout",use_container_width=True):
            logout()
            st.experimental_set_query_params()
            st.experimental_rerun()
        st.text("サイドバーから利用するメニューをお選びください。")
        initialize_index()

if __name__ == "__main__":
    if "token" not in st.session_state or st.session_state["token"] is None or float(st.session_state["token_expires"]) <= time():
        app()
    else:
        st.title("Azure AD Login with Streamlit")
        if st.button("logout",use_container_width=True):
            logout()
            st.experimental_set_query_params()
            st.experimental_rerun()
        st.text("ログイン済みです。")
        st.text("サイドバーから利用するメニューをお選びください。")