import os import gradio as gr from llama_index.core import StorageContext, load_index_from_storage, Settings from llama_index.llms.azure_openai import AzureOpenAI from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding from dotenv import load_dotenv load_dotenv(override=True) api_key = os.getenv("AZURE_OPENAI_API_KEY") api_version = "2024-05-01-preview" azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT") llm = AzureOpenAI( model="gpt-4o", deployment_name="gpt-4o", api_key=api_key, azure_endpoint=azure_endpoint, api_version=api_version, ) # You need to deploy your own embedding model as well as your own chat completion model embed_model = AzureOpenAIEmbedding( model="text-embedding-3-small", deployment_name="text-embedding-3-small", api_key=api_key, azure_endpoint=azure_endpoint, api_version=api_version, ) Settings.llm = llm Settings.embed_model = embed_model # rebuild storage context storage_context = StorageContext.from_defaults(persist_dir="./index") # load index index = load_index_from_storage(storage_context) query_engine = index.as_query_engine(similarity_top_k=10) # Function to handle chat messages with history def echo(message, history): context = "\n".join([f"User: {user_msg}\nBot: {bot_msg}" for user_msg, bot_msg in history]) full_context = f"{context}\nUser: {message}" response = query_engine.query(full_context).response history.append((message, response)) return response # history demo = gr.ChatInterface( fn=echo, examples=[ "光源氏はどのような人物ですか?", "夕顔はどのような人物ですか?" ], title="Llama Index Chatbot", ) demo.launch()