Spaces:
Runtime error
Runtime error
File size: 2,364 Bytes
139f81c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
from llama_index.core.indices.vector_store.base import VectorStoreIndex
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.core import Settings
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext
import qdrant_client
from llama_index.core.indices.query.schema import QueryBundle
from llama_index.llms.gemini import Gemini
from llama_index.embeddings.gemini import GeminiEmbedding
from llama_index.core.memory import ChatMemoryBuffer
import gradio as gr
def embed_setup():
Settings.embed_model = GeminiEmbedding(api_key=os.getenv("GEMINI_API_KEY"), model_name="models/embedding-001")
Settings.llm = Gemini(api_key=os.getenv("GEMINI_API_KEY"), temperature=0.1,model_name="models/gemini-pro")
def qdrant_setup():
client = qdrant_client.QdrantClient(
os.getenv("QDRANT_URL"),
api_key = os.getenv("QDRANT_API_KEY"),
)
return client
def llm_setup():
llm = Gemini(api_key=os.getenv("GEMINI_API_KEY"), temperature=0.6,model_name="models/gemini-pro")
return llm
def query_index(index, similarity_top_k=3, streaming=True):
memory = ChatMemoryBuffer.from_defaults(token_limit=4000)
chat_engine = index.as_chat_engine(
chat_mode="context",
memory=memory,
system_prompt = (
"""You are an AI assistant named Gemini, created by Google. Your task is to provide helpful, accurate, and concise responses to user queries.
Context information is below:
----------------
{context_str}
----------------
Always answer based on the information in the context and be precise
Given this context, please respond to the following user query:
{query_str}
Also suggest 3 more questions based on the the context that the user can ask
Your response:"""
),)
return chat_engine
def get_response(text,history=None):
# Use the initialized query engine to perform the query
response = str(chat_engine.chat(text))
return response
embed_setup()
client = qdrant_setup()
llm = llm_setup()
vector_store = QdrantVectorStore(client = client,collection_name=os.getenv("COLLECTION_NAME"))
index = VectorStoreIndex.from_vector_store(llm = llm, vector_store = vector_store)
chat_engine = query_index(index) # initialize the query engine
t = gr.ChatInterface(get_response, analytics_enabled=True)
t.launch(debug=True, share=True)
|