Spaces:

anasmkh
/

QdrantVectorStore_Llamaindex

Sleeping

App Files Files Community

QdrantVectorStore_Llamaindex / app.py

anasmkh

Update app.py

02a57c6 verified 5 months ago

raw

history blame

7.27 kB

	import os
	import shutil
	import time
	import gradio as gr
	import qdrant_client
	from getpass import getpass

	# Set your OpenAI API key from environment variables.
	openai_api_key = os.getenv('OPENAI_API_KEY')

	# -------------------------------------------------------
	# Configure LlamaIndex with OpenAI LLM and Embeddings
	# -------------------------------------------------------
	from llama_index.llms.openai import OpenAI
	from llama_index.embeddings.openai import OpenAIEmbedding
	from llama_index.core import Settings

	Settings.llm = OpenAI(model="gpt-3.5-turbo", temperature=0.4)
	Settings.embed_model = OpenAIEmbedding(model="text-embedding-ada-002")

	# -------------------------------------------------------
	# Import document readers, index, vector store, memory, etc.
	# -------------------------------------------------------
	from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, StorageContext
	from llama_index.vector_stores.qdrant import QdrantVectorStore
	from llama_index.core.memory import ChatMemoryBuffer

	# Global variables to hold the index and chat engine.
	chat_engine = None
	index = None
	query_engine = None
	memory = None
	client = None
	vector_store = None
	storage_context = None

	# Define a persistent collection name.
	collection_name = "paper"

	# Use a persistent folder to store uploaded files.
	upload_dir = "uploaded_files"
	if not os.path.exists(upload_dir):
	os.makedirs(upload_dir)
	# We do not clear the folder to keep previously uploaded files.

	# -------------------------------------------------------
	# Function to process uploaded files and update the index.
	# -------------------------------------------------------
	def process_upload(files):
	"""
	Accepts a list of uploaded file paths, saves them to a persistent folder,
	loads new documents, and builds or updates the vector index and chat engine.
	"""
	global client, vector_store, storage_context, index, query_engine, memory, chat_engine

	# Copy files into the upload directory if not already present.
	new_file_paths = []
	for file_path in files:
	file_name = os.path.basename(file_path)
	dest = os.path.join(upload_dir, file_name)
	if not os.path.exists(dest):
	shutil.copy(file_path, dest)
	new_file_paths.append(dest)

	# If no new files are uploaded, notify the user.
	if not new_file_paths:
	return "No new documents to add."

	# Load only the new documents.
	new_documents = SimpleDirectoryReader(input_files=new_file_paths).load_data()

	# Initialize a persistent Qdrant client.
	client = qdrant_client.QdrantClient(
	path="./qdrant_db",
	prefer_grpc=True
	)

	# Ensure the collection exists.
	from qdrant_client.http import models
	existing_collections = {col.name for col in client.get_collections().collections}
	if collection_name not in existing_collections:
	client.create_collection(
	collection_name=collection_name,
	vectors_config={
	"text-dense": models.VectorParams(
	size=1536, # text-embedding-ada-002 produces 1536-dimensional vectors.
	distance=models.Distance.COSINE
	)
	}
	)
	# Wait briefly for the collection creation to complete.
	time.sleep(1)

	# Initialize (or re-use) the vector store.
	vector_store = QdrantVectorStore(
	collection_name=collection_name,
	client=client,
	enable_hybrid=True,
	batch_size=20,
	)

	storage_context = StorageContext.from_defaults(vector_store=vector_store)

	# Build the index if it doesn't exist; otherwise, update it.
	if index is None:
	# Load all documents from the persistent folder.
	index = VectorStoreIndex.from_documents(
	SimpleDirectoryReader(upload_dir).load_data(),
	storage_context=storage_context
	)
	else:
	index.insert_documents(new_documents)

	# Reinitialize query and chat engines to reflect updates.
	query_engine = index.as_query_engine(vector_store_query_mode="hybrid")
	memory = ChatMemoryBuffer.from_defaults(token_limit=3000)
	chat_engine = index.as_chat_engine(
	chat_mode="context",
	memory=memory,
	system_prompt=(
	"You are an AI assistant who answers the user questions, "
	"use the schema fields to generate appropriate and valid json queries"
	),
	)

	return "Documents uploaded and index updated successfully!"

	# -------------------------------------------------------
	# Chat function that uses the built chat engine.
	# -------------------------------------------------------
	def chat_with_ai(user_input, chat_history):
	global chat_engine
	if chat_engine is None:
	return chat_history, "Please upload documents first."

	response = chat_engine.chat(user_input)
	references = response.source_nodes
	ref = []
	for node in references:
	file_name = node.metadata.get('file_name')
	if file_name and file_name not in ref:
	ref.append(file_name)

	complete_response = str(response) + "\n\n"
	chat_history.append((user_input, complete_response))
	return chat_history, ""

	# -------------------------------------------------------
	# Function to clear the chat history.
	# -------------------------------------------------------
	def clear_history():
	return [], ""

	# -------------------------------------------------------
	# Build the Gradio interface.
	# -------------------------------------------------------
	def gradio_interface():
	with gr.Blocks() as demo:
	gr.Markdown("# Chat Interface for LlamaIndex with File Upload")

	# Use Tabs to separate the file upload and chat interfaces.
	with gr.Tab("Upload Documents"):
	gr.Markdown("Upload PDF, Excel, CSV, DOC/DOCX, or TXT files below:")
	file_upload = gr.File(
	label="Upload Files",
	file_count="multiple",
	file_types=[".pdf", ".csv", ".txt", ".xlsx", ".xls", ".doc", ".docx"],
	type="filepath" # returns file paths
	)
	upload_status = gr.Textbox(label="Upload Status", interactive=False)
	upload_button = gr.Button("Process Upload")

	upload_button.click(process_upload, inputs=file_upload, outputs=upload_status)

	with gr.Tab("Chat"):
	chatbot = gr.Chatbot(label="LlamaIndex Chatbot")
	user_input = gr.Textbox(
	placeholder="Ask a question...", label="Enter your question"
	)
	submit_button = gr.Button("Send")
	btn_clear = gr.Button("Clear History")

	# A State to hold the chat history.
	chat_history = gr.State([])

	submit_button.click(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input])
	user_input.submit(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input])
	btn_clear.click(clear_history, outputs=[chatbot, user_input])

	return demo

	# Launch the Gradio app.
	gradio_interface().launch(debug=True)