Spaces:

anasmkh
/

QdrantVectorStore_Llamaindex

Sleeping

App Files Files Community

anasmkh commited on Feb 13

Commit

5bb0370

verified ·

1 Parent(s): f321ab3

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -11

app.py CHANGED Viewed

@@ -1,13 +1,16 @@
 import os
 import shutil
 import gradio as gr
 import qdrant_client
 from getpass import getpass
 openai_api_key = os.getenv('OPENAI_API_KEY')
 from llama_index.llms.openai import OpenAI
 from llama_index.embeddings.openai import OpenAIEmbedding
 from llama_index.core import Settings
@@ -31,6 +34,9 @@ client = None
 vector_store = None
 storage_context = None
 # -------------------------------------------------------
 # Function to process uploaded files and build the index.
 # -------------------------------------------------------
@@ -47,7 +53,7 @@ def process_upload(files):
         for f in os.listdir(upload_dir):
             os.remove(os.path.join(upload_dir, f))
-    # 'files' is a list of file paths (Gradio's File component with type="file")
     for file_path in files:
         file_name = os.path.basename(file_path)
         dest = os.path.join(upload_dir, file_name)
@@ -58,10 +64,30 @@ def process_upload(files):
     # Build the index and chat engine using Qdrant as the vector store.
     global client, vector_store, storage_context, index, query_engine, memory, chat_engine
-    client = qdrant_client.QdrantClient(location=":memory:")
     vector_store = QdrantVectorStore(
-        collection_name="paper",
         client=client,
         enable_hybrid=True,
         batch_size=20,
@@ -91,7 +117,6 @@ def process_upload(files):
 # -------------------------------------------------------
 def chat_with_ai(user_input, chat_history):
     global chat_engine
-    # Check if the chat engine is initialized.
     if chat_engine is None:
         return chat_history, "Please upload documents first."
@@ -99,7 +124,6 @@ def chat_with_ai(user_input, chat_history):
     references = response.source_nodes
     ref, pages = [], []
-    # Extract file names from the source nodes (if available)
     for node in references:
         file_name = node.metadata.get('file_name')
         if file_name and file_name not in ref:
@@ -125,15 +149,13 @@ def gradio_interface():
     with gr.Blocks() as demo:
         gr.Markdown("# Chat Interface for LlamaIndex with File Upload")
-        # Use Tabs to separate the file upload and chat interfaces.
         with gr.Tab("Upload Documents"):
             gr.Markdown("Upload PDF, Excel, CSV, DOC/DOCX, or TXT files below:")
-            # The file upload widget: we specify allowed file types.
             file_upload = gr.File(
                 label="Upload Files",
                 file_count="multiple",
                 file_types=[".pdf", ".csv", ".txt", ".xlsx", ".xls", ".doc", ".docx"],
-                type="filepath"  # returns file paths
             )
             upload_status = gr.Textbox(label="Upload Status", interactive=False)
             upload_button = gr.Button("Process Upload")
@@ -148,7 +170,6 @@ def gradio_interface():
             submit_button = gr.Button("Send")
             btn_clear = gr.Button("Clear History")
-            # A State to hold the chat history.
             chat_history = gr.State([])
             submit_button.click(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input])

 import os
 import shutil
+import time
 import gradio as gr
 import qdrant_client
 from getpass import getpass
+# Set your OpenAI API key from environment variables.
 openai_api_key = os.getenv('OPENAI_API_KEY')
+# -------------------------------------------------------
+# Configure LlamaIndex with OpenAI LLM and Embeddings
+# -------------------------------------------------------
 from llama_index.llms.openai import OpenAI
 from llama_index.embeddings.openai import OpenAIEmbedding
 from llama_index.core import Settings
 vector_store = None
 storage_context = None
+# Define the collection name.
+collection_name = "paper"
 # -------------------------------------------------------
 # Function to process uploaded files and build the index.
 # -------------------------------------------------------
         for f in os.listdir(upload_dir):
             os.remove(os.path.join(upload_dir, f))
+    # 'files' is a list of file paths.
     for file_path in files:
         file_name = os.path.basename(file_path)
         dest = os.path.join(upload_dir, file_name)
     # Build the index and chat engine using Qdrant as the vector store.
     global client, vector_store, storage_context, index, query_engine, memory, chat_engine
+    # Use a persistent Qdrant client.
+    client = qdrant_client.QdrantClient(
+        path="./qdrant_db",
+        prefer_grpc=True
+    )
+    # Ensure the collection exists.
+    from qdrant_client.http import models
+    existing_collections = {col.name for col in client.get_collections().collections}
+    if collection_name not in existing_collections:
+        client.create_collection(
+            collection_name=collection_name,
+            vectors_config=models.VectorParams(
+                size=1536,  # text-embedding-ada-002 produces 1536-d vectors.
+                distance=models.Distance.COSINE
+            )
+        )
+        # Wait a moment for Qdrant to register the new collection.
+        time.sleep(1)
+    # Initialize the vector store.
     vector_store = QdrantVectorStore(
+        collection_name=collection_name,
         client=client,
         enable_hybrid=True,
         batch_size=20,
 # -------------------------------------------------------
 def chat_with_ai(user_input, chat_history):
     global chat_engine
     if chat_engine is None:
         return chat_history, "Please upload documents first."
     references = response.source_nodes
     ref, pages = [], []
     for node in references:
         file_name = node.metadata.get('file_name')
         if file_name and file_name not in ref:
     with gr.Blocks() as demo:
         gr.Markdown("# Chat Interface for LlamaIndex with File Upload")
         with gr.Tab("Upload Documents"):
             gr.Markdown("Upload PDF, Excel, CSV, DOC/DOCX, or TXT files below:")
             file_upload = gr.File(
                 label="Upload Files",
                 file_count="multiple",
                 file_types=[".pdf", ".csv", ".txt", ".xlsx", ".xls", ".doc", ".docx"],
+                type="filepath"
             )
             upload_status = gr.Textbox(label="Upload Status", interactive=False)
             upload_button = gr.Button("Process Upload")
             submit_button = gr.Button("Send")
             btn_clear = gr.Button("Clear History")
             chat_history = gr.State([])
             submit_button.click(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input])