Spaces:

anasmkh
/

QdrantVectorStore_Llamaindex

Sleeping

App Files Files Community

anasmkh commited on Feb 13

Commit

95989dc

verified ·

1 Parent(s): 02a57c6

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -62

app.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import os
 import shutil
-import time
 import gradio as gr
 import qdrant_client
 from getpass import getpass
@@ -34,66 +33,37 @@ client = None
 vector_store = None
 storage_context = None
-# Define a persistent collection name.
-collection_name = "paper"
-# Use a persistent folder to store uploaded files.
-upload_dir = "uploaded_files"
-if not os.path.exists(upload_dir):
-    os.makedirs(upload_dir)
-# We do not clear the folder to keep previously uploaded files.
 # -------------------------------------------------------
-# Function to process uploaded files and update the index.
 # -------------------------------------------------------
 def process_upload(files):
     """
-    Accepts a list of uploaded file paths, saves them to a persistent folder,
-    loads new documents, and builds or updates the vector index and chat engine.
     """
-    global client, vector_store, storage_context, index, query_engine, memory, chat_engine
-    # Copy files into the upload directory if not already present.
-    new_file_paths = []
     for file_path in files:
         file_name = os.path.basename(file_path)
         dest = os.path.join(upload_dir, file_name)
-        if not os.path.exists(dest):
-            shutil.copy(file_path, dest)
-            new_file_paths.append(dest)
-    # If no new files are uploaded, notify the user.
-    if not new_file_paths:
-        return "No new documents to add."
-    # Load only the new documents.
-    new_documents = SimpleDirectoryReader(input_files=new_file_paths).load_data()
-    # Initialize a persistent Qdrant client.
-    client = qdrant_client.QdrantClient(
-        path="./qdrant_db",
-        prefer_grpc=True
-    )
-    # Ensure the collection exists.
-    from qdrant_client.http import models
-    existing_collections = {col.name for col in client.get_collections().collections}
-    if collection_name not in existing_collections:
-        client.create_collection(
-            collection_name=collection_name,
-            vectors_config={
-                "text-dense": models.VectorParams(
-                    size=1536,  # text-embedding-ada-002 produces 1536-dimensional vectors.
-                    distance=models.Distance.COSINE
-                )
-            }
-        )
-        # Wait briefly for the collection creation to complete.
-        time.sleep(1)
-    # Initialize (or re-use) the vector store.
     vector_store = QdrantVectorStore(
-        collection_name=collection_name,
         client=client,
         enable_hybrid=True,
         batch_size=20,
@@ -101,19 +71,12 @@ def process_upload(files):
     storage_context = StorageContext.from_defaults(vector_store=vector_store)
-    # Build the index if it doesn't exist; otherwise, update it.
-    if index is None:
-        # Load all documents from the persistent folder.
-        index = VectorStoreIndex.from_documents(
-            SimpleDirectoryReader(upload_dir).load_data(),
-            storage_context=storage_context
-        )
-    else:
-        index.insert_documents(new_documents)
-    # Reinitialize query and chat engines to reflect updates.
     query_engine = index.as_query_engine(vector_store_query_mode="hybrid")
     memory = ChatMemoryBuffer.from_defaults(token_limit=3000)
     chat_engine = index.as_chat_engine(
         chat_mode="context",
         memory=memory,
@@ -123,26 +86,32 @@ def process_upload(files):
         ),
     )
-    return "Documents uploaded and index updated successfully!"
 # -------------------------------------------------------
 # Chat function that uses the built chat engine.
 # -------------------------------------------------------
 def chat_with_ai(user_input, chat_history):
     global chat_engine
     if chat_engine is None:
         return chat_history, "Please upload documents first."
     response = chat_engine.chat(user_input)
     references = response.source_nodes
-    ref = []
     for node in references:
         file_name = node.metadata.get('file_name')
         if file_name and file_name not in ref:
             ref.append(file_name)
     complete_response = str(response) + "\n\n"
-    chat_history.append((user_input, complete_response))
     return chat_history, ""
 # -------------------------------------------------------
@@ -161,6 +130,7 @@ def gradio_interface():
         # Use Tabs to separate the file upload and chat interfaces.
         with gr.Tab("Upload Documents"):
             gr.Markdown("Upload PDF, Excel, CSV, DOC/DOCX, or TXT files below:")
             file_upload = gr.File(
                 label="Upload Files",
                 file_count="multiple",

 import os
 import shutil
 import gradio as gr
 import qdrant_client
 from getpass import getpass
 vector_store = None
 storage_context = None
 # -------------------------------------------------------
+# Function to process uploaded files and build the index.
 # -------------------------------------------------------
 def process_upload(files):
     """
+    Accepts a list of uploaded file paths, saves them to a local folder,
+    loads them as documents, and builds the vector index and chat engine.
     """
+    upload_dir = "uploaded_files"
+    if not os.path.exists(upload_dir):
+        os.makedirs(upload_dir)
+    else:
+        # Clear any existing files in the folder.
+        for f in os.listdir(upload_dir):
+            os.remove(os.path.join(upload_dir, f))
+    # 'files' is a list of file paths (Gradio's File component with type="file")
     for file_path in files:
         file_name = os.path.basename(file_path)
         dest = os.path.join(upload_dir, file_name)
+        shutil.copy(file_path, dest)
+    # Load documents from the saved folder.
+    documents = SimpleDirectoryReader(upload_dir).load_data()
+    # Build the index and chat engine using Qdrant as the vector store.
+    global client, vector_store, storage_context, index, query_engine, memory, chat_engine
+    client = qdrant_client.QdrantClient(location=":memory:")
     vector_store = QdrantVectorStore(
+        collection_name="paper",
         client=client,
         enable_hybrid=True,
         batch_size=20,
     storage_context = StorageContext.from_defaults(vector_store=vector_store)
+    index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
     query_engine = index.as_query_engine(vector_store_query_mode="hybrid")
     memory = ChatMemoryBuffer.from_defaults(token_limit=3000)
     chat_engine = index.as_chat_engine(
         chat_mode="context",
         memory=memory,
         ),
     )
+    return "Documents uploaded and index built successfully!"
 # -------------------------------------------------------
 # Chat function that uses the built chat engine.
 # -------------------------------------------------------
 def chat_with_ai(user_input, chat_history):
     global chat_engine
+    # Check if the chat engine is initialized.
     if chat_engine is None:
         return chat_history, "Please upload documents first."
     response = chat_engine.chat(user_input)
     references = response.source_nodes
+    ref, pages = [], []
+    # Extract file names from the source nodes (if available)
     for node in references:
         file_name = node.metadata.get('file_name')
         if file_name and file_name not in ref:
             ref.append(file_name)
     complete_response = str(response) + "\n\n"
+    if ref or pages:
+        chat_history.append((user_input, complete_response))
+    else:
+        chat_history.append((user_input, str(response)))
     return chat_history, ""
 # -------------------------------------------------------
         # Use Tabs to separate the file upload and chat interfaces.
         with gr.Tab("Upload Documents"):
             gr.Markdown("Upload PDF, Excel, CSV, DOC/DOCX, or TXT files below:")
+            # The file upload widget: we specify allowed file types.
             file_upload = gr.File(
                 label="Upload Files",
                 file_count="multiple",