Spaces:

anasmkh
/

QdrantVectorStore_Llamaindex

Sleeping

App Files Files Community

anasmkh commited on Feb 13

Commit

aff47dd

verified ·

1 Parent(s): c9eadbe

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -62

app.py CHANGED Viewed

@@ -24,7 +24,7 @@ from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, StorageCon
 from llama_index.vector_stores.qdrant import QdrantVectorStore
 from llama_index.core.memory import ChatMemoryBuffer
-# Global variables to hold persistent objects.
 chat_engine = None
 index = None
 query_engine = None
@@ -33,122 +33,109 @@ client = None
 vector_store = None
 storage_context = None
-# Define a global collection name (you can change this as needed)
-collection_name = "paper"
 def process_upload(files):
     """
-    Process newly uploaded files by copying them into a persistent folder,
-    loading their content, and then either building a new index or inserting
-    new documents into the existing index.
     """
     upload_dir = "uploaded_files"
-    # Create the upload folder if it does not exist.
     if not os.path.exists(upload_dir):
         os.makedirs(upload_dir)
-    # Copy new files into the upload directory.
-    new_file_paths = []
     for file_path in files:
         file_name = os.path.basename(file_path)
         dest = os.path.join(upload_dir, file_name)
-        # Copy the file if it doesn't already exist.
-        if not os.path.exists(dest):
-            shutil.copy(file_path, dest)
-        new_file_paths.append(dest)
-    # Load only the newly uploaded documents.
-    # (SimpleDirectoryReader can accept a list of file paths via the 'input_files' parameter.)
-    documents = SimpleDirectoryReader(input_files=new_file_paths).load_data()
     global client, vector_store, storage_context, index, query_engine, memory, chat_engine
-    # Initialize Qdrant client if not already done.
-    if client is None:
-        client = qdrant_client.QdrantClient(
-            path="./qdrant_db",
-            prefer_grpc=True
-        )
-    # Ensure the collection exists.
-    from qdrant_client.http import models
-    existing_collections = {col.name for col in client.get_collections().collections}
-    if collection_name not in existing_collections:
-        client.create_collection(
-            collection_name=collection_name,
-            vectors_config=models.VectorParams(
-                size=1536,  # OpenAI's text-embedding-ada-002 produces 1536-d vectors.
-                distance=models.Distance.COSINE
-            )
-        )
-    # Initialize the vector store if not already done.
-    if vector_store is None:
-        vector_store = QdrantVectorStore(
-            collection_name=collection_name,
-            client=client,
-            enable_hybrid=True,
-            batch_size=20,
-        )
-    # Initialize storage context if not already done.
-    if storage_context is None:
-        storage_context = StorageContext.from_defaults(vector_store=vector_store)
-    # If no index exists yet, create one from the documents.
-    if index is None:
-        index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
-    else:
-        # Append the new documents to the existing index.
-        index.insert_documents(documents)
-    # (Optional) Reinitialize the query and chat engines so they reflect the updated index.
     query_engine = index.as_query_engine(vector_store_query_mode="hybrid")
     memory = ChatMemoryBuffer.from_defaults(token_limit=3000)
     chat_engine = index.as_chat_engine(
         chat_mode="context",
         memory=memory,
-        system_prompt="You are an AI assistant who answers the user questions,"
     )
-    return "Documents uploaded and index updated successfully!"
 def chat_with_ai(user_input, chat_history):
     global chat_engine
     if chat_engine is None:
         return chat_history, "Please upload documents first."
     response = chat_engine.chat(user_input)
     references = response.source_nodes
-    ref = []
-    # Extract referenced file names from the response.
     for node in references:
         file_name = node.metadata.get('file_name')
         if file_name and file_name not in ref:
             ref.append(file_name)
     complete_response = str(response) + "\n\n"
-    if ref:
         chat_history.append((user_input, complete_response))
     else:
         chat_history.append((user_input, str(response)))
     return chat_history, ""
 def clear_history():
     return [], ""
 def gradio_interface():
     with gr.Blocks() as demo:
-        gr.Markdown("# AI Assistant")
         with gr.Tab("Upload Documents"):
             gr.Markdown("Upload PDF, Excel, CSV, DOC/DOCX, or TXT files below:")
             file_upload = gr.File(
                 label="Upload Files",
                 file_count="multiple",
                 file_types=[".pdf", ".csv", ".txt", ".xlsx", ".xls", ".doc", ".docx"],
-                type="filepath"  # Returns file paths.
             )
             upload_status = gr.Textbox(label="Upload Status", interactive=False)
             upload_button = gr.Button("Process Upload")
@@ -156,7 +143,7 @@ def gradio_interface():
             upload_button.click(process_upload, inputs=file_upload, outputs=upload_status)
         with gr.Tab("Chat"):
-            chatbot = gr.Chatbot(label="AI Assistant Chat Interface")
             user_input = gr.Textbox(
                 placeholder="Ask a question...", label="Enter your question"
             )
@@ -172,4 +159,5 @@ def gradio_interface():
     return demo
 gradio_interface().launch(debug=True)

 from llama_index.vector_stores.qdrant import QdrantVectorStore
 from llama_index.core.memory import ChatMemoryBuffer
+# Global variables to hold the index and chat engine.
 chat_engine = None
 index = None
 query_engine = None
 vector_store = None
 storage_context = None
+# -------------------------------------------------------
+# Function to process uploaded files and build the index.
+# -------------------------------------------------------
 def process_upload(files):
     """
+    Accepts a list of uploaded file paths, saves them to a local folder,
+    loads them as documents, and builds the vector index and chat engine.
     """
     upload_dir = "uploaded_files"
     if not os.path.exists(upload_dir):
         os.makedirs(upload_dir)
+    else:
+        # Clear any existing files in the folder.
+        for f in os.listdir(upload_dir):
+            os.remove(os.path.join(upload_dir, f))
+    # 'files' is a list of file paths (Gradio's File component with type="file")
     for file_path in files:
         file_name = os.path.basename(file_path)
         dest = os.path.join(upload_dir, file_name)
+        shutil.copy(file_path, dest)
+    # Load documents from the saved folder.
+    documents = SimpleDirectoryReader(upload_dir).load_data()
+    # Build the index and chat engine using Qdrant as the vector store.
     global client, vector_store, storage_context, index, query_engine, memory, chat_engine
+    client = qdrant_client.QdrantClient(location=":memory:")
+    vector_store = QdrantVectorStore(
+        collection_name="paper",
+        client=client,
+        enable_hybrid=True,
+        batch_size=20,
+    )
+    storage_context = StorageContext.from_defaults(vector_store=vector_store)
+    index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
     query_engine = index.as_query_engine(vector_store_query_mode="hybrid")
     memory = ChatMemoryBuffer.from_defaults(token_limit=3000)
     chat_engine = index.as_chat_engine(
         chat_mode="context",
         memory=memory,
+        system_prompt=(
+            "You are an AI assistant who answers the user questions, "
+            "use the schema fields to generate appropriate and valid json queries"
+        ),
     )
+    return "Documents uploaded and index built successfully!"
+# -------------------------------------------------------
+# Chat function that uses the built chat engine.
+# -------------------------------------------------------
 def chat_with_ai(user_input, chat_history):
     global chat_engine
+    # Check if the chat engine is initialized.
     if chat_engine is None:
         return chat_history, "Please upload documents first."
     response = chat_engine.chat(user_input)
     references = response.source_nodes
+    ref, pages = [], []
+    # Extract file names from the source nodes (if available)
     for node in references:
         file_name = node.metadata.get('file_name')
         if file_name and file_name not in ref:
             ref.append(file_name)
     complete_response = str(response) + "\n\n"
+    if ref or pages:
         chat_history.append((user_input, complete_response))
     else:
         chat_history.append((user_input, str(response)))
     return chat_history, ""
+# -------------------------------------------------------
+# Function to clear the chat history.
+# -------------------------------------------------------
 def clear_history():
     return [], ""
+# -------------------------------------------------------
+# Build the Gradio interface.
+# -------------------------------------------------------
 def gradio_interface():
     with gr.Blocks() as demo:
+        gr.Markdown("# Chat Interface for LlamaIndex with File Upload")
+        # Use Tabs to separate the file upload and chat interfaces.
         with gr.Tab("Upload Documents"):
             gr.Markdown("Upload PDF, Excel, CSV, DOC/DOCX, or TXT files below:")
+            # The file upload widget: we specify allowed file types.
             file_upload = gr.File(
                 label="Upload Files",
                 file_count="multiple",
                 file_types=[".pdf", ".csv", ".txt", ".xlsx", ".xls", ".doc", ".docx"],
+                type="filepath"  # returns file paths
             )
             upload_status = gr.Textbox(label="Upload Status", interactive=False)
             upload_button = gr.Button("Process Upload")
             upload_button.click(process_upload, inputs=file_upload, outputs=upload_status)
         with gr.Tab("Chat"):
+            chatbot = gr.Chatbot(label="LlamaIndex Chatbot")
             user_input = gr.Textbox(
                 placeholder="Ask a question...", label="Enter your question"
             )
     return demo
+# Launch the Gradio app.
 gradio_interface().launch(debug=True)