Spaces:

anasmkh
/

QdrantVectorStore_Llamaindex

Running

App Files Files Community

anasmkh commited on Feb 12

Commit

29c811a

verified ·

1 Parent(s): af5fe62

Update app.py

Browse files

Files changed (1) hide show

app.py +110 -86

app.py CHANGED Viewed

@@ -1,120 +1,144 @@
 import os
 from getpass import getpass
-openai_api_key = os.getenv('OPENAI_API_KEY')
-openai_api_key = openai_api_key
 from llama_index.llms.openai import OpenAI
 from llama_index.embeddings.openai import OpenAIEmbedding
-from llama_index.core import Settings
-Settings.llm = OpenAI(model="gpt-3.5-turbo",temperature=0.4)
-Settings.embed_model = OpenAIEmbedding(model="text-embedding-ada-002")
-from llama_index.core import SimpleDirectoryReader
-documents = SimpleDirectoryReader("new_file").load_data()
-from llama_index.core import VectorStoreIndex, StorageContext
 from llama_index.vector_stores.qdrant import QdrantVectorStore
 import qdrant_client
-client = qdrant_client.QdrantClient(
-    location=":memory:",
 )
 vector_store = QdrantVectorStore(
-    collection_name = "paper",
     client=client,
     enable_hybrid=True,
     batch_size=20,
 )
 storage_context = StorageContext.from_defaults(vector_store=vector_store)
-index = VectorStoreIndex.from_documents(
-    documents,
-    storage_context=storage_context,
-)
-query_engine = index.as_query_engine(
-    vector_store_query_mode="hybrid"
-)
-from llama_index.core.memory import ChatMemoryBuffer
-memory = ChatMemoryBuffer.from_defaults(token_limit=3000)
 chat_engine = index.as_chat_engine(
     chat_mode="context",
-    memory=memory,
-    system_prompt=(
-        """You are an AI assistant who answers the user questions,
-           use the schema fields to generate appriopriate and valid json queries"""
-    ),
 )
-# def is_greeting(user_input):
-#     greetings = ["hello", "hi", "hey", "good morning", "good afternoon", "good evening", "greetings"]
-#     user_input_lower = user_input.lower().strip()
-#     return any(greet in user_input_lower for greet in greetings)
-# def is_bye(user_input):
-#     greetings = ["thanks", "thanks you", "thanks a lot", "good answer", "good bye", "bye bye"]
-#     user_input_lower = user_input.lower().strip()
-#     return any(greet in user_input_lower for greet in greetings)
-import gradio as gr
 def chat_with_ai(user_input, chat_history):
-    # if is_greeting(user_input):
-    #     response = 'hi, how can i help you?'
-    #     chat_history.append((user_input, response))
-    #     return chat_history, ""
-    # elif is_bye(user_input):
-    #     response = "you're wlocome"
-    #     chat_history.append((user_input, response))
-    #     return chat_history, ""
     response = chat_engine.chat(user_input)
     references = response.source_nodes
-    ref,pages = [],[]
-    for i in range(len(references)):
-      if references[i].metadata['file_name'] not in ref:
-        ref.append(references[i].metadata['file_name'])
-      # pages.append(references[i].metadata['page_label'])
-    complete_response = str(response) + "\n\n"
-    if ref !=[] or pages!=[]:
-      chat_history.append((user_input, complete_response))
-      ref = []
-    elif ref==[] or pages==[]:
-      chat_history.append((user_input,str(response)))
     return chat_history, ""
 def clear_history():
     return [], ""
 def gradio_chatbot():
     with gr.Blocks() as demo:
-        gr.Markdown("# Chat Interface for LlamaIndex")
-        chatbot = gr.Chatbot(label="LlamaIndex Chatbot")
-        user_input = gr.Textbox(
-            placeholder="Ask a question...", label="Enter your question"
-        )
-        submit_button = gr.Button("Send")
-        btn_clear = gr.Button("Delete Context")
-        chat_history = gr.State([])
-        submit_button.click(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input])
-        user_input.submit(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input])
-        btn_clear.click(fn=clear_history, outputs=[chatbot, user_input])
     return demo
-gradio_chatbot().launch(debug=True)

 import os
+import shutil
 from getpass import getpass
+import gradio as gr
 from llama_index.llms.openai import OpenAI
 from llama_index.embeddings.openai import OpenAIEmbedding
+from llama_index.core import Settings, SimpleDirectoryReader, VectorStoreIndex, StorageContext
 from llama_index.vector_stores.qdrant import QdrantVectorStore
+from llama_index.core.memory import ChatMemoryBuffer
 import qdrant_client
+# Set your OpenAI API key from environment variable
+openai_api_key = os.getenv('OPENAI_API_KEY')
+if not openai_api_key:
+    raise ValueError("Please set your OPENAI_API_KEY environment variable.")
+# Define a system prompt as a global constant
+SYSTEM_PROMPT = (
+    "You are an AI assistant who answers the user questions, "
+    "use the schema fields to generate appropriate and valid json queries"
 )
+# Configure the LLM and embedding models
+Settings.llm = OpenAI(model="gpt-3.5-turbo", temperature=0.4)
+Settings.embed_model = OpenAIEmbedding(model="text-embedding-ada-002")
+# Load initial documents from a directory called "new_file"
+documents = SimpleDirectoryReader("new_file").load_data()
+# Set up the Qdrant vector store (using an in-memory collection for simplicity)
+client = qdrant_client.QdrantClient(location=":memory:")
 vector_store = QdrantVectorStore(
+    collection_name="paper",
     client=client,
     enable_hybrid=True,
     batch_size=20,
 )
 storage_context = StorageContext.from_defaults(vector_store=vector_store)
+# Build the initial index and query/chat engines
+index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
+chat_memory = ChatMemoryBuffer.from_defaults(token_limit=3000)
 chat_engine = index.as_chat_engine(
     chat_mode="context",
+    memory=chat_memory,
+    system_prompt=SYSTEM_PROMPT,
 )
+def process_uploaded_file(uploaded_file):
+    """
+    Process the uploaded file:
+      1. Save the file to an "uploads" folder.
+      2. Copy it to a temporary folder ("temp_upload") to load using SimpleDirectoryReader.
+      3. Extend the global documents list and rebuild the index and chat engine.
+    """
+    if uploaded_file is None:
+        return "No file uploaded."
+    # 'uploaded_file' is a temporary file path provided by Gradio.
+    file_name = os.path.basename(uploaded_file)
+    uploads_dir = "uploads"
+    os.makedirs(uploads_dir, exist_ok=True)
+    dest_path = os.path.join(uploads_dir, file_name)
+    shutil.copy(uploaded_file, dest_path)
+    # Prepare a temporary directory to read the file
+    temp_dir = "temp_upload"
+    os.makedirs(temp_dir, exist_ok=True)
+    # Clear previous files in temp_dir (optional, to avoid mixing files)
+    for f in os.listdir(temp_dir):
+        os.remove(os.path.join(temp_dir, f))
+    shutil.copy(dest_path, temp_dir)
+    # Load the new document(s) from the temporary folder
+    new_docs = SimpleDirectoryReader(temp_dir).load_data()
+    # Update the global documents list and rebuild the index and chat engine
+    global documents, index, chat_engine
+    documents.extend(new_docs)
+    index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
+    chat_engine = index.as_chat_engine(
+        chat_mode="context",
+        memory=chat_memory,
+        system_prompt=SYSTEM_PROMPT,
+    )
+    return f"File '{file_name}' processed and added to index."
 def chat_with_ai(user_input, chat_history):
+    """
+    Send the user input to the chat engine and update the conversation history.
+    """
     response = chat_engine.chat(user_input)
+    # Collect reference filenames from the response (if available)
     references = response.source_nodes
+    ref = []
+    for node in references:
+        if "file_name" in node.metadata and node.metadata["file_name"] not in ref:
+            ref.append(node.metadata["file_name"])
+    # Create a complete response string with references if present
+    complete_response = str(response)
+    if ref:
+        complete_response += "\n\nReferences: " + ", ".join(ref)
+    chat_history.append((user_input, complete_response))
     return chat_history, ""
 def clear_history():
     return [], ""
 def gradio_chatbot():
+    """
+    Create a Gradio interface with two tabs:
+      - "Chat" for interacting with the chat engine.
+      - "Upload" for uploading new files to update the index.
+    """
     with gr.Blocks() as demo:
+        gr.Markdown("# Chat Interface for LlamaIndex with File Upload")
+        with gr.Tab("Chat"):
+            chatbot = gr.Chatbot(label="LlamaIndex Chatbot")
+            user_input = gr.Textbox(
+                placeholder="Ask a question...", label="Enter your question"
+            )
+            submit_button = gr.Button("Send")
+            btn_clear = gr.Button("Delete Context")
+            chat_history = gr.State([])
+            submit_button.click(chat_with_ai, inputs=[user_input, chat_history],
+                                  outputs=[chatbot, user_input])
+            user_input.submit(chat_with_ai, inputs=[user_input, chat_history],
+                              outputs=[chatbot, user_input])
+            btn_clear.click(fn=clear_history, outputs=[chatbot, user_input])
+        with gr.Tab("Upload"):
+            gr.Markdown("### Upload a file to add its content to the index")
+            file_upload = gr.File(label="Choose a file")
+            upload_button = gr.Button("Upload and Process")
+            upload_status = gr.Textbox(label="Upload Status")
+            upload_button.click(process_uploaded_file, inputs=[file_upload], outputs=[upload_status])
     return demo
+if __name__ == "__main__":
+    gradio_chatbot().launch(debug=True)