Spaces:

anasmkh
/

QdrantVectorStore_Llamaindex

Running

File size: 6,943 Bytes

import os
from getpass import getpass

openai_api_key = os.getenv('OPENAI_API_KEY')
openai_api_key = openai_api_key

from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core import Settings

Settings.llm = OpenAI(model="gpt-3.5-turbo", temperature=0.4)
Settings.embed_model = OpenAIEmbedding(model="text-embedding-ada-002")

from llama_index.core import SimpleDirectoryReader

# Load initial documents
documents = SimpleDirectoryReader("new_file").load_data()

from llama_index.core import VectorStoreIndex, StorageContext
from llama_index.vector_stores.qdrant import QdrantVectorStore
import qdrant_client

client = qdrant_client.QdrantClient(
    location=":memory:",
)

vector_store = QdrantVectorStore(
    collection_name="paper",
    client=client,
    enable_hybrid=True,
    batch_size=20,
)

storage_context = StorageContext.from_defaults(vector_store=vector_store)

index = VectorStoreIndex.from_documents(
    documents,
    storage_context=storage_context,
)

query_engine = index.as_query_engine(
    vector_store_query_mode="hybrid"
)

from llama_index.core.memory import ChatMemoryBuffer

memory = ChatMemoryBuffer.from_defaults(token_limit=3000)

chat_engine = index.as_chat_engine(
    chat_mode="context",
    memory=memory,
    system_prompt=(
        """You are an AI assistant who answers the user questions,
           use the schema fields to generate appropriate and valid json queries"""
    ),
)

import gradio as gr

def chat_with_ai(user_input, chat_history):
    response = chat_engine.chat(user_input)
    references = response.source_nodes
    ref, pages = [], []
    for i in range(len(references)):
        if references[i].metadata['file_name'] not in ref:
            ref.append(references[i].metadata['file_name'])
    complete_response = str(response) + "\n\n"
    if ref != [] or pages != []:
        chat_history.append((user_input, complete_response))
        ref = []
    elif ref == [] or pages == []:
        chat_history.append((user_input, str(response)))
        
    return chat_history, ""

def clear_history():
    return [], ""

import os
import PyPDF2
import docx
import pandas as pd

def extract_text_from_file(file_path):
    """
    Extracts text from the file based on its extension.
    Supports: PDF, DOC/DOCX, TXT, XLS/XLSX.
    """
    ext = os.path.splitext(file_path)[1].lower()
    text = ""
    
    if ext == ".pdf":
        try:
            with open(file_path, "rb") as f:
                pdf_reader = PyPDF2.PdfReader(f)
                for page in pdf_reader.pages:
                    page_text = page.extract_text()
                    if page_text:
                        text += page_text + "\n"
        except Exception as e:
            text = f"Error processing PDF: {e}"
    
    elif ext in [".doc", ".docx"]:
        try:
            doc = docx.Document(file_path)
            text = "\n".join([para.text for para in doc.paragraphs])
        except Exception as e:
            text = f"Error processing Word document: {e}"
    
    elif ext == ".txt":
        try:
            with open(file_path, "r", encoding="utf-8") as f:
                text = f.read()
        except Exception as e:
            text = f"Error processing TXT file: {e}"
    
    elif ext in [".xls", ".xlsx"]:
        try:
            # Read the first sheet of the Excel file
            df = pd.read_excel(file_path)
            # Convert the dataframe to CSV format (or any format you prefer)
            text = df.to_csv(index=False)
        except Exception as e:
            text = f"Error processing Excel file: {e}"
    
    else:
        text = "Unsupported file type for text extraction."
    
    return text

def upload_file(file):
    """
    Handles file upload from Gradio.
    Saves the file to the "new_file" directory and extracts text content based on file type.
    Supports file-like objects, dictionaries, or file paths.
    """
    # Check if a file was uploaded
    if file is None:
        return "No file uploaded!"
    
    # If file is a list (multiple files), take the first one
    if isinstance(file, list):
        file = file[0]
    
    # Initialize file_name and file_data based on the type of 'file'
    if hasattr(file, 'read'):
        # file is a file-like object
        file_data = file.read()
        file_name = getattr(file, 'name', "uploaded_file")
    elif isinstance(file, dict):
        # file is a dictionary with "name" and "data" keys
        file_name = file.get("name", "uploaded_file")
        file_data = file.get("data")
    elif isinstance(file, str):
        # file is a string (e.g., a NamedString representing a file path)
        file_name = os.path.basename(file)
        try:
            with open(file, "rb") as f:
                file_data = f.read()
        except Exception as e:
            return f"Error reading file from path: {e}"
    else:
        return "Uploaded file format not recognized."
    
    # Validate that file_data is available
    if file_data is None:
        return "Uploaded file data not found!"
    
    # Ensure the "new_file" directory exists
    if not os.path.exists("new_file"):
        os.makedirs("new_file")
    
    # Save the file to the "new_file" directory
    file_path = os.path.join("new_file", file_name)
    try:
        with open(file_path, "wb") as f:
            f.write(file_data)
    except Exception as e:
        return f"Error saving file: {e}"
    
    # Extract text from the file for further processing
    extracted_text = extract_text_from_file(file_path)
    
    # Create a preview of the extracted text
    preview = extracted_text[:200] + "..." if len(extracted_text) > 200 else extracted_text
    return f"File {file_name} uploaded and processed successfully!\nExtracted text preview:\n{preview}"



def gradio_chatbot():
    with gr.Blocks() as demo:
        gr.Markdown("# Chat Interface for LlamaIndex")

        chatbot = gr.Chatbot(label="LlamaIndex Chatbot")
        user_input = gr.Textbox(
            placeholder="Ask a question...", label="Enter your question"
        )

        submit_button = gr.Button("Send")
        btn_clear = gr.Button("Delete Context")

        # Add a file upload component
        file_upload = gr.File(label="Upload a file")

        # Add a button to handle file upload
        upload_button = gr.Button("Upload File")

        chat_history = gr.State([])

        # Define the file upload action
        upload_button.click(upload_file, inputs=file_upload, outputs=user_input)

        # Define the chat interaction
        submit_button.click(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input])

        user_input.submit(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input])
        btn_clear.click(fn=clear_history, outputs=[chatbot, user_input])

    return demo

gradio_chatbot().launch(debug=True)