Spaces:

Sarath0x8f
/

Document-QA-bot

Running

File size: 11,190 Bytes

from datetime import datetime
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_parse import LlamaParse
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
import os
from dotenv import load_dotenv
import gradio as gr
import base64

# Load environment variables
load_dotenv()

llm_models = [
    "mistralai/Mixtral-8x7B-Instruct-v0.1",
    "meta-llama/Meta-Llama-3-8B-Instruct",
    "mistralai/Mistral-7B-Instruct-v0.2",
    "tiiuae/falcon-7b-instruct",
]

embed_models = [
    "BAAI/bge-small-en-v1.5",  # 33.4M
    "NeuML/pubmedbert-base-embeddings",
    "BAAI/llm-embedder", # 109M
    "BAAI/bge-large-en" # 335M
]

# Global variable for selected model
selected_llm_model_name = llm_models[0]  # Default to the first model in the list
selected_embed_model_name = embed_models[0] # Default to the first model in the list
vector_index = None

# Initialize the parser
parser = LlamaParse(api_key=os.getenv("LLAMA_INDEX_API"), result_type='markdown')

# Define file extractor with various common extensions
file_extractor = {
    '.pdf': parser,  # PDF documents
    '.docx': parser,  # Microsoft Word documents
    '.doc': parser,  # Older Microsoft Word documents
    '.txt': parser,  # Plain text files
    '.csv': parser,  # Comma-separated values files
    '.xlsx': parser,  # Microsoft Excel files
    '.pptx': parser,  # Microsoft PowerPoint files
    '.html': parser,  # HTML files
    '.jpg': parser,  # JPEG images
    '.jpeg': parser,  # JPEG images
    '.png': parser,  # PNG images
    '.webp': parser,  # WebP images
    '.svg': parser,  # SVG files
}

# Markdown content definitions
description = """
## Welcome to DocBot 📄🤖
DocBot is an intelligent document analysis tool that can help you extract insights from various document formats including:
- PDF documents
- Word documents (.docx, .doc)
- Text files
- CSV files
- Excel files
- PowerPoint presentations
- HTML files
- Images with text (JPG, PNG, WebP, SVG)
Simply upload your document, select your preferred embedding model and LLM, then start asking questions!
"""

guide = """
### How to Use DocBot:
1. **Upload Document**: Choose any supported file format
2. **Select Embedding Model**: Choose from available embedding models (BAAI/bge-small-en-v1.5 is recommended for most cases)
3. **Submit**: Click submit to process your document
4. **Select LLM**: Choose your preferred language model
5. **Ask Questions**: Start chatting with your document!
### Tips:
- Smaller embedding models (like bge-small-en-v1.5) are faster but may be less accurate
- Larger models provide better understanding but take more time
- Be specific in your questions for better results
"""

footer = """
<div style="text-align: center; margin-top: 20px; padding: 20px; border-top: 1px solid #ddd;">
    <p>Built with ❤️ using LlamaIndex and Gradio</p>
    <div style="display: flex; justify-content: center; gap: 20px; margin-top: 10px;">
        <a href="https://github.com" target="_blank">
            <img src="data:image/png;base64,{0}" alt="GitHub" style="width: 24px; height: 24px;">
        </a>
        <a href="https://linkedin.com" target="_blank">
            <img src="data:image/png;base64,{1}" alt="LinkedIn" style="width: 24px; height: 24px;">
        </a>
        <a href="https://your-website.com" target="_blank">
            <img src="data:image/png;base64,{2}" alt="Website" style="width: 24px; height: 24px;">
        </a>
    </div>
</div>
"""

# File processing function
def load_files(file_path: str, embed_model_name: str):
    try:
        if not file_path:
            return "Please select a file first."
        
        if not embed_model_name:
            return "Please select an embedding model."
            
        global vector_index
        document = SimpleDirectoryReader(input_files=[file_path], file_extractor=file_extractor).load_data()
        embed_model = HuggingFaceEmbedding(model_name=embed_model_name)
        vector_index = VectorStoreIndex.from_documents(document, embed_model=embed_model)
        print(f"Parsing done for {file_path}")
        filename = os.path.basename(file_path)
        return f"✅ Ready to answer questions about: {filename}"
    except Exception as e:
        return f"❌ An error occurred: {str(e)}"

# Function to handle the selected model from dropdown
def set_llm_model(selected_model):
    global selected_llm_model_name
    if selected_model:
        selected_llm_model_name = selected_model
    return f"LLM set to: {selected_model}"

# Respond function that uses the globally set selected model
def respond(message, history):
    try:
        if not vector_index:
            return "Please upload and process a document first."
            
        if not message.strip():
            return "Please enter a question."
            
        # Initialize the LLM with the selected model
        llm = HuggingFaceInferenceAPI(
            model_name=selected_llm_model_name,
            contextWindow=8192,
            maxTokens=1024,
            temperature=0.3,
            topP=0.9,
            frequencyPenalty=0.5,
            presencePenalty=0.5,
            token=os.getenv("TOKEN")
        )

        # Set up the query engine with the selected LLM
        query_engine = vector_index.as_query_engine(llm=llm)
        bot_message = query_engine.query(message)

        print(f"\n{datetime.now()}:{selected_llm_model_name}:: {message} --> {str(bot_message)}\n")
        return f"**{selected_llm_model_name}:**\n\n{str(bot_message)}"
    except Exception as e:
        return f"❌ An error occurred: {str(e)}"

def encode_image_safe(image_path):
    """Safely encode image, return empty string if file doesn't exist"""
    try:
        if os.path.exists(image_path):
            with open(image_path, "rb") as image_file:
                return base64.b64encode(image_file.read()).decode('utf-8')
    except Exception:
        pass
    return ""

# Clear function for file processing components
def clear_file_components():
    return None, embed_models[0], ""

# Encode the images (with fallback for missing images)
github_logo_encoded = encode_image_safe("Images/github-logo.png")
linkedin_logo_encoded = encode_image_safe("Images/linkedin-logo.png")
website_logo_encoded = encode_image_safe("Images/ai-logo.png")

# UI Setup
with gr.Blocks(
    theme=gr.themes.Soft(),
    css='footer {visibility: hidden}',
    title="DocBot - Document Analysis Assistant"
) as demo:
    
    gr.Markdown("# DocBot 📄🤖")
    gr.Markdown("*Intelligent Document Analysis Assistant*")
    
    with gr.Tabs():
        with gr.TabItem("📖 Introduction"):
            gr.Markdown(description)

        with gr.TabItem("🤖 DocBot"):
            with gr.Accordion("📋 Quick Start Guide", open=False):
                gr.Markdown(guide)
                
            with gr.Row():
                with gr.Column(scale=1):
                    with gr.Group():
                        gr.Markdown("### Document Processing")
                        file_input = gr.File(
                            file_count="single", 
                            type='filepath', 
                            label="Step 1: Upload Document",
                            file_types=['.pdf', '.docx', '.doc', '.txt', '.csv', '.xlsx', '.pptx', '.html', '.jpg', '.jpeg', '.png', '.webp', '.svg']
                        )
                        
                        embed_model_dropdown = gr.Dropdown(
                            choices=embed_models, 
                            label="Step 2: Select Embedding Model", 
                            interactive=True,
                            value=embed_models[0]
                        )
                        
                        with gr.Row():
                            btn = gr.Button("🚀 Process Document", variant='primary', size="lg")
                            clear_btn = gr.Button("🗑️ Clear", size="lg")
                        
                        output = gr.Textbox(
                            label='Processing Status', 
                            interactive=False,
                            placeholder="Upload a document and click 'Process Document' to begin..."
                        )
                    
                    with gr.Group():
                        gr.Markdown("### Model Selection")
                        llm_model_dropdown = gr.Dropdown(
                            choices=llm_models, 
                            label="Step 3: Select Language Model", 
                            interactive=True,
                            value=llm_models[0]
                        )
                        llm_status = gr.Textbox(
                            label="Selected Model",
                            interactive=False,
                            value=f"LLM set to: {llm_models[0]}"
                        )
                
                with gr.Column(scale=2):
                    gr.Markdown("### Chat with Your Document")
                    chatbot = gr.Chatbot(
                        height=600,
                        placeholder="Process a document first, then start asking questions!",
                        show_label=False
                    )
                    
                    msg = gr.Textbox(
                        placeholder="Step 4: Ask questions about your document...",
                        container=False,
                        scale=7
                    )
                    
                    with gr.Row():
                        submit_btn = gr.Button("Send", variant="primary")
                        clear_chat_btn = gr.Button("Clear Chat")

    # Add footer if images exist
    if any([github_logo_encoded, linkedin_logo_encoded, website_logo_encoded]):
        gr.HTML(footer.format(github_logo_encoded, linkedin_logo_encoded, website_logo_encoded))
    
    # Set up event handlers
    def chat_respond(message, history):
        if not message.strip():
            return history, ""
        
        response = respond(message, history)
        history.append([message, response])
        return history, ""
    
    def clear_chat():
        return [], ""
    
    # Event bindings
    llm_model_dropdown.change(
        fn=set_llm_model, 
        inputs=[llm_model_dropdown], 
        outputs=[llm_status]
    )
    
    btn.click(
        fn=load_files, 
        inputs=[file_input, embed_model_dropdown], 
        outputs=[output]
    )
    
    submit_btn.click(
        fn=chat_respond,
        inputs=[msg, chatbot],
        outputs=[chatbot, msg]
    )
    
    msg.submit(
        fn=chat_respond,
        inputs=[msg, chatbot], 
        outputs=[chatbot, msg]
    )
    
    clear_btn.click(
        fn=clear_file_components,
        outputs=[file_input, embed_model_dropdown, output]
    )
    
    clear_chat_btn.click(
        fn=clear_chat,
        outputs=[chatbot, msg]
    )

# Launch the demo
if __name__ == "__main__":
    demo.launch(
        share=True,
        server_name="0.0.0.0",
        server_port=7860,
        show_error=True
    )