from datetime import datetime from llama_index.core import VectorStoreIndex, SimpleDirectoryReader from llama_index.embeddings.huggingface import HuggingFaceEmbedding from llama_parse import LlamaParse from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI import os from dotenv import load_dotenv import gradio as gr import base64 # Load environment variables load_dotenv() llm_models = [ "mistralai/Mixtral-8x7B-Instruct-v0.1", "meta-llama/Meta-Llama-3-8B-Instruct", "mistralai/Mistral-7B-Instruct-v0.2", "tiiuae/falcon-7b-instruct", ] embed_models = [ "BAAI/bge-small-en-v1.5", # 33.4M "NeuML/pubmedbert-base-embeddings", "BAAI/llm-embedder", # 109M "BAAI/bge-large-en" # 335M ] # Global variable for selected model selected_llm_model_name = llm_models[0] # Default to the first model in the list selected_embed_model_name = embed_models[0] # Default to the first model in the list vector_index = None # Initialize the parser parser = LlamaParse(api_key=os.getenv("LLAMA_INDEX_API"), result_type='markdown') # Define file extractor with various common extensions file_extractor = { '.pdf': parser, # PDF documents '.docx': parser, # Microsoft Word documents '.doc': parser, # Older Microsoft Word documents '.txt': parser, # Plain text files '.csv': parser, # Comma-separated values files '.xlsx': parser, # Microsoft Excel files '.pptx': parser, # Microsoft PowerPoint files '.html': parser, # HTML files '.jpg': parser, # JPEG images '.jpeg': parser, # JPEG images '.png': parser, # PNG images '.webp': parser, # WebP images '.svg': parser, # SVG files } # Markdown content definitions description = """ ## Welcome to DocBot 📄🤖 DocBot is an intelligent document analysis tool that can help you extract insights from various document formats including: - PDF documents - Word documents (.docx, .doc) - Text files - CSV files - Excel files - PowerPoint presentations - HTML files - Images with text (JPG, PNG, WebP, SVG) Simply upload your document, select your preferred embedding model and LLM, then start asking questions! """ guide = """ ### How to Use DocBot: 1. **Upload Document**: Choose any supported file format 2. **Select Embedding Model**: Choose from available embedding models (BAAI/bge-small-en-v1.5 is recommended for most cases) 3. **Submit**: Click submit to process your document 4. **Select LLM**: Choose your preferred language model 5. **Ask Questions**: Start chatting with your document! ### Tips: - Smaller embedding models (like bge-small-en-v1.5) are faster but may be less accurate - Larger models provide better understanding but take more time - Be specific in your questions for better results """ footer = """

Built with ❤️ using LlamaIndex and Gradio

GitHub LinkedIn Website
""" # File processing function def load_files(file_path: str, embed_model_name: str): try: if not file_path: return "Please select a file first." if not embed_model_name: return "Please select an embedding model." global vector_index document = SimpleDirectoryReader(input_files=[file_path], file_extractor=file_extractor).load_data() embed_model = HuggingFaceEmbedding(model_name=embed_model_name) vector_index = VectorStoreIndex.from_documents(document, embed_model=embed_model) print(f"Parsing done for {file_path}") filename = os.path.basename(file_path) return f"✅ Ready to answer questions about: {filename}" except Exception as e: return f"❌ An error occurred: {str(e)}" # Function to handle the selected model from dropdown def set_llm_model(selected_model): global selected_llm_model_name if selected_model: selected_llm_model_name = selected_model return f"LLM set to: {selected_model}" # Respond function that uses the globally set selected model def respond(message, history): try: if not vector_index: return "Please upload and process a document first." if not message.strip(): return "Please enter a question." # Initialize the LLM with the selected model llm = HuggingFaceInferenceAPI( model_name=selected_llm_model_name, contextWindow=8192, maxTokens=1024, temperature=0.3, topP=0.9, frequencyPenalty=0.5, presencePenalty=0.5, token=os.getenv("TOKEN") ) # Set up the query engine with the selected LLM query_engine = vector_index.as_query_engine(llm=llm) bot_message = query_engine.query(message) print(f"\n{datetime.now()}:{selected_llm_model_name}:: {message} --> {str(bot_message)}\n") return f"**{selected_llm_model_name}:**\n\n{str(bot_message)}" except Exception as e: return f"❌ An error occurred: {str(e)}" def encode_image_safe(image_path): """Safely encode image, return empty string if file doesn't exist""" try: if os.path.exists(image_path): with open(image_path, "rb") as image_file: return base64.b64encode(image_file.read()).decode('utf-8') except Exception: pass return "" # Encode the images (with fallback for missing images) github_logo_encoded = encode_image_safe("Images/github-logo.png") linkedin_logo_encoded = encode_image_safe("Images/linkedin-logo.png") website_logo_encoded = encode_image_safe("Images/ai-logo.png") # UI Setup with gr.Blocks( theme=gr.themes.Soft(), css='footer {visibility: hidden}', title="DocBot - Document Analysis Assistant" ) as demo: gr.Markdown("# DocBot 📄🤖") gr.Markdown("*Intelligent Document Analysis Assistant*") with gr.Tabs(): with gr.TabItem("📖 Introduction"): gr.Markdown(description) with gr.TabItem("🤖 DocBot"): with gr.Accordion("📋 Quick Start Guide", open=False): gr.Markdown(guide) with gr.Row(): with gr.Column(scale=1): with gr.Group(): gr.Markdown("### Document Processing") file_input = gr.File( file_count="single", type='filepath', label="Step 1: Upload Document", file_types=['.pdf', '.docx', '.doc', '.txt', '.csv', '.xlsx', '.pptx', '.html', '.jpg', '.jpeg', '.png', '.webp', '.svg'] ) embed_model_dropdown = gr.Dropdown( choices=embed_models, label="Step 2: Select Embedding Model", interactive=True, value=embed_models[0] ) with gr.Row(): btn = gr.Button("🚀 Process Document", variant='primary', size="lg") clear = gr.ClearButton("🗑️ Clear", size="lg") output = gr.Textbox( label='Processing Status', interactive=False, placeholder="Upload a document and click 'Process Document' to begin..." ) with gr.Group(): gr.Markdown("### Model Selection") llm_model_dropdown = gr.Dropdown( choices=llm_models, label="Step 3: Select Language Model", interactive=True, value=llm_models[0] ) llm_status = gr.Textbox( label="Selected Model", interactive=False, value=f"LLM set to: {llm_models[0]}" ) with gr.Column(scale=2): gr.Markdown("### Chat with Your Document") chatbot = gr.Chatbot( height=600, placeholder="Process a document first, then start asking questions!", show_label=False ) msg = gr.Textbox( placeholder="Step 4: Ask questions about your document...", container=False, scale=7 ) with gr.Row(): submit_btn = gr.Button("Send", variant="primary") clear_chat = gr.ClearButton([msg, chatbot], value="Clear Chat") # Add footer if images exist if any([github_logo_encoded, linkedin_logo_encoded, website_logo_encoded]): gr.HTML(footer.format(github_logo_encoded, linkedin_logo_encoded, website_logo_encoded)) # Set up event handlers def chat_respond(message, history): if not message.strip(): return history, "" response = respond(message, history) history.append([message, response]) return history, "" # Event bindings llm_model_dropdown.change( fn=set_llm_model, inputs=[llm_model_dropdown], outputs=[llm_status] ) btn.click( fn=load_files, inputs=[file_input, embed_model_dropdown], outputs=[output] ) submit_btn.click( fn=chat_respond, inputs=[msg, chatbot], outputs=[chatbot, msg] ) msg.submit( fn=chat_respond, inputs=[msg, chatbot], outputs=[chatbot, msg] ) clear.click( lambda: [None, None, ""], outputs=[file_input, embed_model_dropdown, output] ) # Launch the demo if __name__ == "__main__": demo.launch( share=True, server_name="0.0.0.0", server_port=7860, show_error=True )