Document-QA-bot / app.py
Sarath0x8f's picture
Update app.py
912b2d3 verified
raw
history blame
11.2 kB
from datetime import datetime
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_parse import LlamaParse
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
import os
from dotenv import load_dotenv
import gradio as gr
import base64
# Load environment variables
load_dotenv()
llm_models = [
"mistralai/Mixtral-8x7B-Instruct-v0.1",
"meta-llama/Meta-Llama-3-8B-Instruct",
"mistralai/Mistral-7B-Instruct-v0.2",
"tiiuae/falcon-7b-instruct",
]
embed_models = [
"BAAI/bge-small-en-v1.5", # 33.4M
"NeuML/pubmedbert-base-embeddings",
"BAAI/llm-embedder", # 109M
"BAAI/bge-large-en" # 335M
]
# Global variable for selected model
selected_llm_model_name = llm_models[0] # Default to the first model in the list
selected_embed_model_name = embed_models[0] # Default to the first model in the list
vector_index = None
# Initialize the parser
parser = LlamaParse(api_key=os.getenv("LLAMA_INDEX_API"), result_type='markdown')
# Define file extractor with various common extensions
file_extractor = {
'.pdf': parser, # PDF documents
'.docx': parser, # Microsoft Word documents
'.doc': parser, # Older Microsoft Word documents
'.txt': parser, # Plain text files
'.csv': parser, # Comma-separated values files
'.xlsx': parser, # Microsoft Excel files
'.pptx': parser, # Microsoft PowerPoint files
'.html': parser, # HTML files
'.jpg': parser, # JPEG images
'.jpeg': parser, # JPEG images
'.png': parser, # PNG images
'.webp': parser, # WebP images
'.svg': parser, # SVG files
}
# Markdown content definitions
description = """
## Welcome to DocBot πŸ“„πŸ€–
DocBot is an intelligent document analysis tool that can help you extract insights from various document formats including:
- PDF documents
- Word documents (.docx, .doc)
- Text files
- CSV files
- Excel files
- PowerPoint presentations
- HTML files
- Images with text (JPG, PNG, WebP, SVG)
Simply upload your document, select your preferred embedding model and LLM, then start asking questions!
"""
guide = """
### How to Use DocBot:
1. **Upload Document**: Choose any supported file format
2. **Select Embedding Model**: Choose from available embedding models (BAAI/bge-small-en-v1.5 is recommended for most cases)
3. **Submit**: Click submit to process your document
4. **Select LLM**: Choose your preferred language model
5. **Ask Questions**: Start chatting with your document!
### Tips:
- Smaller embedding models (like bge-small-en-v1.5) are faster but may be less accurate
- Larger models provide better understanding but take more time
- Be specific in your questions for better results
"""
footer = """
<div style="text-align: center; margin-top: 20px; padding: 20px; border-top: 1px solid #ddd;">
<p>Built with ❀️ using LlamaIndex and Gradio</p>
<div style="display: flex; justify-content: center; gap: 20px; margin-top: 10px;">
<a href="https://github.com" target="_blank">
<img src="data:image/png;base64,{0}" alt="GitHub" style="width: 24px; height: 24px;">
</a>
<a href="https://linkedin.com" target="_blank">
<img src="data:image/png;base64,{1}" alt="LinkedIn" style="width: 24px; height: 24px;">
</a>
<a href="https://your-website.com" target="_blank">
<img src="data:image/png;base64,{2}" alt="Website" style="width: 24px; height: 24px;">
</a>
</div>
</div>
"""
# File processing function
def load_files(file_path: str, embed_model_name: str):
try:
if not file_path:
return "Please select a file first."
if not embed_model_name:
return "Please select an embedding model."
global vector_index
document = SimpleDirectoryReader(input_files=[file_path], file_extractor=file_extractor).load_data()
embed_model = HuggingFaceEmbedding(model_name=embed_model_name)
vector_index = VectorStoreIndex.from_documents(document, embed_model=embed_model)
print(f"Parsing done for {file_path}")
filename = os.path.basename(file_path)
return f"βœ… Ready to answer questions about: {filename}"
except Exception as e:
return f"❌ An error occurred: {str(e)}"
# Function to handle the selected model from dropdown
def set_llm_model(selected_model):
global selected_llm_model_name
if selected_model:
selected_llm_model_name = selected_model
return f"LLM set to: {selected_model}"
# Respond function that uses the globally set selected model
def respond(message, history):
try:
if not vector_index:
return "Please upload and process a document first."
if not message.strip():
return "Please enter a question."
# Initialize the LLM with the selected model
llm = HuggingFaceInferenceAPI(
model_name=selected_llm_model_name,
contextWindow=8192,
maxTokens=1024,
temperature=0.3,
topP=0.9,
frequencyPenalty=0.5,
presencePenalty=0.5,
token=os.getenv("TOKEN")
)
# Set up the query engine with the selected LLM
query_engine = vector_index.as_query_engine(llm=llm)
bot_message = query_engine.query(message)
print(f"\n{datetime.now()}:{selected_llm_model_name}:: {message} --> {str(bot_message)}\n")
return f"**{selected_llm_model_name}:**\n\n{str(bot_message)}"
except Exception as e:
return f"❌ An error occurred: {str(e)}"
def encode_image_safe(image_path):
"""Safely encode image, return empty string if file doesn't exist"""
try:
if os.path.exists(image_path):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
except Exception:
pass
return ""
# Clear function for file processing components
def clear_file_components():
return None, embed_models[0], ""
# Encode the images (with fallback for missing images)
github_logo_encoded = encode_image_safe("Images/github-logo.png")
linkedin_logo_encoded = encode_image_safe("Images/linkedin-logo.png")
website_logo_encoded = encode_image_safe("Images/ai-logo.png")
# UI Setup
with gr.Blocks(
theme=gr.themes.Soft(),
css='footer {visibility: hidden}',
title="DocBot - Document Analysis Assistant"
) as demo:
gr.Markdown("# DocBot πŸ“„πŸ€–")
gr.Markdown("*Intelligent Document Analysis Assistant*")
with gr.Tabs():
with gr.TabItem("πŸ“– Introduction"):
gr.Markdown(description)
with gr.TabItem("πŸ€– DocBot"):
with gr.Accordion("πŸ“‹ Quick Start Guide", open=False):
gr.Markdown(guide)
with gr.Row():
with gr.Column(scale=1):
with gr.Group():
gr.Markdown("### Document Processing")
file_input = gr.File(
file_count="single",
type='filepath',
label="Step 1: Upload Document",
file_types=['.pdf', '.docx', '.doc', '.txt', '.csv', '.xlsx', '.pptx', '.html', '.jpg', '.jpeg', '.png', '.webp', '.svg']
)
embed_model_dropdown = gr.Dropdown(
choices=embed_models,
label="Step 2: Select Embedding Model",
interactive=True,
value=embed_models[0]
)
with gr.Row():
btn = gr.Button("πŸš€ Process Document", variant='primary', size="lg")
clear_btn = gr.Button("πŸ—‘οΈ Clear", size="lg")
output = gr.Textbox(
label='Processing Status',
interactive=False,
placeholder="Upload a document and click 'Process Document' to begin..."
)
with gr.Group():
gr.Markdown("### Model Selection")
llm_model_dropdown = gr.Dropdown(
choices=llm_models,
label="Step 3: Select Language Model",
interactive=True,
value=llm_models[0]
)
llm_status = gr.Textbox(
label="Selected Model",
interactive=False,
value=f"LLM set to: {llm_models[0]}"
)
with gr.Column(scale=2):
gr.Markdown("### Chat with Your Document")
chatbot = gr.Chatbot(
height=600,
placeholder="Process a document first, then start asking questions!",
show_label=False
)
msg = gr.Textbox(
placeholder="Step 4: Ask questions about your document...",
container=False,
scale=7
)
with gr.Row():
submit_btn = gr.Button("Send", variant="primary")
clear_chat_btn = gr.Button("Clear Chat")
# Add footer if images exist
if any([github_logo_encoded, linkedin_logo_encoded, website_logo_encoded]):
gr.HTML(footer.format(github_logo_encoded, linkedin_logo_encoded, website_logo_encoded))
# Set up event handlers
def chat_respond(message, history):
if not message.strip():
return history, ""
response = respond(message, history)
history.append([message, response])
return history, ""
def clear_chat():
return [], ""
# Event bindings
llm_model_dropdown.change(
fn=set_llm_model,
inputs=[llm_model_dropdown],
outputs=[llm_status]
)
btn.click(
fn=load_files,
inputs=[file_input, embed_model_dropdown],
outputs=[output]
)
submit_btn.click(
fn=chat_respond,
inputs=[msg, chatbot],
outputs=[chatbot, msg]
)
msg.submit(
fn=chat_respond,
inputs=[msg, chatbot],
outputs=[chatbot, msg]
)
clear_btn.click(
fn=clear_file_components,
outputs=[file_input, embed_model_dropdown, output]
)
clear_chat_btn.click(
fn=clear_chat,
outputs=[chatbot, msg]
)
# Launch the demo
if __name__ == "__main__":
demo.launch(
share=True,
server_name="0.0.0.0",
server_port=7860,
show_error=True
)