Spaces:

Sarath0x8f
/

Document-QA-bot

Running

App Files Files Community

Document-QA-bot / app.py

Sarath0x8f

Update app.py

912b2d3 verified 6 days ago

raw

history blame

11.2 kB

	from datetime import datetime
	from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
	from llama_index.embeddings.huggingface import HuggingFaceEmbedding
	from llama_parse import LlamaParse
	from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
	import os
	from dotenv import load_dotenv
	import gradio as gr
	import base64

	# Load environment variables
	load_dotenv()

	llm_models = [
	"mistralai/Mixtral-8x7B-Instruct-v0.1",
	"meta-llama/Meta-Llama-3-8B-Instruct",
	"mistralai/Mistral-7B-Instruct-v0.2",
	"tiiuae/falcon-7b-instruct",
	]

	embed_models = [
	"BAAI/bge-small-en-v1.5", # 33.4M
	"NeuML/pubmedbert-base-embeddings",
	"BAAI/llm-embedder", # 109M
	"BAAI/bge-large-en" # 335M
	]

	# Global variable for selected model
	selected_llm_model_name = llm_models[0] # Default to the first model in the list
	selected_embed_model_name = embed_models[0] # Default to the first model in the list
	vector_index = None

	# Initialize the parser
	parser = LlamaParse(api_key=os.getenv("LLAMA_INDEX_API"), result_type='markdown')

	# Define file extractor with various common extensions
	file_extractor = {
	'.pdf': parser, # PDF documents
	'.docx': parser, # Microsoft Word documents
	'.doc': parser, # Older Microsoft Word documents
	'.txt': parser, # Plain text files
	'.csv': parser, # Comma-separated values files
	'.xlsx': parser, # Microsoft Excel files
	'.pptx': parser, # Microsoft PowerPoint files
	'.html': parser, # HTML files
	'.jpg': parser, # JPEG images
	'.jpeg': parser, # JPEG images
	'.png': parser, # PNG images
	'.webp': parser, # WebP images
	'.svg': parser, # SVG files
	}

	# Markdown content definitions
	description = """
	## Welcome to DocBot 📄🤖
	DocBot is an intelligent document analysis tool that can help you extract insights from various document formats including:
	- PDF documents
	- Word documents (.docx, .doc)
	- Text files
	- CSV files
	- Excel files
	- PowerPoint presentations
	- HTML files
	- Images with text (JPG, PNG, WebP, SVG)
	Simply upload your document, select your preferred embedding model and LLM, then start asking questions!
	"""

	guide = """
	### How to Use DocBot:
	1. Upload Document: Choose any supported file format
	2. Select Embedding Model: Choose from available embedding models (BAAI/bge-small-en-v1.5 is recommended for most cases)
	3. Submit: Click submit to process your document
	4. Select LLM: Choose your preferred language model
	5. Ask Questions: Start chatting with your document!
	### Tips:
	- Smaller embedding models (like bge-small-en-v1.5) are faster but may be less accurate
	- Larger models provide better understanding but take more time
	- Be specific in your questions for better results
	"""

	footer = """
	<div style="text-align: center; margin-top: 20px; padding: 20px; border-top: 1px solid #ddd;">
	<p>Built with ❤️ using LlamaIndex and Gradio</p>
	<div style="display: flex; justify-content: center; gap: 20px; margin-top: 10px;">
	<a href="https://github.com" target="_blank">
	<img src="data:image/png;base64,{0}" alt="GitHub" style="width: 24px; height: 24px;">
	</a>
	<a href="https://linkedin.com" target="_blank">
	<img src="data:image/png;base64,{1}" alt="LinkedIn" style="width: 24px; height: 24px;">
	</a>
	<a href="https://your-website.com" target="_blank">
	<img src="data:image/png;base64,{2}" alt="Website" style="width: 24px; height: 24px;">
	</a>
	</div>
	</div>
	"""

	# File processing function
	def load_files(file_path: str, embed_model_name: str):
	try:
	if not file_path:
	return "Please select a file first."

	if not embed_model_name:
	return "Please select an embedding model."

	global vector_index
	document = SimpleDirectoryReader(input_files=[file_path], file_extractor=file_extractor).load_data()
	embed_model = HuggingFaceEmbedding(model_name=embed_model_name)
	vector_index = VectorStoreIndex.from_documents(document, embed_model=embed_model)
	print(f"Parsing done for {file_path}")
	filename = os.path.basename(file_path)
	return f"✅ Ready to answer questions about: {filename}"
	except Exception as e:
	return f"❌ An error occurred: {str(e)}"

	# Function to handle the selected model from dropdown
	def set_llm_model(selected_model):
	global selected_llm_model_name
	if selected_model:
	selected_llm_model_name = selected_model
	return f"LLM set to: {selected_model}"

	# Respond function that uses the globally set selected model
	def respond(message, history):
	try:
	if not vector_index:
	return "Please upload and process a document first."

	if not message.strip():
	return "Please enter a question."

	# Initialize the LLM with the selected model
	llm = HuggingFaceInferenceAPI(
	model_name=selected_llm_model_name,
	contextWindow=8192,
	maxTokens=1024,
	temperature=0.3,
	topP=0.9,
	frequencyPenalty=0.5,
	presencePenalty=0.5,
	token=os.getenv("TOKEN")
	)

	# Set up the query engine with the selected LLM
	query_engine = vector_index.as_query_engine(llm=llm)
	bot_message = query_engine.query(message)

	print(f"\n{datetime.now()}:{selected_llm_model_name}:: {message} --> {str(bot_message)}\n")
	return f"{selected_llm_model_name}:\n\n{str(bot_message)}"
	except Exception as e:
	return f"❌ An error occurred: {str(e)}"

	def encode_image_safe(image_path):
	"""Safely encode image, return empty string if file doesn't exist"""
	try:
	if os.path.exists(image_path):
	with open(image_path, "rb") as image_file:
	return base64.b64encode(image_file.read()).decode('utf-8')
	except Exception:
	pass
	return ""

	# Clear function for file processing components
	def clear_file_components():
	return None, embed_models[0], ""

	# Encode the images (with fallback for missing images)
	github_logo_encoded = encode_image_safe("Images/github-logo.png")
	linkedin_logo_encoded = encode_image_safe("Images/linkedin-logo.png")
	website_logo_encoded = encode_image_safe("Images/ai-logo.png")

	# UI Setup
	with gr.Blocks(
	theme=gr.themes.Soft(),
	css='footer {visibility: hidden}',
	title="DocBot - Document Analysis Assistant"
	) as demo:

	gr.Markdown("# DocBot 📄🤖")
	gr.Markdown("Intelligent Document Analysis Assistant")

	with gr.Tabs():
	with gr.TabItem("📖 Introduction"):
	gr.Markdown(description)

	with gr.TabItem("🤖 DocBot"):
	with gr.Accordion("📋 Quick Start Guide", open=False):
	gr.Markdown(guide)

	with gr.Row():
	with gr.Column(scale=1):
	with gr.Group():
	gr.Markdown("### Document Processing")
	file_input = gr.File(
	file_count="single",
	type='filepath',
	label="Step 1: Upload Document",
	file_types=['.pdf', '.docx', '.doc', '.txt', '.csv', '.xlsx', '.pptx', '.html', '.jpg', '.jpeg', '.png', '.webp', '.svg']
	)

	embed_model_dropdown = gr.Dropdown(
	choices=embed_models,
	label="Step 2: Select Embedding Model",
	interactive=True,
	value=embed_models[0]
	)

	with gr.Row():
	btn = gr.Button("🚀 Process Document", variant='primary', size="lg")
	clear_btn = gr.Button("🗑️ Clear", size="lg")

	output = gr.Textbox(
	label='Processing Status',
	interactive=False,
	placeholder="Upload a document and click 'Process Document' to begin..."
	)

	with gr.Group():
	gr.Markdown("### Model Selection")
	llm_model_dropdown = gr.Dropdown(
	choices=llm_models,
	label="Step 3: Select Language Model",
	interactive=True,
	value=llm_models[0]
	)
	llm_status = gr.Textbox(
	label="Selected Model",
	interactive=False,
	value=f"LLM set to: {llm_models[0]}"
	)

	with gr.Column(scale=2):
	gr.Markdown("### Chat with Your Document")
	chatbot = gr.Chatbot(
	height=600,
	placeholder="Process a document first, then start asking questions!",
	show_label=False
	)

	msg = gr.Textbox(
	placeholder="Step 4: Ask questions about your document...",
	container=False,
	scale=7
	)

	with gr.Row():
	submit_btn = gr.Button("Send", variant="primary")
	clear_chat_btn = gr.Button("Clear Chat")

	# Add footer if images exist
	if any([github_logo_encoded, linkedin_logo_encoded, website_logo_encoded]):
	gr.HTML(footer.format(github_logo_encoded, linkedin_logo_encoded, website_logo_encoded))

	# Set up event handlers
	def chat_respond(message, history):
	if not message.strip():
	return history, ""

	response = respond(message, history)
	history.append([message, response])
	return history, ""

	def clear_chat():
	return [], ""

	# Event bindings
	llm_model_dropdown.change(
	fn=set_llm_model,
	inputs=[llm_model_dropdown],
	outputs=[llm_status]
	)

	btn.click(
	fn=load_files,
	inputs=[file_input, embed_model_dropdown],
	outputs=[output]
	)

	submit_btn.click(
	fn=chat_respond,
	inputs=[msg, chatbot],
	outputs=[chatbot, msg]
	)

	msg.submit(
	fn=chat_respond,
	inputs=[msg, chatbot],
	outputs=[chatbot, msg]
	)

	clear_btn.click(
	fn=clear_file_components,
	outputs=[file_input, embed_model_dropdown, output]
	)

	clear_chat_btn.click(
	fn=clear_chat,
	outputs=[chatbot, msg]
	)

	# Launch the demo
	if __name__ == "__main__":
	demo.launch(
	share=True,
	server_name="0.0.0.0",
	server_port=7860,
	show_error=True
	)