Spaces:

TalatMasud
/

chatbot-backend

Running

App Files Files Community

chatbot-backend / src /main.py

TalatMasood

Enhanced the support for the excel file and added endpoint to have optimized vector store and Rag for the Excel.

b953016 5 months ago

raw

history blame

18.7 kB

	# src/main.py
	from fastapi import FastAPI, UploadFile, File, HTTPException, BackgroundTasks
	from fastapi.responses import StreamingResponse, FileResponse
	from fastapi.staticfiles import StaticFiles
	from fastapi.middleware.cors import CORSMiddleware # Add this import
	from typing import List
	import uuid
	from datetime import datetime
	from pathlib import Path
	import os

	# Import custom modules1
	from src.agents.rag_agent import RAGAgent
	from src.models.document import AllDocumentsResponse, StoredDocument
	from src.models.UserContact import UserContactRequest
	from src.utils.document_processor import DocumentProcessor
	from src.utils.conversation_summarizer import ConversationSummarizer
	from src.utils.logger import logger
	from src.utils.llm_utils import get_llm_instance, get_vector_store
	from src.db.mongodb_store import MongoDBStore
	from src.implementations.document_service import DocumentService
	from src.models import (
	ChatRequest,
	ChatResponse,
	BatchUploadResponse,
	SummarizeRequest,
	SummaryResponse,
	FeedbackRequest
	)
	from fastapi import HTTPException, Depends
	from fastapi.security import APIKeyHeader
	from src.utils.database_cleanup import perform_cleanup

	from config.config import settings

	app = FastAPI(title="Chatbot API")

	app.add_middleware(
	CORSMiddleware,
	allow_origins=["http://localhost:8080"], # Add your frontend URL
	allow_credentials=True,
	allow_methods=["*"], # Allows all methods
	allow_headers=["*"], # Allows all headers
	)

	# Initialize MongoDB
	mongodb = MongoDBStore(settings.MONGODB_URI)

	# Initialize core components
	doc_processor = DocumentProcessor()
	summarizer = ConversationSummarizer()
	document_service = DocumentService(doc_processor, mongodb)

	# Create uploads directory if it doesn't exist
	UPLOADS_DIR = Path("uploads")
	UPLOADS_DIR.mkdir(exist_ok=True)

	# Mount the uploads directory for static file serving
	app.mount("/docs", StaticFiles(directory=str(UPLOADS_DIR)), name="documents")

	# Security setup
	API_KEY_HEADER = APIKeyHeader(name="ADMIN_API_KEY")

	async def verify_api_key(api_key: str = Depends(API_KEY_HEADER)):
	"""Verify admin API key"""
	if not settings.ADMIN_API_KEY or api_key != settings.ADMIN_API_KEY:
	raise HTTPException(
	status_code=403,
	detail="Invalid or missing API key"
	)
	return api_key

	@app.get("/documents")
	async def get_all_documents():
	"""Get all documents from MongoDB"""
	try:
	documents = await mongodb.get_all_documents()

	formatted_documents = []
	for doc in documents:
	try:
	formatted_doc = {
	"document_id": doc.get("document_id"),
	"filename": doc.get("filename"),
	"content_type": doc.get("content_type"),
	"file_size": doc.get("file_size"),
	"url_path": doc.get("url_path"),
	"upload_timestamp": doc.get("upload_timestamp")
	}
	formatted_documents.append(formatted_doc)
	except Exception as e:
	logger.error(f"Error formatting document {doc.get('document_id', 'unknown')}: {str(e)}")
	continue

	return {
	"total_documents": len(formatted_documents),
	"documents": formatted_documents
	}
	except Exception as e:
	logger.error(f"Error retrieving documents: {str(e)}")
	raise HTTPException(status_code=500, detail=str(e))

	@app.get("/documents/{document_id}/download")
	async def get_document_file(document_id: str):
	"""Serve a document file by its ID"""
	try:
	# Get document info from MongoDB
	doc = await mongodb.get_document(document_id)
	if not doc:
	raise HTTPException(status_code=404, detail="Document not found")

	# Extract filename from url_path
	filename = doc["url_path"].split("/")[-1]
	file_path = UPLOADS_DIR / filename

	if not file_path.exists():
	raise HTTPException(
	status_code=404,
	detail=f"File not found on server: {filename}"
	)

	return FileResponse(
	path=str(file_path),
	filename=doc["filename"],
	media_type=doc["content_type"]
	)

	except Exception as e:
	logger.error(f"Error serving document file: {str(e)}")
	raise HTTPException(status_code=500, detail=str(e))

	@app.post("/documents/upload", response_model=BatchUploadResponse)
	async def upload_documents(
	files: List[UploadFile] = File(...),
	background_tasks: BackgroundTasks = BackgroundTasks()
	):
	"""Upload and process multiple documents"""
	try:
	vector_store, _ = await get_vector_store()
	response = await document_service.process_documents(
	files,
	vector_store,
	background_tasks
	)
	return response
	except Exception as e:
	logger.error(f"Error in document upload: {str(e)}")
	raise HTTPException(status_code=500, detail=str(e))

	@app.get("/documentchunks/{document_id}")
	async def get_document_chunks(document_id: str):
	"""Get all chunks for a specific document"""
	try:
	vector_store, _ = await get_vector_store()
	chunks = vector_store.get_document_chunks(document_id)

	if not chunks:
	raise HTTPException(status_code=404, detail="Document not found")

	return {
	"document_id": document_id,
	"total_chunks": len(chunks),
	"chunks": chunks
	}
	except Exception as e:
	logger.error(f"Error retrieving document chunks: {str(e)}")
	raise HTTPException(status_code=500, detail=str(e))

	@app.delete("/documents/{document_id}")
	async def delete_document(document_id: str):
	"""Delete document from MongoDB, ChromaDB, and physical storage"""
	try:
	# First get document details from MongoDB to get file path
	document = await mongodb.get_document(document_id)
	if not document:
	raise HTTPException(status_code=404, detail="Document not found")

	# Get vector store instance
	vector_store, _ = await get_vector_store()

	# Delete physical file using document service
	deletion_success = await document_service.delete_document(document_id)
	if not deletion_success:
	logger.warning(f"Failed to delete physical file for document {document_id}")

	# Delete from vector store
	try:
	vector_store.delete_document(document_id)
	except Exception as e:
	logger.error(f"Error deleting document from vector store: {str(e)}")
	raise HTTPException(
	status_code=500,
	detail=f"Failed to delete document from vector store: {str(e)}"
	)

	# Delete from MongoDB - don't check return value since document might already be deleted
	await mongodb.delete_document(document_id)

	return {
	"status": "success",
	"message": f"Document {document_id} successfully deleted from all stores"
	}

	except HTTPException:
	raise
	except Exception as e:
	logger.error(f"Error in delete_document endpoint: {str(e)}")
	raise HTTPException(status_code=500, detail=str(e))

	# src/main.py

	@app.post("/user/contact", response_model=ChatResponse)
	async def create_user_contact(
	request: UserContactRequest,
	background_tasks: BackgroundTasks
	):
	"""Create or retrieve user conversation based on contact information"""
	try:
	# Check for existing user
	existing_conversation_id = await mongodb.find_existing_user(
	email=request.email,
	phone_number=request.phone_number
	)

	if existing_conversation_id:
	chat_request = ChatRequest(
	query=f'An old user with name: "{request.full_name}", email: "{request.email}" and phone number: "{request.phone_number}" wants support again. Create a welcome back message for him and ask how i can help you today?',
	llm_provider="openai",
	max_context_docs=3,
	temperature=1.0,
	stream=False,
	conversation_id=existing_conversation_id
	)
	else:
	# Create new conversation with user information
	new_conversation_id = str(uuid.uuid4())
	await mongodb.create_conversation(
	conversation_id=new_conversation_id,
	full_name=request.full_name,
	email=request.email,
	phone_number=request.phone_number
	)

	chat_request = ChatRequest(
	query=f'A new user with name: "{request.full_name}", email: "{request.email}" and phone number: "{request.phone_number}" wants support. Create a welcome message for him and ask how i can help you today?',
	llm_provider="openai",
	max_context_docs=3,
	temperature=1.0,
	stream=False,
	conversation_id=new_conversation_id
	)

	# Call chat_endpoint with the prepared request
	return await chat_endpoint(chat_request, background_tasks)

	except Exception as e:
	logger.error(f"Error in create_user_contact: {str(e)}")
	raise HTTPException(status_code=500, detail=str(e))

	@app.post("/chat", response_model=ChatResponse)
	async def chat_endpoint(
	request: ChatRequest,
	background_tasks: BackgroundTasks
	):
	"""Chat endpoint with RAG support and enhanced Excel handling"""
	try:
	# Initialize core components
	logger.info(f"Initializing vector store and embedding: {str(datetime.now())}")
	vector_store, embedding_model = await get_vector_store()

	logger.info(f"Initializing LLM: {str(datetime.now())}")
	llm = get_llm_instance(request.llm_provider)

	# Initialize RAG agent
	rag_agent = RAGAgent(
	llm=llm,
	embedding=embedding_model,
	vector_store=vector_store,
	mongodb=mongodb
	)

	# Use provided conversation ID or create new one
	conversation_id = request.conversation_id or str(uuid.uuid4())

	# Process the query
	query = request.query

	# Add specific instructions for certain types of queries
	#if "introduce" in query.lower() or "name" in query.lower() or "email" in query.lower():
	query += ". The response should be short and to the point. Make sure to not add any irrelevant information. Keep the introduction concise and friendly."

	# Generate response
	logger.info(f"Generating response: {str(datetime.now())}")

	max_retries = 3
	retry_count = 0
	response = None
	last_error = None

	while retry_count < max_retries and response is None:
	try:
	response = await rag_agent.generate_response(
	query=query,
	conversation_id=conversation_id,
	temperature=request.temperature,
	max_tokens=request.max_tokens if hasattr(request, 'max_tokens') else None
	)
	break
	except Exception as e:
	last_error = e
	retry_count += 1
	logger.warning(f"Attempt {retry_count} failed: {str(e)}")
	await asyncio.sleep(1) # Brief pause before retry

	if response is None:
	raise last_error or Exception("Failed to generate response after retries")

	logger.info(f"Response generated: {str(datetime.now())}")

	# Prepare response metadata
	metadata = {
	'llm_provider': request.llm_provider,
	'temperature': request.temperature,
	'conversation_id': conversation_id
	}

	# Add Excel-specific metadata if present
	has_excel_content = any(
	doc and 'Sheet:' in doc
	for doc in (response.context_docs or [])
	)
	if has_excel_content:
	try:
	metadata['excel_content'] = True

	# Extract Excel-specific insights if available
	if hasattr(rag_agent, 'get_excel_insights'):
	excel_insights = rag_agent.get_excel_insights(
	query=query,
	context_docs=response.context_docs
	)
	if excel_insights:
	metadata['excel_insights'] = excel_insights
	except Exception as e:
	logger.warning(f"Error processing Excel metadata: {str(e)}")

	# Store message in chat history
	await mongodb.store_message(
	conversation_id=conversation_id,
	query=request.query,
	response=response.response,
	context=response.context_docs,
	sources=response.sources,
	llm_provider=request.llm_provider
	)

	# Prepare and return response
	chat_response = ChatResponse(
	response=response.response,
	context=response.context_docs,
	sources=response.sources,
	conversation_id=conversation_id,
	timestamp=datetime.now(),
	relevant_doc_scores=response.scores if hasattr(response, 'scores') else None,
	metadata=metadata
	)

	# Log completion
	logger.info(f"Chat response completed: {str(datetime.now())}")

	return chat_response

	except Exception as e:
	logger.error(f"Error in chat endpoint: {str(e)}", exc_info=True)
	# Convert known errors to HTTPException with appropriate status codes
	if isinstance(e, ValueError):
	raise HTTPException(status_code=400, detail=str(e))
	elif isinstance(e, (KeyError, AttributeError)):
	raise HTTPException(status_code=500, detail="Internal processing error")
	else:
	raise HTTPException(status_code=500, detail=str(e))

	@app.get("/chat/history/{conversation_id}")
	async def get_conversation_history(conversation_id: str):
	"""Get complete conversation history"""
	history = await mongodb.get_conversation_history(conversation_id)

	if not history:
	raise HTTPException(status_code=404, detail="Conversation not found")

	return {
	"conversation_id": conversation_id,
	"messages": history
	}

	@app.post("/chat/summarize", response_model=SummaryResponse)
	async def summarize_conversation(request: SummarizeRequest):
	"""Generate a summary of a conversation"""
	try:
	messages = await mongodb.get_messages_for_summary(request.conversation_id)

	if not messages:
	raise HTTPException(status_code=404, detail="Conversation not found")

	summary = await summarizer.summarize_conversation(
	messages,
	include_metadata=request.include_metadata
	)

	return SummaryResponse(**summary)

	except Exception as e:
	logger.error(f"Error generating summary: {str(e)}")
	raise HTTPException(status_code=500, detail=str(e))

	@app.post("/chat/feedback/{conversation_id}")
	async def submit_feedback(
	conversation_id: str,
	feedback_request: FeedbackRequest
	):
	"""Submit feedback for a conversation"""
	try:
	# Validate conversation exists
	conversation = await mongodb.get_conversation_metadata(conversation_id)
	if not conversation:
	raise HTTPException(status_code=404, detail="Conversation not found")

	# Update feedback
	success = await mongodb.update_feedback(
	conversation_id=conversation_id,
	feedback=feedback_request.feedback,
	rating=feedback_request.rating
	)

	if not success:
	raise HTTPException(
	status_code=500,
	detail="Failed to update feedback"
	)

	return {
	"status": "success",
	"message": "Feedback submitted successfully",
	"data": {
	"conversation_id": conversation_id,
	"feedback": feedback_request.feedback,
	"rating": feedback_request.format_rating()
	}
	}

	except HTTPException:
	raise
	except Exception as e:
	logger.error(f"Error submitting feedback: {str(e)}")
	raise HTTPException(status_code=500, detail=str(e))

	@app.get("/debug/config")
	async def debug_config():
	"""Debug endpoint to check configuration"""
	import os
	from config.config import settings
	from pathlib import Path

	debug_info = {
	"environment_variables": {
	"OPENAI_API_KEY": "[SET]" if os.getenv('OPENAI_API_KEY') else "[NOT SET]",
	"OPENAI_MODEL": os.getenv('OPENAI_MODEL', '[NOT SET]')
	},
	"settings": {
	"OPENAI_API_KEY": "[SET]" if settings.OPENAI_API_KEY else "[NOT SET]",
	"OPENAI_MODEL": settings.OPENAI_MODEL,
	},
	"files": {
	"env_file_exists": Path('.env').exists(),
	"openai_config_exists": (Path.home() / '.openai' / 'api_key').exists()
	}
	}

	if settings.OPENAI_API_KEY:
	key = settings.OPENAI_API_KEY
	debug_info["api_key_info"] = {
	"length": len(key),
	"preview": f"{key[:4]}...{key[-4:]}" if len(key) > 8 else "[INVALID LENGTH]"
	}

	return debug_info

	@app.post("/admin/cleanup")
	async def cleanup_databases(
	include_files: bool = True,
	api_key: str = Depends(verify_api_key)
	):
	"""
	Clean up all data from ChromaDB and MongoDB

	Args:
	include_files (bool): Whether to also delete uploaded files
	"""
	try:
	result = await perform_cleanup(mongodb, include_files)
	return result
	except Exception as e:
	logger.error(f"Error in cleanup operation: {str(e)}")
	raise HTTPException(
	status_code=500,
	detail=f"Error during cleanup: {str(e)}"
	)

	@app.get("/health")
	async def health_check():
	"""Health check endpoint"""
	return {"status": "healthy"}

	if __name__ == "__main__":
	import uvicorn
	uvicorn.run(app, host="0.0.0.0", port=8000)