TalatMasood's picture
Enhanced the support for the excel file and added endpoint to have optimized vector store and Rag for the Excel.
b953016
raw
history blame
18.7 kB
# src/main.py
from fastapi import FastAPI, UploadFile, File, HTTPException, BackgroundTasks
from fastapi.responses import StreamingResponse, FileResponse
from fastapi.staticfiles import StaticFiles
from fastapi.middleware.cors import CORSMiddleware # Add this import
from typing import List
import uuid
from datetime import datetime
from pathlib import Path
import os
# Import custom modules1
from src.agents.rag_agent import RAGAgent
from src.models.document import AllDocumentsResponse, StoredDocument
from src.models.UserContact import UserContactRequest
from src.utils.document_processor import DocumentProcessor
from src.utils.conversation_summarizer import ConversationSummarizer
from src.utils.logger import logger
from src.utils.llm_utils import get_llm_instance, get_vector_store
from src.db.mongodb_store import MongoDBStore
from src.implementations.document_service import DocumentService
from src.models import (
ChatRequest,
ChatResponse,
BatchUploadResponse,
SummarizeRequest,
SummaryResponse,
FeedbackRequest
)
from fastapi import HTTPException, Depends
from fastapi.security import APIKeyHeader
from src.utils.database_cleanup import perform_cleanup
from config.config import settings
app = FastAPI(title="Chatbot API")
app.add_middleware(
CORSMiddleware,
allow_origins=["http://localhost:8080"], # Add your frontend URL
allow_credentials=True,
allow_methods=["*"], # Allows all methods
allow_headers=["*"], # Allows all headers
)
# Initialize MongoDB
mongodb = MongoDBStore(settings.MONGODB_URI)
# Initialize core components
doc_processor = DocumentProcessor()
summarizer = ConversationSummarizer()
document_service = DocumentService(doc_processor, mongodb)
# Create uploads directory if it doesn't exist
UPLOADS_DIR = Path("uploads")
UPLOADS_DIR.mkdir(exist_ok=True)
# Mount the uploads directory for static file serving
app.mount("/docs", StaticFiles(directory=str(UPLOADS_DIR)), name="documents")
# Security setup
API_KEY_HEADER = APIKeyHeader(name="ADMIN_API_KEY")
async def verify_api_key(api_key: str = Depends(API_KEY_HEADER)):
"""Verify admin API key"""
if not settings.ADMIN_API_KEY or api_key != settings.ADMIN_API_KEY:
raise HTTPException(
status_code=403,
detail="Invalid or missing API key"
)
return api_key
@app.get("/documents")
async def get_all_documents():
"""Get all documents from MongoDB"""
try:
documents = await mongodb.get_all_documents()
formatted_documents = []
for doc in documents:
try:
formatted_doc = {
"document_id": doc.get("document_id"),
"filename": doc.get("filename"),
"content_type": doc.get("content_type"),
"file_size": doc.get("file_size"),
"url_path": doc.get("url_path"),
"upload_timestamp": doc.get("upload_timestamp")
}
formatted_documents.append(formatted_doc)
except Exception as e:
logger.error(f"Error formatting document {doc.get('document_id', 'unknown')}: {str(e)}")
continue
return {
"total_documents": len(formatted_documents),
"documents": formatted_documents
}
except Exception as e:
logger.error(f"Error retrieving documents: {str(e)}")
raise HTTPException(status_code=500, detail=str(e))
@app.get("/documents/{document_id}/download")
async def get_document_file(document_id: str):
"""Serve a document file by its ID"""
try:
# Get document info from MongoDB
doc = await mongodb.get_document(document_id)
if not doc:
raise HTTPException(status_code=404, detail="Document not found")
# Extract filename from url_path
filename = doc["url_path"].split("/")[-1]
file_path = UPLOADS_DIR / filename
if not file_path.exists():
raise HTTPException(
status_code=404,
detail=f"File not found on server: {filename}"
)
return FileResponse(
path=str(file_path),
filename=doc["filename"],
media_type=doc["content_type"]
)
except Exception as e:
logger.error(f"Error serving document file: {str(e)}")
raise HTTPException(status_code=500, detail=str(e))
@app.post("/documents/upload", response_model=BatchUploadResponse)
async def upload_documents(
files: List[UploadFile] = File(...),
background_tasks: BackgroundTasks = BackgroundTasks()
):
"""Upload and process multiple documents"""
try:
vector_store, _ = await get_vector_store()
response = await document_service.process_documents(
files,
vector_store,
background_tasks
)
return response
except Exception as e:
logger.error(f"Error in document upload: {str(e)}")
raise HTTPException(status_code=500, detail=str(e))
@app.get("/documentchunks/{document_id}")
async def get_document_chunks(document_id: str):
"""Get all chunks for a specific document"""
try:
vector_store, _ = await get_vector_store()
chunks = vector_store.get_document_chunks(document_id)
if not chunks:
raise HTTPException(status_code=404, detail="Document not found")
return {
"document_id": document_id,
"total_chunks": len(chunks),
"chunks": chunks
}
except Exception as e:
logger.error(f"Error retrieving document chunks: {str(e)}")
raise HTTPException(status_code=500, detail=str(e))
@app.delete("/documents/{document_id}")
async def delete_document(document_id: str):
"""Delete document from MongoDB, ChromaDB, and physical storage"""
try:
# First get document details from MongoDB to get file path
document = await mongodb.get_document(document_id)
if not document:
raise HTTPException(status_code=404, detail="Document not found")
# Get vector store instance
vector_store, _ = await get_vector_store()
# Delete physical file using document service
deletion_success = await document_service.delete_document(document_id)
if not deletion_success:
logger.warning(f"Failed to delete physical file for document {document_id}")
# Delete from vector store
try:
vector_store.delete_document(document_id)
except Exception as e:
logger.error(f"Error deleting document from vector store: {str(e)}")
raise HTTPException(
status_code=500,
detail=f"Failed to delete document from vector store: {str(e)}"
)
# Delete from MongoDB - don't check return value since document might already be deleted
await mongodb.delete_document(document_id)
return {
"status": "success",
"message": f"Document {document_id} successfully deleted from all stores"
}
except HTTPException:
raise
except Exception as e:
logger.error(f"Error in delete_document endpoint: {str(e)}")
raise HTTPException(status_code=500, detail=str(e))
# src/main.py
@app.post("/user/contact", response_model=ChatResponse)
async def create_user_contact(
request: UserContactRequest,
background_tasks: BackgroundTasks
):
"""Create or retrieve user conversation based on contact information"""
try:
# Check for existing user
existing_conversation_id = await mongodb.find_existing_user(
email=request.email,
phone_number=request.phone_number
)
if existing_conversation_id:
chat_request = ChatRequest(
query=f'An old user with name: "{request.full_name}", email: "{request.email}" and phone number: "{request.phone_number}" wants support again. Create a welcome back message for him and ask how i can help you today?',
llm_provider="openai",
max_context_docs=3,
temperature=1.0,
stream=False,
conversation_id=existing_conversation_id
)
else:
# Create new conversation with user information
new_conversation_id = str(uuid.uuid4())
await mongodb.create_conversation(
conversation_id=new_conversation_id,
full_name=request.full_name,
email=request.email,
phone_number=request.phone_number
)
chat_request = ChatRequest(
query=f'A new user with name: "{request.full_name}", email: "{request.email}" and phone number: "{request.phone_number}" wants support. Create a welcome message for him and ask how i can help you today?',
llm_provider="openai",
max_context_docs=3,
temperature=1.0,
stream=False,
conversation_id=new_conversation_id
)
# Call chat_endpoint with the prepared request
return await chat_endpoint(chat_request, background_tasks)
except Exception as e:
logger.error(f"Error in create_user_contact: {str(e)}")
raise HTTPException(status_code=500, detail=str(e))
@app.post("/chat", response_model=ChatResponse)
async def chat_endpoint(
request: ChatRequest,
background_tasks: BackgroundTasks
):
"""Chat endpoint with RAG support and enhanced Excel handling"""
try:
# Initialize core components
logger.info(f"Initializing vector store and embedding: {str(datetime.now())}")
vector_store, embedding_model = await get_vector_store()
logger.info(f"Initializing LLM: {str(datetime.now())}")
llm = get_llm_instance(request.llm_provider)
# Initialize RAG agent
rag_agent = RAGAgent(
llm=llm,
embedding=embedding_model,
vector_store=vector_store,
mongodb=mongodb
)
# Use provided conversation ID or create new one
conversation_id = request.conversation_id or str(uuid.uuid4())
# Process the query
query = request.query
# Add specific instructions for certain types of queries
#if "introduce" in query.lower() or "name" in query.lower() or "email" in query.lower():
query += ". The response should be short and to the point. Make sure to not add any irrelevant information. Keep the introduction concise and friendly."
# Generate response
logger.info(f"Generating response: {str(datetime.now())}")
max_retries = 3
retry_count = 0
response = None
last_error = None
while retry_count < max_retries and response is None:
try:
response = await rag_agent.generate_response(
query=query,
conversation_id=conversation_id,
temperature=request.temperature,
max_tokens=request.max_tokens if hasattr(request, 'max_tokens') else None
)
break
except Exception as e:
last_error = e
retry_count += 1
logger.warning(f"Attempt {retry_count} failed: {str(e)}")
await asyncio.sleep(1) # Brief pause before retry
if response is None:
raise last_error or Exception("Failed to generate response after retries")
logger.info(f"Response generated: {str(datetime.now())}")
# Prepare response metadata
metadata = {
'llm_provider': request.llm_provider,
'temperature': request.temperature,
'conversation_id': conversation_id
}
# Add Excel-specific metadata if present
has_excel_content = any(
doc and 'Sheet:' in doc
for doc in (response.context_docs or [])
)
if has_excel_content:
try:
metadata['excel_content'] = True
# Extract Excel-specific insights if available
if hasattr(rag_agent, 'get_excel_insights'):
excel_insights = rag_agent.get_excel_insights(
query=query,
context_docs=response.context_docs
)
if excel_insights:
metadata['excel_insights'] = excel_insights
except Exception as e:
logger.warning(f"Error processing Excel metadata: {str(e)}")
# Store message in chat history
await mongodb.store_message(
conversation_id=conversation_id,
query=request.query,
response=response.response,
context=response.context_docs,
sources=response.sources,
llm_provider=request.llm_provider
)
# Prepare and return response
chat_response = ChatResponse(
response=response.response,
context=response.context_docs,
sources=response.sources,
conversation_id=conversation_id,
timestamp=datetime.now(),
relevant_doc_scores=response.scores if hasattr(response, 'scores') else None,
metadata=metadata
)
# Log completion
logger.info(f"Chat response completed: {str(datetime.now())}")
return chat_response
except Exception as e:
logger.error(f"Error in chat endpoint: {str(e)}", exc_info=True)
# Convert known errors to HTTPException with appropriate status codes
if isinstance(e, ValueError):
raise HTTPException(status_code=400, detail=str(e))
elif isinstance(e, (KeyError, AttributeError)):
raise HTTPException(status_code=500, detail="Internal processing error")
else:
raise HTTPException(status_code=500, detail=str(e))
@app.get("/chat/history/{conversation_id}")
async def get_conversation_history(conversation_id: str):
"""Get complete conversation history"""
history = await mongodb.get_conversation_history(conversation_id)
if not history:
raise HTTPException(status_code=404, detail="Conversation not found")
return {
"conversation_id": conversation_id,
"messages": history
}
@app.post("/chat/summarize", response_model=SummaryResponse)
async def summarize_conversation(request: SummarizeRequest):
"""Generate a summary of a conversation"""
try:
messages = await mongodb.get_messages_for_summary(request.conversation_id)
if not messages:
raise HTTPException(status_code=404, detail="Conversation not found")
summary = await summarizer.summarize_conversation(
messages,
include_metadata=request.include_metadata
)
return SummaryResponse(**summary)
except Exception as e:
logger.error(f"Error generating summary: {str(e)}")
raise HTTPException(status_code=500, detail=str(e))
@app.post("/chat/feedback/{conversation_id}")
async def submit_feedback(
conversation_id: str,
feedback_request: FeedbackRequest
):
"""Submit feedback for a conversation"""
try:
# Validate conversation exists
conversation = await mongodb.get_conversation_metadata(conversation_id)
if not conversation:
raise HTTPException(status_code=404, detail="Conversation not found")
# Update feedback
success = await mongodb.update_feedback(
conversation_id=conversation_id,
feedback=feedback_request.feedback,
rating=feedback_request.rating
)
if not success:
raise HTTPException(
status_code=500,
detail="Failed to update feedback"
)
return {
"status": "success",
"message": "Feedback submitted successfully",
"data": {
"conversation_id": conversation_id,
"feedback": feedback_request.feedback,
"rating": feedback_request.format_rating()
}
}
except HTTPException:
raise
except Exception as e:
logger.error(f"Error submitting feedback: {str(e)}")
raise HTTPException(status_code=500, detail=str(e))
@app.get("/debug/config")
async def debug_config():
"""Debug endpoint to check configuration"""
import os
from config.config import settings
from pathlib import Path
debug_info = {
"environment_variables": {
"OPENAI_API_KEY": "[SET]" if os.getenv('OPENAI_API_KEY') else "[NOT SET]",
"OPENAI_MODEL": os.getenv('OPENAI_MODEL', '[NOT SET]')
},
"settings": {
"OPENAI_API_KEY": "[SET]" if settings.OPENAI_API_KEY else "[NOT SET]",
"OPENAI_MODEL": settings.OPENAI_MODEL,
},
"files": {
"env_file_exists": Path('.env').exists(),
"openai_config_exists": (Path.home() / '.openai' / 'api_key').exists()
}
}
if settings.OPENAI_API_KEY:
key = settings.OPENAI_API_KEY
debug_info["api_key_info"] = {
"length": len(key),
"preview": f"{key[:4]}...{key[-4:]}" if len(key) > 8 else "[INVALID LENGTH]"
}
return debug_info
@app.post("/admin/cleanup")
async def cleanup_databases(
include_files: bool = True,
api_key: str = Depends(verify_api_key)
):
"""
Clean up all data from ChromaDB and MongoDB
Args:
include_files (bool): Whether to also delete uploaded files
"""
try:
result = await perform_cleanup(mongodb, include_files)
return result
except Exception as e:
logger.error(f"Error in cleanup operation: {str(e)}")
raise HTTPException(
status_code=500,
detail=f"Error during cleanup: {str(e)}"
)
@app.get("/health")
async def health_check():
"""Health check endpoint"""
return {"status": "healthy"}
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)