Spaces:

subhrajit-mohanty
/

rag_api

Running

App Files Files Community

SUBHRAJIT MOHANTY commited on Jul 10

Commit

5dbc569

1 Parent(s): 86e4192

app.py updated

Browse files

Files changed (1) hide show

app.py +486 -299

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from fastapi import FastAPI, HTTPException, Request
 from fastapi.responses import StreamingResponse
 from pydantic import BaseModel, Field
 from typing import List, Optional, Dict, Any, AsyncGenerator
@@ -8,6 +8,8 @@ import uuid
 from datetime import datetime
 import os
 from contextlib import asynccontextmanager
 # Third-party imports
 from openai import AsyncOpenAI
@@ -17,6 +19,7 @@ from sentence_transformers import SentenceTransformer
 import torch
 import asyncio
 from concurrent.futures import ThreadPoolExecutor
 # Models for OpenAI-compatible API
 class Message(BaseModel):
@@ -46,6 +49,14 @@ class ChatCompletionChunk(BaseModel):
     model: str
     choices: List[Dict[str, Any]]
 # Configuration
 class Config:
     GROQ_API_KEY = os.getenv("GROQ_API_KEY")
@@ -64,127 +75,11 @@ class ApplicationState:
         self.openai_client = None
         self.qdrant_client = None
         self.embedding_service = None
 # Global state instance
 app_state = ApplicationState()
-@asynccontextmanager
-async def lifespan(app: FastAPI):
-    # Startup
-    if not Config.GROQ_API_KEY:
-        raise ValueError("GROQ_API_KEY environment variable is required")
-    print("Initializing services...")
-    # Initialize OpenAI client with Groq endpoint
-    try:
-        print(f"Configuring OpenAI client with:")
-        print(f"  Base URL: {Config.GROQ_BASE_URL}")
-        print(f"  API Key: {'*' * 10}...{Config.GROQ_API_KEY[-4:] if Config.GROQ_API_KEY else 'None'}")
-        app_state.openai_client = AsyncOpenAI(
-            api_key=Config.GROQ_API_KEY,
-            base_url=Config.GROQ_BASE_URL,
-            timeout=60.0  # Add timeout
-        )
-        print("✓ OpenAI client initialized with Groq endpoint")
-        # Test the client with a simple request
-        try:
-            test_response = await app_state.openai_client.chat.completions.create(
-                model="mixtral-8x7b-32768",
-                messages=[{"role": "user", "content": "Hello"}],
-                max_tokens=10
-            )
-            print(f"✓ OpenAI client test successful - Response ID: {test_response.id}")
-        except Exception as test_error:
-            print(f"⚠ OpenAI client test failed: {test_error}")
-            print("  This might cause issues with chat completions")
-    except Exception as e:
-        print(f"✗ Error initializing OpenAI client: {e}")
-        print(f"  Error type: {type(e)}")
-        raise e
-    # Initialize Qdrant client
-    try:
-        app_state.qdrant_client = AsyncQdrantClient(
-            url=Config.QDRANT_URL,
-            api_key=Config.QDRANT_API_KEY
-        )
-        print("✓ Qdrant client initialized")
-    except Exception as e:
-        print(f"✗ Error initializing Qdrant client: {e}")
-        raise e
-    # Initialize embedding service
-    try:
-        print("Loading embedding model...")
-        app_state.embedding_service = EmbeddingService()
-        print(f"✓ Embedding model loaded: {Config.EMBEDDING_MODEL}")
-        print(f"✓ Model device: {Config.DEVICE}")
-        print(f"✓ Vector dimension: {app_state.embedding_service.dimension}")
-    except Exception as e:
-        print(f"✗ Error initializing embedding service: {e}")
-        raise e  # Fail fast if embedding service can't be initialized
-    # Verify Qdrant connection and auto-create collection
-    try:
-        collections = await app_state.qdrant_client.get_collections()
-        collection_names = [c.name for c in collections.collections]
-        print(f"✓ Connected to Qdrant. Available collections: {collection_names}")
-        # Check if our collection exists, if not create it
-        if Config.COLLECTION_NAME not in collection_names:
-            print(f"📁 Collection '{Config.COLLECTION_NAME}' not found. Creating automatically...")
-            try:
-                from qdrant_client.models import VectorParams, Distance
-                await app_state.qdrant_client.create_collection(
-                    collection_name=Config.COLLECTION_NAME,
-                    vectors_config=VectorParams(
-                        size=app_state.embedding_service.dimension,
-                        distance=Distance.COSINE
-                    )
-                )
-                print(f"✓ Collection '{Config.COLLECTION_NAME}' created successfully!")
-                print(f"✓ Vector dimension: {app_state.embedding_service.dimension}")
-                print(f"✓ Distance metric: COSINE")
-            except Exception as create_error:
-                print(f"✗ Failed to create collection: {create_error}")
-                print("⚠ You may need to create the collection manually")
-        else:
-            print(f"✓ Collection '{Config.COLLECTION_NAME}' already exists")
-    except Exception as e:
-        print(f"⚠ Warning: Could not connect to Qdrant: {e}")
-        print("⚠ Collection auto-creation skipped")
-    print("🚀 All services initialized successfully!")
-    yield
-    # Shutdown
-    print("Shutting down services...")
-    if app_state.qdrant_client:
-        await app_state.qdrant_client.close()
-        print("✓ Qdrant client closed")
-    if app_state.openai_client:
-        await app_state.openai_client.close()
-        print("✓ OpenAI client closed")
-    if app_state.embedding_service and hasattr(app_state.embedding_service, 'executor'):
-        app_state.embedding_service.executor.shutdown(wait=True)
-        print("✓ Embedding service executor shutdown")
-    print("✓ Shutdown complete")
-# Initialize FastAPI app
-app = FastAPI(
-    title="RAG API with Groq and Qdrant",
-    description="OpenAI-compatible API for RAG using Groq and Qdrant",
-    version="1.0.0",
-    lifespan=lifespan
-)
 class EmbeddingService:
     """Service for generating embeddings using sentence-transformers"""
@@ -202,7 +97,6 @@ class EmbeddingService:
     async def get_embedding(self, text: str) -> List[float]:
         """Generate embedding for given text"""
         try:
-            # Run the synchronous model.encode in a thread pool
             loop = asyncio.get_event_loop()
             embedding = await loop.run_in_executor(
                 self.executor,
@@ -247,7 +141,6 @@ class EmbeddingService:
     def health_check(self) -> dict:
         """Check embedding service health"""
         try:
-            # Test encoding
             test_embedding = self.model.encode(["test"])
             return {
                 "status": "healthy",
@@ -263,94 +156,384 @@ class EmbeddingService:
                 "error": str(e)
             }
-class RAGService:
-    """Service for retrieval-augmented generation"""
-    @staticmethod
-    async def retrieve_relevant_chunks(query: str, top_k: int = Config.TOP_K) -> List[str]:
-        """Retrieve relevant document chunks from Qdrant"""
         try:
-            # Check if embedding service is initialized
-            if app_state.embedding_service is None:
-                print("Error: Embedding service is not initialized")
-                return []
-            # Auto-create collection if it doesn't exist
-            await RAGService._ensure_collection_exists()
-            # Get query embedding - all-MiniLM works well without special prefixes
-            query_embedding = await app_state.embedding_service.get_query_embedding(query)
             # Search in Qdrant
-            search_results = await app_state.qdrant_client.search(
-                collection_name=Config.COLLECTION_NAME,
                 query_vector=query_embedding,
-                limit=top_k,
-                score_threshold=Config.SIMILARITY_THRESHOLD
             )
-            # Extract content from results
-            chunks = []
             for result in search_results:
-                if hasattr(result, 'payload') and 'content' in result.payload:
-                    chunks.append(result.payload['content'])
-                elif hasattr(result, 'payload') and 'text' in result.payload:
-                    chunks.append(result.payload['text'])
-            print(f"Retrieved {len(chunks)} relevant chunks for query")
-            return chunks
         except Exception as e:
-            print(f"Error retrieving chunks: {e}")
             return []
     @staticmethod
-    async def _ensure_collection_exists():
-        """Ensure the collection exists, create if it doesn't"""
         try:
-            # Check if collection exists
-            collections = await app_state.qdrant_client.get_collections()
-            collection_names = [c.name for c in collections.collections]
-            if Config.COLLECTION_NAME not in collection_names:
-                print(f"Creating collection '{Config.COLLECTION_NAME}' on-demand...")
-                from qdrant_client.models import VectorParams, Distance
-                await app_state.qdrant_client.create_collection(
-                    collection_name=Config.COLLECTION_NAME,
-                    vectors_config=VectorParams(
-                        size=app_state.embedding_service.dimension,
-                        distance=Distance.COSINE
-                    )
-                )
-                print(f"✓ Collection '{Config.COLLECTION_NAME}' created successfully!")
         except Exception as e:
-            print(f"Warning: Could not ensure collection exists: {e}")
-            # Continue anyway - the operation might still work
     @staticmethod
-    def build_context_prompt(query: str, chunks: List[str]) -> str:
         """Build a context-aware prompt with retrieved chunks"""
-        if not chunks:
             return query
-        context = "\n\n".join([f"Document {i+1}: {chunk}" for i, chunk in enumerate(chunks)])
-        prompt = f"""Based on the following documents, please answer the user's question. If the information is not available in the documents, please say so.
-Context Documents:
-{context}
-User Question: {query}
-Please provide a helpful and accurate response based on the context provided."""
         return prompt
 @app.get("/")
 async def root():
-    return {"message": "RAG API with Groq and Qdrant", "status": "running"}
 @app.get("/health")
 async def health_check():
@@ -379,7 +562,6 @@ async def health_check():
         openai_health = {"status": "not_initialized", "error": "OpenAI client is None"}
     else:
         try:
-            # Quick test of OpenAI client
             test_response = await app_state.openai_client.chat.completions.create(
                 model="mixtral-8x7b-32768",
                 messages=[{"role": "user", "content": "test"}],
@@ -394,6 +576,7 @@ async def health_check():
         "openai_client": openai_health,
         "qdrant": qdrant_status,
         "embedding_service": embedding_health,
         "collection": Config.COLLECTION_NAME,
         "embedding_model": Config.EMBEDDING_MODEL,
         "groq_endpoint": Config.GROQ_BASE_URL
@@ -401,7 +584,7 @@ async def health_check():
 @app.post("/v1/chat/completions")
 async def chat_completions(request: ChatCompletionRequest):
-    """OpenAI-compatible chat completions endpoint with RAG"""
     if not app_state.openai_client:
         raise HTTPException(status_code=500, detail="OpenAI client not initialized")
@@ -415,17 +598,17 @@ async def chat_completions(request: ChatCompletionRequest):
         last_user_message = user_messages[-1].content
         print(f"Processing query: {last_user_message[:100]}...")
-        # Retrieve relevant chunks
         try:
-            relevant_chunks = await RAGService.retrieve_relevant_chunks(last_user_message)
-            print(f"Retrieved {len(relevant_chunks)} chunks")
         except Exception as e:
             print(f"Error in retrieval: {e}")
-            relevant_chunks = []
         # Build context-aware prompt
-        if relevant_chunks:
-            context_prompt = RAGService.build_context_prompt(last_user_message, relevant_chunks)
             enhanced_messages = request.messages[:-1] + [Message(role="user", content=context_prompt)]
             print("Using context-enhanced prompt")
         else:
@@ -448,7 +631,6 @@ async def chat_completions(request: ChatCompletionRequest):
         raise
     except Exception as e:
         print(f"Unexpected error in chat_completions: {e}")
-        print(f"Error type: {type(e)}")
         import traceback
         traceback.print_exc()
         raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
@@ -456,10 +638,6 @@ async def chat_completions(request: ChatCompletionRequest):
 async def create_chat_completion(messages: List[Dict], request: ChatCompletionRequest) -> ChatCompletionResponse:
     """Create a non-streaming chat completion"""
     try:
-        print(f"Calling OpenAI API with model: {request.model}")
-        print(f"Messages count: {len(messages)}")
-        print(f"Max tokens: {request.max_tokens}")
         response = await app_state.openai_client.chat.completions.create(
             model=request.model,
             messages=messages,
@@ -469,12 +647,6 @@ async def create_chat_completion(messages: List[Dict], request: ChatCompletionRe
             stream=False
         )
-        print(f"Received response from OpenAI API")
-        print(f"Response ID: {response.id}")
-        print(f"Response model: {response.model}")
-        print(f"Choices count: {len(response.choices)}")
-        # Convert response to OpenAI format (already compatible)
         result = ChatCompletionResponse(
             id=response.id,
             created=response.created,
@@ -494,14 +666,10 @@ async def create_chat_completion(messages: List[Dict], request: ChatCompletionRe
             } if response.usage else None
         )
-        print(f"Successfully created response")
         return result
     except Exception as e:
         print(f"Error in create_chat_completion: {e}")
-        print(f"Error type: {type(e)}")
-        import traceback
-        traceback.print_exc()
         raise HTTPException(status_code=500, detail=f"Error calling OpenAI API: {str(e)}")
 async def stream_chat_completion(messages: List[Dict], request: ChatCompletionRequest) -> AsyncGenerator[str, None]:
@@ -536,7 +704,6 @@ async def stream_chat_completion(messages: List[Dict], request: ChatCompletionRe
                     yield f"data: {chunk_response.model_dump_json()}\n\n"
-        # Send final chunk
         yield "data: [DONE]\n\n"
     except Exception as e:
@@ -549,132 +716,153 @@ async def stream_chat_completion(messages: List[Dict], request: ChatCompletionRe
         }
         yield f"data: {json.dumps(error_chunk)}\n\n"
-# Additional endpoints for managing the vector database
-@app.post("/v1/embeddings/add")
-async def add_document(content: str, metadata: Optional[Dict] = None):
-    """Add a document to the vector database"""
     try:
-        # Check if embedding service is initialized
-        if app_state.embedding_service is None:
-            raise HTTPException(status_code=500, detail="Embedding service is not initialized")
-        # Auto-create collection if it doesn't exist
-        await RAGService._ensure_collection_exists()
-        # Generate embedding for document
-        embedding = await app_state.embedding_service.get_document_embedding(content)
-        # Create point
-        point = PointStruct(
-            id=str(uuid.uuid4()),
-            vector=embedding,
-            payload={
-                "content": content,
-                "metadata": metadata or {},
-                "timestamp": datetime.now().isoformat()
             }
-        )
-        # Insert into Qdrant
-        await app_state.qdrant_client.upsert(
-            collection_name=Config.COLLECTION_NAME,
-            points=[point]
         )
-        return {"message": "Document added successfully", "id": point.id}
     except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Error adding document: {str(e)}")
-@app.post("/v1/embeddings/batch_add")
-async def batch_add_documents(documents: List[Dict[str, Any]]):
-    """Add multiple documents to the vector database"""
     try:
-        # Check if embedding service is initialized
-        if app_state.embedding_service is None:
-            raise HTTPException(status_code=500, detail="Embedding service is not initialized")
-        # Auto-create collection if it doesn't exist
-        await RAGService._ensure_collection_exists()
-        # Extract texts and metadata
-        texts = [doc.get("content", "") for doc in documents]
-        metadatas = [doc.get("metadata", {}) for doc in documents]
-        # Generate embeddings for all documents
-        embeddings = await app_state.embedding_service.batch_embed(texts)
-        # Create points
-        points = []
-        for i, (text, embedding, metadata) in enumerate(zip(texts, embeddings, metadatas)):
-            point = PointStruct(
-                id=str(uuid.uuid4()),
-                vector=embedding,
-                payload={
-                    "content": text,
-                    "metadata": metadata,
-                    "timestamp": datetime.now().isoformat()
-                }
-            )
-            points.append(point)
-        # Insert all points into Qdrant
-        await app_state.qdrant_client.upsert(
-            collection_name=Config.COLLECTION_NAME,
-            points=points
-        )
         return {
-            "message": f"Successfully added {len(points)} documents",
-            "ids": [point.id for point in points]
         }
     except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Error adding documents: {str(e)}")
-@app.post("/v1/embeddings/create_collection")
-async def create_collection():
-    """Create a new collection in Qdrant with the correct vector size"""
     try:
-        # Check if embedding service is initialized
-        if app_state.embedding_service is None:
-            raise HTTPException(status_code=500, detail="Embedding service is not initialized")
-        from qdrant_client.models import VectorParams, Distance
-        # Check if collection already exists
-        try:
-            collections = await app_state.qdrant_client.get_collections()
-            collection_names = [c.name for c in collections.collections]
-            if Config.COLLECTION_NAME in collection_names:
-                return {
-                    "message": f"Collection '{Config.COLLECTION_NAME}' already exists",
-                    "vector_size": app_state.embedding_service.dimension,
-                    "distance": "cosine",
-                    "status": "exists"
-                }
-        except Exception as e:
-            print(f"Warning: Could not check existing collections: {e}")
-        # Create the collection
-        await app_state.qdrant_client.create_collection(
             collection_name=Config.COLLECTION_NAME,
-            vectors_config=VectorParams(
-                size=app_state.embedding_service.dimension,  # 384 for all-MiniLM-L6-v2
-                distance=Distance.COSINE
-            )
         )
-        return {
-            "message": f"Collection '{Config.COLLECTION_NAME}' created successfully",
-            "vector_size": app_state.embedding_service.dimension,
-            "distance": "cosine",
-            "status": "created"
-        }
     except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Error creating collection: {str(e)}")
 @app.get("/v1/collections/info")
 async def get_collection_info():
@@ -683,8 +871,7 @@ async def get_collection_info():
         if app_state.qdrant_client is None:
             raise HTTPException(status_code=500, detail="Qdrant client is not initialized")
-        # Auto-create collection if it doesn't exist
-        await RAGService._ensure_collection_exists()
         collection_info = await app_state.qdrant_client.get_collection(Config.COLLECTION_NAME)
         return {

+from fastapi import FastAPI, HTTPException, Request, UploadFile, File
 from fastapi.responses import StreamingResponse
 from pydantic import BaseModel, Field
 from typing import List, Optional, Dict, Any, AsyncGenerator
 from datetime import datetime
 import os
 from contextlib import asynccontextmanager
+import tempfile
+import shutil
 # Third-party imports
 from openai import AsyncOpenAI
 import torch
 import asyncio
 from concurrent.futures import ThreadPoolExecutor
+import PyPDF2
 # Models for OpenAI-compatible API
 class Message(BaseModel):
     model: str
     choices: List[Dict[str, Any]]
+class DocumentUploadRequest(BaseModel):
+    metadata: Optional[Dict[str, Any]] = None
+class DocumentSearchRequest(BaseModel):
+    query: str = Field(..., description="Search query")
+    limit: int = Field(default=5, description="Maximum number of results")
+    min_score: float = Field(default=0.1, description="Minimum similarity score")
 # Configuration
 class Config:
     GROQ_API_KEY = os.getenv("GROQ_API_KEY")
         self.openai_client = None
         self.qdrant_client = None
         self.embedding_service = None
+        self.document_manager = None
 # Global state instance
 app_state = ApplicationState()
 class EmbeddingService:
     """Service for generating embeddings using sentence-transformers"""
     async def get_embedding(self, text: str) -> List[float]:
         """Generate embedding for given text"""
         try:
             loop = asyncio.get_event_loop()
             embedding = await loop.run_in_executor(
                 self.executor,
     def health_check(self) -> dict:
         """Check embedding service health"""
         try:
             test_embedding = self.model.encode(["test"])
             return {
                 "status": "healthy",
                 "error": str(e)
             }
+class DocumentManager:
+    """Enhanced document management with async support"""
+    def __init__(self, qdrant_client: AsyncQdrantClient, embedding_service: EmbeddingService):
+        self.qdrant_client = qdrant_client
+        self.embedding_service = embedding_service
+        self.collection_name = Config.COLLECTION_NAME
+        self.vector_size = 384
+        self.executor = ThreadPoolExecutor(max_workers=2)
+    async def _read_pdf(self, file_path: str) -> str:
+        """Read text from PDF file asynchronously"""
         try:
+            loop = asyncio.get_event_loop()
+            return await loop.run_in_executor(self.executor, self._sync_read_pdf, file_path)
+        except Exception as e:
+            print(f"Error reading PDF {file_path}: {e}")
+            return ""
+    def _sync_read_pdf(self, file_path: str) -> str:
+        """Synchronous PDF reading"""
+        try:
+            with open(file_path, 'rb') as file:
+                pdf_reader = PyPDF2.PdfReader(file)
+                text = ""
+                for page in pdf_reader.pages:
+                    text += page.extract_text() + "\n"
+                return text
+        except Exception as e:
+            print(f"Error reading PDF {file_path}: {e}")
+            return ""
+    def _chunk_text(self, text: str, chunk_size: int = 500, overlap: int = 50) -> List[str]:
+        """Split text into chunks"""
+        if len(text) <= chunk_size:
+            return [text]
+        chunks = []
+        start = 0
+        while start < len(text):
+            end = start + chunk_size
+            if end < len(text):
+                sentence_end = text.rfind('.', start, end)
+                if sentence_end > start:
+                    end = sentence_end + 1
+                else:
+                    word_end = text.rfind(' ', start, end)
+                    if word_end > start:
+                        end = word_end
+            chunk = text[start:end].strip()
+            if chunk:
+                chunks.append(chunk)
+            start = end - overlap
+        return chunks
+    async def _ensure_collection_exists(self):
+        """Ensure the collection exists, create if it doesn't"""
+        try:
+            collections = await self.qdrant_client.get_collections()
+            collection_names = [c.name for c in collections.collections]
+            if self.collection_name not in collection_names:
+                print(f"Creating collection '{self.collection_name}' on-demand...")
+                await self.qdrant_client.create_collection(
+                    collection_name=self.collection_name,
+                    vectors_config=VectorParams(
+                        size=self.vector_size,
+                        distance=Distance.COSINE
+                    )
+                )
+                print(f"✓ Collection '{self.collection_name}' created successfully!")
+        except Exception as e:
+            print(f"Warning: Could not ensure collection exists: {e}")
+    async def add_document(self, file_path: str, metadata: Dict[str, Any] = None) -> str:
+        """Add a PDF document to the collection"""
+        try:
+            await self._ensure_collection_exists()
+            # Read PDF
+            text = await self._read_pdf(file_path)
+            if not text:
+                print(f"Could not extract text from {file_path}")
+                return ""
+            # Create chunks
+            chunks = self._chunk_text(text)
+            if not chunks:
+                print(f"No chunks created from {file_path}")
+                return ""
+            # Generate document ID
+            document_id = str(uuid.uuid4())
+            # Create embeddings for all chunks
+            embeddings = await self.embedding_service.batch_embed(chunks)
+            # Create points for each chunk
+            points = []
+            for i, (chunk, embedding) in enumerate(zip(chunks, embeddings)):
+                payload = {
+                    "document_id": document_id,
+                    "file_path": file_path,
+                    "chunk_index": i,
+                    "content": chunk,  # Use 'content' as the main field
+                    "chunk_text": chunk,  # Keep for compatibility
+                    "total_chunks": len(chunks),
+                    "timestamp": datetime.now().isoformat()
+                }
+                if metadata:
+                    payload["metadata"] = metadata
+                point = PointStruct(
+                    id=str(uuid.uuid4()),
+                    vector=embedding,
+                    payload=payload
+                )
+                points.append(point)
+            # Insert into Qdrant
+            await self.qdrant_client.upsert(collection_name=self.collection_name, points=points)
+            print(f"✓ Added document: {file_path}")
+            print(f"  Document ID: {document_id}")
+            print(f"  Chunks: {len(chunks)}")
+            return document_id
+        except Exception as e:
+            print(f"Error adding document {file_path}: {e}")
+            return ""
+    async def search_documents(self, query: str, limit: int = 5, min_score: float = 0.1) -> List[Dict[str, Any]]:
+        """Search for relevant document chunks"""
+        try:
+            await self._ensure_collection_exists()
+            # Generate query embedding
+            query_embedding = await self.embedding_service.get_query_embedding(query)
             # Search in Qdrant
+            search_results = await self.qdrant_client.search(
+                collection_name=self.collection_name,
                 query_vector=query_embedding,
+                limit=limit,
+                score_threshold=min_score
             )
+            # Format results
+            results = []
             for result in search_results:
+                results.append({
+                    "score": result.score,
+                    "text": result.payload.get("content", result.payload.get("chunk_text", "")),
+                    "file_path": result.payload.get("file_path", ""),
+                    "document_id": result.payload.get("document_id", ""),
+                    "chunk_index": result.payload.get("chunk_index", 0)
+                })
+            print(f"✓ Found {len(results)} results for query: '{query}'")
+            return results
         except Exception as e:
+            print(f"Error searching: {e}")
+            return []
+    async def list_documents(self) -> List[Dict[str, Any]]:
+        """List all documents in the collection"""
+        try:
+            await self._ensure_collection_exists()
+            # Get all points
+            points, _ = await self.qdrant_client.scroll(
+                collection_name=self.collection_name,
+                limit=10000,
+                with_payload=True,
+                with_vectors=False
+            )
+            # Group by document_id
+            documents = {}
+            for point in points:
+                doc_id = point.payload.get("document_id")
+                if doc_id and doc_id not in documents:
+                    documents[doc_id] = {
+                        "document_id": doc_id,
+                        "file_path": point.payload.get("file_path", ""),
+                        "total_chunks": point.payload.get("total_chunks", 0),
+                        "timestamp": point.payload.get("timestamp", ""),
+                        "metadata": point.payload.get("metadata", {})
+                    }
+            doc_list = list(documents.values())
+            print(f"✓ Found {len(doc_list)} documents")
+            return doc_list
+        except Exception as e:
+            print(f"Error listing documents: {e}")
             return []
+    async def delete_document(self, document_id: str) -> bool:
+        """Delete a document and all its chunks"""
+        try:
+            await self._ensure_collection_exists()
+            # Find all points for this document
+            points, _ = await self.qdrant_client.scroll(
+                collection_name=self.collection_name,
+                limit=10000,
+                with_payload=True,
+                with_vectors=False
+            )
+            # Collect point IDs to delete
+            points_to_delete = []
+            for point in points:
+                if point.payload.get("document_id") == document_id:
+                    points_to_delete.append(point.id)
+            if not points_to_delete:
+                print(f"No document found with ID: {document_id}")
+                return False
+            # Delete points
+            await self.qdrant_client.delete(
+                collection_name=self.collection_name,
+                points_selector=points_to_delete
+            )
+            print(f"✓ Deleted document: {document_id} ({len(points_to_delete)} chunks)")
+            return True
+        except Exception as e:
+            print(f"Error deleting document: {e}")
+            return False
+class RAGService:
+    """Service for retrieval-augmented generation"""
     @staticmethod
+    async def retrieve_relevant_chunks(query: str, top_k: int = Config.TOP_K) -> List[Dict[str, Any]]:
+        """Retrieve relevant document chunks using the document manager"""
         try:
+            if app_state.document_manager is None:
+                print("Error: Document manager is not initialized")
+                return []
+            # Use the document manager's search functionality
+            results = await app_state.document_manager.search_documents(
+                query=query,
+                limit=top_k,
+                min_score=Config.SIMILARITY_THRESHOLD
+            )
+            return results
         except Exception as e:
+            print(f"Error retrieving chunks: {e}")
+            return []
     @staticmethod
+    def build_context_prompt(query: str, results: List[Dict[str, Any]]) -> str:
         """Build a context-aware prompt with retrieved chunks"""
+        if not results:
             return query
+        # Build context parts like in your example
+        context_parts = []
+        for result in results:
+            context_parts.append(f"Source: {result['file_path']}\n{result['text']}")
+        combined_context = "\n\n---\n\n".join(context_parts)
+        prompt = f"""Based on the following context, answer the user's question:
+Context:
+{combined_context}
+Question: {query}
+Please provide a comprehensive answer based on the context provided."""
         return prompt
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    # Startup
+    if not Config.GROQ_API_KEY:
+        raise ValueError("GROQ_API_KEY environment variable is required")
+    print("Initializing services...")
+    # Initialize OpenAI client with Groq endpoint
+    try:
+        print(f"Configuring OpenAI client with:")
+        print(f"  Base URL: {Config.GROQ_BASE_URL}")
+        print(f"  API Key: {'*' * 10}...{Config.GROQ_API_KEY[-4:] if Config.GROQ_API_KEY else 'None'}")
+        app_state.openai_client = AsyncOpenAI(
+            api_key=Config.GROQ_API_KEY,
+            base_url=Config.GROQ_BASE_URL,
+            timeout=60.0
+        )
+        print("✓ OpenAI client initialized with Groq endpoint")
+    except Exception as e:
+        print(f"✗ Error initializing OpenAI client: {e}")
+        raise e
+    # Initialize Qdrant client
+    try:
+        app_state.qdrant_client = AsyncQdrantClient(
+            url=Config.QDRANT_URL,
+            api_key=Config.QDRANT_API_KEY
+        )
+        print("✓ Qdrant client initialized")
+    except Exception as e:
+        print(f"✗ Error initializing Qdrant client: {e}")
+        raise e
+    # Initialize embedding service
+    try:
+        print("Loading embedding model...")
+        app_state.embedding_service = EmbeddingService()
+        print(f"✓ Embedding model loaded: {Config.EMBEDDING_MODEL}")
+        print(f"✓ Model device: {Config.DEVICE}")
+        print(f"✓ Vector dimension: {app_state.embedding_service.dimension}")
+    except Exception as e:
+        print(f"✗ Error initializing embedding service: {e}")
+        raise e
+    # Initialize document manager
+    try:
+        app_state.document_manager = DocumentManager(
+            qdrant_client=app_state.qdrant_client,
+            embedding_service=app_state.embedding_service
+        )
+        print("✓ Document manager initialized")
+    except Exception as e:
+        print(f"✗ Error initializing document manager: {e}")
+        raise e
+    print("🚀 All services initialized successfully!")
+    yield
+    # Shutdown
+    print("Shutting down services...")
+    if app_state.qdrant_client:
+        await app_state.qdrant_client.close()
+        print("✓ Qdrant client closed")
+    if app_state.openai_client:
+        await app_state.openai_client.close()
+        print("✓ OpenAI client closed")
+    if app_state.embedding_service and hasattr(app_state.embedding_service, 'executor'):
+        app_state.embedding_service.executor.shutdown(wait=True)
+        print("✓ Embedding service executor shutdown")
+    if app_state.document_manager and hasattr(app_state.document_manager, 'executor'):
+        app_state.document_manager.executor.shutdown(wait=True)
+        print("✓ Document manager executor shutdown")
+    print("✓ Shutdown complete")
+# Initialize FastAPI app
+app = FastAPI(
+    title="Enhanced RAG API with Document Management",
+    description="OpenAI-compatible API for RAG with document management using Groq and Qdrant",
+    version="1.0.0",
+    lifespan=lifespan
+)
 @app.get("/")
 async def root():
+    return {"message": "Enhanced RAG API with Document Management", "status": "running"}
 @app.get("/health")
 async def health_check():
         openai_health = {"status": "not_initialized", "error": "OpenAI client is None"}
     else:
         try:
             test_response = await app_state.openai_client.chat.completions.create(
                 model="mixtral-8x7b-32768",
                 messages=[{"role": "user", "content": "test"}],
         "openai_client": openai_health,
         "qdrant": qdrant_status,
         "embedding_service": embedding_health,
+        "document_manager": "initialized" if app_state.document_manager else "not_initialized",
         "collection": Config.COLLECTION_NAME,
         "embedding_model": Config.EMBEDDING_MODEL,
         "groq_endpoint": Config.GROQ_BASE_URL
 @app.post("/v1/chat/completions")
 async def chat_completions(request: ChatCompletionRequest):
+    """OpenAI-compatible chat completions endpoint with enhanced RAG"""
     if not app_state.openai_client:
         raise HTTPException(status_code=500, detail="OpenAI client not initialized")
         last_user_message = user_messages[-1].content
         print(f"Processing query: {last_user_message[:100]}...")
+        # Retrieve relevant chunks using enhanced search
         try:
+            relevant_results = await RAGService.retrieve_relevant_chunks(last_user_message)
+            print(f"Retrieved {len(relevant_results)} chunks")
         except Exception as e:
             print(f"Error in retrieval: {e}")
+            relevant_results = []
         # Build context-aware prompt
+        if relevant_results:
+            context_prompt = RAGService.build_context_prompt(last_user_message, relevant_results)
             enhanced_messages = request.messages[:-1] + [Message(role="user", content=context_prompt)]
             print("Using context-enhanced prompt")
         else:
         raise
     except Exception as e:
         print(f"Unexpected error in chat_completions: {e}")
         import traceback
         traceback.print_exc()
         raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
 async def create_chat_completion(messages: List[Dict], request: ChatCompletionRequest) -> ChatCompletionResponse:
     """Create a non-streaming chat completion"""
     try:
         response = await app_state.openai_client.chat.completions.create(
             model=request.model,
             messages=messages,
             stream=False
         )
         result = ChatCompletionResponse(
             id=response.id,
             created=response.created,
             } if response.usage else None
         )
         return result
     except Exception as e:
         print(f"Error in create_chat_completion: {e}")
         raise HTTPException(status_code=500, detail=f"Error calling OpenAI API: {str(e)}")
 async def stream_chat_completion(messages: List[Dict], request: ChatCompletionRequest) -> AsyncGenerator[str, None]:
                     yield f"data: {chunk_response.model_dump_json()}\n\n"
         yield "data: [DONE]\n\n"
     except Exception as e:
         }
         yield f"data: {json.dumps(error_chunk)}\n\n"
+# Document management endpoints
+@app.post("/v1/documents/upload")
+async def upload_document(file: UploadFile = File(...), metadata: str = None):
+    """Upload a PDF document"""
     try:
+        if not app_state.document_manager:
+            raise HTTPException(status_code=500, detail="Document manager not initialized")
+        # Validate file type
+        if not file.filename.lower().endswith('.pdf'):
+            raise HTTPException(status_code=400, detail="Only PDF files are supported")
+        # Parse metadata if provided
+        parsed_metadata = {}
+        if metadata:
+            try:
+                parsed_metadata = json.loads(metadata)
+            except json.JSONDecodeError:
+                raise HTTPException(status_code=400, detail="Invalid metadata JSON")
+        # Save uploaded file temporarily
+        with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
+            shutil.copyfileobj(file.file, tmp_file)
+            tmp_path = tmp_file.name
+        try:
+            # Add document to the collection
+            document_id = await app_state.document_manager.add_document(
+                file_path=tmp_path,
+                metadata={
+                    **parsed_metadata,
+                    "original_filename": file.filename,
+                    "upload_timestamp": datetime.now().isoformat()
+                }
+            )
+            if not document_id:
+                raise HTTPException(status_code=500, detail="Failed to add document")
+            return {
+                "message": "Document uploaded successfully",
+                "document_id": document_id,
+                "filename": file.filename
             }
+        finally:
+            # Clean up temporary file
+            os.unlink(tmp_path)
+    except HTTPException:
+        raise
+    except Exception as e:
+        print(f"Error uploading document: {e}")
+        raise HTTPException(status_code=500, detail=f"Error uploading document: {str(e)}")
+@app.post("/v1/documents/search")
+async def search_documents(request: DocumentSearchRequest):
+    """Search for documents"""
+    try:
+        if not app_state.document_manager:
+            raise HTTPException(status_code=500, detail="Document manager not initialized")
+        results = await app_state.document_manager.search_documents(
+            query=request.query,
+            limit=request.limit,
+            min_score=request.min_score
         )
+        return {
+            "query": request.query,
+            "results": results,
+            "count": len(results)
+        }
     except Exception as e:
+        print(f"Error searching documents: {e}")
+        raise HTTPException(status_code=500, detail=f"Error searching documents: {str(e)}")
+@app.get("/v1/documents/list")
+async def list_documents():
+    """List all documents"""
     try:
+        if not app_state.document_manager:
+            raise HTTPException(status_code=500, detail="Document manager not initialized")
+        documents = await app_state.document_manager.list_documents()
         return {
+            "documents": documents,
+            "count": len(documents)
         }
     except Exception as e:
+        print(f"Error listing documents: {e}")
+        raise HTTPException(status_code=500, detail=f"Error listing documents: {str(e)}")
+@app.delete("/v1/documents/{document_id}")
+async def delete_document(document_id: str):
+    """Delete a document"""
     try:
+        if not app_state.document_manager:
+            raise HTTPException(status_code=500, detail="Document manager not initialized")
+        success = await app_state.document_manager.delete_document(document_id)
+        if not success:
+            raise HTTPException(status_code=404, detail="Document not found")
+        return {"message": "Document deleted successfully", "document_id": document_id}
+    except HTTPException:
+        raise
+    except Exception as e:
+        print(f"Error deleting document: {e}")
+        raise HTTPException(status_code=500, detail=f"Error deleting document: {str(e)}")
+# Legacy compatibility endpoints
+@app.post("/v1/embeddings/add")
+async def add_document_legacy(content: str, metadata: Optional[Dict] = None):
+    """Legacy endpoint for adding documents (text content)"""
+    try:
+        if not app_state.embedding_service or not app_state.qdrant_client:
+            raise HTTPException(status_code=500, detail="Services not initialized")
+        await app_state.document_manager._ensure_collection_exists()
+        embedding = await app_state.embedding_service.get_document_embedding(content)
+        point = PointStruct(
+            id=str(uuid.uuid4()),
+            vector=embedding,
+            payload={
+                "content": content,
+                "metadata": metadata or {},
+                "timestamp": datetime.now().isoformat()
+            }
+        )
+        await app_state.qdrant_client.upsert(
             collection_name=Config.COLLECTION_NAME,
+            points=[point]
         )
+        return {"message": "Document added successfully", "id": point.id}
     except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error adding document: {str(e)}")
 @app.get("/v1/collections/info")
 async def get_collection_info():
         if app_state.qdrant_client is None:
             raise HTTPException(status_code=500, detail="Qdrant client is not initialized")
+        await app_state.document_manager._ensure_collection_exists()
         collection_info = await app_state.qdrant_client.get_collection(Config.COLLECTION_NAME)
         return {