Spaces:

subhrajit-mohanty
/

rag_api

Running

App Files Files Community

SUBHRAJIT MOHANTY commited on Jul 9

Commit

347dbd1

1 Parent(s): 09225f8

Chore: Bux fixing

Browse files

Files changed (1) hide show

app.py +38 -27

app.py CHANGED Viewed

@@ -57,10 +57,15 @@ class Config:
     SIMILARITY_THRESHOLD = float(os.getenv("SIMILARITY_THRESHOLD", "0.7"))
     DEVICE = os.getenv("DEVICE", "cuda" if torch.cuda.is_available() else "cpu")
-# Global clients
-groq_client = None
-qdrant_client = None
-embedding_service = None
 @asynccontextmanager
 async def lifespan(app: FastAPI):
@@ -196,15 +201,15 @@ class RAGService:
         """Retrieve relevant document chunks from Qdrant"""
         try:
             # Check if embedding service is initialized
-            if embedding_service is None:
                 print("Error: Embedding service is not initialized")
                 return []
             # Get query embedding - all-MiniLM works well without special prefixes
-            query_embedding = await embedding_service.get_query_embedding(query)
             # Search in Qdrant
-            search_results = await qdrant_client.search(
                 collection_name=Config.COLLECTION_NAME,
                 query_vector=query_embedding,
                 limit=top_k,
@@ -254,23 +259,26 @@ async def health_check():
     """Health check endpoint"""
     try:
         # Test Qdrant connection
-        collections = await qdrant_client.get_collections()
-        qdrant_status = "connected"
     except Exception as e:
         qdrant_status = f"error: {str(e)}"
     # Test embedding service
-    if embedding_service is None:
         embedding_health = {"status": "not_initialized", "error": "EmbeddingService is None"}
     else:
         try:
-            embedding_health = embedding_service.health_check()
         except Exception as e:
             embedding_health = {"status": "error", "error": str(e)}
     return {
-        "status": "healthy" if embedding_service is not None else "unhealthy",
-        "groq": "connected" if groq_client else "not configured",
         "qdrant": qdrant_status,
         "embedding_service": embedding_health,
         "collection": Config.COLLECTION_NAME,
@@ -281,7 +289,7 @@ async def health_check():
 async def chat_completions(request: ChatCompletionRequest):
     """OpenAI-compatible chat completions endpoint with RAG"""
-    if not groq_client:
         raise HTTPException(status_code=500, detail="Groq client not initialized")
     try:
@@ -321,7 +329,7 @@ async def chat_completions(request: ChatCompletionRequest):
 async def create_chat_completion(messages: List[Dict], request: ChatCompletionRequest) -> ChatCompletionResponse:
     """Create a non-streaming chat completion"""
     try:
-        response = await groq_client.chat.completions.create(
             model=request.model,
             messages=messages,
             max_tokens=request.max_tokens,
@@ -359,7 +367,7 @@ async def stream_chat_completion(messages: List[Dict], request: ChatCompletionRe
         completion_id = f"chatcmpl-{uuid.uuid4().hex}"
         created = int(datetime.now().timestamp())
-        stream = await groq_client.chat.completions.create(
             model=request.model,
             messages=messages,
             max_tokens=request.max_tokens,
@@ -418,11 +426,11 @@ async def add_document(content: str, metadata: Optional[Dict] = None):
     """Add a document to the vector database"""
     try:
         # Check if embedding service is initialized
-        if embedding_service is None:
             raise HTTPException(status_code=500, detail="Embedding service is not initialized")
         # Generate embedding for document
-        embedding = await embedding_service.get_document_embedding(content)
         # Create point
         point = PointStruct(
@@ -436,7 +444,7 @@ async def add_document(content: str, metadata: Optional[Dict] = None):
         )
         # Insert into Qdrant
-        await qdrant_client.upsert(
             collection_name=Config.COLLECTION_NAME,
             points=[point]
         )
@@ -451,7 +459,7 @@ async def batch_add_documents(documents: List[Dict[str, Any]]):
     """Add multiple documents to the vector database"""
     try:
         # Check if embedding service is initialized
-        if embedding_service is None:
             raise HTTPException(status_code=500, detail="Embedding service is not initialized")
         # Extract texts and metadata
@@ -459,7 +467,7 @@ async def batch_add_documents(documents: List[Dict[str, Any]]):
         metadatas = [doc.get("metadata", {}) for doc in documents]
         # Generate embeddings for all documents
-        embeddings = await embedding_service.batch_embed(texts)
         # Create points
         points = []
@@ -476,7 +484,7 @@ async def batch_add_documents(documents: List[Dict[str, Any]]):
             points.append(point)
         # Insert all points into Qdrant
-        await qdrant_client.upsert(
             collection_name=Config.COLLECTION_NAME,
             points=points
         )
@@ -494,22 +502,22 @@ async def create_collection():
     """Create a new collection in Qdrant with the correct vector size"""
     try:
         # Check if embedding service is initialized
-        if embedding_service is None:
             raise HTTPException(status_code=500, detail="Embedding service is not initialized")
         from qdrant_client.models import VectorParams, Distance
-        await qdrant_client.create_collection(
             collection_name=Config.COLLECTION_NAME,
             vectors_config=VectorParams(
-                size=embedding_service.dimension,  # 384 for all-MiniLM-L6-v2
                 distance=Distance.COSINE
             )
         )
         return {
             "message": f"Collection '{Config.COLLECTION_NAME}' created successfully",
-            "vector_size": embedding_service.dimension,
             "distance": "cosine"
         }
@@ -520,7 +528,10 @@ async def create_collection():
 async def get_collection_info():
     """Get information about the collection"""
     try:
-        collection_info = await qdrant_client.get_collection(Config.COLLECTION_NAME)
         return {
             "name": Config.COLLECTION_NAME,
             "vectors_count": collection_info.vectors_count,

     SIMILARITY_THRESHOLD = float(os.getenv("SIMILARITY_THRESHOLD", "0.7"))
     DEVICE = os.getenv("DEVICE", "cuda" if torch.cuda.is_available() else "cpu")
+class ApplicationState:
+    """Application state container"""
+    def __init__(self):
+        self.groq_client = None
+        self.qdrant_client = None
+        self.embedding_service = None
+# Global state instance
+app_state = ApplicationState()
 @asynccontextmanager
 async def lifespan(app: FastAPI):
         """Retrieve relevant document chunks from Qdrant"""
         try:
             # Check if embedding service is initialized
+            if app_state.embedding_service is None:
                 print("Error: Embedding service is not initialized")
                 return []
             # Get query embedding - all-MiniLM works well without special prefixes
+            query_embedding = await app_state.embedding_service.get_query_embedding(query)
             # Search in Qdrant
+            search_results = await app_state.qdrant_client.search(
                 collection_name=Config.COLLECTION_NAME,
                 query_vector=query_embedding,
                 limit=top_k,
     """Health check endpoint"""
     try:
         # Test Qdrant connection
+        if app_state.qdrant_client:
+            collections = await app_state.qdrant_client.get_collections()
+            qdrant_status = "connected"
+        else:
+            qdrant_status = "not_initialized"
     except Exception as e:
         qdrant_status = f"error: {str(e)}"
     # Test embedding service
+    if app_state.embedding_service is None:
         embedding_health = {"status": "not_initialized", "error": "EmbeddingService is None"}
     else:
         try:
+            embedding_health = app_state.embedding_service.health_check()
         except Exception as e:
             embedding_health = {"status": "error", "error": str(e)}
     return {
+        "status": "healthy" if app_state.embedding_service is not None else "unhealthy",
+        "groq": "connected" if app_state.groq_client else "not configured",
         "qdrant": qdrant_status,
         "embedding_service": embedding_health,
         "collection": Config.COLLECTION_NAME,
 async def chat_completions(request: ChatCompletionRequest):
     """OpenAI-compatible chat completions endpoint with RAG"""
+    if not app_state.groq_client:
         raise HTTPException(status_code=500, detail="Groq client not initialized")
     try:
 async def create_chat_completion(messages: List[Dict], request: ChatCompletionRequest) -> ChatCompletionResponse:
     """Create a non-streaming chat completion"""
     try:
+        response = await app_state.groq_client.chat.completions.create(
             model=request.model,
             messages=messages,
             max_tokens=request.max_tokens,
         completion_id = f"chatcmpl-{uuid.uuid4().hex}"
         created = int(datetime.now().timestamp())
+        stream = await app_state.groq_client.chat.completions.create(
             model=request.model,
             messages=messages,
             max_tokens=request.max_tokens,
     """Add a document to the vector database"""
     try:
         # Check if embedding service is initialized
+        if app_state.embedding_service is None:
             raise HTTPException(status_code=500, detail="Embedding service is not initialized")
         # Generate embedding for document
+        embedding = await app_state.embedding_service.get_document_embedding(content)
         # Create point
         point = PointStruct(
         )
         # Insert into Qdrant
+        await app_state.qdrant_client.upsert(
             collection_name=Config.COLLECTION_NAME,
             points=[point]
         )
     """Add multiple documents to the vector database"""
     try:
         # Check if embedding service is initialized
+        if app_state.embedding_service is None:
             raise HTTPException(status_code=500, detail="Embedding service is not initialized")
         # Extract texts and metadata
         metadatas = [doc.get("metadata", {}) for doc in documents]
         # Generate embeddings for all documents
+        embeddings = await app_state.embedding_service.batch_embed(texts)
         # Create points
         points = []
             points.append(point)
         # Insert all points into Qdrant
+        await app_state.qdrant_client.upsert(
             collection_name=Config.COLLECTION_NAME,
             points=points
         )
     """Create a new collection in Qdrant with the correct vector size"""
     try:
         # Check if embedding service is initialized
+        if app_state.embedding_service is None:
             raise HTTPException(status_code=500, detail="Embedding service is not initialized")
         from qdrant_client.models import VectorParams, Distance
+        await app_state.qdrant_client.create_collection(
             collection_name=Config.COLLECTION_NAME,
             vectors_config=VectorParams(
+                size=app_state.embedding_service.dimension,  # 384 for all-MiniLM-L6-v2
                 distance=Distance.COSINE
             )
         )
         return {
             "message": f"Collection '{Config.COLLECTION_NAME}' created successfully",
+            "vector_size": app_state.embedding_service.dimension,
             "distance": "cosine"
         }
 async def get_collection_info():
     """Get information about the collection"""
     try:
+        if app_state.qdrant_client is None:
+            raise HTTPException(status_code=500, detail="Qdrant client is not initialized")
+        collection_info = await app_state.qdrant_client.get_collection(Config.COLLECTION_NAME)
         return {
             "name": Config.COLLECTION_NAME,
             "vectors_count": collection_info.vectors_count,