Spaces:

traversaal-internal
/

pro-search-api

Sleeping

App Files Files Community

nazr commited on Apr 22

Commit

d5f2978

1 Parent(s): 0c79beb

upgraded openai version

Browse files

Files changed (2) hide show

app.py +88 -52
requirements.txt +1 -1

app.py CHANGED Viewed

@@ -26,47 +26,61 @@ hf_home_dir = os.environ["HF_HOME"]
 if not os.path.exists(hf_home_dir):
     os.makedirs(hf_home_dir)
-collection_name = os.getenv('QDRANT_COLLECTION_NAME')
 logging.info(f"Collection name: {collection_name}")
 # Setup logging using Python's standard logging library
 logging.basicConfig(level=logging.INFO)
 # Load Hugging Face token from environment variable
-huggingface_token = os.getenv('HUGGINGFACE_HUB_TOKEN')
 if huggingface_token:
     try:
         login(token=huggingface_token, add_to_git_credential=True)
         logging.info("Successfully logged into Hugging Face Hub.")
     except Exception as e:
         logging.error(f"Failed to log into Hugging Face Hub: {e}")
-        raise HTTPException(status_code=500, detail="Failed to log into Hugging Face Hub.")
 else:
-    raise ValueError("Hugging Face token is not set. Please set the HUGGINGFACE_HUB_TOKEN environment variable.")
 # Initialize the Qdrant searcher
-qdrant_url = os.getenv('QDRANT_URL')
-access_token = os.getenv('QDRANT_ACCESS_TOKEN')
 if not qdrant_url or not access_token:
-    raise ValueError("Qdrant URL or Access Token is not set. Please set the QDRANT_URL and QDRANT_ACCESS_TOKEN environment variables.")
 # Load the model and tokenizer with trust_remote_code=True
 try:
     cache_folder = os.path.join(hf_home_dir, "transformers_cache")
     # Load the tokenizer and model with trust_remote_code=True
-    tokenizer = AutoTokenizer.from_pretrained('nomic-ai/nomic-embed-text-v1.5', trust_remote_code=True)
-    model = AutoModel.from_pretrained('nomic-ai/nomic-embed-text-v1.5', trust_remote_code=True)
     logging.info("Successfully loaded the model and tokenizer with transformers.")
     # Initialize the Qdrant searcher after the model is successfully loaded
     global searcher  # Ensure searcher is accessible globally if needed
     searcher = QdrantSearcher(qdrant_url=qdrant_url, access_token=access_token)
 except Exception as e:
     logging.error(f"Failed to load the model or initialize searcher: {e}")
-    raise HTTPException(status_code=500, detail="Failed to load the custom model or initialize searcher.")
 # Function to embed text using the model
 def embed_text(text):
@@ -75,43 +89,45 @@ def embed_text(text):
     embeddings = outputs.last_hidden_state.mean(dim=1)  # Example: mean pooling
     return embeddings.detach().numpy()
 # Define the request body models
 class SearchDocumentsRequest(BaseModel):
     query: str
     limit: int = 3
     file_id: str = None
 class GenerateRAGRequest(BaseModel):
     search_query: str
     file_id: str = None
 class XApiKeyRequest(BaseModel):
     organization_id: str
     user_id: str
-    search_query: str
     file_id: str = None
-import os
-for name, value in os.environ.items():
-    print("{0}: {1}".format(name, value))
 @app.get("/")
 async def root():
-    return {"message": "Welcome to the Search and RAG API!, go to relevant address for API request"}
 # Define the search documents endpoint
 @app.post("/api/search-documents")
 async def search_documents(
-    body: SearchDocumentsRequest,
-    credentials: tuple = Depends(token_required)
 ):
     customer_id, user_id = credentials
     start_time = time.time()
     if not customer_id or not user_id:
         logging.error("Failed to extract customer_id or user_id from the JWT token.")
-        raise HTTPException(status_code=401, detail="Invalid token: missing customer_id or user_id")
     logging.info("Received request to search documents")
     try:
@@ -120,14 +136,22 @@ async def search_documents(
         # Encode the query using the custom embedding function
         query_embedding = embed_text(body.query)
         print(body.query)
-        #collection_name = "embed"  # Use the collection name where the embeddings are stored
         logging.info("Performing search using the precomputed embeddings")
         if body.file_id:
-            hits, error = searcher.search_documents(collection_name, query_embedding, user_id, body.limit, file_id=body.file_id)
         else:
             # Perform search using the precomputed embeddings
-            hits, error = searcher.search_documents(collection_name, query_embedding, user_id, body.limit)
         if error:
             logging.error(f"Search documents error: {error}")
             raise HTTPException(status_code=500, detail=error)
@@ -138,33 +162,39 @@ async def search_documents(
         logging.error(f"Unexpected error: {e}")
         raise HTTPException(status_code=500, detail=str(e))
 # Define the generate RAG response endpoint
 @app.post("/api/generate-rag-response")
 async def generate_rag_response_api(
-    body: GenerateRAGRequest,
-    credentials: tuple = Depends(token_required)
 ):
     customer_id, user_id = credentials
     start_time = time.time()
     if not customer_id or not user_id:
         logging.error("Failed to extract customer_id or user_id from the JWT token.")
-        raise HTTPException(status_code=401, detail="Invalid token: missing customer_id or user_id")
     logging.info("Received request to generate RAG response")
     try:
         search_time = time.time()
         logging.info("Starting document search")
         # Encode the query using the custom embedding function
         query_embedding = embed_text(body.search_query)
         print(body.search_query)
-        #collection_name = "embed"  # Use the collection name where the embeddings are stored
         # Perform search using the precomputed embeddings
         if body.file_id:
-            hits, error = searcher.search_documents(collection_name, query_embedding, user_id, file_id=body.file_id)
         else:
-            hits, error = searcher.search_documents(collection_name, query_embedding, user_id)
         if error:
             logging.error(f"Search documents error: {error}")
             raise HTTPException(status_code=500, detail=error)
@@ -177,9 +207,11 @@ async def generate_rag_response_api(
         response, error = generate_rag_response(hits, body.search_query)
         rag_end_time = time.time()
         rag_time_taken = rag_end_time - rag_start_time
-        end_time= time.time()
         total_time = end_time - start_time
-        logging.info(f"Search time: {search_time_taken}, RAG time: {rag_time_taken}, Total time: {total_time}")
         if error:
             logging.error(f"Generate RAG response error: {error}")
             raise HTTPException(status_code=500, detail=error)
@@ -189,10 +221,10 @@ async def generate_rag_response_api(
         logging.error(f"Unexpected error: {e}")
         raise HTTPException(status_code=500, detail=str(e))
 @app.post("/api/search-documents/v1")
 async def search_documents_x_api_key(
-    body: XApiKeyRequest,
-    authorized: bool = Depends(x_api_key_auth)
 ):
     if not authorized:
         raise HTTPException(status_code=401, detail="Unauthorized")
@@ -201,7 +233,7 @@ async def search_documents_x_api_key(
     user_id = body.user_id
     file_id = body.file_id
-    logging.info(f'search query {body.search_query}')
     logging.info(f"organization_id: {organization_id}, user_id: {user_id}")
     logging.info("Received request to search documents with x-api-key auth")
     try:
@@ -209,11 +241,13 @@ async def search_documents_x_api_key(
         # Encode the query using the custom embedding function
         query_embedding = embed_text(body.search_query)
-        #collection_name = "embed"  # Use the collection name where the embeddings are stored
         # Perform search using the precomputed embeddings
-        hits, error = searcher.search_documents(collection_name, query_embedding, user_id, limit=3, file_id=file_id)
         if error:
             logging.error(f"Search documents error: {error}")
             raise HTTPException(status_code=500, detail=error)
@@ -226,10 +260,10 @@ async def search_documents_x_api_key(
         logging.error(f"Unexpected error: {e}")
         raise HTTPException(status_code=500, detail=str(e))
 @app.post("/api/generate-rag-response/v1")
 async def generate_rag_response_x_api_key(
-    body: XApiKeyRequest,
-    authorized: bool = Depends(x_api_key_auth)
 ):
     # Assuming x_api_key_auth validates the key
     if not authorized:
@@ -239,7 +273,7 @@ async def generate_rag_response_x_api_key(
     user_id = body.user_id
     file_id = body.file_id
-    logging.info(f'search query {body.search_query}')
     logging.info(f"organization_id: {organization_id}, user_id: {user_id}")
     logging.info("Received request to generate RAG response with x-api-key auth")
     try:
@@ -247,11 +281,13 @@ async def generate_rag_response_x_api_key(
         # Encode the query using the custom embedding function
         query_embedding = embed_text(body.search_query)
-        #collection_name = "embed"  # Use the collection name where the embeddings are stored
         # Perform search using the precomputed embeddings
-        hits, error = searcher.search_documents(collection_name, query_embedding, user_id, file_id=file_id)
         if error:
             logging.error(f"Search documents error: {error}")
             raise HTTPException(status_code=500, detail=error)
@@ -260,7 +296,7 @@ async def generate_rag_response_x_api_key(
         # Generate the RAG response using the retrieved documents
         response, error = generate_rag_response(hits, body.search_query)
         if error:
             logging.error(f"Generate RAG response error: {error}")
             raise HTTPException(status_code=500, detail=error)
@@ -272,7 +308,7 @@ async def generate_rag_response_x_api_key(
         raise HTTPException(status_code=500, detail=str(e))
-if __name__ == '__main__':
     import uvicorn
-    uvicorn.run(app, host='0.0.0.0', port=8000)

 if not os.path.exists(hf_home_dir):
     os.makedirs(hf_home_dir)
+collection_name = os.getenv("QDRANT_COLLECTION_NAME")
 logging.info(f"Collection name: {collection_name}")
 # Setup logging using Python's standard logging library
 logging.basicConfig(level=logging.INFO)
 # Load Hugging Face token from environment variable
+huggingface_token = os.getenv("HUGGINGFACE_HUB_TOKEN")
 if huggingface_token:
     try:
         login(token=huggingface_token, add_to_git_credential=True)
         logging.info("Successfully logged into Hugging Face Hub.")
     except Exception as e:
         logging.error(f"Failed to log into Hugging Face Hub: {e}")
+        raise HTTPException(
+            status_code=500, detail="Failed to log into Hugging Face Hub."
+        )
 else:
+    raise ValueError(
+        "Hugging Face token is not set. Please set the HUGGINGFACE_HUB_TOKEN environment variable."
+    )
 # Initialize the Qdrant searcher
+qdrant_url = os.getenv("QDRANT_URL")
+access_token = os.getenv("QDRANT_ACCESS_TOKEN")
 if not qdrant_url or not access_token:
+    raise ValueError(
+        "Qdrant URL or Access Token is not set. Please set the QDRANT_URL and QDRANT_ACCESS_TOKEN environment variables."
+    )
 # Load the model and tokenizer with trust_remote_code=True
 try:
     cache_folder = os.path.join(hf_home_dir, "transformers_cache")
     # Load the tokenizer and model with trust_remote_code=True
+    tokenizer = AutoTokenizer.from_pretrained(
+        "nomic-ai/nomic-embed-text-v1.5", trust_remote_code=True
+    )
+    model = AutoModel.from_pretrained(
+        "nomic-ai/nomic-embed-text-v1.5", trust_remote_code=True
+    )
     logging.info("Successfully loaded the model and tokenizer with transformers.")
     # Initialize the Qdrant searcher after the model is successfully loaded
     global searcher  # Ensure searcher is accessible globally if needed
     searcher = QdrantSearcher(qdrant_url=qdrant_url, access_token=access_token)
 except Exception as e:
     logging.error(f"Failed to load the model or initialize searcher: {e}")
+    raise HTTPException(
+        status_code=500,
+        detail="Failed to load the custom model or initialize searcher.",
+    )
 # Function to embed text using the model
 def embed_text(text):
     embeddings = outputs.last_hidden_state.mean(dim=1)  # Example: mean pooling
     return embeddings.detach().numpy()
 # Define the request body models
 class SearchDocumentsRequest(BaseModel):
     query: str
     limit: int = 3
     file_id: str = None
 class GenerateRAGRequest(BaseModel):
     search_query: str
     file_id: str = None
 class XApiKeyRequest(BaseModel):
     organization_id: str
     user_id: str
+    search_query: str
     file_id: str = None
 @app.get("/")
 async def root():
+    return {
+        "message": "Welcome to the Search and RAG API!, go to relevant address for API request"
+    }
 # Define the search documents endpoint
 @app.post("/api/search-documents")
 async def search_documents(
+    body: SearchDocumentsRequest, credentials: tuple = Depends(token_required)
 ):
     customer_id, user_id = credentials
     start_time = time.time()
     if not customer_id or not user_id:
         logging.error("Failed to extract customer_id or user_id from the JWT token.")
+        raise HTTPException(
+            status_code=401, detail="Invalid token: missing customer_id or user_id"
+        )
     logging.info("Received request to search documents")
     try:
         # Encode the query using the custom embedding function
         query_embedding = embed_text(body.query)
         print(body.query)
+        # collection_name = "embed"  # Use the collection name where the embeddings are stored
         logging.info("Performing search using the precomputed embeddings")
         if body.file_id:
+            hits, error = searcher.search_documents(
+                collection_name,
+                query_embedding,
+                user_id,
+                body.limit,
+                file_id=body.file_id,
+            )
         else:
             # Perform search using the precomputed embeddings
+            hits, error = searcher.search_documents(
+                collection_name, query_embedding, user_id, body.limit
+            )
         if error:
             logging.error(f"Search documents error: {error}")
             raise HTTPException(status_code=500, detail=error)
         logging.error(f"Unexpected error: {e}")
         raise HTTPException(status_code=500, detail=str(e))
 # Define the generate RAG response endpoint
 @app.post("/api/generate-rag-response")
 async def generate_rag_response_api(
+    body: GenerateRAGRequest, credentials: tuple = Depends(token_required)
 ):
     customer_id, user_id = credentials
     start_time = time.time()
     if not customer_id or not user_id:
         logging.error("Failed to extract customer_id or user_id from the JWT token.")
+        raise HTTPException(
+            status_code=401, detail="Invalid token: missing customer_id or user_id"
+        )
     logging.info("Received request to generate RAG response")
     try:
         search_time = time.time()
         logging.info("Starting document search")
         # Encode the query using the custom embedding function
         query_embedding = embed_text(body.search_query)
         print(body.search_query)
+        # collection_name = "embed"  # Use the collection name where the embeddings are stored
         # Perform search using the precomputed embeddings
         if body.file_id:
+            hits, error = searcher.search_documents(
+                collection_name, query_embedding, user_id, file_id=body.file_id
+            )
         else:
+            hits, error = searcher.search_documents(
+                collection_name, query_embedding, user_id
+            )
         if error:
             logging.error(f"Search documents error: {error}")
             raise HTTPException(status_code=500, detail=error)
         response, error = generate_rag_response(hits, body.search_query)
         rag_end_time = time.time()
         rag_time_taken = rag_end_time - rag_start_time
+        end_time = time.time()
         total_time = end_time - start_time
+        logging.info(
+            f"Search time: {search_time_taken}, RAG time: {rag_time_taken}, Total time: {total_time}"
+        )
         if error:
             logging.error(f"Generate RAG response error: {error}")
             raise HTTPException(status_code=500, detail=error)
         logging.error(f"Unexpected error: {e}")
         raise HTTPException(status_code=500, detail=str(e))
 @app.post("/api/search-documents/v1")
 async def search_documents_x_api_key(
+    body: XApiKeyRequest, authorized: bool = Depends(x_api_key_auth)
 ):
     if not authorized:
         raise HTTPException(status_code=401, detail="Unauthorized")
     user_id = body.user_id
     file_id = body.file_id
+    logging.info(f"search query {body.search_query}")
     logging.info(f"organization_id: {organization_id}, user_id: {user_id}")
     logging.info("Received request to search documents with x-api-key auth")
     try:
         # Encode the query using the custom embedding function
         query_embedding = embed_text(body.search_query)
+        # collection_name = "embed"  # Use the collection name where the embeddings are stored
         # Perform search using the precomputed embeddings
+        hits, error = searcher.search_documents(
+            collection_name, query_embedding, user_id, limit=3, file_id=file_id
+        )
         if error:
             logging.error(f"Search documents error: {error}")
             raise HTTPException(status_code=500, detail=error)
         logging.error(f"Unexpected error: {e}")
         raise HTTPException(status_code=500, detail=str(e))
 @app.post("/api/generate-rag-response/v1")
 async def generate_rag_response_x_api_key(
+    body: XApiKeyRequest, authorized: bool = Depends(x_api_key_auth)
 ):
     # Assuming x_api_key_auth validates the key
     if not authorized:
     user_id = body.user_id
     file_id = body.file_id
+    logging.info(f"search query {body.search_query}")
     logging.info(f"organization_id: {organization_id}, user_id: {user_id}")
     logging.info("Received request to generate RAG response with x-api-key auth")
     try:
         # Encode the query using the custom embedding function
         query_embedding = embed_text(body.search_query)
+        # collection_name = "embed"  # Use the collection name where the embeddings are stored
         # Perform search using the precomputed embeddings
+        hits, error = searcher.search_documents(
+            collection_name, query_embedding, user_id, file_id=file_id
+        )
         if error:
             logging.error(f"Search documents error: {error}")
             raise HTTPException(status_code=500, detail=error)
         # Generate the RAG response using the retrieved documents
         response, error = generate_rag_response(hits, body.search_query)
         if error:
             logging.error(f"Generate RAG response error: {error}")
             raise HTTPException(status_code=500, detail=error)
         raise HTTPException(status_code=500, detail=str(e))
+if __name__ == "__main__":
     import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)

requirements.txt CHANGED Viewed

@@ -2,7 +2,7 @@ fastapi==0.111.1
 fastapi-cli==0.0.4
 uvicorn==0.17.6
 cryptography>=3.4.7
-openai==1.37.1
 PyJWT==2.6.0
 nltk==3.6.7
 numpy==1.24.0

 fastapi-cli==0.0.4
 uvicorn==0.17.6
 cryptography>=3.4.7
+openai==1.75.0
 PyJWT==2.6.0
 nltk==3.6.7
 numpy==1.24.0