Spaces:

ka1kuk
/

LLM-api

Sleeping

App Files Files Community

ka1kuk commited on Mar 12, 2024

Commit

df26f0d

verified ·

1 Parent(s): df51ba4

Update apis/chat_api.py

Browse files

Files changed (1) hide show

apis/chat_api.py +21 -21

apis/chat_api.py CHANGED Viewed

@@ -175,7 +175,7 @@ class ChatAPIApp:
             data_response = streamer.chat_return_dict(stream_response)
             return data_response
-    def chat_embedding(texts, model_name, api_key):
         api_url = f"https://api-inference.huggingface.co/pipeline/feature-extraction/{model_name}"
         headers = {"Authorization": f"Bearer {api_key}"}
         response = requests.post(api_url, headers=headers, json={"inputs": texts})
@@ -189,26 +189,26 @@ class ChatAPIApp:
     async def embedding(request: QueryRequest):
-        try:
-            for attempt in range(3):  # Retry logic
-                try:
-                    embeddings = await chat_embedding(request.texts, request.model_name, request.api_key)
-                    data = [
-                        {"object": "embedding", "index": i, "embedding": embedding}
-                        for i, embedding in enumerate(embeddings)
-                    ]
-                    return {
-                        "object": "list",
-                        "data": data,
-                        "model": request.model_name,
-                        "usage": {"prompt_tokens": len(request.texts), "total_tokens": len(request.texts)}
-                    }
-                except RuntimeError as e:
-                    if attempt < 2:  # Don't sleep on the last attempt
-                        await asyncio.sleep(10)  # Delay for the retry
-            raise HTTPException(status_code=503, detail="The model is currently loading, please try again later.")
-        except Exception as e:
-            raise HTTPException(status_code=500, detail=str(e))
     def setup_routes(self):
         for prefix in ["", "/v1", "/api", "/api/v1"]:

             data_response = streamer.chat_return_dict(stream_response)
             return data_response
+    async def chat_embedding(texts, model_name, api_key):
         api_url = f"https://api-inference.huggingface.co/pipeline/feature-extraction/{model_name}"
         headers = {"Authorization": f"Bearer {api_key}"}
         response = requests.post(api_url, headers=headers, json={"inputs": texts})
     async def embedding(request: QueryRequest):
+            try:
+                for attempt in range(3):  # Retry logic
+                    try:
+                        embeddings = await chat_embedding(request.texts, request.model_name, request.api_key)
+                        data = [
+                            {"object": "embedding", "index": i, "embedding": embedding}
+                            for i, embedding in enumerate(embeddings)
+                        ]
+                        return {
+                            "object": "list",
+                            "data": data,
+                            "model": request.model_name,
+                            "usage": {"prompt_tokens": len(request.texts), "total_tokens": len(request.texts)}
+                        }
+                    except RuntimeError as e:
+                        if attempt < 2:  # Don't sleep on the last attempt
+                            await asyncio.sleep(10)  # Delay for the retry
+                raise HTTPException(status_code=503, detail="The model is currently loading, please try again later.")
+            except Exception as e:
+                raise HTTPException(status_code=500, detail=str(e))
     def setup_routes(self):
         for prefix in ["", "/v1", "/api", "/api/v1"]: