Spaces:

TeamGenKI
/

Inference-API

Runtime error

AurelioAguirre commited on Jan 10

Commit

1f1d641

1 Parent(s): 73ca5b8

changed to uvicorn setup for HF v12

Files changed (1) hide show

main/api.py CHANGED Viewed

@@ -30,7 +30,7 @@ class InferenceApi(LitAPI):
     async def _get_client(self):
         """Get or create HTTP client as needed"""
         return httpx.AsyncClient(
-            base_url=self.llm_config.get('base_url', 'http://localhost:8002'),
             timeout=float(self.llm_config.get('timeout', 60.0))
         )
@@ -50,19 +50,14 @@ class InferenceApi(LitAPI):
             json: Optional[Dict[str, Any]] = None,
             stream: bool = False
     ) -> Any:
-        """Make an authenticated request to the LLM Server.
-        Args:
-            method: HTTP method ('GET' or 'POST')
-            endpoint: Endpoint name to get from config
-            params: Query parameters
-            json: JSON body for POST requests
-            stream: Whether to return a streaming response
-        """
         access_token = os.environ.get("InferenceAPI")
         headers = {"Authorization": f"Bearer {access_token}"} if access_token else {}
         try:
             async with await self._get_client() as client:
                 if stream:
                     return await client.stream(
@@ -84,7 +79,7 @@ class InferenceApi(LitAPI):
                     return response
         except Exception as e:
-            self.logger.error(f"Error in request to {endpoint}: {str(e)}")
             raise
     def predict(self, x: str, **kwargs) -> Iterator[str]:

     async def _get_client(self):
         """Get or create HTTP client as needed"""
         return httpx.AsyncClient(
+            base_url=self.llm_config.get('base_url', 'http://localhost:8001'),
             timeout=float(self.llm_config.get('timeout', 60.0))
         )
             json: Optional[Dict[str, Any]] = None,
             stream: bool = False
     ) -> Any:
+        """Make an authenticated request to the LLM Server."""
         access_token = os.environ.get("InferenceAPI")
         headers = {"Authorization": f"Bearer {access_token}"} if access_token else {}
+        base_url = self.llm_config.get('base_url', 'http://localhost:8002')
+        full_endpoint = f"{base_url.rstrip('/')}/{self._get_endpoint(endpoint).lstrip('/')}"
         try:
+            self.logger.info(f"Making {method} request to: {full_endpoint}")
             async with await self._get_client() as client:
                 if stream:
                     return await client.stream(
                     return response
         except Exception as e:
+            self.logger.error(f"Error in request to {full_endpoint}: {str(e)}")
             raise
     def predict(self, x: str, **kwargs) -> Iterator[str]: