Spaces:
Runtime error
Runtime error
Commit
·
1f1d641
1
Parent(s):
73ca5b8
changed to uvicorn setup for HF v12
Browse files- main/api.py +6 -11
main/api.py
CHANGED
@@ -30,7 +30,7 @@ class InferenceApi(LitAPI):
|
|
30 |
async def _get_client(self):
|
31 |
"""Get or create HTTP client as needed"""
|
32 |
return httpx.AsyncClient(
|
33 |
-
base_url=self.llm_config.get('base_url', 'http://localhost:
|
34 |
timeout=float(self.llm_config.get('timeout', 60.0))
|
35 |
)
|
36 |
|
@@ -50,19 +50,14 @@ class InferenceApi(LitAPI):
|
|
50 |
json: Optional[Dict[str, Any]] = None,
|
51 |
stream: bool = False
|
52 |
) -> Any:
|
53 |
-
"""Make an authenticated request to the LLM Server.
|
54 |
-
|
55 |
-
Args:
|
56 |
-
method: HTTP method ('GET' or 'POST')
|
57 |
-
endpoint: Endpoint name to get from config
|
58 |
-
params: Query parameters
|
59 |
-
json: JSON body for POST requests
|
60 |
-
stream: Whether to return a streaming response
|
61 |
-
"""
|
62 |
access_token = os.environ.get("InferenceAPI")
|
63 |
headers = {"Authorization": f"Bearer {access_token}"} if access_token else {}
|
|
|
|
|
64 |
|
65 |
try:
|
|
|
66 |
async with await self._get_client() as client:
|
67 |
if stream:
|
68 |
return await client.stream(
|
@@ -84,7 +79,7 @@ class InferenceApi(LitAPI):
|
|
84 |
return response
|
85 |
|
86 |
except Exception as e:
|
87 |
-
self.logger.error(f"Error in request to {
|
88 |
raise
|
89 |
|
90 |
def predict(self, x: str, **kwargs) -> Iterator[str]:
|
|
|
30 |
async def _get_client(self):
|
31 |
"""Get or create HTTP client as needed"""
|
32 |
return httpx.AsyncClient(
|
33 |
+
base_url=self.llm_config.get('base_url', 'http://localhost:8001'),
|
34 |
timeout=float(self.llm_config.get('timeout', 60.0))
|
35 |
)
|
36 |
|
|
|
50 |
json: Optional[Dict[str, Any]] = None,
|
51 |
stream: bool = False
|
52 |
) -> Any:
|
53 |
+
"""Make an authenticated request to the LLM Server."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
access_token = os.environ.get("InferenceAPI")
|
55 |
headers = {"Authorization": f"Bearer {access_token}"} if access_token else {}
|
56 |
+
base_url = self.llm_config.get('base_url', 'http://localhost:8002')
|
57 |
+
full_endpoint = f"{base_url.rstrip('/')}/{self._get_endpoint(endpoint).lstrip('/')}"
|
58 |
|
59 |
try:
|
60 |
+
self.logger.info(f"Making {method} request to: {full_endpoint}")
|
61 |
async with await self._get_client() as client:
|
62 |
if stream:
|
63 |
return await client.stream(
|
|
|
79 |
return response
|
80 |
|
81 |
except Exception as e:
|
82 |
+
self.logger.error(f"Error in request to {full_endpoint}: {str(e)}")
|
83 |
raise
|
84 |
|
85 |
def predict(self, x: str, **kwargs) -> Iterator[str]:
|