AurelioAguirre commited on
Commit
1f1d641
·
1 Parent(s): 73ca5b8

changed to uvicorn setup for HF v12

Browse files
Files changed (1) hide show
  1. main/api.py +6 -11
main/api.py CHANGED
@@ -30,7 +30,7 @@ class InferenceApi(LitAPI):
30
  async def _get_client(self):
31
  """Get or create HTTP client as needed"""
32
  return httpx.AsyncClient(
33
- base_url=self.llm_config.get('base_url', 'http://localhost:8002'),
34
  timeout=float(self.llm_config.get('timeout', 60.0))
35
  )
36
 
@@ -50,19 +50,14 @@ class InferenceApi(LitAPI):
50
  json: Optional[Dict[str, Any]] = None,
51
  stream: bool = False
52
  ) -> Any:
53
- """Make an authenticated request to the LLM Server.
54
-
55
- Args:
56
- method: HTTP method ('GET' or 'POST')
57
- endpoint: Endpoint name to get from config
58
- params: Query parameters
59
- json: JSON body for POST requests
60
- stream: Whether to return a streaming response
61
- """
62
  access_token = os.environ.get("InferenceAPI")
63
  headers = {"Authorization": f"Bearer {access_token}"} if access_token else {}
 
 
64
 
65
  try:
 
66
  async with await self._get_client() as client:
67
  if stream:
68
  return await client.stream(
@@ -84,7 +79,7 @@ class InferenceApi(LitAPI):
84
  return response
85
 
86
  except Exception as e:
87
- self.logger.error(f"Error in request to {endpoint}: {str(e)}")
88
  raise
89
 
90
  def predict(self, x: str, **kwargs) -> Iterator[str]:
 
30
  async def _get_client(self):
31
  """Get or create HTTP client as needed"""
32
  return httpx.AsyncClient(
33
+ base_url=self.llm_config.get('base_url', 'http://localhost:8001'),
34
  timeout=float(self.llm_config.get('timeout', 60.0))
35
  )
36
 
 
50
  json: Optional[Dict[str, Any]] = None,
51
  stream: bool = False
52
  ) -> Any:
53
+ """Make an authenticated request to the LLM Server."""
 
 
 
 
 
 
 
 
54
  access_token = os.environ.get("InferenceAPI")
55
  headers = {"Authorization": f"Bearer {access_token}"} if access_token else {}
56
+ base_url = self.llm_config.get('base_url', 'http://localhost:8002')
57
+ full_endpoint = f"{base_url.rstrip('/')}/{self._get_endpoint(endpoint).lstrip('/')}"
58
 
59
  try:
60
+ self.logger.info(f"Making {method} request to: {full_endpoint}")
61
  async with await self._get_client() as client:
62
  if stream:
63
  return await client.stream(
 
79
  return response
80
 
81
  except Exception as e:
82
+ self.logger.error(f"Error in request to {full_endpoint}: {str(e)}")
83
  raise
84
 
85
  def predict(self, x: str, **kwargs) -> Iterator[str]: