AurelioAguirre commited on
Commit
10d4b3b
·
1 Parent(s): 840a4e4

Connection with api. Added test-client

Browse files
Files changed (3) hide show
  1. main/main.py +30 -25
  2. test-client/__init__.py +0 -0
  3. test-client/client.py +103 -0
main/main.py CHANGED
@@ -1,4 +1,5 @@
1
  from fastapi import FastAPI, HTTPException
 
2
  from pydantic import BaseModel
3
  from typing import Optional, Union
4
  import torch
@@ -13,15 +14,21 @@ from huggingface_hub.hf_api import HfApi
13
  logging.basicConfig(level=logging.INFO)
14
  logger = logging.getLogger(__name__)
15
 
16
- # Initialize FastAPI with root path for Spaces
17
  app = FastAPI(
18
  title="LLM Engine Service",
19
- # This is crucial for Hugging Face Spaces
20
- root_path="/",
21
- # Add OpenAPI configs
22
- openapi_url="/api/openapi.json",
23
- docs_url="/api/docs",
24
- redoc_url="/api/redoc"
 
 
 
 
 
 
25
  )
26
 
27
  # Global variable to store the LLM instance
@@ -49,14 +56,13 @@ class GenerateRequest(BaseModel):
49
  @app.get("/")
50
  async def root():
51
  """Root endpoint to verify service is running"""
52
- space_url = "https://teamgenki-llm-engine.hf.space"
53
  return {
54
  "status": "running",
55
  "service": "LLM Engine",
56
  "endpoints": {
57
- "initialize": f"{space_url}/initialize",
58
- "generate": f"{space_url}/generate",
59
- "health": f"{space_url}/health"
60
  }
61
  }
62
 
@@ -132,9 +138,6 @@ async def generate(request: GenerateRequest):
132
 
133
  try:
134
  if request.stream:
135
- # For streaming responses, we need to handle differently
136
- # This is a placeholder as the actual streaming implementation
137
- # would need to use StreamingResponse from FastAPI
138
  raise HTTPException(
139
  status_code=400,
140
  detail="Streaming is not currently supported through the API"
@@ -190,23 +193,25 @@ async def health_check():
190
  def main():
191
  # Load environment variables or configuration here
192
  host = os.getenv("LLM_ENGINE_HOST", "0.0.0.0")
193
- port = int(os.getenv("LLM_ENGINE_PORT", "7860")) # Changed to 7860 for Spaces
194
-
195
- # Log the service URLs
196
- space_url = "https://teamgenki-llm-engine.hf.space"
197
- logger.info(f"Service will be available at: {space_url}")
198
- logger.info(f"API endpoints:")
199
- logger.info(f" Initialize: {space_url}/initialize")
200
- logger.info(f" Generate: {space_url}/generate")
201
- logger.info(f" Health: {space_url}/health")
 
 
 
202
 
203
  # Start the server
204
  uvicorn.run(
205
  app,
206
  host=host,
207
  port=port,
208
- log_level="info",
209
- reload=False
210
  )
211
 
212
  if __name__ == "__main__":
 
1
  from fastapi import FastAPI, HTTPException
2
+ from fastapi.middleware.cors import CORSMiddleware
3
  from pydantic import BaseModel
4
  from typing import Optional, Union
5
  import torch
 
14
  logging.basicConfig(level=logging.INFO)
15
  logger = logging.getLogger(__name__)
16
 
17
+ # Initialize FastAPI with simplified configuration
18
  app = FastAPI(
19
  title="LLM Engine Service",
20
+ docs_url="/docs",
21
+ redoc_url="/redoc",
22
+ openapi_url="/openapi.json"
23
+ )
24
+
25
+ # Add CORS middleware
26
+ app.add_middleware(
27
+ CORSMiddleware,
28
+ allow_origins=["*"],
29
+ allow_credentials=True,
30
+ allow_methods=["*"],
31
+ allow_headers=["*"],
32
  )
33
 
34
  # Global variable to store the LLM instance
 
56
  @app.get("/")
57
  async def root():
58
  """Root endpoint to verify service is running"""
 
59
  return {
60
  "status": "running",
61
  "service": "LLM Engine",
62
  "endpoints": {
63
+ "initialize": "/initialize",
64
+ "generate": "/generate",
65
+ "health": "/health"
66
  }
67
  }
68
 
 
138
 
139
  try:
140
  if request.stream:
 
 
 
141
  raise HTTPException(
142
  status_code=400,
143
  detail="Streaming is not currently supported through the API"
 
193
  def main():
194
  # Load environment variables or configuration here
195
  host = os.getenv("LLM_ENGINE_HOST", "0.0.0.0")
196
+ port = int(os.getenv("LLM_ENGINE_PORT", "7860")) # Default to 7860 for Spaces
197
+
198
+ # Log startup information
199
+ logger.info(f"Starting LLM Engine service on {host}:{port}")
200
+ logger.info("Available endpoints:")
201
+ logger.info(" - /")
202
+ logger.info(" - /health")
203
+ logger.info(" - /initialize")
204
+ logger.info(" - /generate")
205
+ logger.info(" - /docs")
206
+ logger.info(" - /redoc")
207
+ logger.info(" - /openapi.json")
208
 
209
  # Start the server
210
  uvicorn.run(
211
  app,
212
  host=host,
213
  port=port,
214
+ log_level="info"
 
215
  )
216
 
217
  if __name__ == "__main__":
test-client/__init__.py ADDED
File without changes
test-client/client.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import requests
3
+ from typing import Optional, Dict, Any
4
+ import json
5
+
6
+ class LLMEngineClient:
7
+ def __init__(self, base_url: str, timeout: int = 10):
8
+ # Remove /api suffix and ensure proper formatting
9
+ self.base_url = base_url.rstrip('/')
10
+ self.timeout = timeout
11
+ self.logger = logging.getLogger(__name__)
12
+
13
+ # Set up logging
14
+ logging.basicConfig(
15
+ level=logging.INFO,
16
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
17
+ )
18
+
19
+ self.logger.info(f"Initialized client with base URL: {self.base_url}")
20
+
21
+ def _make_request(self, method: str, endpoint: str, data: Optional[Dict] = None) -> Dict[str, Any]:
22
+ """Make HTTP request with detailed error handling"""
23
+ url = f"{self.base_url}/{endpoint.lstrip('/')}"
24
+ self.logger.info(f"Making {method} request to: {url}")
25
+
26
+ try:
27
+ headers = {
28
+ 'Accept': 'application/json',
29
+ 'Content-Type': 'application/json' if data else 'application/json'
30
+ }
31
+
32
+ response = requests.request(
33
+ method=method,
34
+ url=url,
35
+ json=data if data else None,
36
+ timeout=self.timeout,
37
+ headers=headers
38
+ )
39
+
40
+ # Log response details for debugging
41
+ self.logger.debug(f"Response status code: {response.status_code}")
42
+ self.logger.debug(f"Response headers: {response.headers}")
43
+ self.logger.debug(f"Response content: {response.text[:500]}")
44
+
45
+ # Check if the response is HTML
46
+ content_type = response.headers.get('content-type', '')
47
+ if 'text/html' in content_type:
48
+ self.logger.error(f"Received HTML response. URL might be incorrect or service might be down.")
49
+ self.logger.error(f"Attempted URL: {url}")
50
+ raise ValueError(f"Server returned HTML instead of JSON. Please check if the URL {url} is correct.")
51
+
52
+ response.raise_for_status()
53
+
54
+ return response.json()
55
+
56
+ except requests.exceptions.ConnectionError as e:
57
+ self.logger.error(f"Failed to connect to {url}: {str(e)}")
58
+ raise ConnectionError(f"Could not connect to LLM Engine at {url}. Is the service running?")
59
+
60
+ except requests.exceptions.Timeout as e:
61
+ self.logger.error(f"Request to {url} timed out after {self.timeout}s")
62
+ raise TimeoutError(f"Request timed out after {self.timeout} seconds")
63
+
64
+ except requests.exceptions.RequestException as e:
65
+ self.logger.error(f"Request failed: {str(e)}")
66
+ raise
67
+
68
+ def check_health(self) -> Dict[str, Any]:
69
+ """Check if the service is running and get health status"""
70
+ return self._make_request('GET', 'health')
71
+
72
+ def initialize_model(self, config: Dict[str, Any]) -> Dict[str, Any]:
73
+ """Initialize the model with given configuration"""
74
+ return self._make_request('POST', 'initialize', data=config)
75
+
76
+ def generate_text(self, request: Dict[str, Any]) -> Dict[str, Any]:
77
+ """Generate text using the initialized model"""
78
+ return self._make_request('POST', 'generate', data=request)
79
+
80
+ def test_connection():
81
+ """Test the connection to the LLM Engine"""
82
+ # When running on Spaces, we need to use the gradio-provided URL
83
+ base_url = "https://teamgenki-llm-engine.hf.space"
84
+ client = LLMEngineClient(base_url)
85
+
86
+ try:
87
+ # Try each endpoint
88
+ client.logger.info("Testing root endpoint...")
89
+ root_response = client._make_request('GET', '')
90
+ client.logger.info(f"Root endpoint response: {root_response}")
91
+
92
+ client.logger.info("Testing health endpoint...")
93
+ health_status = client.check_health()
94
+ client.logger.info(f"Health endpoint response: {health_status}")
95
+
96
+ return True
97
+
98
+ except Exception as e:
99
+ client.logger.error(f"Connection test failed: {str(e)}")
100
+ return False
101
+
102
+ if __name__ == "__main__":
103
+ test_connection()