AurelioAguirre commited on
Commit
5de8cee
·
1 Parent(s): 10d4b3b

Fixing model download issue v9

Browse files
Files changed (3) hide show
  1. Dockerfile +12 -18
  2. test-client/__init__.py +0 -0
  3. test-client/client.py +0 -103
Dockerfile CHANGED
@@ -24,38 +24,32 @@ COPY requirements.txt .
24
  # Install Python dependencies
25
  RUN pip install --no-cache-dir -r requirements.txt
26
 
27
- # Copy the rest of the application
28
- COPY . .
29
-
30
  # Create checkpoints directory with proper permissions
31
- RUN mkdir -p /app/checkpoints && \
32
- chmod 777 /app/checkpoints
33
 
34
  # The token will be passed during build time
35
  ARG HF_TOKEN
36
  ENV HF_TOKEN=${HF_TOKEN}
37
 
38
- # Download both models using litgpt
39
- # Only proceed if HF_TOKEN is provided
40
  RUN if [ -n "$HF_TOKEN" ]; then \
41
- python -c "from huggingface_hub import login; from litgpt.cli import download; login('${HF_TOKEN}'); \
42
- download('meta-llama/Llama-2-3b-chat-hf', '/app/checkpoints'); \
43
- download('mistralai/Mistral-7B-Instruct-v0.3', '/app/checkpoints')"; \
44
  else \
45
  echo "No Hugging Face token provided. Models will need to be downloaded separately."; \
 
46
  fi
47
 
 
 
 
48
  # Set environment variables
49
  ENV LLM_ENGINE_HOST=0.0.0.0
50
- ENV LLM_ENGINE_PORT=8001
51
-
52
- # Update MODEL_PATH for the new model
53
- ENV MODEL_PATH=/app/checkpoints/mistralai/Mistral-7B-Instruct-v0.3
54
 
55
- # Expose both ports:
56
- # 8001 for FastAPI
57
- # 7860 for Hugging Face Spaces
58
- EXPOSE 8001 7860
59
 
60
  # Command to run the application
61
  CMD ["python", "main/main.py"]
 
24
  # Install Python dependencies
25
  RUN pip install --no-cache-dir -r requirements.txt
26
 
 
 
 
27
  # Create checkpoints directory with proper permissions
28
+ RUN mkdir -p /app/main/checkpoints && \
29
+ chmod 777 /app/main/checkpoints
30
 
31
  # The token will be passed during build time
32
  ARG HF_TOKEN
33
  ENV HF_TOKEN=${HF_TOKEN}
34
 
35
+ # Download model using litgpt command line with correct checkpoint path
 
36
  RUN if [ -n "$HF_TOKEN" ]; then \
37
+ litgpt download mistralai/Mistral-7B-Instruct-v0.3 --access_token ${HF_TOKEN} --checkpoint_dir /app/main/checkpoints; \
 
 
38
  else \
39
  echo "No Hugging Face token provided. Models will need to be downloaded separately."; \
40
+ exit 1; \
41
  fi
42
 
43
+ # Copy the rest of the application
44
+ COPY . .
45
+
46
  # Set environment variables
47
  ENV LLM_ENGINE_HOST=0.0.0.0
48
+ ENV LLM_ENGINE_PORT=7860
49
+ ENV MODEL_PATH=/app/main/checkpoints/mistralai/Mistral-7B-Instruct-v0.3
 
 
50
 
51
+ # Expose port 7860 for Hugging Face Spaces
52
+ EXPOSE 7860
 
 
53
 
54
  # Command to run the application
55
  CMD ["python", "main/main.py"]
test-client/__init__.py DELETED
File without changes
test-client/client.py DELETED
@@ -1,103 +0,0 @@
1
- import logging
2
- import requests
3
- from typing import Optional, Dict, Any
4
- import json
5
-
6
- class LLMEngineClient:
7
- def __init__(self, base_url: str, timeout: int = 10):
8
- # Remove /api suffix and ensure proper formatting
9
- self.base_url = base_url.rstrip('/')
10
- self.timeout = timeout
11
- self.logger = logging.getLogger(__name__)
12
-
13
- # Set up logging
14
- logging.basicConfig(
15
- level=logging.INFO,
16
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
17
- )
18
-
19
- self.logger.info(f"Initialized client with base URL: {self.base_url}")
20
-
21
- def _make_request(self, method: str, endpoint: str, data: Optional[Dict] = None) -> Dict[str, Any]:
22
- """Make HTTP request with detailed error handling"""
23
- url = f"{self.base_url}/{endpoint.lstrip('/')}"
24
- self.logger.info(f"Making {method} request to: {url}")
25
-
26
- try:
27
- headers = {
28
- 'Accept': 'application/json',
29
- 'Content-Type': 'application/json' if data else 'application/json'
30
- }
31
-
32
- response = requests.request(
33
- method=method,
34
- url=url,
35
- json=data if data else None,
36
- timeout=self.timeout,
37
- headers=headers
38
- )
39
-
40
- # Log response details for debugging
41
- self.logger.debug(f"Response status code: {response.status_code}")
42
- self.logger.debug(f"Response headers: {response.headers}")
43
- self.logger.debug(f"Response content: {response.text[:500]}")
44
-
45
- # Check if the response is HTML
46
- content_type = response.headers.get('content-type', '')
47
- if 'text/html' in content_type:
48
- self.logger.error(f"Received HTML response. URL might be incorrect or service might be down.")
49
- self.logger.error(f"Attempted URL: {url}")
50
- raise ValueError(f"Server returned HTML instead of JSON. Please check if the URL {url} is correct.")
51
-
52
- response.raise_for_status()
53
-
54
- return response.json()
55
-
56
- except requests.exceptions.ConnectionError as e:
57
- self.logger.error(f"Failed to connect to {url}: {str(e)}")
58
- raise ConnectionError(f"Could not connect to LLM Engine at {url}. Is the service running?")
59
-
60
- except requests.exceptions.Timeout as e:
61
- self.logger.error(f"Request to {url} timed out after {self.timeout}s")
62
- raise TimeoutError(f"Request timed out after {self.timeout} seconds")
63
-
64
- except requests.exceptions.RequestException as e:
65
- self.logger.error(f"Request failed: {str(e)}")
66
- raise
67
-
68
- def check_health(self) -> Dict[str, Any]:
69
- """Check if the service is running and get health status"""
70
- return self._make_request('GET', 'health')
71
-
72
- def initialize_model(self, config: Dict[str, Any]) -> Dict[str, Any]:
73
- """Initialize the model with given configuration"""
74
- return self._make_request('POST', 'initialize', data=config)
75
-
76
- def generate_text(self, request: Dict[str, Any]) -> Dict[str, Any]:
77
- """Generate text using the initialized model"""
78
- return self._make_request('POST', 'generate', data=request)
79
-
80
- def test_connection():
81
- """Test the connection to the LLM Engine"""
82
- # When running on Spaces, we need to use the gradio-provided URL
83
- base_url = "https://teamgenki-llm-engine.hf.space"
84
- client = LLMEngineClient(base_url)
85
-
86
- try:
87
- # Try each endpoint
88
- client.logger.info("Testing root endpoint...")
89
- root_response = client._make_request('GET', '')
90
- client.logger.info(f"Root endpoint response: {root_response}")
91
-
92
- client.logger.info("Testing health endpoint...")
93
- health_status = client.check_health()
94
- client.logger.info(f"Health endpoint response: {health_status}")
95
-
96
- return True
97
-
98
- except Exception as e:
99
- client.logger.error(f"Connection test failed: {str(e)}")
100
- return False
101
-
102
- if __name__ == "__main__":
103
- test_connection()