Joash commited on
Commit
1f37a6a
·
1 Parent(s): 93aa8dc

Add detailed logging and improve error handling in model manager

Browse files
Files changed (2) hide show
  1. Dockerfile +4 -2
  2. src/model_manager.py +22 -20
Dockerfile CHANGED
@@ -26,6 +26,8 @@ ENV PORT=7860
26
  ENV PATH="/home/user/.local/bin:${PATH}"
27
  ENV HF_HOME=/home/user/.cache/huggingface
28
  ENV TRANSFORMERS_CACHE=/home/user/.cache/huggingface
 
 
29
 
30
  # Switch to non-root user
31
  USER user
@@ -46,5 +48,5 @@ COPY --chown=user:user . .
46
  # Expose port for Hugging Face Spaces
47
  EXPOSE 7860
48
 
49
- # Run the application
50
- CMD ["python", "-m", "uvicorn", "src.api:app", "--host", "0.0.0.0", "--port", "7860"]
 
26
  ENV PATH="/home/user/.local/bin:${PATH}"
27
  ENV HF_HOME=/home/user/.cache/huggingface
28
  ENV TRANSFORMERS_CACHE=/home/user/.cache/huggingface
29
+ # Set logging to stdout
30
+ ENV LOG_FILE=/dev/stdout
31
 
32
  # Switch to non-root user
33
  USER user
 
48
  # Expose port for Hugging Face Spaces
49
  EXPOSE 7860
50
 
51
+ # Run the application with logging
52
+ CMD ["python", "-u", "-m", "uvicorn", "src.api:app", "--host", "0.0.0.0", "--port", "7860", "--log-level", "debug"]
src/model_manager.py CHANGED
@@ -1,5 +1,5 @@
1
  import logging
2
- from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import torch
4
  from huggingface_hub import login
5
  from .config import Config
@@ -16,7 +16,12 @@ class ModelManager:
16
  # Login to Hugging Face Hub
17
  if Config.HUGGING_FACE_TOKEN:
18
  logger.info("Logging in to Hugging Face Hub")
19
- login(token=Config.HUGGING_FACE_TOKEN)
 
 
 
 
 
20
 
21
  # Initialize tokenizer and model
22
  self._init_tokenizer()
@@ -37,7 +42,8 @@ class ModelManager:
37
  'bos_token': '<s>'
38
  }
39
  self.tokenizer.add_special_tokens(special_tokens)
40
- logger.info("Tokenizer loaded successfully.")
 
41
  except Exception as e:
42
  logger.error(f"Error loading tokenizer: {str(e)}")
43
  raise
@@ -46,6 +52,7 @@ class ModelManager:
46
  """Initialize the model."""
47
  try:
48
  logger.info(f"Loading model: {self.model_name}")
 
49
 
50
  # Load model with CPU configuration
51
  self.model = AutoModelForCausalLM.from_pretrained(
@@ -57,7 +64,8 @@ class ModelManager:
57
  )
58
  # Resize embeddings to match tokenizer
59
  self.model.resize_token_embeddings(len(self.tokenizer))
60
- logger.info(f"Using device: {self.device}")
 
61
  except Exception as e:
62
  logger.error(f"Error loading model: {str(e)}")
63
  raise
@@ -65,11 +73,16 @@ class ModelManager:
65
  def generate_text(self, prompt: str, max_new_tokens: int = 1024) -> str:
66
  """Generate text from prompt."""
67
  try:
 
 
 
68
  # Encode the prompt
69
  inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048)
70
  inputs = {k: v.to(self.device) for k, v in inputs.items()}
 
71
 
72
  # Generate response
 
73
  with torch.no_grad():
74
  outputs = self.model.generate(
75
  **inputs,
@@ -80,27 +93,16 @@ class ModelManager:
80
  pad_token_id=self.tokenizer.pad_token_id,
81
  eos_token_id=self.tokenizer.eos_token_id,
82
  )
83
-
84
  # Decode and return the generated text
85
  generated_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
86
- # Extract only the generated part (remove the prompt)
87
  response = generated_text[len(prompt):].strip()
88
 
 
 
89
  return response
90
 
91
  except Exception as e:
92
  logger.error(f"Error generating text: {str(e)}")
93
- return """- Issues:
94
- - Error generating code review
95
- - Model inference failed
96
-
97
- - Improvements:
98
- - Please try again
99
- - Check model configuration
100
-
101
- - Best Practices:
102
- - Ensure proper model setup
103
- - Verify token permissions
104
-
105
- - Security:
106
- - No immediate concerns"""
 
1
  import logging
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
3
  import torch
4
  from huggingface_hub import login
5
  from .config import Config
 
16
  # Login to Hugging Face Hub
17
  if Config.HUGGING_FACE_TOKEN:
18
  logger.info("Logging in to Hugging Face Hub")
19
+ try:
20
+ login(token=Config.HUGGING_FACE_TOKEN)
21
+ logger.info("Successfully logged in to Hugging Face Hub")
22
+ except Exception as e:
23
+ logger.error(f"Failed to login to Hugging Face Hub: {str(e)}")
24
+ raise
25
 
26
  # Initialize tokenizer and model
27
  self._init_tokenizer()
 
42
  'bos_token': '<s>'
43
  }
44
  self.tokenizer.add_special_tokens(special_tokens)
45
+ logger.info("Tokenizer loaded successfully")
46
+ logger.debug(f"Tokenizer vocabulary size: {len(self.tokenizer)}")
47
  except Exception as e:
48
  logger.error(f"Error loading tokenizer: {str(e)}")
49
  raise
 
52
  """Initialize the model."""
53
  try:
54
  logger.info(f"Loading model: {self.model_name}")
55
+ logger.info(f"Using device: {self.device}")
56
 
57
  # Load model with CPU configuration
58
  self.model = AutoModelForCausalLM.from_pretrained(
 
64
  )
65
  # Resize embeddings to match tokenizer
66
  self.model.resize_token_embeddings(len(self.tokenizer))
67
+ logger.info("Model loaded successfully")
68
+ logger.debug(f"Model parameters: {sum(p.numel() for p in self.model.parameters())}")
69
  except Exception as e:
70
  logger.error(f"Error loading model: {str(e)}")
71
  raise
 
73
  def generate_text(self, prompt: str, max_new_tokens: int = 1024) -> str:
74
  """Generate text from prompt."""
75
  try:
76
+ logger.info("Starting text generation")
77
+ logger.debug(f"Prompt length: {len(prompt)}")
78
+
79
  # Encode the prompt
80
  inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048)
81
  inputs = {k: v.to(self.device) for k, v in inputs.items()}
82
+ logger.debug(f"Input tensor shape: {inputs['input_ids'].shape}")
83
 
84
  # Generate response
85
+ logger.info("Generating response")
86
  with torch.no_grad():
87
  outputs = self.model.generate(
88
  **inputs,
 
93
  pad_token_id=self.tokenizer.pad_token_id,
94
  eos_token_id=self.tokenizer.eos_token_id,
95
  )
96
+
97
  # Decode and return the generated text
98
  generated_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
 
99
  response = generated_text[len(prompt):].strip()
100
 
101
+ logger.info("Text generation completed")
102
+ logger.debug(f"Response length: {len(response)}")
103
  return response
104
 
105
  except Exception as e:
106
  logger.error(f"Error generating text: {str(e)}")
107
+ logger.error(f"Error details: {type(e).__name__}")
108
+ raise