Joash commited on
Commit
6d7cc48
·
1 Parent(s): ffe79d4

Improve model loading with fallback options and memory settings

Browse files
Files changed (1) hide show
  1. app.py +28 -13
app.py CHANGED
@@ -9,8 +9,8 @@ import json
9
  from typing import List, Dict
10
  import warnings
11
 
12
- # Filter CUDA warnings
13
- warnings.filterwarnings('ignore', category=UserWarning, message='Can\'t initialize NVML')
14
 
15
  # Configure logging
16
  logging.basicConfig(level=logging.INFO)
@@ -24,6 +24,11 @@ MODEL_NAME = os.getenv("MODEL_NAME", "google/gemma-2b-it")
24
  CACHE_DIR = "/home/user/.cache/huggingface"
25
  os.makedirs(CACHE_DIR, exist_ok=True)
26
 
 
 
 
 
 
27
  class Review:
28
  def __init__(self, code: str, language: str, suggestions: str):
29
  self.code = code
@@ -36,7 +41,7 @@ class CodeReviewer:
36
  def __init__(self):
37
  self.model = None
38
  self.tokenizer = None
39
- self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
40
  self.review_history: List[Review] = []
41
  self.metrics = {
42
  'total_reviews': 0,
@@ -62,7 +67,6 @@ class CodeReviewer:
62
  logger.info("Loading model...")
63
  # Initialize model with specific configuration
64
  model_kwargs = {
65
- "device_map": "auto",
66
  "torch_dtype": torch.float16,
67
  "trust_remote_code": True,
68
  "low_cpu_mem_usage": True,
@@ -70,20 +74,31 @@ class CodeReviewer:
70
  "token": HF_TOKEN
71
  }
72
 
73
- # Load model with error handling
74
  try:
 
75
  self.model = AutoModelForCausalLM.from_pretrained(
76
  MODEL_NAME,
 
77
  **model_kwargs
78
  )
79
- except Exception as model_error:
80
- logger.error(f"Error loading model: {model_error}")
81
- # Try loading with safetensors
82
- model_kwargs["use_safetensors"] = True
83
- self.model = AutoModelForCausalLM.from_pretrained(
84
- MODEL_NAME,
85
- **model_kwargs
86
- )
 
 
 
 
 
 
 
 
 
87
 
88
  logger.info(f"Model loaded successfully on {self.device}")
89
  except Exception as e:
 
9
  from typing import List, Dict
10
  import warnings
11
 
12
+ # Filter out CUDA/NVML warnings
13
+ warnings.filterwarnings('ignore', category=UserWarning)
14
 
15
  # Configure logging
16
  logging.basicConfig(level=logging.INFO)
 
24
  CACHE_DIR = "/home/user/.cache/huggingface"
25
  os.makedirs(CACHE_DIR, exist_ok=True)
26
 
27
+ # Set environment variables for GPU
28
+ os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
29
+ os.environ["CUDA_VISIBLE_DEVICES"] = "0"
30
+ os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:512"
31
+
32
  class Review:
33
  def __init__(self, code: str, language: str, suggestions: str):
34
  self.code = code
 
41
  def __init__(self):
42
  self.model = None
43
  self.tokenizer = None
44
+ self.device = None
45
  self.review_history: List[Review] = []
46
  self.metrics = {
47
  'total_reviews': 0,
 
67
  logger.info("Loading model...")
68
  # Initialize model with specific configuration
69
  model_kwargs = {
 
70
  "torch_dtype": torch.float16,
71
  "trust_remote_code": True,
72
  "low_cpu_mem_usage": True,
 
74
  "token": HF_TOKEN
75
  }
76
 
77
+ # Try loading with different configurations
78
  try:
79
+ # First try with device_map="auto"
80
  self.model = AutoModelForCausalLM.from_pretrained(
81
  MODEL_NAME,
82
+ device_map="auto",
83
  **model_kwargs
84
  )
85
+ self.device = next(self.model.parameters()).device
86
+ except Exception as e1:
87
+ logger.warning(f"Failed to load with device_map='auto': {e1}")
88
+ try:
89
+ # Try with specific device
90
+ if torch.cuda.is_available():
91
+ self.device = torch.device("cuda:0")
92
+ else:
93
+ self.device = torch.device("cpu")
94
+ model_kwargs["device_map"] = None
95
+ self.model = AutoModelForCausalLM.from_pretrained(
96
+ MODEL_NAME,
97
+ **model_kwargs
98
+ ).to(self.device)
99
+ except Exception as e2:
100
+ logger.error(f"Failed to load model on specific device: {e2}")
101
+ raise
102
 
103
  logger.info(f"Model loaded successfully on {self.device}")
104
  except Exception as e: