Joash commited on
Commit
8aef6ee
·
1 Parent(s): 573694a

Simplify model loading to use CPU by default

Browse files
Files changed (1) hide show
  1. app.py +10 -39
app.py CHANGED
@@ -9,8 +9,8 @@ import json
9
  from typing import List, Dict
10
  import warnings
11
 
12
- # Filter out CUDA/NVML warnings
13
- warnings.filterwarnings('ignore', category=UserWarning)
14
 
15
  # Configure logging
16
  logging.basicConfig(level=logging.INFO)
@@ -24,11 +24,6 @@ MODEL_NAME = os.getenv("MODEL_NAME", "google/gemma-2b-it")
24
  CACHE_DIR = "/home/user/.cache/huggingface"
25
  os.makedirs(CACHE_DIR, exist_ok=True)
26
 
27
- # Set environment variables for GPU
28
- os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
29
- os.environ["CUDA_VISIBLE_DEVICES"] = "0"
30
- os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:512"
31
-
32
  class Review:
33
  def __init__(self, code: str, language: str, suggestions: str):
34
  self.code = code
@@ -41,7 +36,7 @@ class CodeReviewer:
41
  def __init__(self):
42
  self.model = None
43
  self.tokenizer = None
44
- self.device = None
45
  self.review_history: List[Review] = []
46
  self.metrics = {
47
  'total_reviews': 0,
@@ -67,38 +62,19 @@ class CodeReviewer:
67
  logger.info("Loading model...")
68
  # Initialize model with specific configuration
69
  model_kwargs = {
70
- "torch_dtype": torch.float16,
71
  "trust_remote_code": True,
72
  "low_cpu_mem_usage": True,
73
  "cache_dir": CACHE_DIR,
74
  "token": HF_TOKEN
75
  }
76
 
77
- # Try loading with different configurations
78
- try:
79
- # First try with device_map="auto"
80
- self.model = AutoModelForCausalLM.from_pretrained(
81
- MODEL_NAME,
82
- device_map="auto",
83
- **model_kwargs
84
- )
85
- self.device = next(self.model.parameters()).device
86
- except Exception as e1:
87
- logger.warning(f"Failed to load with device_map='auto': {e1}")
88
- try:
89
- # Try with specific device
90
- if torch.cuda.is_available():
91
- self.device = torch.device("cuda:0")
92
- else:
93
- self.device = torch.device("cpu")
94
- model_kwargs["device_map"] = None
95
- self.model = AutoModelForCausalLM.from_pretrained(
96
- MODEL_NAME,
97
- **model_kwargs
98
- ).to(self.device)
99
- except Exception as e2:
100
- logger.error(f"Failed to load model on specific device: {e2}")
101
- raise
102
 
103
  logger.info(f"Model loaded successfully on {self.device}")
104
  except Exception as e:
@@ -170,11 +146,6 @@ Code:
170
  # Update metrics
171
  self.update_metrics(review)
172
 
173
- # Clear GPU memory
174
- if torch.cuda.is_available():
175
- del inputs, outputs
176
- torch.cuda.empty_cache()
177
-
178
  return suggestions
179
 
180
  except Exception as e:
 
9
  from typing import List, Dict
10
  import warnings
11
 
12
+ # Filter out warnings
13
+ warnings.filterwarnings('ignore')
14
 
15
  # Configure logging
16
  logging.basicConfig(level=logging.INFO)
 
24
  CACHE_DIR = "/home/user/.cache/huggingface"
25
  os.makedirs(CACHE_DIR, exist_ok=True)
26
 
 
 
 
 
 
27
  class Review:
28
  def __init__(self, code: str, language: str, suggestions: str):
29
  self.code = code
 
36
  def __init__(self):
37
  self.model = None
38
  self.tokenizer = None
39
+ self.device = "cpu" # Default to CPU
40
  self.review_history: List[Review] = []
41
  self.metrics = {
42
  'total_reviews': 0,
 
62
  logger.info("Loading model...")
63
  # Initialize model with specific configuration
64
  model_kwargs = {
65
+ "torch_dtype": torch.float32, # Use float32 for CPU
66
  "trust_remote_code": True,
67
  "low_cpu_mem_usage": True,
68
  "cache_dir": CACHE_DIR,
69
  "token": HF_TOKEN
70
  }
71
 
72
+ # Load model directly to CPU
73
+ self.model = AutoModelForCausalLM.from_pretrained(
74
+ MODEL_NAME,
75
+ device_map=None, # Don't use device_map
76
+ **model_kwargs
77
+ ).to(self.device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
  logger.info(f"Model loaded successfully on {self.device}")
80
  except Exception as e:
 
146
  # Update metrics
147
  self.update_metrics(review)
148
 
 
 
 
 
 
149
  return suggestions
150
 
151
  except Exception as e: