Spaces:

Joash2024
/

code-review-assistant

Sleeping

App Files Files Community

Joash commited on Dec 8, 2024

Commit

8aef6ee

1 Parent(s): 573694a

Simplify model loading to use CPU by default

Browse files

Files changed (1) hide show

app.py +10 -39

app.py CHANGED Viewed

@@ -9,8 +9,8 @@ import json
 from typing import List, Dict
 import warnings
-# Filter out CUDA/NVML warnings
-warnings.filterwarnings('ignore', category=UserWarning)
 # Configure logging
 logging.basicConfig(level=logging.INFO)
@@ -24,11 +24,6 @@ MODEL_NAME = os.getenv("MODEL_NAME", "google/gemma-2b-it")
 CACHE_DIR = "/home/user/.cache/huggingface"
 os.makedirs(CACHE_DIR, exist_ok=True)
-# Set environment variables for GPU
-os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
-os.environ["CUDA_VISIBLE_DEVICES"] = "0"
-os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:512"
 class Review:
     def __init__(self, code: str, language: str, suggestions: str):
         self.code = code
@@ -41,7 +36,7 @@ class CodeReviewer:
     def __init__(self):
         self.model = None
         self.tokenizer = None
-        self.device = None
         self.review_history: List[Review] = []
         self.metrics = {
             'total_reviews': 0,
@@ -67,38 +62,19 @@ class CodeReviewer:
             logger.info("Loading model...")
             # Initialize model with specific configuration
             model_kwargs = {
-                "torch_dtype": torch.float16,
                 "trust_remote_code": True,
                 "low_cpu_mem_usage": True,
                 "cache_dir": CACHE_DIR,
                 "token": HF_TOKEN
             }
-            # Try loading with different configurations
-            try:
-                # First try with device_map="auto"
-                self.model = AutoModelForCausalLM.from_pretrained(
-                    MODEL_NAME,
-                    device_map="auto",
-                    **model_kwargs
-                )
-                self.device = next(self.model.parameters()).device
-            except Exception as e1:
-                logger.warning(f"Failed to load with device_map='auto': {e1}")
-                try:
-                    # Try with specific device
-                    if torch.cuda.is_available():
-                        self.device = torch.device("cuda:0")
-                    else:
-                        self.device = torch.device("cpu")
-                    model_kwargs["device_map"] = None
-                    self.model = AutoModelForCausalLM.from_pretrained(
-                        MODEL_NAME,
-                        **model_kwargs
-                    ).to(self.device)
-                except Exception as e2:
-                    logger.error(f"Failed to load model on specific device: {e2}")
-                    raise
             logger.info(f"Model loaded successfully on {self.device}")
         except Exception as e:
@@ -170,11 +146,6 @@ Code:
             # Update metrics
             self.update_metrics(review)
-            # Clear GPU memory
-            if torch.cuda.is_available():
-                del inputs, outputs
-                torch.cuda.empty_cache()
             return suggestions
         except Exception as e:

 from typing import List, Dict
 import warnings
+# Filter out warnings
+warnings.filterwarnings('ignore')
 # Configure logging
 logging.basicConfig(level=logging.INFO)
 CACHE_DIR = "/home/user/.cache/huggingface"
 os.makedirs(CACHE_DIR, exist_ok=True)
 class Review:
     def __init__(self, code: str, language: str, suggestions: str):
         self.code = code
     def __init__(self):
         self.model = None
         self.tokenizer = None
+        self.device = "cpu"  # Default to CPU
         self.review_history: List[Review] = []
         self.metrics = {
             'total_reviews': 0,
             logger.info("Loading model...")
             # Initialize model with specific configuration
             model_kwargs = {
+                "torch_dtype": torch.float32,  # Use float32 for CPU
                 "trust_remote_code": True,
                 "low_cpu_mem_usage": True,
                 "cache_dir": CACHE_DIR,
                 "token": HF_TOKEN
             }
+            # Load model directly to CPU
+            self.model = AutoModelForCausalLM.from_pretrained(
+                MODEL_NAME,
+                device_map=None,  # Don't use device_map
+                **model_kwargs
+            ).to(self.device)
             logger.info(f"Model loaded successfully on {self.device}")
         except Exception as e:
             # Update metrics
             self.update_metrics(review)
             return suggestions
         except Exception as e: