Spaces:

pradeep6kumar2024
/

QLORA_phi2

Sleeping

App Files Files Community

pradeep6kumar2024 commited on Mar 5

Commit

61052e7

1 Parent(s): 8920961

Fix Gradio version and remove debug prints

Browse files

Files changed (2) hide show

app.py +22 -6
app_fixed.py +22 -6

app.py CHANGED Viewed

@@ -10,6 +10,7 @@ import psutil
 # Configuration
 BASE_MODEL = "microsoft/phi-2"
 ADAPTER_MODEL = "pradeep6kumar2024/phi2-qlora-assistant"
 # Memory monitoring
 def get_memory_usage():
@@ -32,7 +33,8 @@ class ModelWrapper:
                 # Clear memory
                 gc.collect()
-                print(f"Memory before loading: {get_memory_usage():.2f} MB")
                 print("Loading tokenizer...")
                 self.tokenizer = AutoTokenizer.from_pretrained(
@@ -42,7 +44,8 @@ class ModelWrapper:
                 )
                 self.tokenizer.pad_token = self.tokenizer.eos_token
-                print(f"Memory after tokenizer: {get_memory_usage():.2f} MB")
                 print("Loading base model...")
                 base_model = AutoModelForCausalLM.from_pretrained(
@@ -55,7 +58,8 @@ class ModelWrapper:
                     offload_folder="offload"
                 )
-                print(f"Memory after base model: {get_memory_usage():.2f} MB")
                 print("Loading LoRA adapter...")
                 self.model = PeftModel.from_pretrained(
@@ -69,7 +73,8 @@ class ModelWrapper:
                 del base_model
                 gc.collect()
-                print(f"Memory after adapter: {get_memory_usage():.2f} MB")
                 self.model.eval()
                 print("Model loading complete!")
@@ -91,7 +96,8 @@ class ModelWrapper:
             else:
                 enhanced_prompt = prompt
-            print(f"Enhanced prompt: {enhanced_prompt}")
             # Tokenize input with shorter max length
             inputs = self.tokenizer(
@@ -125,12 +131,20 @@ class ModelWrapper:
             # Decode response
             response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
             # Clean up the response
             if response.startswith(enhanced_prompt):
                 response = response[len(enhanced_prompt):].strip()
             # Basic cleanup only
-            response = response.replace("Human:", "").replace("Assistant:", "")
             # Ensure code examples are properly formatted
             if "```python" not in response and "def " in response:
@@ -138,6 +152,8 @@ class ModelWrapper:
             # Simple validation
             if len(response.strip()) < 10:
                 if "function" in prompt.lower():
                     fallback_response = """```python
 def add_numbers(a, b):

 # Configuration
 BASE_MODEL = "microsoft/phi-2"
 ADAPTER_MODEL = "pradeep6kumar2024/phi2-qlora-assistant"
+DEBUG = False  # Set to True to enable debug prints
 # Memory monitoring
 def get_memory_usage():
                 # Clear memory
                 gc.collect()
+                if DEBUG:
+                    print(f"Memory before loading: {get_memory_usage():.2f} MB")
                 print("Loading tokenizer...")
                 self.tokenizer = AutoTokenizer.from_pretrained(
                 )
                 self.tokenizer.pad_token = self.tokenizer.eos_token
+                if DEBUG:
+                    print(f"Memory after tokenizer: {get_memory_usage():.2f} MB")
                 print("Loading base model...")
                 base_model = AutoModelForCausalLM.from_pretrained(
                     offload_folder="offload"
                 )
+                if DEBUG:
+                    print(f"Memory after base model: {get_memory_usage():.2f} MB")
                 print("Loading LoRA adapter...")
                 self.model = PeftModel.from_pretrained(
                 del base_model
                 gc.collect()
+                if DEBUG:
+                    print(f"Memory after adapter: {get_memory_usage():.2f} MB")
                 self.model.eval()
                 print("Model loading complete!")
             else:
                 enhanced_prompt = prompt
+            if DEBUG:
+                print(f"Enhanced prompt: {enhanced_prompt}")
             # Tokenize input with shorter max length
             inputs = self.tokenizer(
             # Decode response
             response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+            if DEBUG:
+                print(f"Raw response: {response}")
             # Clean up the response
             if response.startswith(enhanced_prompt):
                 response = response[len(enhanced_prompt):].strip()
+                if DEBUG:
+                    print(f"After prompt removal: {response}")
             # Basic cleanup only
+            cleaned_response = response.replace("Human:", "").replace("Assistant:", "")
+            if DEBUG and cleaned_response != response:
+                print(f"After conversation removal: {cleaned_response}")
+            response = cleaned_response
             # Ensure code examples are properly formatted
             if "```python" not in response and "def " in response:
             # Simple validation
             if len(response.strip()) < 10:
+                if DEBUG:
+                    print("Response validation failed - using fallback")
                 if "function" in prompt.lower():
                     fallback_response = """```python
 def add_numbers(a, b):

app_fixed.py CHANGED Viewed

@@ -10,6 +10,7 @@ import psutil
 # Configuration
 BASE_MODEL = "microsoft/phi-2"
 ADAPTER_MODEL = "pradeep6kumar2024/phi2-qlora-assistant"
 # Memory monitoring
 def get_memory_usage():
@@ -32,7 +33,8 @@ class ModelWrapper:
                 # Clear memory
                 gc.collect()
-                print(f"Memory before loading: {get_memory_usage():.2f} MB")
                 print("Loading tokenizer...")
                 self.tokenizer = AutoTokenizer.from_pretrained(
@@ -42,7 +44,8 @@ class ModelWrapper:
                 )
                 self.tokenizer.pad_token = self.tokenizer.eos_token
-                print(f"Memory after tokenizer: {get_memory_usage():.2f} MB")
                 print("Loading base model...")
                 base_model = AutoModelForCausalLM.from_pretrained(
@@ -55,7 +58,8 @@ class ModelWrapper:
                     offload_folder="offload"
                 )
-                print(f"Memory after base model: {get_memory_usage():.2f} MB")
                 print("Loading LoRA adapter...")
                 self.model = PeftModel.from_pretrained(
@@ -69,7 +73,8 @@ class ModelWrapper:
                 del base_model
                 gc.collect()
-                print(f"Memory after adapter: {get_memory_usage():.2f} MB")
                 self.model.eval()
                 print("Model loading complete!")
@@ -91,7 +96,8 @@ class ModelWrapper:
             else:
                 enhanced_prompt = prompt
-            print(f"Enhanced prompt: {enhanced_prompt}")
             # Tokenize input with shorter max length
             inputs = self.tokenizer(
@@ -125,12 +131,20 @@ class ModelWrapper:
             # Decode response
             response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
             # Clean up the response
             if response.startswith(enhanced_prompt):
                 response = response[len(enhanced_prompt):].strip()
             # Basic cleanup only
-            response = response.replace("Human:", "").replace("Assistant:", "")
             # Ensure code examples are properly formatted
             if "```python" not in response and "def " in response:
@@ -138,6 +152,8 @@ class ModelWrapper:
             # Simple validation
             if len(response.strip()) < 10:
                 if "function" in prompt.lower():
                     fallback_response = """```python
 def add_numbers(a, b):

 # Configuration
 BASE_MODEL = "microsoft/phi-2"
 ADAPTER_MODEL = "pradeep6kumar2024/phi2-qlora-assistant"
+DEBUG = False  # Set to True to enable debug prints
 # Memory monitoring
 def get_memory_usage():
                 # Clear memory
                 gc.collect()
+                if DEBUG:
+                    print(f"Memory before loading: {get_memory_usage():.2f} MB")
                 print("Loading tokenizer...")
                 self.tokenizer = AutoTokenizer.from_pretrained(
                 )
                 self.tokenizer.pad_token = self.tokenizer.eos_token
+                if DEBUG:
+                    print(f"Memory after tokenizer: {get_memory_usage():.2f} MB")
                 print("Loading base model...")
                 base_model = AutoModelForCausalLM.from_pretrained(
                     offload_folder="offload"
                 )
+                if DEBUG:
+                    print(f"Memory after base model: {get_memory_usage():.2f} MB")
                 print("Loading LoRA adapter...")
                 self.model = PeftModel.from_pretrained(
                 del base_model
                 gc.collect()
+                if DEBUG:
+                    print(f"Memory after adapter: {get_memory_usage():.2f} MB")
                 self.model.eval()
                 print("Model loading complete!")
             else:
                 enhanced_prompt = prompt
+            if DEBUG:
+                print(f"Enhanced prompt: {enhanced_prompt}")
             # Tokenize input with shorter max length
             inputs = self.tokenizer(
             # Decode response
             response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+            if DEBUG:
+                print(f"Raw response: {response}")
             # Clean up the response
             if response.startswith(enhanced_prompt):
                 response = response[len(enhanced_prompt):].strip()
+                if DEBUG:
+                    print(f"After prompt removal: {response}")
             # Basic cleanup only
+            cleaned_response = response.replace("Human:", "").replace("Assistant:", "")
+            if DEBUG and cleaned_response != response:
+                print(f"After conversation removal: {cleaned_response}")
+            response = cleaned_response
             # Ensure code examples are properly formatted
             if "```python" not in response and "def " in response:
             # Simple validation
             if len(response.strip()) < 10:
+                if DEBUG:
+                    print("Response validation failed - using fallback")
                 if "function" in prompt.lower():
                     fallback_response = """```python
 def add_numbers(a, b):