pradeep6kumar2024 commited on
Commit
61052e7
·
1 Parent(s): 8920961

Fix Gradio version and remove debug prints

Browse files
Files changed (2) hide show
  1. app.py +22 -6
  2. app_fixed.py +22 -6
app.py CHANGED
@@ -10,6 +10,7 @@ import psutil
10
  # Configuration
11
  BASE_MODEL = "microsoft/phi-2"
12
  ADAPTER_MODEL = "pradeep6kumar2024/phi2-qlora-assistant"
 
13
 
14
  # Memory monitoring
15
  def get_memory_usage():
@@ -32,7 +33,8 @@ class ModelWrapper:
32
  # Clear memory
33
  gc.collect()
34
 
35
- print(f"Memory before loading: {get_memory_usage():.2f} MB")
 
36
 
37
  print("Loading tokenizer...")
38
  self.tokenizer = AutoTokenizer.from_pretrained(
@@ -42,7 +44,8 @@ class ModelWrapper:
42
  )
43
  self.tokenizer.pad_token = self.tokenizer.eos_token
44
 
45
- print(f"Memory after tokenizer: {get_memory_usage():.2f} MB")
 
46
 
47
  print("Loading base model...")
48
  base_model = AutoModelForCausalLM.from_pretrained(
@@ -55,7 +58,8 @@ class ModelWrapper:
55
  offload_folder="offload"
56
  )
57
 
58
- print(f"Memory after base model: {get_memory_usage():.2f} MB")
 
59
 
60
  print("Loading LoRA adapter...")
61
  self.model = PeftModel.from_pretrained(
@@ -69,7 +73,8 @@ class ModelWrapper:
69
  del base_model
70
  gc.collect()
71
 
72
- print(f"Memory after adapter: {get_memory_usage():.2f} MB")
 
73
 
74
  self.model.eval()
75
  print("Model loading complete!")
@@ -91,7 +96,8 @@ class ModelWrapper:
91
  else:
92
  enhanced_prompt = prompt
93
 
94
- print(f"Enhanced prompt: {enhanced_prompt}")
 
95
 
96
  # Tokenize input with shorter max length
97
  inputs = self.tokenizer(
@@ -125,12 +131,20 @@ class ModelWrapper:
125
  # Decode response
126
  response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
127
 
 
 
 
128
  # Clean up the response
129
  if response.startswith(enhanced_prompt):
130
  response = response[len(enhanced_prompt):].strip()
 
 
131
 
132
  # Basic cleanup only
133
- response = response.replace("Human:", "").replace("Assistant:", "")
 
 
 
134
 
135
  # Ensure code examples are properly formatted
136
  if "```python" not in response and "def " in response:
@@ -138,6 +152,8 @@ class ModelWrapper:
138
 
139
  # Simple validation
140
  if len(response.strip()) < 10:
 
 
141
  if "function" in prompt.lower():
142
  fallback_response = """```python
143
  def add_numbers(a, b):
 
10
  # Configuration
11
  BASE_MODEL = "microsoft/phi-2"
12
  ADAPTER_MODEL = "pradeep6kumar2024/phi2-qlora-assistant"
13
+ DEBUG = False # Set to True to enable debug prints
14
 
15
  # Memory monitoring
16
  def get_memory_usage():
 
33
  # Clear memory
34
  gc.collect()
35
 
36
+ if DEBUG:
37
+ print(f"Memory before loading: {get_memory_usage():.2f} MB")
38
 
39
  print("Loading tokenizer...")
40
  self.tokenizer = AutoTokenizer.from_pretrained(
 
44
  )
45
  self.tokenizer.pad_token = self.tokenizer.eos_token
46
 
47
+ if DEBUG:
48
+ print(f"Memory after tokenizer: {get_memory_usage():.2f} MB")
49
 
50
  print("Loading base model...")
51
  base_model = AutoModelForCausalLM.from_pretrained(
 
58
  offload_folder="offload"
59
  )
60
 
61
+ if DEBUG:
62
+ print(f"Memory after base model: {get_memory_usage():.2f} MB")
63
 
64
  print("Loading LoRA adapter...")
65
  self.model = PeftModel.from_pretrained(
 
73
  del base_model
74
  gc.collect()
75
 
76
+ if DEBUG:
77
+ print(f"Memory after adapter: {get_memory_usage():.2f} MB")
78
 
79
  self.model.eval()
80
  print("Model loading complete!")
 
96
  else:
97
  enhanced_prompt = prompt
98
 
99
+ if DEBUG:
100
+ print(f"Enhanced prompt: {enhanced_prompt}")
101
 
102
  # Tokenize input with shorter max length
103
  inputs = self.tokenizer(
 
131
  # Decode response
132
  response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
133
 
134
+ if DEBUG:
135
+ print(f"Raw response: {response}")
136
+
137
  # Clean up the response
138
  if response.startswith(enhanced_prompt):
139
  response = response[len(enhanced_prompt):].strip()
140
+ if DEBUG:
141
+ print(f"After prompt removal: {response}")
142
 
143
  # Basic cleanup only
144
+ cleaned_response = response.replace("Human:", "").replace("Assistant:", "")
145
+ if DEBUG and cleaned_response != response:
146
+ print(f"After conversation removal: {cleaned_response}")
147
+ response = cleaned_response
148
 
149
  # Ensure code examples are properly formatted
150
  if "```python" not in response and "def " in response:
 
152
 
153
  # Simple validation
154
  if len(response.strip()) < 10:
155
+ if DEBUG:
156
+ print("Response validation failed - using fallback")
157
  if "function" in prompt.lower():
158
  fallback_response = """```python
159
  def add_numbers(a, b):
app_fixed.py CHANGED
@@ -10,6 +10,7 @@ import psutil
10
  # Configuration
11
  BASE_MODEL = "microsoft/phi-2"
12
  ADAPTER_MODEL = "pradeep6kumar2024/phi2-qlora-assistant"
 
13
 
14
  # Memory monitoring
15
  def get_memory_usage():
@@ -32,7 +33,8 @@ class ModelWrapper:
32
  # Clear memory
33
  gc.collect()
34
 
35
- print(f"Memory before loading: {get_memory_usage():.2f} MB")
 
36
 
37
  print("Loading tokenizer...")
38
  self.tokenizer = AutoTokenizer.from_pretrained(
@@ -42,7 +44,8 @@ class ModelWrapper:
42
  )
43
  self.tokenizer.pad_token = self.tokenizer.eos_token
44
 
45
- print(f"Memory after tokenizer: {get_memory_usage():.2f} MB")
 
46
 
47
  print("Loading base model...")
48
  base_model = AutoModelForCausalLM.from_pretrained(
@@ -55,7 +58,8 @@ class ModelWrapper:
55
  offload_folder="offload"
56
  )
57
 
58
- print(f"Memory after base model: {get_memory_usage():.2f} MB")
 
59
 
60
  print("Loading LoRA adapter...")
61
  self.model = PeftModel.from_pretrained(
@@ -69,7 +73,8 @@ class ModelWrapper:
69
  del base_model
70
  gc.collect()
71
 
72
- print(f"Memory after adapter: {get_memory_usage():.2f} MB")
 
73
 
74
  self.model.eval()
75
  print("Model loading complete!")
@@ -91,7 +96,8 @@ class ModelWrapper:
91
  else:
92
  enhanced_prompt = prompt
93
 
94
- print(f"Enhanced prompt: {enhanced_prompt}")
 
95
 
96
  # Tokenize input with shorter max length
97
  inputs = self.tokenizer(
@@ -125,12 +131,20 @@ class ModelWrapper:
125
  # Decode response
126
  response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
127
 
 
 
 
128
  # Clean up the response
129
  if response.startswith(enhanced_prompt):
130
  response = response[len(enhanced_prompt):].strip()
 
 
131
 
132
  # Basic cleanup only
133
- response = response.replace("Human:", "").replace("Assistant:", "")
 
 
 
134
 
135
  # Ensure code examples are properly formatted
136
  if "```python" not in response and "def " in response:
@@ -138,6 +152,8 @@ class ModelWrapper:
138
 
139
  # Simple validation
140
  if len(response.strip()) < 10:
 
 
141
  if "function" in prompt.lower():
142
  fallback_response = """```python
143
  def add_numbers(a, b):
 
10
  # Configuration
11
  BASE_MODEL = "microsoft/phi-2"
12
  ADAPTER_MODEL = "pradeep6kumar2024/phi2-qlora-assistant"
13
+ DEBUG = False # Set to True to enable debug prints
14
 
15
  # Memory monitoring
16
  def get_memory_usage():
 
33
  # Clear memory
34
  gc.collect()
35
 
36
+ if DEBUG:
37
+ print(f"Memory before loading: {get_memory_usage():.2f} MB")
38
 
39
  print("Loading tokenizer...")
40
  self.tokenizer = AutoTokenizer.from_pretrained(
 
44
  )
45
  self.tokenizer.pad_token = self.tokenizer.eos_token
46
 
47
+ if DEBUG:
48
+ print(f"Memory after tokenizer: {get_memory_usage():.2f} MB")
49
 
50
  print("Loading base model...")
51
  base_model = AutoModelForCausalLM.from_pretrained(
 
58
  offload_folder="offload"
59
  )
60
 
61
+ if DEBUG:
62
+ print(f"Memory after base model: {get_memory_usage():.2f} MB")
63
 
64
  print("Loading LoRA adapter...")
65
  self.model = PeftModel.from_pretrained(
 
73
  del base_model
74
  gc.collect()
75
 
76
+ if DEBUG:
77
+ print(f"Memory after adapter: {get_memory_usage():.2f} MB")
78
 
79
  self.model.eval()
80
  print("Model loading complete!")
 
96
  else:
97
  enhanced_prompt = prompt
98
 
99
+ if DEBUG:
100
+ print(f"Enhanced prompt: {enhanced_prompt}")
101
 
102
  # Tokenize input with shorter max length
103
  inputs = self.tokenizer(
 
131
  # Decode response
132
  response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
133
 
134
+ if DEBUG:
135
+ print(f"Raw response: {response}")
136
+
137
  # Clean up the response
138
  if response.startswith(enhanced_prompt):
139
  response = response[len(enhanced_prompt):].strip()
140
+ if DEBUG:
141
+ print(f"After prompt removal: {response}")
142
 
143
  # Basic cleanup only
144
+ cleaned_response = response.replace("Human:", "").replace("Assistant:", "")
145
+ if DEBUG and cleaned_response != response:
146
+ print(f"After conversation removal: {cleaned_response}")
147
+ response = cleaned_response
148
 
149
  # Ensure code examples are properly formatted
150
  if "```python" not in response and "def " in response:
 
152
 
153
  # Simple validation
154
  if len(response.strip()) < 10:
155
+ if DEBUG:
156
+ print("Response validation failed - using fallback")
157
  if "function" in prompt.lower():
158
  fallback_response = """```python
159
  def add_numbers(a, b):