Syncbuz120 commited on
Commit
1b892e4
·
1 Parent(s): 494bf87
Files changed (1) hide show
  1. model/generate.py +66 -61
model/generate.py CHANGED
@@ -15,31 +15,26 @@ MEMORY_OPTIMIZED_MODELS = [
15
  "gpt2", # ~500MB
16
  "distilgpt2", # ~250MB
17
  "microsoft/DialoGPT-small", # ~250MB
18
- "huggingface/CodeBERTa-small-v1", # Code tasks
19
  ]
20
 
21
- # Singleton state
22
  _generator_instance = None
23
 
24
  def get_optimal_model_for_memory():
25
- """Select the best model based on available memory."""
26
  available_memory = psutil.virtual_memory().available / (1024 * 1024) # MB
27
  logger.info(f"Available memory: {available_memory:.1f}MB")
28
 
29
  if available_memory < 300:
30
- return None # Use template fallback
31
  elif available_memory < 600:
32
  return "microsoft/DialoGPT-small"
33
  else:
34
  return "distilgpt2"
35
 
36
  def load_model_with_memory_optimization(model_name):
37
- """Load model with low memory settings."""
38
  try:
39
  logger.info(f"Loading {model_name} with memory optimizations...")
40
 
41
  tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side='left', use_fast=True)
42
-
43
  if tokenizer.pad_token is None:
44
  tokenizer.pad_token = tokenizer.eos_token
45
 
@@ -72,104 +67,119 @@ def extract_keywords(text):
72
  def generate_template_based_test_cases(srs_text):
73
  keywords = extract_keywords(srs_text)
74
  test_cases = []
 
75
 
76
  if any(word in keywords for word in ['login', 'authentication', 'user', 'password']):
77
  test_cases.extend([
78
  {
79
- "id": "TC_001",
80
  "title": "Valid Login Test",
81
  "description": "Test login with valid credentials",
82
  "steps": ["Enter valid username", "Enter valid password", "Click login"],
83
  "expected": "User should be logged in successfully"
84
  },
85
  {
86
- "id": "TC_002",
87
  "title": "Invalid Login Test",
88
  "description": "Test login with invalid credentials",
89
  "steps": ["Enter invalid username", "Enter invalid password", "Click login"],
90
  "expected": "Error message should be displayed"
91
  }
92
  ])
 
93
 
94
  if any(word in keywords for word in ['database', 'data', 'store', 'save']):
95
  test_cases.append({
96
- "id": "TC_003",
97
  "title": "Data Storage Test",
98
  "description": "Test data storage functionality",
99
  "steps": ["Enter data", "Save data", "Verify storage"],
100
  "expected": "Data should be stored correctly"
101
  })
 
 
 
 
 
 
 
 
 
 
102
 
103
  if not test_cases:
104
- test_cases = [
105
- {
106
- "id": "TC_001",
107
- "title": "Basic Functionality Test",
108
- "description": "Test basic system functionality",
109
- "steps": ["Access the system", "Perform basic operations", "Verify results"],
110
- "expected": "System should work as expected"
111
- }
112
- ]
113
 
114
  return test_cases
115
 
116
- def parse_generated_test_cases(generated_text):
117
- lines = generated_text.split('\n')
118
  test_cases = []
119
- current_case = {}
 
120
  case_counter = 1
121
 
122
  for line in lines:
123
  line = line.strip()
124
- if line.startswith(('1.', '2.', '3.', 'TC', 'Test')):
125
- if current_case:
126
- test_cases.append(current_case)
127
- current_case = {
 
 
128
  "id": f"TC_{case_counter:03d}",
129
  "title": line,
130
- "description": line,
131
- "steps": ["Execute the test"],
132
- "expected": "Test should pass"
133
  }
 
134
  case_counter += 1
 
 
135
 
136
- if current_case:
137
- test_cases.append(current_case)
 
 
138
 
139
  if not test_cases:
140
  return [{
141
  "id": "TC_001",
142
  "title": "Generated Test Case",
143
- "description": "Auto-generated test case based on requirements",
144
- "steps": ["Review requirements", "Execute test", "Verify results"],
145
- "expected": "Requirements should be met"
146
  }]
147
 
148
  return test_cases
149
 
150
  def generate_with_ai_model(srs_text, tokenizer, model):
151
- max_input_length = 200
152
- if len(srs_text) > max_input_length:
153
- srs_text = srs_text[:max_input_length]
154
-
155
- prompt = f"""Generate test cases for this software requirement:
156
  {srs_text}
157
 
158
  Test Cases:
159
  1."""
160
 
 
 
 
161
  try:
162
  inputs = tokenizer.encode(
163
  prompt,
164
  return_tensors="pt",
165
- max_length=150,
166
- truncation=True
167
  )
168
 
169
  with torch.no_grad():
170
  outputs = model.generate(
171
  inputs,
172
- max_new_tokens=100,
173
  num_return_sequences=1,
174
  temperature=0.7,
175
  do_sample=True,
@@ -203,32 +213,38 @@ def generate_with_fallback(srs_text):
203
  test_cases = generate_template_based_test_cases(srs_text)
204
  return test_cases, "Template-Based Generator", "rule-based", "Low memory - fallback to rule-based generation"
205
 
206
- # ✅ Function exposed to app.py
207
  def generate_test_cases(srs_text):
208
  return generate_with_fallback(srs_text)[0]
209
 
 
 
 
 
 
 
 
 
 
210
  def get_generator():
211
  global _generator_instance
212
  if _generator_instance is None:
213
  class Generator:
214
  def __init__(self):
215
  self.model_name = get_optimal_model_for_memory()
216
- self.tokenizer = None
217
- self.model = None
218
  if self.model_name:
219
  self.tokenizer, self.model = load_model_with_memory_optimization(self.model_name)
220
 
221
  def get_model_info(self):
222
  mem = psutil.Process().memory_info().rss / 1024 / 1024
223
  return {
224
- "model_name": self.model_name if self.model_name else "Template-Based Generator",
225
  "status": "loaded" if self.model else "template_mode",
226
  "memory_usage": f"{mem:.1f}MB",
227
  "optimization": "low_memory"
228
  }
229
 
230
  _generator_instance = Generator()
231
-
232
  return _generator_instance
233
 
234
  def monitor_memory():
@@ -238,25 +254,14 @@ def monitor_memory():
238
  gc.collect()
239
  logger.info("Memory cleanup triggered")
240
 
241
- # ✅ NEW FUNCTION for enhanced output: test cases + model info + reason
242
- def generate_test_cases_and_info(input_text):
243
- test_cases, model_name, algorithm_used, reason = generate_with_fallback(input_text)
244
- return {
245
- "model": model_name,
246
- "algorithm": algorithm_used,
247
- "reason": reason,
248
- "test_cases": test_cases
249
- }
250
-
251
- # ✅ Explain why each algorithm is selected
252
  def get_algorithm_reason(model_name):
253
  if model_name == "microsoft/DialoGPT-small":
254
  return "Selected due to low memory availability; DialoGPT-small provides conversational understanding in limited memory environments."
255
  elif model_name == "distilgpt2":
256
- return "Selected for its balance between performance and low memory usage. Ideal for small environments needing causal language modeling."
257
  elif model_name == "gpt2":
258
- return "Chosen for general-purpose text generation with moderate memory headroom."
259
  elif model_name is None:
260
- return "No model used due to insufficient memory. Rule-based template generation chosen instead."
261
  else:
262
- return "Model selected based on best tradeoff between memory usage and language generation capability."
 
15
  "gpt2", # ~500MB
16
  "distilgpt2", # ~250MB
17
  "microsoft/DialoGPT-small", # ~250MB
 
18
  ]
19
 
 
20
  _generator_instance = None
21
 
22
  def get_optimal_model_for_memory():
 
23
  available_memory = psutil.virtual_memory().available / (1024 * 1024) # MB
24
  logger.info(f"Available memory: {available_memory:.1f}MB")
25
 
26
  if available_memory < 300:
27
+ return None
28
  elif available_memory < 600:
29
  return "microsoft/DialoGPT-small"
30
  else:
31
  return "distilgpt2"
32
 
33
  def load_model_with_memory_optimization(model_name):
 
34
  try:
35
  logger.info(f"Loading {model_name} with memory optimizations...")
36
 
37
  tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side='left', use_fast=True)
 
38
  if tokenizer.pad_token is None:
39
  tokenizer.pad_token = tokenizer.eos_token
40
 
 
67
  def generate_template_based_test_cases(srs_text):
68
  keywords = extract_keywords(srs_text)
69
  test_cases = []
70
+ counter = 1
71
 
72
  if any(word in keywords for word in ['login', 'authentication', 'user', 'password']):
73
  test_cases.extend([
74
  {
75
+ "id": f"TC_{counter:03d}",
76
  "title": "Valid Login Test",
77
  "description": "Test login with valid credentials",
78
  "steps": ["Enter valid username", "Enter valid password", "Click login"],
79
  "expected": "User should be logged in successfully"
80
  },
81
  {
82
+ "id": f"TC_{counter+1:03d}",
83
  "title": "Invalid Login Test",
84
  "description": "Test login with invalid credentials",
85
  "steps": ["Enter invalid username", "Enter invalid password", "Click login"],
86
  "expected": "Error message should be displayed"
87
  }
88
  ])
89
+ counter += 2
90
 
91
  if any(word in keywords for word in ['database', 'data', 'store', 'save']):
92
  test_cases.append({
93
+ "id": f"TC_{counter:03d}",
94
  "title": "Data Storage Test",
95
  "description": "Test data storage functionality",
96
  "steps": ["Enter data", "Save data", "Verify storage"],
97
  "expected": "Data should be stored correctly"
98
  })
99
+ counter += 1
100
+
101
+ if any(word in keywords for word in ['validation', 'error']):
102
+ test_cases.append({
103
+ "id": f"TC_{counter:03d}",
104
+ "title": "Input Validation Test",
105
+ "description": "Test system input validation",
106
+ "steps": ["Enter invalid input", "Submit form"],
107
+ "expected": "System should prevent submission and show error"
108
+ })
109
 
110
  if not test_cases:
111
+ test_cases = [{
112
+ "id": "TC_001",
113
+ "title": "Generic Functional Test",
114
+ "description": "Test basic system functionality",
115
+ "steps": ["Access system", "Perform operations"],
116
+ "expected": "System works correctly"
117
+ }]
 
 
118
 
119
  return test_cases
120
 
121
+ def parse_generated_test_cases(text):
122
+ lines = text.split('\n')
123
  test_cases = []
124
+ current = {}
125
+ steps = []
126
  case_counter = 1
127
 
128
  for line in lines:
129
  line = line.strip()
130
+ if re.match(r'^\d+\.', line) or line.lower().startswith("test case"):
131
+ if current:
132
+ current["steps"] = steps or ["Execute the test"]
133
+ current["expected"] = "Test should pass"
134
+ test_cases.append(current)
135
+ current = {
136
  "id": f"TC_{case_counter:03d}",
137
  "title": line,
138
+ "description": line
 
 
139
  }
140
+ steps = []
141
  case_counter += 1
142
+ elif line.lower().startswith("step") or line.startswith("-"):
143
+ steps.append(line.lstrip('- ').strip())
144
 
145
+ if current:
146
+ current["steps"] = steps or ["Execute the test"]
147
+ current["expected"] = "Test should pass"
148
+ test_cases.append(current)
149
 
150
  if not test_cases:
151
  return [{
152
  "id": "TC_001",
153
  "title": "Generated Test Case",
154
+ "description": "Auto-generated based on SRS",
155
+ "steps": ["Review requirements", "Execute test"],
156
+ "expected": "Requirements met"
157
  }]
158
 
159
  return test_cases
160
 
161
  def generate_with_ai_model(srs_text, tokenizer, model):
162
+ prompt = f"""Generate detailed and numbered test cases for the following software requirement:
 
 
 
 
163
  {srs_text}
164
 
165
  Test Cases:
166
  1."""
167
 
168
+ input_length = len(srs_text.split())
169
+ max_new_tokens = min(max(100, input_length * 2), 600)
170
+
171
  try:
172
  inputs = tokenizer.encode(
173
  prompt,
174
  return_tensors="pt",
175
+ truncation=True,
176
+ max_length=512
177
  )
178
 
179
  with torch.no_grad():
180
  outputs = model.generate(
181
  inputs,
182
+ max_new_tokens=max_new_tokens,
183
  num_return_sequences=1,
184
  temperature=0.7,
185
  do_sample=True,
 
213
  test_cases = generate_template_based_test_cases(srs_text)
214
  return test_cases, "Template-Based Generator", "rule-based", "Low memory - fallback to rule-based generation"
215
 
 
216
  def generate_test_cases(srs_text):
217
  return generate_with_fallback(srs_text)[0]
218
 
219
+ def generate_test_cases_and_info(input_text):
220
+ test_cases, model_name, algorithm_used, reason = generate_with_fallback(input_text)
221
+ return {
222
+ "model": model_name,
223
+ "algorithm": algorithm_used,
224
+ "reason": reason,
225
+ "test_cases": test_cases
226
+ }
227
+
228
  def get_generator():
229
  global _generator_instance
230
  if _generator_instance is None:
231
  class Generator:
232
  def __init__(self):
233
  self.model_name = get_optimal_model_for_memory()
234
+ self.tokenizer, self.model = None, None
 
235
  if self.model_name:
236
  self.tokenizer, self.model = load_model_with_memory_optimization(self.model_name)
237
 
238
  def get_model_info(self):
239
  mem = psutil.Process().memory_info().rss / 1024 / 1024
240
  return {
241
+ "model_name": self.model_name or "Template-Based Generator",
242
  "status": "loaded" if self.model else "template_mode",
243
  "memory_usage": f"{mem:.1f}MB",
244
  "optimization": "low_memory"
245
  }
246
 
247
  _generator_instance = Generator()
 
248
  return _generator_instance
249
 
250
  def monitor_memory():
 
254
  gc.collect()
255
  logger.info("Memory cleanup triggered")
256
 
 
 
 
 
 
 
 
 
 
 
 
257
  def get_algorithm_reason(model_name):
258
  if model_name == "microsoft/DialoGPT-small":
259
  return "Selected due to low memory availability; DialoGPT-small provides conversational understanding in limited memory environments."
260
  elif model_name == "distilgpt2":
261
+ return "Selected for its balance between performance and low memory usage."
262
  elif model_name == "gpt2":
263
+ return "Chosen for general-purpose generation with moderate memory headroom."
264
  elif model_name is None:
265
+ return "Rule-based fallback due to memory constraints."
266
  else:
267
+ return "Chosen based on available memory and task compatibility."