Joash commited on
Commit
defa041
·
1 Parent(s): 4a6c42f

Add history and metrics persistence with file storage

Browse files
Files changed (1) hide show
  1. app.py +49 -17
app.py CHANGED
@@ -25,6 +25,9 @@ MODEL_NAME = os.getenv("MODEL_NAME", "google/gemma-2b-it")
25
  CACHE_DIR = "/home/user/.cache/huggingface"
26
  os.makedirs(CACHE_DIR, exist_ok=True)
27
 
 
 
 
28
  class Review:
29
  def __init__(self, code: str, language: str, suggestions: str):
30
  self.code = code
@@ -32,6 +35,22 @@ class Review:
32
  self.suggestions = suggestions
33
  self.timestamp = datetime.now().isoformat()
34
  self.response_time = 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
  class CodeReviewer:
37
  def __init__(self):
@@ -45,6 +64,32 @@ class CodeReviewer:
45
  'reviews_today': 0
46
  }
47
  self._initialized = False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
  @spaces.GPU
50
  def ensure_initialized(self):
@@ -60,14 +105,12 @@ class CodeReviewer:
60
  login(token=HF_TOKEN, add_to_git_credential=False)
61
 
62
  logger.info("Loading tokenizer...")
63
- # Initialize tokenizer with special tokens
64
  self.tokenizer = AutoTokenizer.from_pretrained(
65
  MODEL_NAME,
66
  token=HF_TOKEN,
67
  trust_remote_code=True,
68
  cache_dir=CACHE_DIR
69
  )
70
- # Ensure special tokens are set
71
  special_tokens = {
72
  'pad_token': '[PAD]',
73
  'eos_token': '</s>',
@@ -87,13 +130,13 @@ class CodeReviewer:
87
  cache_dir=CACHE_DIR,
88
  token=HF_TOKEN
89
  )
90
- # Resize embeddings for special tokens if needed
91
  if num_added > 0:
92
  logger.info("Resizing model embeddings for special tokens")
93
  self.model.resize_token_embeddings(len(self.tokenizer))
94
 
95
  self.device = next(self.model.parameters()).device
96
  logger.info(f"Model loaded successfully on {self.device}")
 
97
  return True
98
  except Exception as e:
99
  logger.error(f"Error initializing model: {e}")
@@ -117,14 +160,12 @@ Code:
117
  def review_code(self, code: str, language: str) -> str:
118
  """Perform code review using the model."""
119
  try:
120
- # Ensure model is initialized
121
  if not self._initialized and not self.initialize_model():
122
  return "Error: Model initialization failed. Please try again later."
123
 
124
  start_time = datetime.now()
125
  prompt = self.create_review_prompt(code, language)
126
 
127
- # Tokenize with error handling
128
  try:
129
  inputs = self.tokenizer(
130
  prompt,
@@ -140,7 +181,6 @@ Code:
140
  logger.error(f"Tokenization error: {token_error}")
141
  return "Error: Failed to process input code. Please try again."
142
 
143
- # Generate with error handling
144
  try:
145
  with torch.no_grad():
146
  outputs = self.model.generate(
@@ -158,7 +198,6 @@ Code:
158
  logger.error(f"Generation error: {gen_error}")
159
  return "Error: Failed to generate review. Please try again."
160
 
161
- # Decode with error handling
162
  try:
163
  response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
164
  suggestions = response[len(prompt):].strip()
@@ -166,16 +205,14 @@ Code:
166
  logger.error(f"Decoding error: {decode_error}")
167
  return "Error: Failed to decode model output. Please try again."
168
 
169
- # Create review and update metrics
170
  end_time = datetime.now()
171
  review = Review(code, language, suggestions)
172
  review.response_time = (end_time - start_time).total_seconds()
173
  self.review_history.append(review)
174
 
175
- # Update metrics
176
  self.update_metrics(review)
 
177
 
178
- # Clear GPU memory
179
  if self.device and self.device.type == "cuda":
180
  del inputs, outputs
181
  torch.cuda.empty_cache()
@@ -190,12 +227,10 @@ Code:
190
  """Update metrics with new review."""
191
  self.metrics['total_reviews'] += 1
192
 
193
- # Update average response time
194
  total_time = self.metrics['avg_response_time'] * (self.metrics['total_reviews'] - 1)
195
  total_time += review.response_time
196
  self.metrics['avg_response_time'] = total_time / self.metrics['total_reviews']
197
 
198
- # Update reviews today
199
  today = datetime.now().date()
200
  self.metrics['reviews_today'] = sum(
201
  1 for r in self.review_history
@@ -212,7 +247,7 @@ Code:
212
  'suggestions': r.suggestions,
213
  'response_time': f"{r.response_time:.2f}s"
214
  }
215
- for r in reversed(self.review_history[-10:]) # Last 10 reviews
216
  ]
217
 
218
  def get_metrics(self) -> Dict:
@@ -266,13 +301,12 @@ with gr.Blocks(theme=gr.themes.Soft()) as iface:
266
  label="Performance Metrics"
267
  )
268
 
269
- # Set up event handlers
270
  @spaces.GPU
271
  def review_code_interface(code: str, language: str) -> str:
272
  if not code.strip():
273
  return "Please enter some code to review."
274
  try:
275
- reviewer.ensure_initialized() # Ensure model is initialized
276
  return reviewer.review_code(code, language)
277
  except Exception as e:
278
  logger.error(f"Interface error: {e}")
@@ -317,7 +351,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as iface:
317
  outputs=metrics_output
318
  )
319
 
320
- # Add example inputs
321
  gr.Examples(
322
  examples=[
323
  ["""def add_numbers(a, b):
@@ -333,7 +366,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as iface:
333
  inputs=[code_input, language_input]
334
  )
335
 
336
- # Launch the app
337
  if __name__ == "__main__":
338
  iface.launch(
339
  server_name="0.0.0.0",
 
25
  CACHE_DIR = "/home/user/.cache/huggingface"
26
  os.makedirs(CACHE_DIR, exist_ok=True)
27
 
28
+ # History file
29
+ HISTORY_FILE = "review_history.json"
30
+
31
  class Review:
32
  def __init__(self, code: str, language: str, suggestions: str):
33
  self.code = code
 
35
  self.suggestions = suggestions
36
  self.timestamp = datetime.now().isoformat()
37
  self.response_time = 0.0
38
+
39
+ def to_dict(self):
40
+ return {
41
+ 'timestamp': self.timestamp,
42
+ 'language': self.language,
43
+ 'code': self.code,
44
+ 'suggestions': self.suggestions,
45
+ 'response_time': self.response_time
46
+ }
47
+
48
+ @classmethod
49
+ def from_dict(cls, data):
50
+ review = cls(data['code'], data['language'], data['suggestions'])
51
+ review.timestamp = data['timestamp']
52
+ review.response_time = data['response_time']
53
+ return review
54
 
55
  class CodeReviewer:
56
  def __init__(self):
 
64
  'reviews_today': 0
65
  }
66
  self._initialized = False
67
+ self.load_history()
68
+
69
+ def load_history(self):
70
+ """Load review history from file."""
71
+ try:
72
+ if os.path.exists(HISTORY_FILE):
73
+ with open(HISTORY_FILE, 'r') as f:
74
+ data = json.load(f)
75
+ self.review_history = [Review.from_dict(r) for r in data['history']]
76
+ self.metrics = data['metrics']
77
+ logger.info(f"Loaded {len(self.review_history)} reviews from history")
78
+ except Exception as e:
79
+ logger.error(f"Error loading history: {e}")
80
+
81
+ def save_history(self):
82
+ """Save review history to file."""
83
+ try:
84
+ data = {
85
+ 'history': [r.to_dict() for r in self.review_history],
86
+ 'metrics': self.metrics
87
+ }
88
+ with open(HISTORY_FILE, 'w') as f:
89
+ json.dump(data, f)
90
+ logger.info("Saved review history")
91
+ except Exception as e:
92
+ logger.error(f"Error saving history: {e}")
93
 
94
  @spaces.GPU
95
  def ensure_initialized(self):
 
105
  login(token=HF_TOKEN, add_to_git_credential=False)
106
 
107
  logger.info("Loading tokenizer...")
 
108
  self.tokenizer = AutoTokenizer.from_pretrained(
109
  MODEL_NAME,
110
  token=HF_TOKEN,
111
  trust_remote_code=True,
112
  cache_dir=CACHE_DIR
113
  )
 
114
  special_tokens = {
115
  'pad_token': '[PAD]',
116
  'eos_token': '</s>',
 
130
  cache_dir=CACHE_DIR,
131
  token=HF_TOKEN
132
  )
 
133
  if num_added > 0:
134
  logger.info("Resizing model embeddings for special tokens")
135
  self.model.resize_token_embeddings(len(self.tokenizer))
136
 
137
  self.device = next(self.model.parameters()).device
138
  logger.info(f"Model loaded successfully on {self.device}")
139
+ self._initialized = True
140
  return True
141
  except Exception as e:
142
  logger.error(f"Error initializing model: {e}")
 
160
  def review_code(self, code: str, language: str) -> str:
161
  """Perform code review using the model."""
162
  try:
 
163
  if not self._initialized and not self.initialize_model():
164
  return "Error: Model initialization failed. Please try again later."
165
 
166
  start_time = datetime.now()
167
  prompt = self.create_review_prompt(code, language)
168
 
 
169
  try:
170
  inputs = self.tokenizer(
171
  prompt,
 
181
  logger.error(f"Tokenization error: {token_error}")
182
  return "Error: Failed to process input code. Please try again."
183
 
 
184
  try:
185
  with torch.no_grad():
186
  outputs = self.model.generate(
 
198
  logger.error(f"Generation error: {gen_error}")
199
  return "Error: Failed to generate review. Please try again."
200
 
 
201
  try:
202
  response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
203
  suggestions = response[len(prompt):].strip()
 
205
  logger.error(f"Decoding error: {decode_error}")
206
  return "Error: Failed to decode model output. Please try again."
207
 
 
208
  end_time = datetime.now()
209
  review = Review(code, language, suggestions)
210
  review.response_time = (end_time - start_time).total_seconds()
211
  self.review_history.append(review)
212
 
 
213
  self.update_metrics(review)
214
+ self.save_history() # Save after each review
215
 
 
216
  if self.device and self.device.type == "cuda":
217
  del inputs, outputs
218
  torch.cuda.empty_cache()
 
227
  """Update metrics with new review."""
228
  self.metrics['total_reviews'] += 1
229
 
 
230
  total_time = self.metrics['avg_response_time'] * (self.metrics['total_reviews'] - 1)
231
  total_time += review.response_time
232
  self.metrics['avg_response_time'] = total_time / self.metrics['total_reviews']
233
 
 
234
  today = datetime.now().date()
235
  self.metrics['reviews_today'] = sum(
236
  1 for r in self.review_history
 
247
  'suggestions': r.suggestions,
248
  'response_time': f"{r.response_time:.2f}s"
249
  }
250
+ for r in reversed(self.review_history[-10:])
251
  ]
252
 
253
  def get_metrics(self) -> Dict:
 
301
  label="Performance Metrics"
302
  )
303
 
 
304
  @spaces.GPU
305
  def review_code_interface(code: str, language: str) -> str:
306
  if not code.strip():
307
  return "Please enter some code to review."
308
  try:
309
+ reviewer.ensure_initialized()
310
  return reviewer.review_code(code, language)
311
  except Exception as e:
312
  logger.error(f"Interface error: {e}")
 
351
  outputs=metrics_output
352
  )
353
 
 
354
  gr.Examples(
355
  examples=[
356
  ["""def add_numbers(a, b):
 
366
  inputs=[code_input, language_input]
367
  )
368
 
 
369
  if __name__ == "__main__":
370
  iface.launch(
371
  server_name="0.0.0.0",