Joash commited on
Commit
6d59d74
·
1 Parent(s): 54e3d2b

Fix model name to google/gemma-2-2b-it

Browse files
Files changed (1) hide show
  1. app.py +9 -405
app.py CHANGED
@@ -9,10 +9,6 @@ import json
9
  from typing import List, Dict
10
  import warnings
11
  import spaces
12
- from dotenv import load_dotenv
13
-
14
- # Load environment variables from .env file
15
- load_dotenv()
16
 
17
  # Filter out warnings
18
  warnings.filterwarnings('ignore')
@@ -23,7 +19,15 @@ logger = logging.getLogger(__name__)
23
 
24
  # Environment variables
25
  HF_TOKEN = os.getenv("HUGGING_FACE_TOKEN")
26
- MODEL_NAME = os.getenv("MODEL_NAME", "google/gemma-2b-it")
 
 
 
 
 
 
 
 
27
 
28
  # Create data directory for persistence
29
  DATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
@@ -31,403 +35,3 @@ os.makedirs(DATA_DIR, exist_ok=True)
31
 
32
  # History file
33
  HISTORY_FILE = os.path.join(DATA_DIR, "review_history.json")
34
-
35
- class Review:
36
- def __init__(self, code: str, language: str, suggestions: str):
37
- self.code = code
38
- self.language = language
39
- self.suggestions = suggestions
40
- self.timestamp = datetime.now().isoformat()
41
- self.response_time = 0.0
42
-
43
- def to_dict(self):
44
- return {
45
- 'timestamp': self.timestamp,
46
- 'language': self.language,
47
- 'code': self.code,
48
- 'suggestions': self.suggestions,
49
- 'response_time': self.response_time
50
- }
51
-
52
- @classmethod
53
- def from_dict(cls, data):
54
- review = cls(data['code'], data['language'], data['suggestions'])
55
- review.timestamp = data['timestamp']
56
- review.response_time = data['response_time']
57
- return review
58
-
59
- class CodeReviewer:
60
- def __init__(self):
61
- self.model = None
62
- self.tokenizer = None
63
- self.device = None
64
- self.review_history: List[Review] = []
65
- self.metrics = {
66
- 'total_reviews': 0,
67
- 'avg_response_time': 0.0,
68
- 'reviews_today': 0
69
- }
70
- self._initialized = False
71
- self.load_history()
72
-
73
- def load_history(self):
74
- """Load review history from file."""
75
- try:
76
- if os.path.exists(HISTORY_FILE):
77
- with open(HISTORY_FILE, 'r') as f:
78
- data = json.load(f)
79
- self.review_history = [Review.from_dict(r) for r in data['history']]
80
- self.metrics = data['metrics']
81
- logger.info(f"Loaded {len(self.review_history)} reviews from history")
82
- except Exception as e:
83
- logger.error(f"Error loading history: {e}")
84
- # Initialize empty history if file doesn't exist or is corrupted
85
- self.review_history = []
86
- self.metrics = {
87
- 'total_reviews': 0,
88
- 'avg_response_time': 0.0,
89
- 'reviews_today': 0
90
- }
91
-
92
- def save_history(self):
93
- """Save review history to file."""
94
- try:
95
- data = {
96
- 'history': [r.to_dict() for r in self.review_history],
97
- 'metrics': self.metrics
98
- }
99
- # Ensure the directory exists
100
- os.makedirs(os.path.dirname(HISTORY_FILE), exist_ok=True)
101
- with open(HISTORY_FILE, 'w') as f:
102
- json.dump(data, f)
103
- logger.info("Saved review history")
104
- except Exception as e:
105
- logger.error(f"Error saving history: {e}")
106
-
107
- @spaces.GPU
108
- def ensure_initialized(self):
109
- """Ensure model is initialized."""
110
- if not self._initialized:
111
- self.initialize_model()
112
- self._initialized = True
113
-
114
- def initialize_model(self):
115
- """Initialize the model and tokenizer."""
116
- try:
117
- # Login to Hugging Face with git credential
118
- if HF_TOKEN:
119
- try:
120
- login(token=HF_TOKEN, add_to_git_credential=True)
121
- logger.info("Successfully logged in to Hugging Face")
122
- except Exception as e:
123
- logger.error(f"Error logging in to Hugging Face: {e}")
124
- return False
125
-
126
- logger.info("Loading tokenizer...")
127
- self.tokenizer = AutoTokenizer.from_pretrained(
128
- MODEL_NAME,
129
- token=HF_TOKEN,
130
- trust_remote_code=True
131
- )
132
- special_tokens = {
133
- 'pad_token': '[PAD]',
134
- 'eos_token': '</s>',
135
- 'bos_token': '<s>'
136
- }
137
- num_added = self.tokenizer.add_special_tokens(special_tokens)
138
- logger.info(f"Added {num_added} special tokens")
139
- logger.info("Tokenizer loaded successfully")
140
-
141
- logger.info("Loading model...")
142
- self.model = AutoModelForCausalLM.from_pretrained(
143
- MODEL_NAME,
144
- device_map="auto",
145
- torch_dtype=torch.float16,
146
- trust_remote_code=True,
147
- low_cpu_mem_usage=True,
148
- token=HF_TOKEN
149
- )
150
- if num_added > 0:
151
- logger.info("Resizing model embeddings for special tokens")
152
- self.model.resize_token_embeddings(len(self.tokenizer))
153
-
154
- self.device = next(self.model.parameters()).device
155
- logger.info(f"Model loaded successfully on {self.device}")
156
- self._initialized = True
157
- return True
158
- except Exception as e:
159
- logger.error(f"Error initializing model: {e}")
160
- self._initialized = False
161
- return False
162
-
163
- def create_review_prompt(self, code: str, language: str) -> str:
164
- """Create a structured prompt for code review."""
165
- return f"""Review this {language} code. List specific points in these sections:
166
- Issues:
167
- Improvements:
168
- Best Practices:
169
- Security:
170
-
171
- Code:
172
- ```{language}
173
- {code}
174
- ```"""
175
-
176
- @spaces.GPU
177
- def review_code(self, code: str, language: str) -> str:
178
- """Perform code review using the model."""
179
- try:
180
- if not self._initialized and not self.initialize_model():
181
- return "Error: Model initialization failed. Please try again later."
182
-
183
- start_time = datetime.now()
184
- prompt = self.create_review_prompt(code, language)
185
-
186
- try:
187
- inputs = self.tokenizer(
188
- prompt,
189
- return_tensors="pt",
190
- truncation=True,
191
- max_length=512,
192
- padding=True
193
- )
194
- if inputs is None:
195
- raise ValueError("Failed to tokenize input")
196
- inputs = inputs.to(self.device)
197
- except Exception as token_error:
198
- logger.error(f"Tokenization error: {token_error}")
199
- return "Error: Failed to process input code. Please try again."
200
-
201
- try:
202
- with torch.no_grad():
203
- outputs = self.model.generate(
204
- **inputs,
205
- max_new_tokens=512,
206
- do_sample=True,
207
- temperature=0.7,
208
- top_p=0.95,
209
- num_beams=1,
210
- early_stopping=True,
211
- pad_token_id=self.tokenizer.pad_token_id,
212
- eos_token_id=self.tokenizer.eos_token_id
213
- )
214
- except Exception as gen_error:
215
- logger.error(f"Generation error: {gen_error}")
216
- return "Error: Failed to generate review. Please try again."
217
-
218
- try:
219
- response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
220
- suggestions = response[len(prompt):].strip()
221
- except Exception as decode_error:
222
- logger.error(f"Decoding error: {decode_error}")
223
- return "Error: Failed to decode model output. Please try again."
224
-
225
- # Create and save review
226
- end_time = datetime.now()
227
- review = Review(code, language, suggestions)
228
- review.response_time = (end_time - start_time).total_seconds()
229
-
230
- # Update metrics first
231
- self.metrics['total_reviews'] += 1
232
- total_time = self.metrics['avg_response_time'] * (self.metrics['total_reviews'] - 1)
233
- total_time += review.response_time
234
- self.metrics['avg_response_time'] = total_time / self.metrics['total_reviews']
235
-
236
- today = datetime.now().date()
237
-
238
- # Add review to history
239
- self.review_history.append(review)
240
-
241
- # Update today's reviews count
242
- self.metrics['reviews_today'] = sum(
243
- 1 for r in self.review_history
244
- if datetime.fromisoformat(r.timestamp).date() == today
245
- )
246
-
247
- # Save to file
248
- self.save_history()
249
-
250
- if self.device and self.device.type == "cuda":
251
- del inputs, outputs
252
- torch.cuda.empty_cache()
253
-
254
- return suggestions
255
-
256
- except Exception as e:
257
- logger.error(f"Error during code review: {e}")
258
- return f"Error performing code review: {str(e)}"
259
-
260
- def update_metrics(self, review: Review):
261
- """Update metrics with new review."""
262
- self.metrics['total_reviews'] += 1
263
-
264
- total_time = self.metrics['avg_response_time'] * (self.metrics['total_reviews'] - 1)
265
- total_time += review.response_time
266
- self.metrics['avg_response_time'] = total_time / self.metrics['total_reviews']
267
-
268
- today = datetime.now().date()
269
- self.metrics['reviews_today'] = sum(
270
- 1 for r in self.review_history
271
- if datetime.fromisoformat(r.timestamp).date() == today
272
- )
273
-
274
- def get_history(self) -> List[Dict]:
275
- """Get formatted review history."""
276
- return [
277
- {
278
- 'timestamp': r.timestamp,
279
- 'language': r.language,
280
- 'code': r.code,
281
- 'suggestions': r.suggestions,
282
- 'response_time': f"{r.response_time:.2f}s"
283
- }
284
- for r in reversed(self.review_history[-10:])
285
- ]
286
-
287
- def get_metrics(self) -> Dict:
288
- """Get current metrics."""
289
- return {
290
- 'Total Reviews': self.metrics['total_reviews'],
291
- 'Average Response Time': f"{self.metrics['avg_response_time']:.2f}s",
292
- 'Reviews Today': self.metrics['reviews_today'],
293
- 'Device': str(self.device) if self.device else "Not initialized"
294
- }
295
-
296
- # Initialize reviewer
297
- reviewer = CodeReviewer()
298
-
299
- # Create Gradio interface
300
- with gr.Blocks(theme=gr.themes.Soft()) as iface:
301
- gr.Markdown("# Code Review Assistant")
302
- gr.Markdown("An automated code review system powered by Gemma-2b")
303
-
304
- with gr.Tabs():
305
- with gr.Tab("Review Code"):
306
- with gr.Row():
307
- with gr.Column():
308
- code_input = gr.Textbox(
309
- lines=10,
310
- placeholder="Enter your code here...",
311
- label="Code"
312
- )
313
- language_input = gr.Dropdown(
314
- choices=["python", "javascript", "java", "cpp", "typescript", "go", "rust"],
315
- value="python",
316
- label="Language"
317
- )
318
- submit_btn = gr.Button("Submit for Review", variant="primary")
319
- with gr.Column():
320
- output = gr.Textbox(
321
- label="Review Results",
322
- lines=10
323
- )
324
-
325
- with gr.Tab("History"):
326
- with gr.Row():
327
- refresh_history = gr.Button("Refresh History", variant="secondary")
328
- history_output = gr.Textbox(
329
- label="Review History",
330
- lines=20,
331
- value="Click 'Refresh History' to view review history"
332
- )
333
-
334
- with gr.Tab("Metrics"):
335
- with gr.Row():
336
- refresh_metrics = gr.Button("Refresh Metrics", variant="secondary")
337
- metrics_output = gr.JSON(
338
- label="Performance Metrics"
339
- )
340
-
341
- @spaces.GPU
342
- def review_code_interface(code: str, language: str) -> str:
343
- if not code.strip():
344
- return "Please enter some code to review."
345
- try:
346
- reviewer.ensure_initialized()
347
- result = reviewer.review_code(code, language)
348
- return result
349
- except Exception as e:
350
- logger.error(f"Interface error: {e}")
351
- return f"Error: {str(e)}"
352
-
353
- def get_history_interface() -> str:
354
- try:
355
- history = reviewer.get_history()
356
- if not history:
357
- return "No reviews yet."
358
- result = ""
359
- for review in history:
360
- result += f"Time: {review['timestamp']}\n"
361
- result += f"Language: {review['language']}\n"
362
- result += f"Response Time: {review['response_time']}\n"
363
- result += "Code:\n```\n" + review['code'] + "\n```\n"
364
- result += "Suggestions:\n" + review['suggestions'] + "\n"
365
- result += "-" * 80 + "\n\n"
366
- return result
367
- except Exception as e:
368
- logger.error(f"History error: {e}")
369
- return "Error retrieving history"
370
-
371
- def get_metrics_interface() -> Dict:
372
- try:
373
- metrics = reviewer.get_metrics()
374
- if not metrics:
375
- return {
376
- 'Total Reviews': 0,
377
- 'Average Response Time': '0.00s',
378
- 'Reviews Today': 0,
379
- 'Device': str(reviewer.device) if reviewer.device else "Not initialized"
380
- }
381
- return metrics
382
- except Exception as e:
383
- logger.error(f"Metrics error: {e}")
384
- return {"error": str(e)}
385
-
386
- def update_all_outputs(code: str, language: str) -> tuple:
387
- """Update all outputs after code review."""
388
- result = review_code_interface(code, language)
389
- history = get_history_interface()
390
- metrics = get_metrics_interface()
391
- return result, history, metrics
392
-
393
- # Connect the interface
394
- submit_btn.click(
395
- update_all_outputs,
396
- inputs=[code_input, language_input],
397
- outputs=[output, history_output, metrics_output]
398
- )
399
-
400
- refresh_history.click(
401
- get_history_interface,
402
- outputs=history_output
403
- )
404
-
405
- refresh_metrics.click(
406
- get_metrics_interface,
407
- outputs=metrics_output
408
- )
409
-
410
- # Add example inputs
411
- gr.Examples(
412
- examples=[
413
- ["""def add_numbers(a, b):
414
- return a + b""", "python"],
415
- ["""function calculateSum(numbers) {
416
- let sum = 0;
417
- for(let i = 0; i < numbers.length; i++) {
418
- sum += numbers[i];
419
- }
420
- return sum;
421
- }""", "javascript"]
422
- ],
423
- inputs=[code_input, language_input]
424
- )
425
-
426
- # Launch the app
427
- if __name__ == "__main__":
428
- iface.launch(
429
- server_name="0.0.0.0",
430
- server_port=7860,
431
- show_error=True,
432
- quiet=False
433
- )
 
9
  from typing import List, Dict
10
  import warnings
11
  import spaces
 
 
 
 
12
 
13
  # Filter out warnings
14
  warnings.filterwarnings('ignore')
 
19
 
20
  # Environment variables
21
  HF_TOKEN = os.getenv("HUGGING_FACE_TOKEN")
22
+ MODEL_NAME = os.getenv("MODEL_NAME", "google/gemma-2-2b-it") # Fixed model name
23
+
24
+ # Login to Hugging Face with git credential
25
+ if HF_TOKEN:
26
+ try:
27
+ login(token=HF_TOKEN, add_to_git_credential=True)
28
+ logger.info("Successfully logged in to Hugging Face")
29
+ except Exception as e:
30
+ logger.error(f"Error logging in to Hugging Face: {e}")
31
 
32
  # Create data directory for persistence
33
  DATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
 
35
 
36
  # History file
37
  HISTORY_FILE = os.path.join(DATA_DIR, "review_history.json")