Joash commited on
Commit
d1dd13c
·
0 Parent(s):

Initial commit with working code review assistant

Browse files
Files changed (4) hide show
  1. .gitattributes +35 -0
  2. README.md +62 -0
  3. app.py +432 -0
  4. requirements.txt +6 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Code Review Assistant v2
3
+ emoji: 🔍
4
+ colorFrom: blue
5
+ colorTo: purple
6
+ sdk: gradio
7
+ sdk_version: 5.9.0
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
+ # Code Review Assistant v2
13
+
14
+ An enhanced version of the Code Review Assistant powered by Google's Gemma-2-2b-it model, now with improved metrics tracking and history management.
15
+
16
+ ## 🚀 Features
17
+
18
+ ### Core Functionality
19
+ - Automated code review and analysis
20
+ - Support for multiple programming languages
21
+ - Detailed suggestions for improvements
22
+ - Security and best practices recommendations
23
+
24
+ ### Improvements in v2
25
+ - Robust metrics tracking with persistent storage
26
+ - Enhanced review history with Markdown formatting
27
+ - Improved error handling and recovery
28
+ - Efficient storage management
29
+ - Real-time performance monitoring
30
+
31
+ ### Supported Languages
32
+ - Python
33
+ - JavaScript
34
+ - Java
35
+ - C++
36
+ - TypeScript
37
+ - Go
38
+ - Rust
39
+
40
+ ## 📊 Metrics & History
41
+ - Tracks total reviews performed
42
+ - Measures average response time
43
+ - Counts daily reviews
44
+ - Maintains detailed review history
45
+ - Real-time performance monitoring
46
+
47
+ ## 🛠️ Technical Details
48
+ - Model: Google's Gemma-2-2b-it
49
+ - Framework: Gradio 4.19.0
50
+ - Backend: Python with PyTorch
51
+ - Storage: Persistent file-based storage with atomic writes
52
+ - Deployment: Hugging Face Spaces with GPU acceleration
53
+
54
+ ## 📝 Usage
55
+ 1. Select your programming language
56
+ 2. Paste your code in the input box
57
+ 3. Click "Submit for Review"
58
+ 4. View the detailed review results
59
+ 5. Check metrics and history in their respective tabs
60
+
61
+ ## 🔄 Updates
62
+ This v2 version includes significant improvements in reliability and functionality, particularly in the metrics tracking and history management systems.
app.py ADDED
@@ -0,0 +1,432 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
+ import torch
4
+ from huggingface_hub import login
5
+ import os
6
+ import logging
7
+ from datetime import datetime
8
+ import json
9
+ from typing import List, Dict
10
+ import warnings
11
+ import spaces
12
+
13
+ # Filter out warnings
14
+ warnings.filterwarnings('ignore')
15
+
16
+ # Configure logging with more detail
17
+ logging.basicConfig(
18
+ level=logging.INFO,
19
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
20
+ )
21
+ logger = logging.getLogger(__name__)
22
+
23
+ # Environment variables with default model
24
+ MODEL_NAME = "google/gemma-2-2b-it"
25
+ HF_TOKEN = os.getenv("HUGGING_FACE_TOKEN")
26
+
27
+ # Hugging Face Spaces persistent storage directory
28
+ STORAGE_DIR = os.path.join(os.getcwd(), "storage")
29
+ os.makedirs(STORAGE_DIR, exist_ok=True)
30
+
31
+ # History file in persistent storage
32
+ HISTORY_FILE = os.path.join(STORAGE_DIR, "review_history.json")
33
+
34
+ class Review:
35
+ def __init__(self, code: str, language: str, suggestions: str):
36
+ self.code = code
37
+ self.language = language
38
+ self.suggestions = suggestions
39
+ self.timestamp = datetime.now().isoformat()
40
+ self.response_time = 0.0
41
+
42
+ def to_dict(self):
43
+ return {
44
+ 'timestamp': self.timestamp,
45
+ 'language': self.language,
46
+ 'code': code_snippet(self.code, max_length=200),
47
+ 'suggestions': self.suggestions,
48
+ 'response_time': self.response_time
49
+ }
50
+
51
+ @classmethod
52
+ def from_dict(cls, data):
53
+ review = cls(data['code'], data['language'], data['suggestions'])
54
+ review.timestamp = data['timestamp']
55
+ review.response_time = data.get('response_time', 0.0)
56
+ return review
57
+
58
+ def code_snippet(code: str, max_length: int = 200) -> str:
59
+ """Create a truncated version of code for storage."""
60
+ if len(code) <= max_length:
61
+ return code
62
+ return code[:max_length] + "..."
63
+
64
+ class CodeReviewer:
65
+ def __init__(self):
66
+ self.model = None
67
+ self.tokenizer = None
68
+ self.device = None
69
+ self.review_history: List[Review] = []
70
+ self.metrics = {
71
+ 'total_reviews': 0,
72
+ 'avg_response_time': 0.0,
73
+ 'reviews_today': 0
74
+ }
75
+ self._initialized = False
76
+ self.load_history()
77
+
78
+ def load_history(self):
79
+ """Load review history from file with error handling."""
80
+ try:
81
+ if os.path.exists(HISTORY_FILE):
82
+ with open(HISTORY_FILE, 'r') as f:
83
+ data = json.load(f)
84
+ self.review_history = [Review.from_dict(r) for r in data.get('history', [])]
85
+ self.metrics = data.get('metrics', {
86
+ 'total_reviews': 0,
87
+ 'avg_response_time': 0.0,
88
+ 'reviews_today': 0
89
+ })
90
+ logger.info(f"Loaded {len(self.review_history)} reviews from history")
91
+ else:
92
+ logger.info("No history file found, starting fresh")
93
+ self.save_history()
94
+ except Exception as e:
95
+ logger.error(f"Error loading history: {e}")
96
+ self.review_history = []
97
+ self.metrics = {
98
+ 'total_reviews': 0,
99
+ 'avg_response_time': 0.0,
100
+ 'reviews_today': 0
101
+ }
102
+
103
+ def save_history(self):
104
+ """Save review history to file with error handling."""
105
+ try:
106
+ os.makedirs(os.path.dirname(HISTORY_FILE), exist_ok=True)
107
+
108
+ data = {
109
+ 'history': [r.to_dict() for r in self.review_history[-100:]],
110
+ 'metrics': self.metrics
111
+ }
112
+
113
+ temp_file = HISTORY_FILE + '.tmp'
114
+ with open(temp_file, 'w') as f:
115
+ json.dump(data, f, indent=2)
116
+ os.replace(temp_file, HISTORY_FILE)
117
+
118
+ logger.info("Saved review history successfully")
119
+ except Exception as e:
120
+ logger.error(f"Error saving history: {e}")
121
+ if os.path.exists(temp_file):
122
+ try:
123
+ os.remove(temp_file)
124
+ except:
125
+ pass
126
+
127
+ def initialize_model(self):
128
+ """Initialize the model and tokenizer."""
129
+ try:
130
+ logger.info(f"Initializing model {MODEL_NAME}")
131
+
132
+ # Initialize tokenizer
133
+ logger.info("Loading tokenizer...")
134
+ self.tokenizer = AutoTokenizer.from_pretrained(
135
+ MODEL_NAME,
136
+ trust_remote_code=True
137
+ )
138
+
139
+ # Add special tokens if needed
140
+ special_tokens = {
141
+ 'pad_token': '[PAD]',
142
+ 'eos_token': '</s>',
143
+ 'bos_token': '<s>'
144
+ }
145
+ num_added = self.tokenizer.add_special_tokens(special_tokens)
146
+ logger.info(f"Added {num_added} special tokens")
147
+
148
+ # Initialize model
149
+ logger.info("Loading model...")
150
+ self.model = AutoModelForCausalLM.from_pretrained(
151
+ MODEL_NAME,
152
+ device_map="auto",
153
+ torch_dtype=torch.float16,
154
+ trust_remote_code=True,
155
+ low_cpu_mem_usage=True
156
+ )
157
+
158
+ # Resize embeddings if needed
159
+ if num_added > 0:
160
+ self.model.resize_token_embeddings(len(self.tokenizer))
161
+
162
+ self.device = next(self.model.parameters()).device
163
+ logger.info(f"Model loaded successfully on {self.device}")
164
+ self._initialized = True
165
+ return True
166
+ except Exception as e:
167
+ logger.error(f"Error initializing model: {str(e)}")
168
+ self._initialized = False
169
+ return False
170
+
171
+ def create_review_prompt(self, code: str, language: str) -> str:
172
+ """Create a structured prompt for code review."""
173
+ return f"""Review this {language} code. List specific points in these sections:
174
+ Issues:
175
+ Improvements:
176
+ Best Practices:
177
+ Security:
178
+
179
+ Code:
180
+ ```{language}
181
+ {code}
182
+ ```"""
183
+
184
+ @spaces.GPU
185
+ def review_code(self, code: str, language: str) -> str:
186
+ """Perform code review using the model."""
187
+ try:
188
+ if not self._initialized:
189
+ logger.info("Model not initialized, attempting initialization...")
190
+ if not self.initialize_model():
191
+ return "Error: Model initialization failed. Please check logs for details."
192
+
193
+ start_time = datetime.now()
194
+ prompt = self.create_review_prompt(code, language)
195
+
196
+ try:
197
+ inputs = self.tokenizer(
198
+ prompt,
199
+ return_tensors="pt",
200
+ truncation=True,
201
+ max_length=512,
202
+ padding=True
203
+ ).to(self.device)
204
+ except Exception as token_error:
205
+ logger.error(f"Tokenization error: {str(token_error)}")
206
+ return f"Error during tokenization: {str(token_error)}"
207
+
208
+ try:
209
+ with torch.no_grad():
210
+ outputs = self.model.generate(
211
+ **inputs,
212
+ max_new_tokens=512,
213
+ do_sample=True,
214
+ temperature=0.7,
215
+ top_p=0.95,
216
+ num_beams=1,
217
+ early_stopping=True,
218
+ pad_token_id=self.tokenizer.pad_token_id,
219
+ eos_token_id=self.tokenizer.eos_token_id
220
+ )
221
+ except Exception as gen_error:
222
+ logger.error(f"Generation error: {str(gen_error)}")
223
+ return f"Error during generation: {str(gen_error)}"
224
+
225
+ try:
226
+ response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
227
+ suggestions = response[len(prompt):].strip()
228
+ except Exception as decode_error:
229
+ logger.error(f"Decoding error: {str(decode_error)}")
230
+ return f"Error decoding response: {str(decode_error)}"
231
+
232
+ # Create and save review
233
+ end_time = datetime.now()
234
+ review = Review(code, language, suggestions)
235
+ review.response_time = (end_time - start_time).total_seconds()
236
+
237
+ # Update metrics
238
+ self.update_metrics(review)
239
+
240
+ # Add review to history and save
241
+ self.review_history.append(review)
242
+ self.save_history()
243
+
244
+ if self.device.type == "cuda":
245
+ del inputs, outputs
246
+ torch.cuda.empty_cache()
247
+
248
+ return suggestions
249
+
250
+ except Exception as e:
251
+ logger.error(f"Error during code review: {str(e)}")
252
+ return f"Error performing code review: {str(e)}"
253
+
254
+ def update_metrics(self, review: Review):
255
+ """Update metrics with new review."""
256
+ try:
257
+ self.metrics['total_reviews'] += 1
258
+
259
+ total_time = self.metrics['avg_response_time'] * (self.metrics['total_reviews'] - 1)
260
+ total_time += review.response_time
261
+ self.metrics['avg_response_time'] = total_time / self.metrics['total_reviews']
262
+
263
+ today = datetime.now().date()
264
+ self.metrics['reviews_today'] = sum(
265
+ 1 for r in self.review_history
266
+ if datetime.fromisoformat(r.timestamp).date() == today
267
+ )
268
+ except Exception as e:
269
+ logger.error(f"Error updating metrics: {e}")
270
+
271
+ def get_history(self) -> List[Dict]:
272
+ """Get formatted review history."""
273
+ try:
274
+ return [
275
+ {
276
+ 'timestamp': r.timestamp,
277
+ 'language': r.language,
278
+ 'code': code_snippet(r.code),
279
+ 'suggestions': r.suggestions,
280
+ 'response_time': f"{r.response_time:.2f}s"
281
+ }
282
+ for r in reversed(self.review_history[-10:])
283
+ ]
284
+ except Exception as e:
285
+ logger.error(f"Error getting history: {e}")
286
+ return []
287
+
288
+ def get_metrics(self) -> Dict:
289
+ """Get current metrics."""
290
+ try:
291
+ return {
292
+ 'Total Reviews': self.metrics['total_reviews'],
293
+ 'Average Response Time': f"{self.metrics['avg_response_time']:.2f}s",
294
+ 'Reviews Today': self.metrics['reviews_today'],
295
+ 'Device': str(self.device) if self.device else "Not initialized"
296
+ }
297
+ except Exception as e:
298
+ logger.error(f"Error getting metrics: {e}")
299
+ return {
300
+ 'Total Reviews': 0,
301
+ 'Average Response Time': '0.00s',
302
+ 'Reviews Today': 0,
303
+ 'Device': 'Error'
304
+ }
305
+
306
+ # Initialize reviewer
307
+ reviewer = CodeReviewer()
308
+
309
+ # Create Gradio interface
310
+ with gr.Blocks(theme=gr.themes.Soft()) as iface:
311
+ gr.Markdown("# Code Review Assistant v2")
312
+ gr.Markdown("An automated code review system powered by Gemma-2-2b-it")
313
+
314
+ with gr.Tabs():
315
+ with gr.Tab("Review Code"):
316
+ with gr.Row():
317
+ with gr.Column():
318
+ code_input = gr.Textbox(
319
+ lines=10,
320
+ placeholder="Enter your code here...",
321
+ label="Code"
322
+ )
323
+ language_input = gr.Dropdown(
324
+ choices=["python", "javascript", "java", "cpp", "typescript", "go", "rust"],
325
+ value="python",
326
+ label="Language"
327
+ )
328
+ submit_btn = gr.Button("Submit for Review", variant="primary")
329
+ with gr.Column():
330
+ output = gr.Textbox(
331
+ label="Review Results",
332
+ lines=10
333
+ )
334
+
335
+ with gr.Tab("History"):
336
+ with gr.Row():
337
+ refresh_history = gr.Button("Refresh History", variant="secondary")
338
+ history_output = gr.Markdown(
339
+ value="Click 'Refresh History' to view review history"
340
+ )
341
+
342
+ with gr.Tab("Metrics"):
343
+ with gr.Row():
344
+ refresh_metrics = gr.Button("Refresh Metrics", variant="secondary")
345
+ metrics_output = gr.JSON(
346
+ label="Performance Metrics"
347
+ )
348
+
349
+ @spaces.GPU
350
+ def review_code_interface(code: str, language: str) -> str:
351
+ if not code.strip():
352
+ return "Please enter some code to review."
353
+ try:
354
+ result = reviewer.review_code(code, language)
355
+ return result
356
+ except Exception as e:
357
+ logger.error(f"Interface error: {str(e)}")
358
+ return f"Error: {str(e)}"
359
+
360
+ def get_history_interface() -> str:
361
+ try:
362
+ history = reviewer.get_history()
363
+ if not history:
364
+ return "No reviews yet."
365
+
366
+ result = ""
367
+ for review in history:
368
+ result += f"### Review from {review['timestamp']}\n\n"
369
+ result += f"**Language:** {review['language']}\n\n"
370
+ result += f"**Response Time:** {review['response_time']}\n\n"
371
+ result += "**Code:**\n```\n" + review['code'] + "\n```\n\n"
372
+ result += "**Suggestions:**\n" + review['suggestions'] + "\n\n"
373
+ result += "---\n\n"
374
+ return result
375
+ except Exception as e:
376
+ logger.error(f"History error: {str(e)}")
377
+ return f"Error retrieving history: {str(e)}"
378
+
379
+ def get_metrics_interface() -> Dict:
380
+ try:
381
+ return reviewer.get_metrics()
382
+ except Exception as e:
383
+ logger.error(f"Metrics error: {str(e)}")
384
+ return {"error": str(e)}
385
+
386
+ def update_all_outputs(code: str, language: str) -> tuple:
387
+ """Update all outputs after code review."""
388
+ result = review_code_interface(code, language)
389
+ history = get_history_interface()
390
+ metrics = get_metrics_interface()
391
+ return result, history, metrics
392
+
393
+ # Connect the interface
394
+ submit_btn.click(
395
+ update_all_outputs,
396
+ inputs=[code_input, language_input],
397
+ outputs=[output, history_output, metrics_output]
398
+ )
399
+
400
+ refresh_history.click(
401
+ get_history_interface,
402
+ outputs=history_output
403
+ )
404
+
405
+ refresh_metrics.click(
406
+ get_metrics_interface,
407
+ outputs=metrics_output
408
+ )
409
+
410
+ # Add example inputs
411
+ gr.Examples(
412
+ examples=[
413
+ ["""def add_numbers(a, b):
414
+ return a + b""", "python"],
415
+ ["""function calculateSum(numbers) {
416
+ let sum = 0;
417
+ for(let i = 0; i < numbers.length; i++) {
418
+ sum += numbers[i];
419
+ }
420
+ return sum;
421
+ }""", "javascript"]
422
+ ],
423
+ inputs=[code_input, language_input]
424
+ )
425
+
426
+ # Launch the app
427
+ if __name__ == "__main__":
428
+ iface.launch(
429
+ server_name="0.0.0.0",
430
+ server_port=7860,
431
+ show_error=True
432
+ )
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ transformers>=4.37.0
2
+ torch>=2.1.0
3
+ gradio>=4.19.0
4
+ huggingface-hub>=0.20.0
5
+ accelerate>=0.27.0
6
+ bitsandbytes>=0.41.0