# app.py import gradio as gr import torch from transformers import AutoTokenizer, AutoModelForCausalLM import json import os # Load the CodeGen-2B-mono model and tokenizer from Hugging Face model_name = "Salesforce/codegen-2B-mono" # Best version for CPU-friendly performance in code generation tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name) # Ensure the model runs on CPU (important for Hugging Face Spaces free tier) device = torch.device("cpu") model.to(device) # Cache to store recent prompts and responses with file-based persistence CACHE_FILE = "cache.json" cache = {} # Load cache from file if it exists if os.path.exists(CACHE_FILE): with open(CACHE_FILE, "r") as f: cache = json.load(f) def code_assistant(prompt, language): # Input validation if not prompt.strip(): return "Error: The input prompt cannot be empty. Please provide a coding question or code snippet." if len(prompt) > 256: return "Error: The input prompt is too long. Please limit it to 256 characters." # Check if the prompt is in cache cache_key = (prompt, language) if str(cache_key) in cache: return cache[str(cache_key)] # Customize the prompt based on language if language: prompt = f"[{language}] {prompt}" # Indicate the language for context # Tokenize the input inputs = tokenizer(prompt, return_tensors="pt").to(device) # Generate response with adjusted parameters for faster CPU response outputs = model.generate( inputs.input_ids, max_length=128, # Shortened max length for quicker response temperature=0.1, # Lower temperature for focused output top_p=0.8, # Slightly reduced top_p for quicker sampling do_sample=True ) # Decode the generated output generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) # Store the response in cache (limit cache size to 10 items) if len(cache) >= 10: cache.pop(next(iter(cache))) # Remove the oldest item cache[str(cache_key)] = generated_text # Write the updated cache to file with open(CACHE_FILE, "w") as f: json.dump(cache, f) return generated_text # Set up Gradio interface with a dropdown for programming language selection iface = gr.Interface( fn=code_assistant, inputs=[ gr.Textbox(lines=5, placeholder="Ask a coding question or paste your code here..."), gr.Dropdown(choices=["Python", "JavaScript", "Java", "C++", "HTML", "CSS", "SQL", "Other"], label="Programming Language") ], outputs="text", title="CodeBand", description="An AI code assistant to help you with coding queries, debugging, and code generation. Specify the programming language for more accurate responses." ) # Launch the Gradio app iface.launch()