import torch from peft import PeftModel, PeftConfig from transformers import AutoModelForCausalLM, AutoTokenizer import gradio as gr import spaces # Ensure CUDA is available assert torch.cuda.is_available(), "CUDA is not available. Please check your GPU setup." # Set the device device = torch.device("cuda") torch.cuda.set_device(0) # Use the first GPU if multiple are available # Load the model and tokenizer peft_model_id = "rootxhacker/CodeAstra-7B" config = PeftConfig.from_pretrained(peft_model_id) # Load the model on GPU model = AutoModelForCausalLM.from_pretrained( config.base_model_name_or_path, return_dict=True, load_in_4bit=True, torch_dtype=torch.float16, device_map="auto" ) tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path) # Load the Lora model model = PeftModel.from_pretrained(model, peft_model_id) model.to(device) # Ensure all model parameters are on CUDA for param in model.parameters(): param.data = param.data.to(device) @spaces.GPU(duration=200) def get_completion(query, model, tokenizer): try: inputs = tokenizer(query, return_tensors="pt").to(device) with torch.no_grad(): outputs = model.generate(**inputs, max_new_tokens=512, do_sample=True, temperature=0.7) return tokenizer.decode(outputs[0].cpu(), skip_special_tokens=True) except Exception as e: return f"An error occurred: {str(e)}" @spaces.GPU(duration=200) def code_review(code_to_analyze): query = f"As a code review expert, examine the following code for potential security flaws and provide guidance on secure coding practices:\n{code_to_analyze}" result = get_completion(query, model, tokenizer) return result # Create Gradio interface iface = gr.Interface( fn=code_review, inputs=gr.Textbox(lines=10, label="Enter code to analyze"), outputs=gr.Textbox(label="Code Review Result"), title="Code Review Expert", description="This tool analyzes code for potential security flaws and provides guidance on secure coding practices." ) # Launch the Gradio app with a public link iface.launch()