sidbhasin's picture
Update app.py
ce8a794 verified
raw
history blame
5.29 kB
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import gc
import os
# Set environment variables for cache management
os.environ['TRANSFORMERS_CACHE'] = '/tmp/.cache/huggingface'
os.environ['HF_HOME'] = '/tmp/.cache/huggingface'
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:512'
def cleanup_memory():
if torch.cuda.is_available():
torch.cuda.empty_cache()
torch.cuda.synchronize()
gc.collect()
def load_model():
cleanup_memory()
model_name = "Qwen/Qwen1.5-0.5B" # Using smaller model
try:
tokenizer = AutoTokenizer.from_pretrained(
model_name,
trust_remote_code=True,
use_fast=False,
cache_dir="/tmp/.cache/huggingface"
)
model = AutoModelForCausalLM.from_pretrained(
model_name,
device_map="auto",
trust_remote_code=True,
torch_dtype=torch.float16,
low_cpu_mem_usage=True,
offload_folder="/tmp/offload",
offload_state_dict=True,
cache_dir="/tmp/.cache/huggingface"
)
return model, tokenizer
except Exception as e:
print(f"Error loading model: {str(e)}")
return None, None
# Initialize model and tokenizer
print("Loading model...")
model, tokenizer = load_model()
print("Model loaded successfully!")
def generate_code(prompt):
try:
cleanup_memory()
# Prepare the prompt
messages = [
{"role": "system", "content": "You are an expert Python developer. Generate clean, efficient, and well-commented code based on the user's requirements. Only provide the code without any explanations."},
{"role": "user", "content": f"Create a Python tool for the following requirement: {prompt}"}
]
# Generate the response
inputs = tokenizer.encode(str(messages), return_tensors="pt", truncation=True, max_length=512)
inputs = inputs.to(model.device)
outputs = model.generate(
inputs,
max_length=1024,
temperature=0.7,
top_p=0.9,
repetition_penalty=1.1,
do_sample=True,
pad_token_id=tokenizer.eos_token_id,
num_return_sequences=1
)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Extract code from response
code_start = response.find("```python")
code_end = response.find("```", code_start + 8) if code_start != -1 else -1
if code_start != -1 and code_end != -1:
code = response[code_start + 8:code_end].strip()
else:
# If no code block found, try to extract any Python-like code
code = "\n".join([line for line in response.split("\n")
if line.strip() and not line.startswith(("#", "//", "/*"))])
cleanup_memory()
return code
except Exception as e:
cleanup_memory()
raise gr.Error(f"Code generation failed: {str(e)}")
# Create the Gradio interface
with gr.Blocks(theme=gr.themes.Soft(primary_hue="purple")) as demo:
gr.Markdown(
"""
# ๐Ÿ› ๏ธ AI Tool Builder by Syncmerce
Generate Python code for your tools using AI. Simply describe what you want to build!
"""
)
with gr.Row():
with gr.Column(scale=1):
prompt_input = gr.Textbox(
label="Tool Requirements",
placeholder="Describe the tool you want to build... Be specific about features and functionality.",
lines=4
)
with gr.Row():
generate_btn = gr.Button("๐Ÿ”จ Generate Tool", variant="primary")
clear_btn = gr.Button("๐Ÿ—‘๏ธ Clear")
with gr.Column(scale=1):
code_output = gr.Code(
label="Generated Code",
language="python",
lines=20,
show_label=True
)
# Add examples (keeping them simple to save memory)
gr.Examples(
examples=[
["Create a simple PDF text extractor"],
["Build a basic web scraper for product prices"],
["Create an image resizing tool"],
],
inputs=prompt_input,
outputs=code_output,
fn=generate_code,
cache_examples=True,
)
# Add event handlers
generate_btn.click(
fn=generate_code,
inputs=prompt_input,
outputs=code_output,
api_name="generate"
)
clear_btn.click(
fn=lambda: (None, None),
inputs=None,
outputs=[prompt_input, code_output],
api_name="clear"
)
gr.Markdown(
"""
### Tips for better results:
- Keep your requirements clear and concise
- Specify input/output formats if needed
- Mention any specific libraries you want to use
"""
)
# Launch the app with optimized settings
demo.launch(
share=True,
enable_queue=True,
show_error=True,
server_name="0.0.0.0",
server_port=7860,
cache_examples=True,
max_threads=4
)