File size: 1,647 Bytes
2532836
 
 
928e881
 
c8c940e
928e881
 
 
2532836
c8c940e
837c657
2532836
837c657
 
2532836
928e881
837c657
2532836
 
 
928e881
 
837c657
c8c940e
 
 
 
 
 
 
 
 
 
837c657
c8c940e
 
 
 
837c657
 
 
c8c940e
2532836
928e881
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import streamlit as st
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

@st.cache_resource
def load_model():
    model_name = "Salesforce/codet5-base"  # Switch to 'codet5-base' for better results
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
    return tokenizer, model

# Load the model and tokenizer
tokenizer, model = load_model()

st.title("Code Generator")
st.write("Generate code snippets from natural language prompts using CodeT5!")

prompt = st.text_area("Enter your coding task:", placeholder="Write a Python function to calculate factorial.")
max_length = st.slider("Maximum length of generated code:", 20, 300, 100)

if st.button("Generate Code"):
    if prompt.strip():
        with st.spinner("Generating code..."):
            inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True)

            # Use sampling-based generation for better quality
            outputs = model.generate(
                inputs.input_ids,
                max_length=max_length,
                temperature=0.7,
                top_p=0.95,
                do_sample=True,
            )

            # Debugging: Show raw token output
            st.write("### Debugging: Raw Model Output")
            st.json(outputs.tolist())

            # Decode tokens properly
            generated_code = tokenizer.decode(outputs[0], skip_special_tokens=True, clean_up_tokenization_spaces=True)

            st.write("### Generated Code:")
            st.code(generated_code, language="python")

    else:
        st.warning("Please enter a prompt!")