import spaces import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer # Load model and tokenizer model_name = "infly/OpenCoder-8B-Instruct" tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True) @spaces.GPU # Define the text generation function def generate_text(prompt): inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True) outputs = model.generate( inputs["input_ids"], #attention_mask=inputs["attention_mask"], # Add attention mask num_return_sequences=1 ) return tokenizer.decode(outputs[0], skip_special_tokens=True) # Create the Gradio interface iface = gr.ChatInterface( fn=generate_text, inputs=gr.Textbox(label="Enter your prompt", placeholder="Start typing...", lines=5), outputs="text", title="OpenCoder 8B Instruct", description="Generate text using the OpenCoder model. Input a prompt to generate responses.", ) # Launch the Gradio app iface.launch()