import gradio as gr from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, pipeline # Use a modern, instruction-tuned model. FLAN-T5 Base is lightweight enough for free CPU usage. MODEL_NAME = "google/flan-t5-base" # Load the tokenizer and model tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME) # Set up the text-to-text generation pipeline llm_pipeline = pipeline("text2text-generation", model=model, tokenizer=tokenizer) def budtender_response(user_input: str) -> str: """ This function crafts a prompt to instruct the model to act as BudtenderPro, a bilingual, professional budtender for adult and medical cannabis customers in New Mexico. """ # Construct a detailed system prompt for context. prompt = ( "You are BudtenderPro, a friendly, knowledgeable, and culturally sensitive budtender for " "both adult and medical cannabis customers in New Mexico. You are fluent in both English and Spanish. " "You provide professional advice on strains, dosages, and product recommendations, keeping in mind legal regulations and community values. " "Answer the user's question with clear, respectful, and useful guidance. \n\n" f"User Question: {user_input}" ) # Generate the response. Adjust parameters (max_length, temperature) as needed. outputs = llm_pipeline(prompt, max_length=256, do_sample=True, temperature=0.7) answer = outputs[0]['generated_text'] return answer.strip() # Set up the Gradio interface iface = gr.Interface( fn=budtender_response, inputs=gr.Textbox(lines=3, placeholder="Ask your cannabis-related question here..."), outputs=gr.Textbox(label="BudtenderPro Response"), title="BudtenderPro LLM Demo", description=( "A modern, bilingual budtender assistant for adult and medical cannabis questions in New Mexico. " "Powered by Google FLAN-T5 Base, this demo leverages an up-to-date Hugging Face workflow to provide professional guidance." ) ) if __name__ == "__main__": iface.launch()