import streamlit as st @st.cache_resource(show_spinner="Loading Model & Tokenizer") def load_model(): # This is cached and will not run again and again. from transformers import AutoTokenizer, AutoModelForCausalLM import torch from peft import PeftModel base_model = AutoModelForCausalLM.from_pretrained( "unsloth/Qwen2.5-0.5B", device_map="cpu", torch_dtype=torch.bfloat16) m = PeftModel.from_pretrained(base_model, "mosama/Qwen2.5-0.5B-Pretraining-ar-eng-urd-LoRA-Adapters") merged_model = m.merge_and_unload() tokenizer = AutoTokenizer.from_pretrained("mosama/Qwen2.5-0.5B-Pretrained-ar-end-urd-500") st.success('Model & Tokenizer Loaded Successfully!', icon="✅") return merged_model, tokenizer st.title("Qwen2.5-0.5B Arabic, English & Urdu Continuous Pretrained") model, tokenizer = load_model() # Initialize chat history if "messages" not in st.session_state: st.session_state.messages = [] for message in st.session_state.messages: with st.chat_message(message["role"]): st.markdown(message["content"]) if not st.session_state.messages: with st.chat_message("assistant", avatar="assistant"): st.write("Hello 👋 I am an AI bot powered by Qwen 2.5 0.5B model.") st.session_state.messages.append({"role": "assistant", "content": "Hello 👋 I am an AI bot powered by Qwen 2.5 0.5B model."}) st.session_state.state_chat_input = False if prompt := st.chat_input("Say Something", key="input_1", disabled=st.session_state.state_chat_input): # Display user message in chat message container with st.chat_message("user"): st.markdown(prompt) # Add user message to chat history st.session_state.messages.append({"role": "user", "content": prompt}) if prompt or st.session_state.state_chat_input: if st.session_state.state_chat_input: with st.spinner(text="Generating response..."): model_inputs = tokenizer(st.session_state.messages[-1]['content'], return_tensors="pt").to(model.device) print(model_inputs) generated_ids = model.generate( **model_inputs, max_new_tokens=50, repetition_penalty=1.2, temperature=0.5, do_sample=True, top_p=0.9, top_k=20 ) print("Generated Response!") response = tokenizer.decode(generated_ids[0], skip_special_tokens=True) # Display assistant response in chat message container with st.chat_message("assistant"): st.markdown(response) # Add assistant response to chat history st.session_state.messages.append({"role": "assistant", "content": response}) st.session_state.state_chat_input = False st.rerun() else: st.session_state.state_chat_input = True st.rerun()