speedy-llm / app.py
joermd's picture
Update app.py
9ccef71 verified
raw
history blame
2.75 kB
import numpy as np
import streamlit as st
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import os
# Random dog images for error messages
random_dog = [
"0f476473-2d8b-415e-b944-483768418a95.jpg",
"1bd75c81-f1d7-4e55-9310-a27595fa8762.jpg",
# Add more images as needed
]
# Function to reset conversation
def reset_conversation():
'''Resets conversation'''
st.session_state.conversation = []
st.session_state.messages = []
return None
# Sidebar controls
temp_values = st.sidebar.slider('Select a temperature value', 0.0, 1.0, 0.5)
max_token_value = st.sidebar.slider('Select a max_token value', 1000, 9000, 5000)
st.sidebar.button('Reset Chat', on_click=reset_conversation)
# Initialize chat history
if "messages" not in st.session_state:
st.session_state.messages = []
# Display chat messages from history on app rerun
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
# Set cache directory path to /data
cache_dir = "/data" # المسار المحدد للتخزين في مساحة Hugging Face
# Load model and tokenizer on-demand to save memory
if prompt := st.chat_input(f"مرحبا انا سبيدي , كيف استطيع مساعدتك ؟"):
with st.chat_message("user"):
st.markdown(prompt)
st.session_state.messages.append({"role": "user", "content": prompt})
# Load model only when user submits a prompt
try:
# Load the tokenizer and model with caching in the specified directory
tokenizer = AutoTokenizer.from_pretrained("joermd/llma-speedy", cache_dir=cache_dir)
model = AutoModelForCausalLM.from_pretrained("joermd/llma-speedy", cache_dir=cache_dir)
# Generate response
inputs = tokenizer(prompt, return_tensors="pt")
outputs = model.generate(
inputs.input_ids,
max_new_tokens=max_token_value,
temperature=temp_values,
do_sample=True
)
assistant_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Clear memory (for CUDA) and delete the model to free up RAM
if torch.cuda.is_available():
torch.cuda.empty_cache()
del model
except Exception as e:
assistant_response = "😵‍💫 Connection issue! Try again later. Here's a 🐶:"
st.image(f'https://random.dog/{random_dog[np.random.randint(len(random_dog))]}')
st.write("Error message:")
st.write(e)
# Display assistant response
with st.chat_message("assistant"):
st.markdown(assistant_response)
st.session_state.messages.append({"role": "assistant", "content": assistant_response})