Spaces:
Paused
Paused
File size: 3,465 Bytes
7e0b3c1 b1c5080 7e0b3c1 4b8d1fe 7e0b3c1 9ccef71 7e0b3c1 4b8d1fe 9ccef71 7e0b3c1 4b8d1fe 2be8ca9 7e0b3c1 4b8d1fe 7e0b3c1 4b8d1fe 7e0b3c1 4b8d1fe 64e1afa 9ccef71 4b8d1fe 2be8ca9 a823283 7e0b3c1 4b8d1fe 9ccef71 2be8ca9 9ccef71 4b8d1fe 2be8ca9 4b8d1fe 2be8ca9 4b8d1fe 2be8ca9 9ccef71 2be8ca9 4b8d1fe 2be8ca9 4b8d1fe 2be8ca9 4b8d1fe 7e0b3c1 2be8ca9 74011b4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
import numpy as np
import streamlit as st
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import os
# Random dog images for error messages
random_dog = [
"0f476473-2d8b-415e-b944-483768418a95.jpg",
"1bd75c81-f1d7-4e55-9310-a27595fa8762.jpg",
# Add more images as needed
]
# Function to reset conversation
def reset_conversation():
'''Resets conversation'''
st.session_state.conversation = []
st.session_state.messages = []
return None
# Sidebar controls
temp_values = st.sidebar.slider('Select a temperature value', 0.0, 1.0, 0.5)
max_token_value = st.sidebar.slider('Select a max_token value', 1000, 9000, 5000)
st.sidebar.button('Reset Chat', on_click=reset_conversation)
# Initialize chat history
if "messages" not in st.session_state:
st.session_state.messages = []
# Display chat messages from history on app rerun
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
# Set cache directory path to /data
cache_dir = "/data" # المسار المحدد للتخزين في مساحة Hugging Face
# Load model and tokenizer on-demand to save memory
if prompt := st.chat_input(f"مرحبا انا سبيدي , كيف استطيع مساعدتك ؟"):
with st.chat_message("user"):
st.markdown(prompt)
st.session_state.messages.append({"role": "user", "content": prompt})
# Load model only when user submits a prompt
try:
# Load the tokenizer and model with caching in the specified directory
tokenizer = AutoTokenizer.from_pretrained("joermd/speedy-llama2", cache_dir=cache_dir)
model = AutoModelForCausalLM.from_pretrained(
"joermd/speedy-llama2",
cache_dir=cache_dir,
torch_dtype=torch.bfloat16,
device_map="auto"
)
# Prepare the system message and conversation
system_message = {
"role": "system",
"content": "You are a friendly chatbot who answers questions in Arabic."
}
messages = [system_message, {"role": "user", "content": prompt}]
# Create conversation prompt using chat template
conversation = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
# Generate response
inputs = tokenizer(conversation, return_tensors="pt")
outputs = model.generate(
inputs.input_ids,
max_new_tokens=max_token_value,
temperature=temp_values,
do_sample=True,
top_k=50,
top_p=0.95
)
assistant_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Clear memory (for CUDA) and delete the model to free up RAM
if torch.cuda.is_available():
torch.cuda.empty_cache()
del model
except Exception as e:
assistant_response = "😵💫 عذراً، حدث خطأ في الاتصال! حاول مرة أخرى لاحقاً. إليك صورة كلب 🐶:"
st.image(f'https://random.dog/{random_dog[np.random.randint(len(random_dog))]}')
st.write("رسالة الخطأ:")
st.write(e)
# Display assistant response
with st.chat_message("assistant"):
st.markdown(assistant_response)
st.session_state.messages.append({"role": "assistant", "content": assistant_response}) |