Spaces:

joermd
/

speedy-llm

Paused

App Files Files Community

joermd commited on Nov 12, 2024

Commit

4b8d1fe

verified ·

1 Parent(s): f4d2f94

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -6

app.py CHANGED Viewed

@@ -3,49 +3,74 @@ import streamlit as st
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import os
 # Random dog images for error messages
 random_dog = [
     "0f476473-2d8b-415e-b944-483768418a95.jpg",
     "1bd75c81-f1d7-4e55-9310-a27595fa8762.jpg",
     # Add more images as needed
 ]
 # Function to reset conversation
 def reset_conversation():
     '''Resets conversation'''
     st.session_state.conversation = []
     st.session_state.messages = []
     return None
 # Sidebar controls
 temp_values = st.sidebar.slider('Select a temperature value', 0.0, 1.0, 0.5)
 max_token_value = st.sidebar.slider('Select a max_token value', 1000, 9000, 5000)
 st.sidebar.button('Reset Chat', on_click=reset_conversation)
 # Initialize chat history
 if "messages" not in st.session_state:
     st.session_state.messages = []
 # Display chat messages from history on app rerun
 for message in st.session_state.messages:
     with st.chat_message(message["role"]):
         st.markdown(message["content"])
 # Set cache directory path to /data
 cache_dir = "/data"  # المسار المحدد للتخزين في مساحة Hugging Face
 # Load model and tokenizer on-demand to save memory
 if prompt := st.chat_input(f"مرحبا انا سبيدي , كيف استطيع مساعدتك ؟"):
     with st.chat_message("user"):
         st.markdown(prompt)
     st.session_state.messages.append({"role": "user", "content": prompt})
     # Load model only when user submits a prompt
     try:
         # Load the tokenizer and model with caching in the specified directory
-        tokenizer = AutoTokenizer.from_pretrained("sambanovasystems/SambaLingo-Arabic-Chat", cache_dir=cache_dir)
-        model = AutoModelForCausalLM.from_pretrained("sambanovasystems/SambaLingo-Arabic-Chat", cache_dir=cache_dir)
         # Generate response
-        inputs = tokenizer(prompt, return_tensors="pt")
         outputs = model.generate(
             inputs.input_ids,
             max_new_tokens=max_token_value,
             temperature=temp_values,
-            do_sample=True
         )
         assistant_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
@@ -55,10 +80,11 @@ if prompt := st.chat_input(f"مرحبا انا سبيدي , كيف استطيع
         del model
     except Exception as e:
-        assistant_response = "😵‍💫 Connection issue! Try again later. Here's a 🐶:"
         st.image(f'https://random.dog/{random_dog[np.random.randint(len(random_dog))]}')
-        st.write("Error message:")
         st.write(e)
     # Display assistant response
     with st.chat_message("assistant"):
         st.markdown(assistant_response)

 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import os
 # Random dog images for error messages
 random_dog = [
     "0f476473-2d8b-415e-b944-483768418a95.jpg",
     "1bd75c81-f1d7-4e55-9310-a27595fa8762.jpg",
     # Add more images as needed
 ]
 # Function to reset conversation
 def reset_conversation():
     '''Resets conversation'''
     st.session_state.conversation = []
     st.session_state.messages = []
     return None
 # Sidebar controls
 temp_values = st.sidebar.slider('Select a temperature value', 0.0, 1.0, 0.5)
 max_token_value = st.sidebar.slider('Select a max_token value', 1000, 9000, 5000)
 st.sidebar.button('Reset Chat', on_click=reset_conversation)
 # Initialize chat history
 if "messages" not in st.session_state:
     st.session_state.messages = []
 # Display chat messages from history on app rerun
 for message in st.session_state.messages:
     with st.chat_message(message["role"]):
         st.markdown(message["content"])
 # Set cache directory path to /data
 cache_dir = "/data"  # المسار المحدد للتخزين في مساحة Hugging Face
 # Load model and tokenizer on-demand to save memory
 if prompt := st.chat_input(f"مرحبا انا سبيدي , كيف استطيع مساعدتك ؟"):
     with st.chat_message("user"):
         st.markdown(prompt)
     st.session_state.messages.append({"role": "user", "content": prompt})
     # Load model only when user submits a prompt
     try:
         # Load the tokenizer and model with caching in the specified directory
+        tokenizer = AutoTokenizer.from_pretrained("joermd/speedy-llama2", cache_dir=cache_dir)
+        model = AutoModelForCausalLM.from_pretrained(
+            "joermd/speedy-llama2",
+            cache_dir=cache_dir,
+            torch_dtype=torch.bfloat16,
+            device_map="auto"
+        )
+        # Prepare the system message and conversation
+        system_message = {
+            "role": "system",
+            "content": "You are a friendly chatbot who answers questions in Arabic."
+        }
+        messages = [system_message, {"role": "user", "content": prompt}]
+        # Create conversation prompt using chat template
+        conversation = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
         # Generate response
+        inputs = tokenizer(conversation, return_tensors="pt")
         outputs = model.generate(
             inputs.input_ids,
             max_new_tokens=max_token_value,
             temperature=temp_values,
+            do_sample=True,
+            top_k=50,
+            top_p=0.95
         )
         assistant_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
         del model
     except Exception as e:
+        assistant_response = "😵‍💫 عذراً، حدث خطأ في الاتصال! حاول مرة أخرى لاحقاً. إليك صورة كلب 🐶:"
         st.image(f'https://random.dog/{random_dog[np.random.randint(len(random_dog))]}')
+        st.write("رسالة الخطأ:")
         st.write(e)
     # Display assistant response
     with st.chat_message("assistant"):
         st.markdown(assistant_response)