joermd commited on
Commit
9ccef71
·
verified ·
1 Parent(s): 74011b4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -24
app.py CHANGED
@@ -3,56 +3,41 @@ import streamlit as st
3
  import torch
4
  from transformers import AutoModelForCausalLM, AutoTokenizer
5
  import os
6
-
7
  # Random dog images for error messages
8
  random_dog = [
9
  "0f476473-2d8b-415e-b944-483768418a95.jpg",
10
  "1bd75c81-f1d7-4e55-9310-a27595fa8762.jpg",
 
11
  ]
12
-
13
  def reset_conversation():
14
  '''Resets conversation'''
15
  st.session_state.conversation = []
16
  st.session_state.messages = []
17
  return None
18
-
19
  # Sidebar controls
20
  temp_values = st.sidebar.slider('Select a temperature value', 0.0, 1.0, 0.5)
21
  max_token_value = st.sidebar.slider('Select a max_token value', 1000, 9000, 5000)
22
  st.sidebar.button('Reset Chat', on_click=reset_conversation)
23
-
24
  # Initialize chat history
25
  if "messages" not in st.session_state:
26
  st.session_state.messages = []
27
-
28
  # Display chat messages from history on app rerun
29
  for message in st.session_state.messages:
30
  with st.chat_message(message["role"]):
31
  st.markdown(message["content"])
32
-
33
  # Set cache directory path to /data
34
- cache_dir = "/data"
35
-
36
  # Load model and tokenizer on-demand to save memory
37
  if prompt := st.chat_input(f"مرحبا انا سبيدي , كيف استطيع مساعدتك ؟"):
38
  with st.chat_message("user"):
39
  st.markdown(prompt)
40
  st.session_state.messages.append({"role": "user", "content": prompt})
41
-
42
  try:
43
- # Load the tokenizer and model with specific configuration
44
- tokenizer = AutoTokenizer.from_pretrained(
45
- "joermd/llma-speedy",
46
- cache_dir=cache_dir,
47
- local_files_only=False # السماح بتحميل الملفات المتوفرة فقط
48
- )
49
-
50
- model = AutoModelForCausalLM.from_pretrained(
51
- "joermd/llma-speedy",
52
- cache_dir=cache_dir,
53
- local_files_only=False, # السماح بتحميل الملفات المتوفرة فقط
54
- ignore_missing_weights=True # تجاهل الأوزان المفقودة
55
- )
56
 
57
  # Generate response
58
  inputs = tokenizer(prompt, return_tensors="pt")
@@ -64,7 +49,7 @@ if prompt := st.chat_input(f"مرحبا انا سبيدي , كيف استطيع
64
  )
65
  assistant_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
66
 
67
- # Clear memory
68
  if torch.cuda.is_available():
69
  torch.cuda.empty_cache()
70
  del model
@@ -74,7 +59,6 @@ if prompt := st.chat_input(f"مرحبا انا سبيدي , كيف استطيع
74
  st.image(f'https://random.dog/{random_dog[np.random.randint(len(random_dog))]}')
75
  st.write("Error message:")
76
  st.write(e)
77
-
78
  # Display assistant response
79
  with st.chat_message("assistant"):
80
  st.markdown(assistant_response)
 
3
  import torch
4
  from transformers import AutoModelForCausalLM, AutoTokenizer
5
  import os
 
6
  # Random dog images for error messages
7
  random_dog = [
8
  "0f476473-2d8b-415e-b944-483768418a95.jpg",
9
  "1bd75c81-f1d7-4e55-9310-a27595fa8762.jpg",
10
+ # Add more images as needed
11
  ]
12
+ # Function to reset conversation
13
  def reset_conversation():
14
  '''Resets conversation'''
15
  st.session_state.conversation = []
16
  st.session_state.messages = []
17
  return None
 
18
  # Sidebar controls
19
  temp_values = st.sidebar.slider('Select a temperature value', 0.0, 1.0, 0.5)
20
  max_token_value = st.sidebar.slider('Select a max_token value', 1000, 9000, 5000)
21
  st.sidebar.button('Reset Chat', on_click=reset_conversation)
 
22
  # Initialize chat history
23
  if "messages" not in st.session_state:
24
  st.session_state.messages = []
 
25
  # Display chat messages from history on app rerun
26
  for message in st.session_state.messages:
27
  with st.chat_message(message["role"]):
28
  st.markdown(message["content"])
 
29
  # Set cache directory path to /data
30
+ cache_dir = "/data" # المسار المحدد للتخزين في مساحة Hugging Face
 
31
  # Load model and tokenizer on-demand to save memory
32
  if prompt := st.chat_input(f"مرحبا انا سبيدي , كيف استطيع مساعدتك ؟"):
33
  with st.chat_message("user"):
34
  st.markdown(prompt)
35
  st.session_state.messages.append({"role": "user", "content": prompt})
36
+ # Load model only when user submits a prompt
37
  try:
38
+ # Load the tokenizer and model with caching in the specified directory
39
+ tokenizer = AutoTokenizer.from_pretrained("joermd/llma-speedy", cache_dir=cache_dir)
40
+ model = AutoModelForCausalLM.from_pretrained("joermd/llma-speedy", cache_dir=cache_dir)
 
 
 
 
 
 
 
 
 
 
41
 
42
  # Generate response
43
  inputs = tokenizer(prompt, return_tensors="pt")
 
49
  )
50
  assistant_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
51
 
52
+ # Clear memory (for CUDA) and delete the model to free up RAM
53
  if torch.cuda.is_available():
54
  torch.cuda.empty_cache()
55
  del model
 
59
  st.image(f'https://random.dog/{random_dog[np.random.randint(len(random_dog))]}')
60
  st.write("Error message:")
61
  st.write(e)
 
62
  # Display assistant response
63
  with st.chat_message("assistant"):
64
  st.markdown(assistant_response)