joermd commited on
Commit
2be8ca9
·
verified ·
1 Parent(s): 1d026dc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -27
app.py CHANGED
@@ -4,11 +4,6 @@ import torch
4
  from transformers import AutoModelForCausalLM, AutoTokenizer
5
  import os
6
 
7
- # Load the model and tokenizer
8
- model_name = "joermd/llma-speedy"
9
- tokenizer = AutoTokenizer.from_pretrained(model_name)
10
- model = AutoModelForCausalLM.from_pretrained(model_name)
11
-
12
  # Random dog images for error messages
13
  random_dog = [
14
  "0f476473-2d8b-415e-b944-483768418a95.jpg",
@@ -16,21 +11,18 @@ random_dog = [
16
  # Add more images as needed
17
  ]
18
 
 
19
  def reset_conversation():
20
  '''Resets conversation'''
21
  st.session_state.conversation = []
22
  st.session_state.messages = []
23
  return None
24
 
25
- # Create sidebar controls
26
  temp_values = st.sidebar.slider('Select a temperature value', 0.0, 1.0, 0.5)
27
  max_token_value = st.sidebar.slider('Select a max_token value', 1000, 9000, 5000)
28
  st.sidebar.button('Reset Chat', on_click=reset_conversation)
29
 
30
- # Set the model and display its name
31
- st.sidebar.write(f"You're now chatting with **{model_name}**")
32
- st.sidebar.markdown("*Generated content may be inaccurate or false.*")
33
-
34
  # Initialize chat history
35
  if "messages" not in st.session_state:
36
  st.session_state.messages = []
@@ -40,28 +32,43 @@ for message in st.session_state.messages:
40
  with st.chat_message(message["role"]):
41
  st.markdown(message["content"])
42
 
43
- # Accept user input
 
 
 
44
  if prompt := st.chat_input(f"مرحبا انا سبيدي , كيف استطيع مساعدتك ؟"):
45
  with st.chat_message("user"):
46
  st.markdown(prompt)
47
  st.session_state.messages.append({"role": "user", "content": prompt})
48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  # Display assistant response
50
  with st.chat_message("assistant"):
51
- try:
52
- inputs = tokenizer(prompt, return_tensors="pt")
53
- outputs = model.generate(
54
- inputs.input_ids,
55
- max_new_tokens=max_token_value,
56
- temperature=temp_values,
57
- do_sample=True
58
- )
59
- assistant_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
60
- except Exception as e:
61
- assistant_response = "😵‍💫 Connection issue! Try again later. Here's a 🐶:"
62
- st.image(f'https://random.dog/{random_dog[np.random.randint(len(random_dog))]}')
63
- st.write("Error message:")
64
- st.write(e)
65
-
66
- st.markdown(assistant_response)
67
  st.session_state.messages.append({"role": "assistant", "content": assistant_response})
 
4
  from transformers import AutoModelForCausalLM, AutoTokenizer
5
  import os
6
 
 
 
 
 
 
7
  # Random dog images for error messages
8
  random_dog = [
9
  "0f476473-2d8b-415e-b944-483768418a95.jpg",
 
11
  # Add more images as needed
12
  ]
13
 
14
+ # Function to reset conversation
15
  def reset_conversation():
16
  '''Resets conversation'''
17
  st.session_state.conversation = []
18
  st.session_state.messages = []
19
  return None
20
 
21
+ # Sidebar controls
22
  temp_values = st.sidebar.slider('Select a temperature value', 0.0, 1.0, 0.5)
23
  max_token_value = st.sidebar.slider('Select a max_token value', 1000, 9000, 5000)
24
  st.sidebar.button('Reset Chat', on_click=reset_conversation)
25
 
 
 
 
 
26
  # Initialize chat history
27
  if "messages" not in st.session_state:
28
  st.session_state.messages = []
 
32
  with st.chat_message(message["role"]):
33
  st.markdown(message["content"])
34
 
35
+ # Set cache directory path
36
+ cache_dir = "speedy-llm/" # استبدل هذا بالمسار المخصص للتخزين في مساحة Hugging Face
37
+
38
+ # Load model and tokenizer on-demand to save memory
39
  if prompt := st.chat_input(f"مرحبا انا سبيدي , كيف استطيع مساعدتك ؟"):
40
  with st.chat_message("user"):
41
  st.markdown(prompt)
42
  st.session_state.messages.append({"role": "user", "content": prompt})
43
 
44
+ # Load model only when user submits a prompt
45
+ try:
46
+ # Load the tokenizer and model with caching in the specified directory
47
+ tokenizer = AutoTokenizer.from_pretrained("joermd/llma-speedy", cache_dir=cache_dir)
48
+ model = AutoModelForCausalLM.from_pretrained("joermd/llma-speedy", cache_dir=cache_dir)
49
+
50
+ # Generate response
51
+ inputs = tokenizer(prompt, return_tensors="pt")
52
+ outputs = model.generate(
53
+ inputs.input_ids,
54
+ max_new_tokens=max_token_value,
55
+ temperature=temp_values,
56
+ do_sample=True
57
+ )
58
+ assistant_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
59
+
60
+ # Clear memory (for CUDA) and delete the model to free up RAM
61
+ if torch.cuda.is_available():
62
+ torch.cuda.empty_cache()
63
+ del model
64
+
65
+ except Exception as e:
66
+ assistant_response = "😵‍💫 Connection issue! Try again later. Here's a 🐶:"
67
+ st.image(f'https://random.dog/{random_dog[np.random.randint(len(random_dog))]}')
68
+ st.write("Error message:")
69
+ st.write(e)
70
+
71
  # Display assistant response
72
  with st.chat_message("assistant"):
73
+ st.markdown(assistant_response)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  st.session_state.messages.append({"role": "assistant", "content": assistant_response})