DexterSptizu commited on
Commit
3b1bd72
1 Parent(s): 113d7bb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +91 -43
app.py CHANGED
@@ -3,9 +3,11 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import torch
4
  from datetime import datetime
5
 
6
- # Initialize session state for chat history
7
  if 'messages' not in st.session_state:
8
  st.session_state.messages = []
 
 
9
 
10
  @st.cache_resource
11
  def load_model():
@@ -46,7 +48,11 @@ def generate_response(prompt, model, tokenizer, history):
46
  return response
47
 
48
  def main():
49
- st.set_page_config(page_title="AMD-OLMo Chatbot", layout="wide")
 
 
 
 
50
 
51
  # Custom CSS
52
  st.markdown("""
@@ -58,17 +64,31 @@ def main():
58
  background-color: #f0f2f6;
59
  padding: 20px;
60
  border-radius: 10px;
 
61
  }
62
  .chat-message {
63
- padding: 10px;
64
  border-radius: 10px;
65
- margin: 5px 0;
66
  }
67
  .user-message {
68
  background-color: #e6f3ff;
 
69
  }
70
  .assistant-message {
71
  background-color: #f0f2f6;
 
 
 
 
 
 
 
 
 
 
 
 
72
  }
73
  </style>
74
  """, unsafe_allow_html=True)
@@ -79,35 +99,46 @@ def main():
79
  with tab1:
80
  st.title("AMD-OLMo-1B-SFT Model Information")
81
 
82
- st.markdown("""
83
- ## Model Overview
84
- AMD-OLMo-1B-SFT is a state-of-the-art language model developed by AMD[1][2]. Key features include:
85
-
86
- ### Architecture
87
- - **Base Model**: 1.2B parameters
88
- - **Layers**: 16
89
- - **Attention Heads**: 16
90
- - **Hidden Size**: 2048
91
- - **Context Length**: 2048
92
- - **Vocabulary Size**: 50,280
93
-
94
- ### Training Details
95
- - Pre-trained on 1.3 trillion tokens from Dolma v1.7
96
- - Supervised fine-tuned (SFT) in two phases:
97
- 1. Tulu V2 dataset
98
- 2. OpenHermes-2.5, WebInstructSub, and Code-Feedback datasets
99
-
100
- ### Capabilities
101
- - General text generation
102
- - Question answering
103
- - Code understanding
104
- - Reasoning tasks
105
- - Instruction following
106
-
107
- ### Hardware Requirements
108
- - Optimized for AMD Instinct™ MI250 GPUs
109
- - Training performed on 16 nodes with 4 GPUs each
110
- """)
 
 
 
 
 
 
 
 
 
 
 
111
 
112
  with tab2:
113
  st.title("Chat with AMD-OLMo")
@@ -127,37 +158,54 @@ def main():
127
  with chat_container:
128
  for message in st.session_state.messages:
129
  div_class = "user-message" if message["role"] == "user" else "assistant-message"
 
130
  st.markdown(f"""
131
  <div class="chat-message {div_class}">
132
  <b>{message["role"].title()}:</b> {message["content"]}
 
133
  </div>
134
  """, unsafe_allow_html=True)
135
 
136
- # User input
137
  with st.container():
138
- user_input = st.text_area("Your message:", key="user_input", height=100)
 
 
 
 
 
 
139
  col1, col2, col3 = st.columns([1, 1, 4])
140
 
141
  with col1:
142
- if st.button("Send"):
143
  if user_input.strip():
144
- # Add user message to history
145
- st.session_state.messages.append({"role": "user", "content": user_input})
 
 
 
 
146
 
147
  # Generate response
148
- with st.spinner("Thinking..."):
149
  response = generate_response(user_input, model, tokenizer, st.session_state.messages)
150
 
151
- # Add assistant response to history
152
- st.session_state.messages.append({"role": "assistant", "content": response})
 
 
 
 
153
 
154
  # Clear input
155
- st.session_state.user_input = ""
156
  st.experimental_rerun()
157
 
158
  with col2:
159
- if st.button("Clear History"):
160
  st.session_state.messages = []
 
161
  st.experimental_rerun()
162
 
163
  if __name__ == "__main__":
 
3
  import torch
4
  from datetime import datetime
5
 
6
+ # Initialize session state variables
7
  if 'messages' not in st.session_state:
8
  st.session_state.messages = []
9
+ if "user_input_widget" not in st.session_state:
10
+ st.session_state.user_input_widget = ""
11
 
12
  @st.cache_resource
13
  def load_model():
 
48
  return response
49
 
50
  def main():
51
+ st.set_page_config(
52
+ page_title="AMD-OLMo Chatbot",
53
+ layout="wide",
54
+ initial_sidebar_state="expanded"
55
+ )
56
 
57
  # Custom CSS
58
  st.markdown("""
 
64
  background-color: #f0f2f6;
65
  padding: 20px;
66
  border-radius: 10px;
67
+ margin: 10px 0;
68
  }
69
  .chat-message {
70
+ padding: 15px;
71
  border-radius: 10px;
72
+ margin: 10px 0;
73
  }
74
  .user-message {
75
  background-color: #e6f3ff;
76
+ border-left: 5px solid #2e6da4;
77
  }
78
  .assistant-message {
79
  background-color: #f0f2f6;
80
+ border-left: 5px solid #5cb85c;
81
+ }
82
+ .stTextArea textarea {
83
+ font-size: 16px;
84
+ }
85
+ .timestamp {
86
+ font-size: 12px;
87
+ color: #666;
88
+ margin-top: 5px;
89
+ }
90
+ .st-emotion-cache-1v0mbdj.e115fcil1 {
91
+ margin-top: 20px;
92
  }
93
  </style>
94
  """, unsafe_allow_html=True)
 
99
  with tab1:
100
  st.title("AMD-OLMo-1B-SFT Model Information")
101
 
102
+ with st.container():
103
+ st.markdown("""
104
+ <div class="model-info">
105
+ <h2>Model Overview</h2>
106
+ AMD-OLMo-1B-SFT is a state-of-the-art language model developed by AMD. This model represents a significant advancement in AMD's AI capabilities.
107
+
108
+ <h3>Architecture Specifications</h3>
109
+
110
+ | Component | Specification |
111
+ |-----------|---------------|
112
+ | Parameters | 1.2B |
113
+ | Layers | 16 |
114
+ | Attention Heads | 16 |
115
+ | Hidden Size | 2048 |
116
+ | Context Length | 2048 |
117
+ | Vocabulary Size | 50,280 |
118
+
119
+ <h3>Training Details</h3>
120
+
121
+ - Pre-trained on 1.3 trillion tokens from Dolma v1.7
122
+ - Two-phase supervised fine-tuning (SFT):
123
+ 1. Tulu V2 dataset
124
+ 2. OpenHermes-2.5, WebInstructSub, and Code-Feedback datasets
125
+
126
+ <h3>Key Capabilities</h3>
127
+
128
+ - Natural language understanding and generation
129
+ - Context-aware responses
130
+ - Code understanding and generation
131
+ - Complex reasoning tasks
132
+ - Instruction following
133
+ - Multi-turn conversations
134
+
135
+ <h3>Hardware Optimization</h3>
136
+
137
+ - Optimized for AMD Instinct™ MI250 GPUs
138
+ - Distributed training across 16 nodes with 4 GPUs each
139
+ - Efficient inference on consumer hardware
140
+ </div>
141
+ """, unsafe_allow_html=True)
142
 
143
  with tab2:
144
  st.title("Chat with AMD-OLMo")
 
158
  with chat_container:
159
  for message in st.session_state.messages:
160
  div_class = "user-message" if message["role"] == "user" else "assistant-message"
161
+ timestamp = message.get("timestamp", datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
162
  st.markdown(f"""
163
  <div class="chat-message {div_class}">
164
  <b>{message["role"].title()}:</b> {message["content"]}
165
+ <div class="timestamp">{timestamp}</div>
166
  </div>
167
  """, unsafe_allow_html=True)
168
 
169
+ # User input section
170
  with st.container():
171
+ user_input = st.text_area(
172
+ "Your message:",
173
+ key="user_input_widget",
174
+ height=100,
175
+ placeholder="Type your message here..."
176
+ )
177
+
178
  col1, col2, col3 = st.columns([1, 1, 4])
179
 
180
  with col1:
181
+ if st.button("Send", use_container_width=True):
182
  if user_input.strip():
183
+ # Add user message to history with timestamp
184
+ st.session_state.messages.append({
185
+ "role": "user",
186
+ "content": user_input,
187
+ "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
188
+ })
189
 
190
  # Generate response
191
+ with st.spinner("Generating response..."):
192
  response = generate_response(user_input, model, tokenizer, st.session_state.messages)
193
 
194
+ # Add assistant response to history with timestamp
195
+ st.session_state.messages.append({
196
+ "role": "assistant",
197
+ "content": response,
198
+ "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
199
+ })
200
 
201
  # Clear input
202
+ st.session_state.user_input_widget = ""
203
  st.experimental_rerun()
204
 
205
  with col2:
206
+ if st.button("Clear History", use_container_width=True):
207
  st.session_state.messages = []
208
+ st.session_state.user_input_widget = ""
209
  st.experimental_rerun()
210
 
211
  if __name__ == "__main__":