Wedyan2023 commited on
Commit
0d30c90
·
verified ·
1 Parent(s): 75803b0

Create app101.py

Browse files
Files changed (1) hide show
  1. app101.py +1287 -0
app101.py ADDED
@@ -0,0 +1,1287 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import os
4
+ import json
5
+ import base64
6
+ import random
7
+ from streamlit_pdf_viewer import pdf_viewer
8
+ from langchain.prompts import PromptTemplate
9
+ from datetime import datetime
10
+ from pathlib import Path
11
+ from openai import OpenAI
12
+ from dotenv import load_dotenv
13
+ import warnings
14
+
15
+ warnings.filterwarnings('ignore')
16
+
17
+ os.getenv("OAUTH_CLIENT_ID")
18
+
19
+
20
+ # Load environment variables and initialize the OpenAI client to use Hugging Face Inference API.
21
+ load_dotenv()
22
+ client = OpenAI(
23
+ base_url="https://api-inference.huggingface.co/v1",
24
+ api_key=os.environ.get('GP_WED2') # Hugging Face API token
25
+ )
26
+
27
+ # Create necessary directories
28
+ for dir_name in ['data', 'feedback']:
29
+ if not os.path.exists(dir_name):
30
+ os.makedirs(dir_name)
31
+
32
+ # Custom CSS
33
+ st.markdown("""
34
+ <style>
35
+ .stButton > button {
36
+ width: 100%;
37
+ margin-bottom: 10px;
38
+ background-color: #4CAF50;
39
+ color: white;
40
+ border: none;
41
+ padding: 10px;
42
+ border-radius: 5px;
43
+ }
44
+ .task-button {
45
+ background-color: #2196F3 !important;
46
+ }
47
+ .stSelectbox {
48
+ margin-bottom: 20px;
49
+ }
50
+ .output-container {
51
+ padding: 20px;
52
+ border-radius: 5px;
53
+ border: 1px solid #ddd;
54
+ margin: 10px 0;
55
+ }
56
+ .status-container {
57
+ padding: 10px;
58
+ border-radius: 5px;
59
+ margin: 10px 0;
60
+ }
61
+ .sidebar-info {
62
+ padding: 10px;
63
+ background-color: #f0f2f6;
64
+ border-radius: 5px;
65
+ margin: 10px 0;
66
+ }
67
+ .feedback-button {
68
+ background-color: #ff9800 !important;
69
+ }
70
+ .feedback-container {
71
+ padding: 15px;
72
+ background-color: #f5f5f5;
73
+ border-radius: 5px;
74
+ margin: 15px 0;
75
+ }
76
+ </style>
77
+ """, unsafe_allow_html=True)
78
+
79
+ # Helper functions
80
+ def read_csv_with_encoding(file):
81
+ encodings = ['utf-8', 'latin1', 'iso-8859-1', 'cp1252']
82
+ for encoding in encodings:
83
+ try:
84
+ return pd.read_csv(file, encoding=encoding)
85
+ except UnicodeDecodeError:
86
+ continue
87
+ raise UnicodeDecodeError("Failed to read file with any supported encoding")
88
+
89
+ #def save_feedback(feedback_data):
90
+ #feedback_file = 'feedback/user_feedback.csv'
91
+ #feedback_df = pd.DataFrame([feedback_data])
92
+
93
+ #if os.path.exists(feedback_file):
94
+ #feedback_df.to_csv(feedback_file, mode='a', header=False, index=False)
95
+ #else:
96
+ #feedback_df.to_csv(feedback_file, index=False)
97
+
98
+ def reset_conversation():
99
+ st.session_state.conversation = []
100
+ st.session_state.messages = []
101
+ if 'task_choice' in st.session_state:
102
+ del st.session_state.task_choice
103
+ return None
104
+ #new 24 March
105
+ #user_input = st.text_input("Enter your prompt:")
106
+ ###########33
107
+
108
+ # Initialize session state variables
109
+ if "messages" not in st.session_state:
110
+ st.session_state.messages = []
111
+ if "examples_to_classify" not in st.session_state:
112
+ st.session_state.examples_to_classify = []
113
+ if "system_role" not in st.session_state:
114
+ st.session_state.system_role = ""
115
+
116
+
117
+
118
+ # Main app title
119
+ st.title("🤖🦙 Text Data Labeling and Generation App")
120
+ # def embed_pdf_sidebar(pdf_path):
121
+ # with open(pdf_path, "rb") as f:
122
+ # base64_pdf = base64.b64encode(f.read()).decode('utf-8')
123
+ # pdf_display = f"""
124
+ # <iframe src="data:application/pdf;base64,{base64_pdf}"
125
+ # width="100%" height="400" type="application/pdf"></iframe>
126
+ # """
127
+ # st.markdown(pdf_display, unsafe_allow_html=True)
128
+ #
129
+
130
+
131
+ # Sidebar settings
132
+ with st.sidebar:
133
+ st.title("⚙️ Settings")
134
+ # Add PDF upload section
135
+ #
136
+ # if st.button("📘 Show Instructions"):
137
+ # # This should be a path to a local file
138
+ # pdf_path = os.path.join("Streamlit.pdf")
139
+ # pdf_viewer(
140
+ # pdf_path,
141
+ # width="100%",
142
+ # height=300,
143
+ # render_text=True
144
+ # )
145
+ # with st.sidebar:
146
+ # with st.expander("📘 View Instructions"):
147
+ # pdf_viewer("Streamlit.pdf", width="100%", height=300, render_text=True)
148
+
149
+ #
150
+ ###4
151
+ # with st.sidebar:
152
+ # st.markdown("### 📘 Instructions")
153
+ # st.markdown("[📄 Open Instructions PDF](/file/instructions.pdf)")
154
+
155
+
156
+
157
+
158
+ #
159
+ ####2
160
+ # #with st.sidebar:
161
+ # st.markdown("### 📘 Instructions")
162
+
163
+ # # PDF served from Space's file system
164
+ # pdf_url = "/file/instructions.pdf"
165
+
166
+ # st.markdown(f"""
167
+ # <a href="{pdf_url}" target="_blank">
168
+ # <button style='padding:10px;width:100%;font-size:16px;'>📄 Open Instructions PDF</button>
169
+ # </a>
170
+ # """, unsafe_allow_html=True)
171
+ # ###3 working code
172
+ # with st.sidebar:
173
+ # with open("instructions.pdf", "rb") as f:
174
+ # st.sidebar.download_button(
175
+ # label="📄 Download Instructions PDF",
176
+ # data=f,
177
+ # file_name="instructions.pdf",
178
+ # mime="application/pdf"
179
+ # )
180
+
181
+ ###6
182
+ #this last code works
183
+ with st.sidebar:
184
+ st.markdown("### 📘Data Generation and Labeling Instructions")
185
+ #st.markdown("<h4 style='color: #4A90E2;'>📘 Instructions</h4>", unsafe_allow_html=True)
186
+ with open("User instructions.pdf", "rb") as f:
187
+ st.download_button(
188
+ label="📄 Download Instructions PDF",
189
+ data=f,
190
+ #file_name="instructions.pdf",
191
+ file_name="User instructions.pdf",
192
+ mime="application/pdf"
193
+ )
194
+
195
+
196
+ #works with blu color text
197
+ # with st.sidebar:
198
+ # # Stylish "Instructions" label
199
+ # st.markdown("<h4 style='color: #4A90E2;'>📘 Instructions</h4>", unsafe_allow_html=True)
200
+
201
+ # # PDF download button
202
+ # with open("instructions.pdf", "rb") as f:
203
+ # st.download_button(
204
+ # label="📄 Download Instructions PDF",
205
+ # data=f,
206
+ # file_name="instructions.pdf",
207
+ # mime="application/pdf"
208
+ # )
209
+
210
+ ###5
211
+
212
+ #with st.sidebar:
213
+ # st.markdown("### 📘 Instructions")
214
+
215
+ # # PDF served from Space's file system
216
+ # pdf_url = "/file/instructions.pdf"
217
+
218
+ # st.markdown(f"""
219
+ # <a href="{pdf_url}" target="_blank">
220
+ # <button style='padding:15px;width:100%;font-size:16px;'> 📄 Open Instructions PDF</button>
221
+ # </a>
222
+ # """, unsafe_allow_html=True)
223
+
224
+
225
+
226
+ selected_model = st.selectbox(
227
+ "Select Model",
228
+ ["meta-llama/Llama-3.3-70B-Instruct", "meta-llama/Llama-3.2-3B-Instruct","meta-llama/Llama-4-Scout-17B-16E-Instruct", "meta-llama/Meta-Llama-3-8B-Instruct",
229
+ "meta-llama/Llama-3.1-70B-Instruct"],
230
+ key='model_select'
231
+ )
232
+
233
+ temperature = st.slider(
234
+ "Temperature",
235
+ 0.0, 1.0, 0.7,
236
+ help="Controls randomness in generation"
237
+ )
238
+
239
+ st.button("🔄 New Conversation", on_click=reset_conversation)
240
+ # st.markdown("### 📘 Instructions")
241
+ # embed_pdf_sidebar("Streamlit.pdf")
242
+ #Add PDF Instructions
243
+ # with st.expander("📚 Instructions"):
244
+ # st.write("View or download instruction guides:")
245
+
246
+ # # Option 1: Using st.download_button for PDFs stored in your app
247
+ # with open("file:///C:/Users/hp/Downloads/Streamlit.pdf", "rb") as file:
248
+ # first_pdf = file.read()
249
+ # st.download_button(
250
+ # label="Download Guide 1",
251
+ # data=first_pdf,
252
+ # file_name="user_guide.pdf",
253
+ # mime="application/pdf"
254
+ # )
255
+
256
+ # #with open("https://huggingface.co/spaces/Wedyan2023/COPY/blob/main/Streamlit.pdf", "rb") as file:
257
+ # with open("file:///C:/Users/hp/Downloads/Streamlit.pdf", "rb") as file:
258
+ # second_pdf = file.read()
259
+ # st.download_button(
260
+ # label="Download Guide 2",
261
+ # data=second_pdf,
262
+ # file_name="technical_guide.pdf",
263
+ # mime="application/pdf"
264
+ # )
265
+
266
+
267
+
268
+ with st.container():
269
+ st.markdown(f"""
270
+ <div class="sidebar-info">
271
+ <h4>Current Model: {selected_model}</h4>
272
+ <p><em>Note: Generated content may be inaccurate or false. Check important info.</em></p>
273
+ </div>
274
+ """, unsafe_allow_html=True)
275
+
276
+ # with st.sidebar:
277
+ # st.markdown("### 📘 Instructions")
278
+ # if pdf_file := st.file_uploader("Upload Instruction PDF", type="pdf"):
279
+ # embed_pdf(pdf_file)
280
+
281
+
282
+ feedback_url = "https://docs.google.com/forms/d/e/1FAIpQLSdZ_5mwW-pjqXHgxR0xriyVeRhqdQKgb5c-foXlYAV55Rilsg/viewform?usp=header"
283
+ st.sidebar.markdown(
284
+ f'<a href="{feedback_url}" target="_blank"><button style="width: 100%;">Feedback Form</button></a>',
285
+ unsafe_allow_html=True
286
+ )
287
+
288
+ # Display conversation
289
+ for message in st.session_state.messages:
290
+ with st.chat_message(message["role"]):
291
+ st.markdown(message["content"])
292
+
293
+ # Main content
294
+ if 'task_choice' not in st.session_state:
295
+ col1, col2 = st.columns(2)
296
+ with col1:
297
+ if st.button("📝 Data Generation", key="gen_button", help="Generate new data"):
298
+ st.session_state.task_choice = "Data Generation"
299
+ with col2:
300
+ if st.button("🏷️ Data Labeling", key="label_button", help="Label existing data"):
301
+ st.session_state.task_choice = "Data Labeling"
302
+
303
+ if "task_choice" in st.session_state:
304
+ if st.session_state.task_choice == "Data Generation":
305
+ st.header("📝 Data Generation")
306
+
307
+ # 1. Domain selection
308
+ domain_selection = st.selectbox("Domain", [
309
+ "Restaurant reviews", "E-Commerce reviews", "News", "AG News", "Tourism", "Custom"
310
+ ])
311
+
312
+ # 2. Handle custom domain input
313
+ custom_domain_valid = True # Assume valid until proven otherwise
314
+
315
+ if domain_selection == "Custom":
316
+ domain = st.text_input("Specify custom domain")
317
+ if not domain.strip():
318
+ st.error("Please specify a domain name.")
319
+ custom_domain_valid = False
320
+ else:
321
+ domain = domain_selection
322
+
323
+
324
+
325
+
326
+ # Classification type selection
327
+ classification_type = st.selectbox(
328
+ "Classification Type",
329
+ ["Sentiment Analysis", "Binary Classification", "Multi-Class Classification"]
330
+ )
331
+
332
+
333
+
334
+
335
+
336
+ #system role before
337
+
338
+ ####
339
+ # Labels setup based on classification type
340
+ #labels = []
341
+ labels = []
342
+ labels_valid = False
343
+ errors = []
344
+
345
+ def validate_binary_labels(labels):
346
+ errors = []
347
+ normalized = [label.strip().lower() for label in labels]
348
+
349
+ if not labels[0].strip():
350
+ errors.append("First class name is required.")
351
+ if not labels[1].strip():
352
+ errors.append("Second class name is required.")
353
+ if normalized[0] == normalized[1] and all(normalized):
354
+ errors.append("Class names must be different.")
355
+ return errors
356
+
357
+ if classification_type == "Sentiment Analysis":
358
+ st.write("### Sentiment Analysis Labels (Fixed)")
359
+ col1, col2, col3 = st.columns(3)
360
+ with col1:
361
+ st.text_input("First class", "Positive", disabled=True)
362
+ with col2:
363
+ st.text_input("Second class", "Negative", disabled=True)
364
+ with col3:
365
+ st.text_input("Third class", "Neutral", disabled=True)
366
+ labels = ["Positive", "Negative", "Neutral"]
367
+
368
+ elif classification_type == "Binary Classification":
369
+ st.write("### Binary Classification Labels")
370
+ col1, col2 = st.columns(2)
371
+ with col1:
372
+ label_1 = st.text_input("First class", "Positive")
373
+ with col2:
374
+ label_2 = st.text_input("Second class", "Negative")
375
+
376
+ labels = [label_1, label_2]
377
+ errors = validate_binary_labels(labels)
378
+
379
+ if errors:
380
+ st.error("\n".join(errors))
381
+ else:
382
+ st.success("Binary class names are valid and unique!")
383
+
384
+
385
+ # if classification_type == "Sentiment Analysis":
386
+ # st.write("### Sentiment Analysis Labels (Fixed)")
387
+ # col1, col2, col3 = st.columns(3)
388
+ # with col1:
389
+ # label_1 = st.text_input("First class", "Positive", disabled=True)
390
+ # with col2:
391
+ # label_2 = st.text_input("Second class", "Negative", disabled=True)
392
+ # with col3:
393
+ # label_3 = st.text_input("Third class", "Neutral", disabled=True)
394
+ # labels = ["Positive", "Negative", "Neutral"]
395
+
396
+
397
+ # elif classification_type == "Binary Classification":
398
+ # st.write("### Binary Classification Labels")
399
+ # col1, col2 = st.columns(2)
400
+
401
+ # with col1:
402
+ # label_1 = st.text_input("First class", "Positive")
403
+ # with col2:
404
+ # label_2 = st.text_input("Second class", "Negative")
405
+
406
+ # errors = []
407
+ # labels = [label_1.strip(), label_2.strip()]
408
+
409
+ # # Check for empty class names
410
+ # if not labels[0]:
411
+ # errors.append("First class name is required.")
412
+ # if not labels[1]:
413
+ # errors.append("Second class name is required.")
414
+
415
+ # # Check for duplicates
416
+ # if labels[0].lower() == labels[1].lower():
417
+ # errors.append("Class names must be different.")
418
+
419
+ # # Show errors or success
420
+ # if errors:
421
+ # for error in errors:
422
+ # st.error(error)
423
+ # else:
424
+ # st.success("Binary class names are valid and unique!")
425
+
426
+ #########
427
+
428
+ elif classification_type == "Multi-Class Classification":
429
+ st.write("### Multi-Class Classification Labels")
430
+
431
+ default_labels_by_domain = {
432
+ "News": ["Political", "Sports", "Entertainment", "Technology", "Business"],
433
+ "AG News": ["World", "Sports", "Business", "Sci/Tech"],
434
+ "Tourism": ["Accommodation", "Transportation", "Tourist Attractions",
435
+ "Food & Dining", "Local Experience", "Adventure Activities",
436
+ "Wellness & Spa", "Eco-Friendly Practices", "Family-Friendly",
437
+ "Luxury Tourism"],
438
+ "Restaurant reviews": ["Italian", "French", "American"],
439
+ "E-Commerce reviews": ["Mobile Phones & Accessories", "Laptops & Computers","Kitchen & Dining",
440
+ "Beauty & Personal Care", "Home & Furniture", "Clothing & Fashion",
441
+ "Shoes & Handbags", "Health & Wellness", "Electronics & Gadgets",
442
+ "Books & Stationery","Toys & Games", "Sports & Fitness",
443
+ "Grocery & Gourmet Food","Watches & Accessories", "Baby Products"]
444
+ }
445
+
446
+ num_classes = st.slider("Number of classes", 3, 15, 3)
447
+
448
+ # Get defaults for selected domain, or empty list
449
+ defaults = default_labels_by_domain.get(domain, [])
450
+
451
+ labels = []
452
+ errors = []
453
+ cols = st.columns(3)
454
+
455
+ for i in range(num_classes):
456
+ with cols[i % 3]:
457
+ default_value = defaults[i] if i < len(defaults) else ""
458
+ label_input = st.text_input(f"Class {i+1}", default_value)
459
+ normalized_label = label_input.strip().title()
460
+
461
+ if not normalized_label:
462
+ errors.append(f"Class {i+1} name is required.")
463
+ else:
464
+ labels.append(normalized_label)
465
+
466
+ # Check for duplicates (case-insensitive)
467
+ if len(labels) != len(set(labels)):
468
+ errors.append("Labels names must be unique (case-insensitive, normalized to Title Case).")
469
+
470
+ # Show validation results
471
+ if errors:
472
+ for error in errors:
473
+ st.error(error)
474
+ else:
475
+ st.success("All Labels names are valid and unique!")
476
+ labels_valid = not errors # Will be True only if there are no label errors
477
+
478
+
479
+
480
+
481
+ ##############
482
+
483
+ # Generation parameters
484
+ col1, col2 = st.columns(2)
485
+ with col1:
486
+ min_words = st.number_input("Min words", 1, 100, 20)
487
+ with col2:
488
+ max_words = st.number_input("Max words", min_words, 100, 50)
489
+
490
+ # Few-shot examples
491
+ use_few_shot = st.toggle("Use few-shot examples")
492
+ few_shot_examples = []
493
+ if use_few_shot:
494
+ num_examples = st.slider("Number of few-shot examples", 1, 10, 1)
495
+ for i in range(num_examples):
496
+ with st.expander(f"Example {i+1}"):
497
+ content = st.text_area(f"Content", key=f"few_shot_content_{i}")
498
+ label = st.selectbox(f"Label", labels, key=f"few_shot_label_{i}")
499
+ if content and label:
500
+ few_shot_examples.append({"content": content, "label": label})
501
+
502
+ num_to_generate = st.number_input("Number of examples", 1, 200, 10)
503
+
504
+ # System role customization
505
+ default_system_role = (
506
+ f"You are a seasoned expert in {classification_type}, specializing in the {domain} domain. "
507
+ f"Your primary responsibility is to generate high-quality, diverse, and unique text examples "
508
+ f"tailored to this domain. Please ensure that each example adheres to the specified length "
509
+ f"requirements, ranging from {min_words} to {max_words} words, and avoid any repetition in the generated content."
510
+ )
511
+
512
+ # Allow user to modify the system role
513
+ system_role = st.text_area("Modify System Role (optional)",
514
+ value=default_system_role,
515
+ key="system_role_input")
516
+
517
+ # Store the system role in session state
518
+ st.session_state['system_role'] = system_role if system_role else default_system_role
519
+
520
+
521
+ # # System role customization
522
+ # default_system_role = f"You are a professional {classification_type} expert, your role is to generate text examples"
523
+ # f"for {domain} domain. Always generate unique diverse examples and do not repeat the generated data."
524
+ # f"The generated text should be between {min_words} to {max_words} words long."
525
+
526
+ # # Allow user to modify the system role
527
+ # system_role = st.text_area("Modify System Role (optional)",
528
+ # value=default_system_role,
529
+ # key="system_role_input")
530
+
531
+ # # Store the system role in session state
532
+ # st.session_state['system_role'] = system_role if system_role else default_system_role
533
+
534
+ user_prompt = st.text_area("User Prompt (optional)")
535
+
536
+ # Data Generation system prompt template including system role
537
+
538
+ prompt_template = PromptTemplate(
539
+ input_variables=["system_role", "classification_type", "domain", "num_examples",
540
+ "min_words", "max_words", "labels", "user_prompt", "few_shot_examples"],
541
+ template=(
542
+ "{system_role}\n"
543
+ "- Use the following parameters:\n"
544
+ "- Generate {num_examples} examples\n"
545
+ "- Each example should be between {min_words} to {max_words} words long\n"
546
+ #"- Word range: {min_words} - {max_words} words\n "
547
+ "- Use these labels: {labels}.\n"
548
+ "- Generate the examples in this format: 'Example text. Label: label'\n"
549
+ "- Do not include word counts or any additional information\n"
550
+ "- Always use your creativity and intelligence to generate unique and diverse text data\n"
551
+ "- Write unique examples every time.\n"
552
+ "- DO NOT REPEAT your gnerated text. \n"
553
+ "- For each Output, describe it once and move to the next.\n"
554
+ "- List each Output only once, and avoid repeating details.\n"
555
+ "- Additional instructions: {user_prompt}\n\n"
556
+ "- Use the following examples as a reference in the generation process\n\n {few_shot_examples}. \n"
557
+ "- Think step by step, generate numbered examples, and check each newly generated example to ensure it has not been generated before. If it has, modify it"
558
+
559
+ )
560
+ )
561
+
562
+
563
+
564
+ # Generate system prompt
565
+ system_prompt = prompt_template.format(
566
+ system_role=st.session_state['system_role'],
567
+ classification_type=classification_type,
568
+ domain=domain,
569
+ num_examples=num_to_generate,
570
+ min_words=min_words,
571
+ max_words=max_words,
572
+ labels=", ".join(labels),
573
+ user_prompt=user_prompt,
574
+ few_shot_examples="\n".join([f"{ex['content']}\nLabel: {ex['label']}" for ex in few_shot_examples]) if few_shot_examples else ""
575
+ )
576
+
577
+ # Store system prompt in session state
578
+ st.session_state['system_prompt'] = system_prompt
579
+
580
+ # Display system prompt
581
+ st.write("System Prompt:")
582
+ st.text_area("Current System Prompt", value=st.session_state['system_prompt'],
583
+ height=400, disabled=True)
584
+
585
+
586
+ if st.button("🎯 Generate Examples"):
587
+ #
588
+ errors = []
589
+ if domain_selection == "Custom" and not domain.strip():
590
+ st.warning("Custom domain name is required.")
591
+ elif len(labels) != len(set(labels)):
592
+ st.warning("Class names must be unique.")
593
+ elif any(not lbl.strip() for lbl in labels):
594
+ st.warning("All class labels must be filled in.")
595
+ #else:
596
+ #st.success("Generating examples for domain: {domain}")
597
+
598
+ #if not custom_domain_valid:
599
+ #st.warning("Custom domain name is required.")
600
+ #elif not labels_valid:
601
+ #st.warning("Please fix the label errors before generating examples.")
602
+ #else:
603
+ # Proceed to generate examples
604
+ #st.success(f"Generating examples for domain: {domain}")
605
+
606
+ with st.spinner("Generating examples..."):
607
+ try:
608
+ stream = client.chat.completions.create(
609
+ model=selected_model,
610
+ messages=[{"role": "system", "content": st.session_state['system_prompt']}],
611
+ temperature=temperature,
612
+ stream=True,
613
+ max_tokens=80000,
614
+ top_p=0.9,
615
+ # repetition_penalty=1.2,
616
+ #frequency_penalty=0.5, # Discourages frequent words
617
+ #presence_penalty=0.6,
618
+ )
619
+ #st.session_state['system_prompt'] = system_prompt
620
+ #new 24 march
621
+ st.session_state.messages.append({"role": "user", "content": system_prompt})
622
+ # # ####################
623
+ response = st.write_stream(stream)
624
+ st.session_state.messages.append({"role": "assistant", "content": response})
625
+ # Initialize session state variables if they don't exist
626
+ if 'system_prompt' not in st.session_state:
627
+ st.session_state.system_prompt = system_prompt
628
+
629
+ if 'response' not in st.session_state:
630
+ st.session_state.response = response
631
+
632
+ if 'generated_examples' not in st.session_state:
633
+ st.session_state.generated_examples = []
634
+
635
+ if 'generated_examples_csv' not in st.session_state:
636
+ st.session_state.generated_examples_csv = None
637
+
638
+ if 'generated_examples_json' not in st.session_state:
639
+ st.session_state.generated_examples_json = None
640
+
641
+ # Parse response and generate examples list
642
+ examples_list = []
643
+ for line in response.split('\n'):
644
+ if line.strip():
645
+ parts = line.rsplit('Label:', 1)
646
+ if len(parts) == 2:
647
+ text = parts[0].strip()
648
+ label = parts[1].strip()
649
+ if text and label:
650
+ examples_list.append({
651
+ 'text': text,
652
+ 'label': label,
653
+ 'system_prompt': st.session_state.system_prompt,
654
+ 'system_role': st.session_state.system_role,
655
+ 'task_type': 'Data Generation',
656
+ 'Use few-shot example?': 'Yes' if use_few_shot else 'No',
657
+ })
658
+
659
+ if examples_list:
660
+ # Update session state with new data
661
+ st.session_state.generated_examples = examples_list
662
+
663
+ # Generate CSV and JSON data
664
+ df = pd.DataFrame(examples_list)
665
+ st.session_state.generated_examples_csv = df.to_csv(index=False).encode('utf-8')
666
+ st.session_state.generated_examples_json = json.dumps(examples_list, indent=2).encode('utf-8')
667
+
668
+ # Vertical layout with centered "or" between buttons
669
+ st.download_button(
670
+ "📥 Download Generated Examples (CSV)",
671
+ st.session_state.generated_examples_csv,
672
+ "generated_examples.csv",
673
+ "text/csv",
674
+ key='download-csv-persistent'
675
+ )
676
+
677
+ # Add space and center the "or"
678
+ st.markdown("""
679
+ <div style='text-align: left; margin:15px 0; font-weight: 600; color: #666;'>. . . . . . or</div>
680
+ """, unsafe_allow_html=True)
681
+
682
+ st.download_button(
683
+ "📥 Download Generated Examples (JSON)",
684
+ st.session_state.generated_examples_json,
685
+ "generated_examples.json",
686
+ "application/json",
687
+ key='download-json-persistent'
688
+ )
689
+ # # Display the labeled examples
690
+ # st.markdown("##### 📋 Labeled Examples Preview")
691
+ # st.dataframe(df, use_container_width=True)
692
+
693
+ if st.button("Continue"):
694
+ if follow_up == "Generate more examples":
695
+ st.experimental_rerun()
696
+ elif follow_up == "Data Labeling":
697
+ st.session_state.task_choice = "Data Labeling"
698
+ st.experimental_rerun()
699
+
700
+ except Exception as e:
701
+ st.error("An error occurred during generation.")
702
+ st.error(f"Details: {e}")
703
+
704
+
705
+ # Lableing Process
706
+ elif st.session_state.task_choice == "Data Labeling":
707
+ st.header("🏷️ Data Labeling")
708
+ #new new new
709
+ # 1. Domain selection
710
+ # 1. Domain selection
711
+
712
+
713
+ domain_selection = st.selectbox("Domain", ["Restaurant reviews", "E-Commerce reviews", "News", "AG News", "Tourism", "Custom"])
714
+ # 2. Handle custom domain input
715
+ custom_domain_valid = True # Assume valid until proven otherwise
716
+
717
+ if domain_selection == "Custom":
718
+ domain = st.text_input("Specify custom domain")
719
+ if not domain.strip():
720
+ st.error("Please specify a domain name.")
721
+ custom_domain_valid = False
722
+ else:
723
+ domain = domain_selection
724
+
725
+
726
+ # # Classification type selection
727
+ # classification_type = st.selectbox(
728
+ # "Classification Type",
729
+ # ["Sentiment Analysis", "Binary Classification", "Multi-Class Classification"]
730
+ # )
731
+ #NNew edit
732
+ # classification_type = st.selectbox(
733
+ # "Classification Type",
734
+ # #["Sentiment Analysis", "Binary Classification", "Multi-Class Classification", "Named Entity Recognition (NER)"],
735
+ # ["Sentiment Analysis", "Binary Classification", "Multi-Class Classification"],
736
+ # key="label_class_type"
737
+ # )
738
+
739
+ # Classification type selection
740
+ classification_type = st.selectbox(
741
+ "Classification Type",
742
+ ["Sentiment Analysis", "Binary Classification", "Multi-Class Classification", "Named Entity Recognition (NER)"]
743
+ )
744
+ #NNew edit
745
+ # Labels setup based on classification type
746
+ labels = []
747
+ labels_valid = False
748
+ errors = []
749
+
750
+ if classification_type == "Sentiment Analysis":
751
+ st.write("### Sentiment Analysis Labels (Fixed)")
752
+ col1, col2, col3 = st.columns(3)
753
+ with col1:
754
+ label_1 = st.text_input("First class", "Positive", disabled=True)
755
+ with col2:
756
+ label_2 = st.text_input("Second class", "Negative", disabled=True)
757
+ with col3:
758
+ label_3 = st.text_input("Third class", "Neutral", disabled=True)
759
+ labels = ["Positive", "Negative", "Neutral"]
760
+
761
+
762
+ elif classification_type == "Binary Classification":
763
+ st.write("### Binary Classification Labels")
764
+ col1, col2 = st.columns(2)
765
+
766
+ with col1:
767
+ label_1 = st.text_input("First class", "Positive")
768
+ with col2:
769
+ label_2 = st.text_input("Second class", "Negative")
770
+
771
+ errors = []
772
+ labels = [label_1.strip(), label_2.strip()]
773
+
774
+
775
+ # Strip and lower-case labels for validation
776
+ label_1 = labels[0].strip()
777
+ label_2 = labels[1].strip()
778
+
779
+ # Check for empty class names
780
+ if not label_1:
781
+ errors.append("First class name is required.")
782
+ if not label_2:
783
+ errors.append("Second class name is required.")
784
+
785
+ # Check for duplicates (case insensitive)
786
+ if label_1.lower() == label_2.lower() and label_1 and label_2:
787
+ errors.append("Class names must be different.")
788
+
789
+ # Show errors or success
790
+ if errors:
791
+ for error in errors:
792
+ st.error(error)
793
+ else:
794
+ st.success("Binary class names are valid and unique!")
795
+
796
+
797
+ elif classification_type == "Multi-Class Classification":
798
+ st.write("### Multi-Class Classification Labels")
799
+
800
+ default_labels_by_domain = {
801
+ "News": ["Political", "Sports", "Entertainment", "Technology", "Business"],
802
+ "AG News": ["World", "Sports", "Business", "Sci/Tech"],
803
+ "Tourism": ["Accommodation", "Transportation", "Tourist Attractions",
804
+ "Food & Dining", "Local Experience", "Adventure Activities",
805
+ "Wellness & Spa", "Eco-Friendly Practices", "Family-Friendly",
806
+ "Luxury Tourism"],
807
+ "Restaurant reviews": ["Italian", "French", "American"],
808
+ "E-Commerce reviews": ["Mobile Phones & Accessories", "Laptops & Computers","Kitchen & Dining",
809
+ "Beauty & Personal Care", "Home & Furniture", "Clothing & Fashion",
810
+ "Shoes & Handbags", "Health & Wellness", "Electronics & Gadgets",
811
+ "Books & Stationery","Toys & Games", "Sports & Fitness",
812
+ "Grocery & Gourmet Food","Watches & Accessories", "Baby Products"]
813
+ }
814
+
815
+
816
+
817
+ # Ask user how many classes they want to define
818
+ num_classes = st.slider("Select the number of classes (labels)", min_value=3, max_value=10, value=3)
819
+
820
+ # Use default labels based on selected domain, if available
821
+ defaults = default_labels_by_domain.get(domain, [])
822
+
823
+ labels = []
824
+ errors = []
825
+ cols = st.columns(3) # For nicely arranged label inputs
826
+
827
+ for i in range(num_classes):
828
+ with cols[i % 3]: # Distribute inputs across columns
829
+ default_value = defaults[i] if i < len(defaults) else ""
830
+ label_input = st.text_input(f"Label {i + 1}", default_value)
831
+ normalized_label = label_input.strip().title()
832
+
833
+ if not normalized_label:
834
+ errors.append(f"Label {i + 1} is required.")
835
+ else:
836
+ labels.append(normalized_label)
837
+
838
+ # Check for duplicates (case-insensitive)
839
+ normalized_set = {label.lower() for label in labels}
840
+ if len(labels) != len(normalized_set):
841
+ errors.append("Label names must be unique (case-insensitive).")
842
+
843
+ # Show validation results
844
+ if errors:
845
+ for error in errors:
846
+ st.error(error)
847
+ else:
848
+ st.success("All label names are valid and unique!")
849
+
850
+ labels_valid = not errors # True if no validation errors
851
+
852
+ elif classification_type == "Named Entity Recognition (NER)":
853
+ # NER entity options
854
+ ner_entities = [
855
+ "PERSON - Names of people, fictional characters, historical figures",
856
+ "ORG - Companies, institutions, agencies, teams",
857
+ "LOC - Physical locations (mountains, oceans, etc.)",
858
+ "GPE - Countries, cities, states, political regions",
859
+ "DATE - Calendar dates, years, centuries",
860
+ "TIME - Times, durations",
861
+ "MONEY - Monetary values with currency"
862
+ ]
863
+ selected_entities = st.multiselect(
864
+ "Select entities to recognize",
865
+ ner_entities,
866
+ default=["PERSON - Names of people, fictional characters, historical figures",
867
+ "ORG - Companies, institutions, agencies, teams",
868
+ "LOC - Physical locations (mountains, oceans, etc.)",
869
+ "GPE - Countries, cities, states, political regions",
870
+ "DATE - Calendar dates, years, centuries",
871
+ "TIME - Times, durations",
872
+ "MONEY - Monetary values with currency"],
873
+ key="ner_entity_selection"
874
+ )
875
+
876
+ # Extract just the entity type (before the dash)
877
+ labels = [entity.split(" - ")[0] for entity in selected_entities]
878
+
879
+ if not labels:
880
+ st.warning("Please select at least one entity type")
881
+ labels = ["PERSON"] # Default if nothing selected
882
+
883
+
884
+
885
+
886
+
887
+ #NNew edit
888
+ # elif classification_type == "Multi-Class Classification":
889
+ # st.write("### Multi-Class Classification Labels")
890
+
891
+ # default_labels_by_domain = {
892
+ # "News": ["Political", "Sports", "Entertainment", "Technology", "Business"],
893
+ # "AG News": ["World", "Sports", "Business", "Sci/Tech"],
894
+ # "Tourism": ["Accommodation", "Transportation", "Tourist Attractions",
895
+ # "Food & Dining", "Local Experience", "Adventure Activities",
896
+ # "Wellness & Spa", "Eco-Friendly Practices", "Family-Friendly",
897
+ # "Luxury Tourism"],
898
+ # "Restaurant reviews": ["Italian", "French", "American"]
899
+ # }
900
+ # num_classes = st.slider("Number of classes", 3, 10, 3)
901
+
902
+ # # Get defaults for selected domain, or empty list
903
+ # defaults = default_labels_by_domain.get(domain, [])
904
+
905
+ # labels = []
906
+ # errors = []
907
+ # cols = st.columns(3)
908
+
909
+ # for i in range(num_classes):
910
+ # with cols[i % 3]:
911
+ # default_value = defaults[i] if i < len(defaults) else ""
912
+ # label_input = st.text_input(f"Class {i+1}", default_value)
913
+ # normalized_label = label_input.strip().title()
914
+
915
+ # if not normalized_label:
916
+ # errors.append(f"Class {i+1} name is required.")
917
+ # else:
918
+ # labels.append(normalized_label)
919
+
920
+ # # Check for duplicates (case-insensitive)
921
+ # if len(labels) != len(set(labels)):
922
+ # errors.append("Labels names must be unique (case-insensitive, normalized to Title Case).")
923
+
924
+ # # Show validation results
925
+ # if errors:
926
+ # for error in errors:
927
+ # st.error(error)
928
+ # else:
929
+ # st.success("All Labels names are valid and unique!")
930
+ # labels_valid = not errors # Will be True only if there are no label errors
931
+
932
+
933
+
934
+
935
+ # else:
936
+ # num_classes = st.slider("Number of classes", 3, 23, 3, key="label_num_classes")
937
+ # labels = []
938
+ # cols = st.columns(3)
939
+ # for i in range(num_classes):
940
+ # with cols[i % 3]:
941
+ # label = st.text_input(f"Class {i+1}", f"Class_{i+1}", key=f"label_class_{i}")
942
+ # labels.append(label)
943
+
944
+ use_few_shot = st.toggle("Use few-shot examples for labeling")
945
+ few_shot_examples = []
946
+ if use_few_shot:
947
+ num_few_shot = st.slider("Number of few-shot examples", 1, 10, 1)
948
+ for i in range(num_few_shot):
949
+ with st.expander(f"Few-shot Example {i+1}"):
950
+ content = st.text_area(f"Content", key=f"label_few_shot_content_{i}")
951
+ label = st.selectbox(f"Label", labels, key=f"label_few_shot_label_{i}")
952
+ if content and label:
953
+ few_shot_examples.append(f"{content}\nLabel: {label}")
954
+
955
+ num_examples = st.number_input("Number of examples to classify", 1, 100, 1)
956
+
957
+ examples_to_classify = []
958
+ if num_examples <= 20:
959
+ for i in range(num_examples):
960
+ example = st.text_area(f"Example {i+1}", key=f"example_{i}")
961
+ if example:
962
+ examples_to_classify.append(example)
963
+ else:
964
+ examples_text = st.text_area(
965
+ "Enter examples (one per line)",
966
+ height=300,
967
+ help="Enter each example on a new line"
968
+ )
969
+ if examples_text:
970
+ examples_to_classify = [ex.strip() for ex in examples_text.split('\n') if ex.strip()]
971
+ if len(examples_to_classify) > num_examples:
972
+ examples_to_classify = examples_to_classify[:num_examples]
973
+
974
+ # System role customization
975
+ default_system_role = (f"You are a highly skilled {classification_type} expert."
976
+ f"Your task is to accurately classify the provided text examples within the {domain} domain."
977
+ f"Ensure that all classifications are precise, context-aware, and aligned with domain-specific standards and best practices."
978
+ )
979
+
980
+ # Allow user to modify the system role
981
+ system_role = st.text_area("Modify System Role (optional)",
982
+ value=default_system_role,
983
+ key="system_role_input")
984
+
985
+ # Allow user to modify the system role
986
+ st.session_state['system_role'] = system_role if system_role else default_system_role
987
+
988
+
989
+ # #New Wedyan
990
+ # default_system_role = f"You are a professional {classification_type} expert, your role is to classify the provided text examples for {domain} domain."
991
+ # system_role = st.text_area("Modify System Role (optional)",
992
+ # value=default_system_role,
993
+ # key="system_role_input")
994
+ # st.session_state['system_role'] = system_role if system_role else default_system_role
995
+ # # Labels initialization
996
+ # #labels = []
997
+ # ####
998
+
999
+ user_prompt = st.text_area("User prompt (optional)", key="label_instructions")
1000
+
1001
+ few_shot_text = "\n\n".join(few_shot_examples) if few_shot_examples else ""
1002
+ examples_text = "\n".join([f"{i+1}. {ex}" for i, ex in enumerate(examples_to_classify)])
1003
+
1004
+ # Customize prompt template based on classification type
1005
+ if classification_type == "Named Entity Recognition (NER)":
1006
+ label_prompt_template = PromptTemplate(
1007
+ input_variables=["system_role", "labels", "few_shot_examples", "examples", "domain", "user_prompt"],
1008
+ template=(
1009
+ "{system_role}\n"
1010
+ #"- You are a professional Named Entity Recognition (NER) expert in {domain} domain. Your role is to identify and extract the following entity types: {labels}.\n"
1011
+ "- For each text example provided, identify all entities of the requested types.\n"
1012
+ "- Use the following entities: {labels}.\n"
1013
+ "- Return each example followed by the entities you found in this format: 'Example text.\n Entities: [ENTITY_TYPE: entity text\n, ENTITY_TYPE: entity text\n, ...] or [No entities found]'\n"
1014
+ "- If no entities of the requested types are found, indicate 'No entities found' in this text.\n"
1015
+ "- Be precise about entity boundaries - don't include unnecessary words.\n"
1016
+ "- Do not provide any additional information or explanations.\n"
1017
+ "- Additional instructions:\n {user_prompt}\n\n"
1018
+ "- Use user few-shot examples as guidance if provided:\n{few_shot_examples}\n\n"
1019
+ "- Examples to analyze:\n{examples}\n\n"
1020
+ "Output:\n"
1021
+ )
1022
+ )
1023
+ else:
1024
+ # Data Labeling system prompt template
1025
+
1026
+ label_prompt_template = PromptTemplate(
1027
+
1028
+ input_variables=["system_role", "classification_type", "labels", "few_shot_examples", "examples","domain", "user_prompt"],
1029
+ template=(
1030
+ "{system_role}\n"
1031
+ # "- You are a professional {classification_type} expert in {domain} domain. Your role is to classify the following examples using these labels: {labels}.\n"
1032
+ "- Use the following instructions:\n"
1033
+ "- Use the following labels: {labels}.\n"
1034
+ "- Return the classified text followed by the label in this format: 'text. Label: [label]'\n"
1035
+ "- Do not provide any additional information or explanations\n"
1036
+ "- User prompt:\n {user_prompt}\n\n"
1037
+ "- Use user provided examples as guidence in the classification process:\n\n {few_shot_examples}\n"
1038
+ "- Examples to classify:\n{examples}\n\n"
1039
+ "- Think step by step then classify the examples"
1040
+ #"Output:\n"
1041
+ ))
1042
+
1043
+
1044
+
1045
+ # Check if few_shot_examples is already a formatted string
1046
+ # Check if few_shot_examples is already a formatted string
1047
+ if isinstance(few_shot_examples, str):
1048
+ formatted_few_shot = few_shot_examples
1049
+ # If it's a list of already formatted strings
1050
+ elif isinstance(few_shot_examples, list) and all(isinstance(ex, str) for ex in few_shot_examples):
1051
+ formatted_few_shot = "\n".join(few_shot_examples)
1052
+ # If it's a list of dictionaries with 'content' and 'label' keys
1053
+ elif isinstance(few_shot_examples, list) and all(isinstance(ex, dict) and 'content' in ex and 'label' in ex for ex in few_shot_examples):
1054
+ formatted_few_shot = "\n".join([f"{ex['content']}\nLabel: {ex['label']}" for ex in few_shot_examples])
1055
+ else:
1056
+ formatted_few_shot = ""
1057
+
1058
+ system_prompt = label_prompt_template.format(
1059
+ system_role=st.session_state['system_role'],
1060
+ classification_type=classification_type,
1061
+ domain=domain,
1062
+ examples="\n".join(examples_to_classify),
1063
+ labels=", ".join(labels),
1064
+ user_prompt=user_prompt,
1065
+ few_shot_examples=formatted_few_shot
1066
+ )
1067
+
1068
+ # Step 2: Store the system_prompt in st.session_state
1069
+ st.session_state['system_prompt'] = system_prompt
1070
+ #::contentReference[oaicite:0]{index=0}
1071
+ st.write("System Prompt:")
1072
+ #st.code(system_prompt)
1073
+ #st.code(st.session_state['system_prompt'])
1074
+ st.text_area("System Prompt", value=st.session_state['system_prompt'], height=300, max_chars=None, key=None, help=None, disabled=True)
1075
+
1076
+
1077
+
1078
+ if st.button("🏷️ Label Data"):
1079
+ if examples_to_classify:
1080
+ with st.spinner("Labeling data..."):
1081
+ # Generate the system prompt based on classification type
1082
+ if classification_type == "Named Entity Recognition (NER)":
1083
+ system_prompt = label_prompt_template.format(
1084
+ system_role=st.session_state['system_role'],
1085
+ labels=", ".join(labels),
1086
+ domain = domain,
1087
+ few_shot_examples=few_shot_text,
1088
+ examples=examples_text,
1089
+ user_prompt=user_prompt
1090
+ )
1091
+ else:
1092
+ system_prompt = label_prompt_template.format(
1093
+ classification_type=classification_type,
1094
+ system_role=st.session_state['system_role'],
1095
+ domain = domain,
1096
+ labels=", ".join(labels),
1097
+ few_shot_examples=few_shot_text,
1098
+ examples=examples_text,
1099
+ user_prompt=user_prompt
1100
+ )
1101
+ try:
1102
+ stream = client.chat.completions.create(
1103
+ model=selected_model,
1104
+ messages=[{"role": "system", "content": system_prompt}],
1105
+ temperature=temperature,
1106
+ stream=True,
1107
+ max_tokens=20000,
1108
+ top_p = 0.9,
1109
+
1110
+ )
1111
+ #new 24 March
1112
+ # Append user message
1113
+ st.session_state.messages.append({"role": "user", "content": system_prompt})
1114
+ #################
1115
+ response = st.write_stream(stream)
1116
+ st.session_state.messages.append({"role": "assistant", "content": response})
1117
+ # Display the labeled examples
1118
+ # # Optional: If you want to add it as a chat-style message log
1119
+ # preview_str = st.session_state.labeled_preview.to_markdown(index=False)
1120
+ # st.session_state.messages.append({"role": "assistant", "content": f"Here is a preview of the labeled examples:\n\n{preview_str}"})
1121
+
1122
+
1123
+ # # Stream response and append assistant message
1124
+ # #14/4/2024
1125
+ # response = st.write_stream(stream)
1126
+ # st.session_state.messages.append({"role": "assistant", "content": response})
1127
+
1128
+ # Initialize session state variables if they don't exist
1129
+ if 'system_prompt' not in st.session_state:
1130
+ st.session_state.system_prompt = system_prompt
1131
+
1132
+ if 'response' not in st.session_state:
1133
+ st.session_state.response = response
1134
+
1135
+ if 'generated_examples' not in st.session_state:
1136
+ st.session_state.generated_examples = []
1137
+
1138
+ if 'generated_examples_csv' not in st.session_state:
1139
+ st.session_state.generated_examples_csv = None
1140
+
1141
+ if 'generated_examples_json' not in st.session_state:
1142
+ st.session_state.generated_examples_json = None
1143
+
1144
+
1145
+
1146
+
1147
+ # Save labeled examples to CSV
1148
+ #new 14/4/2025
1149
+ labeled_examples = []
1150
+ if classification_type == "Named Entity Recognition (NER)":
1151
+ labeled_examples = []
1152
+ for line in response.split('\n'):
1153
+ if line.strip():
1154
+ parts = line.rsplit('Entities:', 1)
1155
+ if len(parts) == 2:
1156
+ text = parts[0].strip()
1157
+ entities = parts[1].strip()
1158
+ if text and entities:
1159
+ labeled_examples.append({
1160
+ 'text': text,
1161
+ 'entities': entities,
1162
+ 'system_prompt': st.session_state.system_prompt,
1163
+ 'system_role': st.session_state.system_role,
1164
+ 'task_type': 'Named Entity Recognition (NER)',
1165
+ 'Use few-shot example?': 'Yes' if use_few_shot else 'No',
1166
+ })
1167
+
1168
+
1169
+ else:
1170
+ labeled_examples = []
1171
+ for line in response.split('\n'):
1172
+ if line.strip():
1173
+ parts = line.rsplit('Label:', 1)
1174
+ if len(parts) == 2:
1175
+ text = parts[0].strip()
1176
+ label = parts[1].strip()
1177
+ if text and label:
1178
+ labeled_examples.append({
1179
+ 'text': text,
1180
+ 'label': label,
1181
+ 'system_prompt': st.session_state.system_prompt,
1182
+ 'system_role': st.session_state.system_role,
1183
+ 'task_type': 'Data Labeling',
1184
+ 'Use few-shot example?': 'Yes' if use_few_shot else 'No',
1185
+ })
1186
+ # Save and provide download options
1187
+ if labeled_examples:
1188
+ # Update session state
1189
+ st.session_state.labeled_examples = labeled_examples
1190
+
1191
+ # Convert to CSV and JSON
1192
+ df = pd.DataFrame(labeled_examples)
1193
+ st.session_state.labeled_examples_csv = df.to_csv(index=False).encode('utf-8')
1194
+ st.session_state.labeled_examples_json = json.dumps(labeled_examples, indent=2).encode('utf-8')
1195
+
1196
+ # Download buttons
1197
+ st.download_button(
1198
+ "📥 Download Labeled Examples (CSV)",
1199
+ st.session_state.labeled_examples_csv,
1200
+ "labeled_examples.csv",
1201
+ "text/csv",
1202
+ key='download-labeled-csv'
1203
+ )
1204
+
1205
+ st.markdown("""
1206
+ <div style='text-align: left; margin:15px 0; font-weight: 600; color: #666;'>. . . . . . or</div>
1207
+ """, unsafe_allow_html=True)
1208
+
1209
+ st.download_button(
1210
+ "📥 Download Labeled Examples (JSON)",
1211
+ st.session_state.labeled_examples_json,
1212
+ "labeled_examples.json",
1213
+ "application/json",
1214
+ key='download-labeled-json'
1215
+ )
1216
+ # Display the labeled examples
1217
+ st.markdown("##### 📋 Labeled Examples Preview")
1218
+ st.dataframe(df, use_container_width=True)
1219
+ # Display section
1220
+ #st.markdown("### 📋 Labeled Examples Preview")
1221
+ #st.dataframe(st.session_state.labeled_preview, use_container_width=True)
1222
+
1223
+
1224
+
1225
+ # if labeled_examples:
1226
+ # df = pd.DataFrame(labeled_examples)
1227
+ # csv = df.to_csv(index=False).encode('utf-8')
1228
+ # st.download_button(
1229
+ # "📥 Download Labeled Examples",
1230
+ # csv,
1231
+ # "labeled_examples.csv",
1232
+ # "text/csv",
1233
+ # key='download-labeled-csv'
1234
+ # )
1235
+ # # Add space and center the "or"
1236
+ # st.markdown("""
1237
+ # <div style='text-align: left; margin:15px 0; font-weight: 600; color: #666;'>. . . . . . or</div>
1238
+ # """, unsafe_allow_html=True)
1239
+
1240
+ # if labeled_examples:
1241
+ # df = pd.DataFrame(labeled_examples)
1242
+ # csv = df.to_csv(index=False).encode('utf-8')
1243
+ # st.download_button(
1244
+ # "📥 Download Labeled Examples",
1245
+ # csv,
1246
+ # "labeled_examples.json",
1247
+ # "text/json",
1248
+ # key='download-labeled-JSON'
1249
+ # )
1250
+
1251
+ # Add follow-up interaction options
1252
+ #st.markdown("---")
1253
+ #follow_up = st.radio(
1254
+ #"What would you like to do next?",
1255
+ #["Label more data", "Data Generation"],
1256
+ # key="labeling_follow_up"
1257
+ # )
1258
+
1259
+ if st.button("Continue"):
1260
+ if follow_up == "Label more data":
1261
+ st.session_state.examples_to_classify = []
1262
+ st.experimental_rerun()
1263
+ elif follow_up == "Data Generation":
1264
+ st.session_state.task_choice = "Data Labeling"
1265
+ st.experimental_rerun()
1266
+
1267
+ except Exception as e:
1268
+ st.error("An error occurred during labeling.")
1269
+ st.error(f"Details: {e}")
1270
+ else:
1271
+ st.warning("Please enter at least one example to classify.")
1272
+
1273
+ #st.session_state.messages.append({"role": "assistant", "content": response})
1274
+
1275
+
1276
+
1277
+
1278
+ # Footer
1279
+ st.markdown("---")
1280
+ st.markdown(
1281
+ """
1282
+ <div style='text-align: center'>
1283
+ <p>Made with ❤️ by Wedyan AlSakran 2025</p>
1284
+ </div>
1285
+ """,
1286
+ unsafe_allow_html=True
1287
+ )