Wedyan2023 commited on
Commit
2336e88
·
verified ·
1 Parent(s): fb21128

Create app100.py

Browse files
Files changed (1) hide show
  1. app100.py +1247 -0
app100.py ADDED
@@ -0,0 +1,1247 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import os
4
+ import json
5
+ import base64
6
+ import random
7
+ from streamlit_pdf_viewer import pdf_viewer
8
+ from langchain.prompts import PromptTemplate
9
+ from datetime import datetime
10
+ from pathlib import Path
11
+ from openai import OpenAI
12
+ from dotenv import load_dotenv
13
+ import warnings
14
+
15
+ warnings.filterwarnings('ignore')
16
+
17
+ os.getenv("OAUTH_CLIENT_ID")
18
+
19
+
20
+ # Load environment variables and initialize the OpenAI client to use Hugging Face Inference API.
21
+ load_dotenv()
22
+ client = OpenAI(
23
+ base_url="https://api-inference.huggingface.co/v1",
24
+ api_key=os.environ.get('RAM') # Hugging Face API token
25
+ )
26
+
27
+ # Create necessary directories
28
+ for dir_name in ['data', 'feedback']:
29
+ if not os.path.exists(dir_name):
30
+ os.makedirs(dir_name)
31
+
32
+ # Custom CSS
33
+ st.markdown("""
34
+ <style>
35
+ .stButton > button {
36
+ width: 100%;
37
+ margin-bottom: 10px;
38
+ background-color: #4CAF50;
39
+ color: white;
40
+ border: none;
41
+ padding: 10px;
42
+ border-radius: 5px;
43
+ }
44
+ .task-button {
45
+ background-color: #2196F3 !important;
46
+ }
47
+ .stSelectbox {
48
+ margin-bottom: 20px;
49
+ }
50
+ .output-container {
51
+ padding: 20px;
52
+ border-radius: 5px;
53
+ border: 1px solid #ddd;
54
+ margin: 10px 0;
55
+ }
56
+ .status-container {
57
+ padding: 10px;
58
+ border-radius: 5px;
59
+ margin: 10px 0;
60
+ }
61
+ .sidebar-info {
62
+ padding: 10px;
63
+ background-color: #f0f2f6;
64
+ border-radius: 5px;
65
+ margin: 10px 0;
66
+ }
67
+ .feedback-button {
68
+ background-color: #ff9800 !important;
69
+ }
70
+ .feedback-container {
71
+ padding: 15px;
72
+ background-color: #f5f5f5;
73
+ border-radius: 5px;
74
+ margin: 15px 0;
75
+ }
76
+ </style>
77
+ """, unsafe_allow_html=True)
78
+
79
+ # Helper functions
80
+ def read_csv_with_encoding(file):
81
+ encodings = ['utf-8', 'latin1', 'iso-8859-1', 'cp1252']
82
+ for encoding in encodings:
83
+ try:
84
+ return pd.read_csv(file, encoding=encoding)
85
+ except UnicodeDecodeError:
86
+ continue
87
+ raise UnicodeDecodeError("Failed to read file with any supported encoding")
88
+
89
+ #def save_feedback(feedback_data):
90
+ #feedback_file = 'feedback/user_feedback.csv'
91
+ #feedback_df = pd.DataFrame([feedback_data])
92
+
93
+ #if os.path.exists(feedback_file):
94
+ #feedback_df.to_csv(feedback_file, mode='a', header=False, index=False)
95
+ #else:
96
+ #feedback_df.to_csv(feedback_file, index=False)
97
+
98
+ def reset_conversation():
99
+ st.session_state.conversation = []
100
+ st.session_state.messages = []
101
+ if 'task_choice' in st.session_state:
102
+ del st.session_state.task_choice
103
+ return None
104
+ #new 24 March
105
+ #user_input = st.text_input("Enter your prompt:")
106
+ ###########33
107
+
108
+ # Initialize session state variables
109
+ if "messages" not in st.session_state:
110
+ st.session_state.messages = []
111
+ if "examples_to_classify" not in st.session_state:
112
+ st.session_state.examples_to_classify = []
113
+ if "system_role" not in st.session_state:
114
+ st.session_state.system_role = ""
115
+
116
+
117
+
118
+ # Main app title
119
+ st.title("🤖🦙 Text Data Labeling and Generation App")
120
+ # def embed_pdf_sidebar(pdf_path):
121
+ # with open(pdf_path, "rb") as f:
122
+ # base64_pdf = base64.b64encode(f.read()).decode('utf-8')
123
+ # pdf_display = f"""
124
+ # <iframe src="data:application/pdf;base64,{base64_pdf}"
125
+ # width="100%" height="400" type="application/pdf"></iframe>
126
+ # """
127
+ # st.markdown(pdf_display, unsafe_allow_html=True)
128
+ #
129
+
130
+
131
+ # Sidebar settings
132
+ with st.sidebar:
133
+ st.title("⚙️ Settings")
134
+ # Add PDF upload section
135
+ #
136
+ # if st.button("📘 Show Instructions"):
137
+ # # This should be a path to a local file
138
+ # pdf_path = os.path.join("Streamlit.pdf")
139
+ # pdf_viewer(
140
+ # pdf_path,
141
+ # width="100%",
142
+ # height=300,
143
+ # render_text=True
144
+ # )
145
+ # with st.sidebar:
146
+ # with st.expander("📘 View Instructions"):
147
+ # pdf_viewer("Streamlit.pdf", width="100%", height=300, render_text=True)
148
+
149
+ #
150
+ ###4
151
+ # with st.sidebar:
152
+ # st.markdown("### 📘 Instructions")
153
+ # st.markdown("[📄 Open Instructions PDF](/file/instructions.pdf)")
154
+
155
+
156
+
157
+
158
+ #
159
+ ####2
160
+ # #with st.sidebar:
161
+ # st.markdown("### 📘 Instructions")
162
+
163
+ # # PDF served from Space's file system
164
+ # pdf_url = "/file/instructions.pdf"
165
+
166
+ # st.markdown(f"""
167
+ # <a href="{pdf_url}" target="_blank">
168
+ # <button style='padding:10px;width:100%;font-size:16px;'>📄 Open Instructions PDF</button>
169
+ # </a>
170
+ # """, unsafe_allow_html=True)
171
+ # ###3 working code
172
+ # with st.sidebar:
173
+ # with open("instructions.pdf", "rb") as f:
174
+ # st.sidebar.download_button(
175
+ # label="📄 Download Instructions PDF",
176
+ # data=f,
177
+ # file_name="instructions.pdf",
178
+ # mime="application/pdf"
179
+ # )
180
+
181
+ ###6
182
+ #this last code works
183
+ with st.sidebar:
184
+ st.markdown("### 📘Data Generation and Labeling Instructions")
185
+ #st.markdown("<h4 style='color: #4A90E2;'>📘 Instructions</h4>", unsafe_allow_html=True)
186
+ with open("User instructions.pdf", "rb") as f:
187
+ st.download_button(
188
+ label="📄 Download Instructions PDF",
189
+ data=f,
190
+ #file_name="instructions.pdf",
191
+ file_name="User instructions.pdf",
192
+ mime="application/pdf"
193
+ )
194
+
195
+
196
+ #works with blu color text
197
+ # with st.sidebar:
198
+ # # Stylish "Instructions" label
199
+ # st.markdown("<h4 style='color: #4A90E2;'>📘 Instructions</h4>", unsafe_allow_html=True)
200
+
201
+ # # PDF download button
202
+ # with open("instructions.pdf", "rb") as f:
203
+ # st.download_button(
204
+ # label="📄 Download Instructions PDF",
205
+ # data=f,
206
+ # file_name="instructions.pdf",
207
+ # mime="application/pdf"
208
+ # )
209
+
210
+ ###5
211
+
212
+ #with st.sidebar:
213
+ # st.markdown("### 📘 Instructions")
214
+
215
+ # # PDF served from Space's file system
216
+ # pdf_url = "/file/instructions.pdf"
217
+
218
+ # st.markdown(f"""
219
+ # <a href="{pdf_url}" target="_blank">
220
+ # <button style='padding:15px;width:100%;font-size:16px;'> 📄 Open Instructions PDF</button>
221
+ # </a>
222
+ # """, unsafe_allow_html=True)
223
+
224
+
225
+
226
+ selected_model = st.selectbox(
227
+ "Select Model",
228
+ ["meta-llama/Llama-3.3-70B-Instruct", "meta-llama/Llama-3.2-3B-Instruct","meta-llama/Llama-4-Scout-17B-16E-Instruct", "meta-llama/Meta-Llama-3-8B-Instruct",
229
+ "meta-llama/Llama-3.1-70B-Instruct"],
230
+ key='model_select'
231
+ )
232
+
233
+ temperature = st.slider(
234
+ "Temperature",
235
+ 0.0, 1.0, 0.7,
236
+ help="Controls randomness in generation"
237
+ )
238
+
239
+ st.button("🔄 New Conversation", on_click=reset_conversation)
240
+ # st.markdown("### 📘 Instructions")
241
+ # embed_pdf_sidebar("Streamlit.pdf")
242
+ #Add PDF Instructions
243
+ # with st.expander("📚 Instructions"):
244
+ # st.write("View or download instruction guides:")
245
+
246
+ # # Option 1: Using st.download_button for PDFs stored in your app
247
+ # with open("file:///C:/Users/hp/Downloads/Streamlit.pdf", "rb") as file:
248
+ # first_pdf = file.read()
249
+ # st.download_button(
250
+ # label="Download Guide 1",
251
+ # data=first_pdf,
252
+ # file_name="user_guide.pdf",
253
+ # mime="application/pdf"
254
+ # )
255
+
256
+ # #with open("https://huggingface.co/spaces/Wedyan2023/COPY/blob/main/Streamlit.pdf", "rb") as file:
257
+ # with open("file:///C:/Users/hp/Downloads/Streamlit.pdf", "rb") as file:
258
+ # second_pdf = file.read()
259
+ # st.download_button(
260
+ # label="Download Guide 2",
261
+ # data=second_pdf,
262
+ # file_name="technical_guide.pdf",
263
+ # mime="application/pdf"
264
+ # )
265
+
266
+
267
+
268
+ with st.container():
269
+ st.markdown(f"""
270
+ <div class="sidebar-info">
271
+ <h4>Current Model: {selected_model}</h4>
272
+ <p><em>Note: Generated content may be inaccurate or false. Check important info.</em></p>
273
+ </div>
274
+ """, unsafe_allow_html=True)
275
+
276
+ # with st.sidebar:
277
+ # st.markdown("### 📘 Instructions")
278
+ # if pdf_file := st.file_uploader("Upload Instruction PDF", type="pdf"):
279
+ # embed_pdf(pdf_file)
280
+
281
+
282
+ feedback_url = "https://docs.google.com/forms/d/e/1FAIpQLSdZ_5mwW-pjqXHgxR0xriyVeRhqdQKgb5c-foXlYAV55Rilsg/viewform?usp=header"
283
+ st.sidebar.markdown(
284
+ f'<a href="{feedback_url}" target="_blank"><button style="width: 100%;">Feedback Form</button></a>',
285
+ unsafe_allow_html=True
286
+ )
287
+
288
+ # Display conversation
289
+ for message in st.session_state.messages:
290
+ with st.chat_message(message["role"]):
291
+ st.markdown(message["content"])
292
+
293
+ # Main content
294
+ if 'task_choice' not in st.session_state:
295
+ col1, col2 = st.columns(2)
296
+ with col1:
297
+ if st.button("📝 Data Generation", key="gen_button", help="Generate new data"):
298
+ st.session_state.task_choice = "Data Generation"
299
+ with col2:
300
+ if st.button("🏷️ Data Labeling", key="label_button", help="Label existing data"):
301
+ st.session_state.task_choice = "Data Labeling"
302
+
303
+ if "task_choice" in st.session_state:
304
+ if st.session_state.task_choice == "Data Generation":
305
+ st.header("📝 Data Generation")
306
+
307
+ # 1. Domain selection
308
+ domain_selection = st.selectbox("Domain", [
309
+ "Restaurant reviews", "E-Commerce reviews", "News", "AG News", "Tourism", "Custom"
310
+ ])
311
+
312
+ # 2. Handle custom domain input
313
+ custom_domain_valid = True # Assume valid until proven otherwise
314
+
315
+ if domain_selection == "Custom":
316
+ domain = st.text_input("Specify custom domain")
317
+ if not domain.strip():
318
+ st.error("Please specify a domain name.")
319
+ custom_domain_valid = False
320
+ else:
321
+ domain = domain_selection
322
+
323
+
324
+
325
+
326
+ # Classification type selection
327
+ classification_type = st.selectbox(
328
+ "Classification Type",
329
+ ["Sentiment Analysis", "Binary Classification", "Multi-Class Classification"]
330
+ )
331
+
332
+
333
+
334
+
335
+
336
+ #system role before
337
+
338
+ ####
339
+ # Labels setup based on classification type
340
+ #labels = []
341
+ labels = []
342
+ labels_valid = False
343
+ errors = []
344
+
345
+ def validate_binary_labels(labels):
346
+ errors = []
347
+ normalized = [label.strip().lower() for label in labels]
348
+
349
+ if not labels[0].strip():
350
+ errors.append("First class name is required.")
351
+ if not labels[1].strip():
352
+ errors.append("Second class name is required.")
353
+ if normalized[0] == normalized[1] and all(normalized):
354
+ errors.append("Class names must be different.")
355
+ return errors
356
+
357
+ if classification_type == "Sentiment Analysis":
358
+ st.write("### Sentiment Analysis Labels (Fixed)")
359
+ col1, col2, col3 = st.columns(3)
360
+ with col1:
361
+ st.text_input("First class", "Positive", disabled=True)
362
+ with col2:
363
+ st.text_input("Second class", "Negative", disabled=True)
364
+ with col3:
365
+ st.text_input("Third class", "Neutral", disabled=True)
366
+ labels = ["Positive", "Negative", "Neutral"]
367
+
368
+ elif classification_type == "Binary Classification":
369
+ st.write("### Binary Classification Labels")
370
+ col1, col2 = st.columns(2)
371
+ with col1:
372
+ label_1 = st.text_input("First class", "Positive")
373
+ with col2:
374
+ label_2 = st.text_input("Second class", "Negative")
375
+
376
+ labels = [label_1, label_2]
377
+ errors = validate_binary_labels(labels)
378
+
379
+ if errors:
380
+ st.error("\n".join(errors))
381
+ else:
382
+ st.success("Binary class names are valid and unique!")
383
+
384
+
385
+ # if classification_type == "Sentiment Analysis":
386
+ # st.write("### Sentiment Analysis Labels (Fixed)")
387
+ # col1, col2, col3 = st.columns(3)
388
+ # with col1:
389
+ # label_1 = st.text_input("First class", "Positive", disabled=True)
390
+ # with col2:
391
+ # label_2 = st.text_input("Second class", "Negative", disabled=True)
392
+ # with col3:
393
+ # label_3 = st.text_input("Third class", "Neutral", disabled=True)
394
+ # labels = ["Positive", "Negative", "Neutral"]
395
+
396
+
397
+ # elif classification_type == "Binary Classification":
398
+ # st.write("### Binary Classification Labels")
399
+ # col1, col2 = st.columns(2)
400
+
401
+ # with col1:
402
+ # label_1 = st.text_input("First class", "Positive")
403
+ # with col2:
404
+ # label_2 = st.text_input("Second class", "Negative")
405
+
406
+ # errors = []
407
+ # labels = [label_1.strip(), label_2.strip()]
408
+
409
+ # # Check for empty class names
410
+ # if not labels[0]:
411
+ # errors.append("First class name is required.")
412
+ # if not labels[1]:
413
+ # errors.append("Second class name is required.")
414
+
415
+ # # Check for duplicates
416
+ # if labels[0].lower() == labels[1].lower():
417
+ # errors.append("Class names must be different.")
418
+
419
+ # # Show errors or success
420
+ # if errors:
421
+ # for error in errors:
422
+ # st.error(error)
423
+ # else:
424
+ # st.success("Binary class names are valid and unique!")
425
+
426
+ #########
427
+
428
+ elif classification_type == "Multi-Class Classification":
429
+ st.write("### Multi-Class Classification Labels")
430
+
431
+ default_labels_by_domain = {
432
+ "News": ["Political", "Sports", "Entertainment", "Technology", "Business"],
433
+ "AG News": ["World", "Sports", "Business", "Sci/Tech"],
434
+ "Tourism": ["Accommodation", "Transportation", "Tourist Attractions",
435
+ "Food & Dining", "Local Experience", "Adventure Activities",
436
+ "Wellness & Spa", "Eco-Friendly Practices", "Family-Friendly",
437
+ "Luxury Tourism"],
438
+ "Restaurant reviews": ["Italian", "French", "American"],
439
+ "E-Commerce reviews": ["Mobile Phones & Accessories", "Laptops & Computers","Kitchen & Dining",
440
+ "Beauty & Personal Care", "Home & Furniture", "Clothing & Fashion",
441
+ "Shoes & Handbags", "Health & Wellness", "Electronics & Gadgets",
442
+ "Books & Stationery","Toys & Games", "Sports & Fitness",
443
+ "Grocery & Gourmet Food","Watches & Accessories", "Baby Products"]
444
+ }
445
+
446
+ num_classes = st.slider("Number of classes", 3, 15, 3)
447
+
448
+ # Get defaults for selected domain, or empty list
449
+ defaults = default_labels_by_domain.get(domain, [])
450
+
451
+ labels = []
452
+ errors = []
453
+ cols = st.columns(3)
454
+
455
+ for i in range(num_classes):
456
+ with cols[i % 3]:
457
+ default_value = defaults[i] if i < len(defaults) else ""
458
+ label_input = st.text_input(f"Class {i+1}", default_value)
459
+ normalized_label = label_input.strip().title()
460
+
461
+ if not normalized_label:
462
+ errors.append(f"Class {i+1} name is required.")
463
+ else:
464
+ labels.append(normalized_label)
465
+
466
+ # Check for duplicates (case-insensitive)
467
+ if len(labels) != len(set(labels)):
468
+ errors.append("Labels names must be unique (case-insensitive, normalized to Title Case).")
469
+
470
+ # Show validation results
471
+ if errors:
472
+ for error in errors:
473
+ st.error(error)
474
+ else:
475
+ st.success("All Labels names are valid and unique!")
476
+ labels_valid = not errors # Will be True only if there are no label errors
477
+
478
+
479
+
480
+
481
+ ##############
482
+
483
+ # Generation parameters
484
+ col1, col2 = st.columns(2)
485
+ with col1:
486
+ min_words = st.number_input("Min words", 1, 100, 20)
487
+ with col2:
488
+ max_words = st.number_input("Max words", min_words, 100, 50)
489
+
490
+ # Few-shot examples
491
+ use_few_shot = st.toggle("Use few-shot examples")
492
+ few_shot_examples = []
493
+ if use_few_shot:
494
+ num_examples = st.slider("Number of few-shot examples", 1, 10, 1)
495
+ for i in range(num_examples):
496
+ with st.expander(f"Example {i+1}"):
497
+ content = st.text_area(f"Content", key=f"few_shot_content_{i}")
498
+ label = st.selectbox(f"Label", labels, key=f"few_shot_label_{i}")
499
+ if content and label:
500
+ few_shot_examples.append({"content": content, "label": label})
501
+
502
+ num_to_generate = st.number_input("Number of examples", 1, 200, 10)
503
+ #sytem role after
504
+ # System role customization
505
+ default_system_role = f"You are a professional {classification_type} expert, your role is to generate text examples for {domain} domain. Always generate unique diverse examples and do not repeat the generated data. The generated text should be between {min_words} to {max_words} words long."
506
+ system_role = st.text_area("Modify System Role (optional)",
507
+ value=default_system_role,
508
+ key="system_role_input")
509
+ st.session_state['system_role'] = system_role if system_role else default_system_role
510
+ # Labels initialization
511
+ #labels = []
512
+
513
+
514
+ user_prompt = st.text_area("User Prompt (optional)")
515
+
516
+ # Updated prompt template including system role
517
+ prompt_template = PromptTemplate(
518
+ input_variables=["system_role", "classification_type", "domain", "num_examples",
519
+ "min_words", "max_words", "labels", "user_prompt", "few_shot_examples"],
520
+ template=(
521
+ "{system_role}\n"
522
+ "- Use the following parameters:\n"
523
+ "- Generate {num_examples} examples\n"
524
+ "- Each example should be between {min_words} to {max_words} words long\n"
525
+ #"- Word range: {min_words} - {max_words} words\n "
526
+ "- Use these labels: {labels}.\n"
527
+ "- Generate the examples in this format: 'Example text. Label: label'\n"
528
+ "- Do not include word counts or any additional information\n"
529
+ "- Always use your creativity and intelligence to generate unique and diverse text data\n"
530
+ "- Write unique examples every time.\n"
531
+ "- DO NOT REPEAT your gnerated text. \n"
532
+ "- For each Output, describe it once and move to the next.\n"
533
+ "- List each Output only once, and avoid repeating details.\n"
534
+ "- Additional instructions: {user_prompt}\n\n"
535
+ "- Use the following examples as a reference in the generation process\n\n {few_shot_examples}. \n"
536
+ "- Think step by step, generate numbered examples, and check each newly generated example to ensure it has not been generated before. If it has, modify it"
537
+ #"- Think step by step, generate numbered examples and check every new generated example if it is generated before and change it."
538
+
539
+ )
540
+ )
541
+
542
+ # Generate system prompt
543
+ system_prompt = prompt_template.format(
544
+ system_role=st.session_state['system_role'],
545
+ classification_type=classification_type,
546
+ domain=domain,
547
+ num_examples=num_to_generate,
548
+ min_words=min_words,
549
+ max_words=max_words,
550
+ labels=", ".join(labels),
551
+ user_prompt=user_prompt,
552
+ few_shot_examples="\n".join([f"{ex['content']}\nLabel: {ex['label']}" for ex in few_shot_examples]) if few_shot_examples else ""
553
+ )
554
+
555
+ # Store system prompt in session state
556
+ st.session_state['system_prompt'] = system_prompt
557
+
558
+ # Display system prompt
559
+ st.write("System Prompt:")
560
+ st.text_area("Current System Prompt", value=st.session_state['system_prompt'],
561
+ height=400, disabled=True)
562
+
563
+
564
+ if st.button("🎯 Generate Examples"):
565
+ #
566
+ errors = []
567
+ if domain_selection == "Custom" and not domain.strip():
568
+ st.warning("Custom domain name is required.")
569
+ elif len(labels) != len(set(labels)):
570
+ st.warning("Class names must be unique.")
571
+ elif any(not lbl.strip() for lbl in labels):
572
+ st.warning("All class labels must be filled in.")
573
+ #else:
574
+ #st.success("Generating examples for domain: {domain}")
575
+
576
+ #if not custom_domain_valid:
577
+ #st.warning("Custom domain name is required.")
578
+ #elif not labels_valid:
579
+ #st.warning("Please fix the label errors before generating examples.")
580
+ #else:
581
+ # Proceed to generate examples
582
+ #st.success(f"Generating examples for domain: {domain}")
583
+
584
+ with st.spinner("Generating examples..."):
585
+ try:
586
+ stream = client.chat.completions.create(
587
+ model=selected_model,
588
+ messages=[{"role": "system", "content": st.session_state['system_prompt']}],
589
+ temperature=temperature,
590
+ stream=True,
591
+ max_tokens=80000,
592
+ top_p=0.9,
593
+ # repetition_penalty=1.2,
594
+ #frequency_penalty=0.5, # Discourages frequent words
595
+ #presence_penalty=0.6,
596
+ )
597
+ #st.session_state['system_prompt'] = system_prompt
598
+ #new 24 march
599
+ st.session_state.messages.append({"role": "user", "content": system_prompt})
600
+ # # ####################
601
+ response = st.write_stream(stream)
602
+ st.session_state.messages.append({"role": "assistant", "content": response})
603
+ # Initialize session state variables if they don't exist
604
+ if 'system_prompt' not in st.session_state:
605
+ st.session_state.system_prompt = system_prompt
606
+
607
+ if 'response' not in st.session_state:
608
+ st.session_state.response = response
609
+
610
+ if 'generated_examples' not in st.session_state:
611
+ st.session_state.generated_examples = []
612
+
613
+ if 'generated_examples_csv' not in st.session_state:
614
+ st.session_state.generated_examples_csv = None
615
+
616
+ if 'generated_examples_json' not in st.session_state:
617
+ st.session_state.generated_examples_json = None
618
+
619
+ # Parse response and generate examples list
620
+ examples_list = []
621
+ for line in response.split('\n'):
622
+ if line.strip():
623
+ parts = line.rsplit('Label:', 1)
624
+ if len(parts) == 2:
625
+ text = parts[0].strip()
626
+ label = parts[1].strip()
627
+ if text and label:
628
+ examples_list.append({
629
+ 'text': text,
630
+ 'label': label,
631
+ 'system_prompt': st.session_state.system_prompt,
632
+ 'system_role': st.session_state.system_role,
633
+ 'task_type': 'Data Generation',
634
+ 'Use few-shot example?': 'Yes' if use_few_shot else 'No',
635
+ })
636
+
637
+ if examples_list:
638
+ # Update session state with new data
639
+ st.session_state.generated_examples = examples_list
640
+
641
+ # Generate CSV and JSON data
642
+ df = pd.DataFrame(examples_list)
643
+ st.session_state.generated_examples_csv = df.to_csv(index=False).encode('utf-8')
644
+ st.session_state.generated_examples_json = json.dumps(examples_list, indent=2).encode('utf-8')
645
+
646
+ # Vertical layout with centered "or" between buttons
647
+ st.download_button(
648
+ "📥 Download Generated Examples (CSV)",
649
+ st.session_state.generated_examples_csv,
650
+ "generated_examples.csv",
651
+ "text/csv",
652
+ key='download-csv-persistent'
653
+ )
654
+
655
+ # Add space and center the "or"
656
+ st.markdown("""
657
+ <div style='text-align: left; margin:15px 0; font-weight: 600; color: #666;'>. . . . . . or</div>
658
+ """, unsafe_allow_html=True)
659
+
660
+ st.download_button(
661
+ "📥 Download Generated Examples (JSON)",
662
+ st.session_state.generated_examples_json,
663
+ "generated_examples.json",
664
+ "application/json",
665
+ key='download-json-persistent'
666
+ )
667
+ # # Display the labeled examples
668
+ # st.markdown("##### 📋 Labeled Examples Preview")
669
+ # st.dataframe(df, use_container_width=True)
670
+
671
+ if st.button("Continue"):
672
+ if follow_up == "Generate more examples":
673
+ st.experimental_rerun()
674
+ elif follow_up == "Data Labeling":
675
+ st.session_state.task_choice = "Data Labeling"
676
+ st.experimental_rerun()
677
+
678
+ except Exception as e:
679
+ st.error("An error occurred during generation.")
680
+ st.error(f"Details: {e}")
681
+
682
+
683
+ # Lableing Process
684
+ elif st.session_state.task_choice == "Data Labeling":
685
+ st.header("🏷️ Data Labeling")
686
+ #new new new
687
+ # 1. Domain selection
688
+ # 1. Domain selection
689
+
690
+
691
+ domain_selection = st.selectbox("Domain", ["Restaurant reviews", "E-Commerce reviews", "News", "AG News", "Tourism", "Custom"])
692
+ # 2. Handle custom domain input
693
+ custom_domain_valid = True # Assume valid until proven otherwise
694
+
695
+ if domain_selection == "Custom":
696
+ domain = st.text_input("Specify custom domain")
697
+ if not domain.strip():
698
+ st.error("Please specify a domain name.")
699
+ custom_domain_valid = False
700
+ else:
701
+ domain = domain_selection
702
+
703
+
704
+ # # Classification type selection
705
+ # classification_type = st.selectbox(
706
+ # "Classification Type",
707
+ # ["Sentiment Analysis", "Binary Classification", "Multi-Class Classification"]
708
+ # )
709
+ #NNew edit
710
+ # classification_type = st.selectbox(
711
+ # "Classification Type",
712
+ # #["Sentiment Analysis", "Binary Classification", "Multi-Class Classification", "Named Entity Recognition (NER)"],
713
+ # ["Sentiment Analysis", "Binary Classification", "Multi-Class Classification"],
714
+ # key="label_class_type"
715
+ # )
716
+
717
+ # Classification type selection
718
+ classification_type = st.selectbox(
719
+ "Classification Type",
720
+ ["Sentiment Analysis", "Binary Classification", "Multi-Class Classification", "Named Entity Recognition (NER)"]
721
+ )
722
+ #NNew edit
723
+ # Labels setup based on classification type
724
+ labels = []
725
+ labels_valid = False
726
+ errors = []
727
+
728
+ if classification_type == "Sentiment Analysis":
729
+ st.write("### Sentiment Analysis Labels (Fixed)")
730
+ col1, col2, col3 = st.columns(3)
731
+ with col1:
732
+ label_1 = st.text_input("First class", "Positive", disabled=True)
733
+ with col2:
734
+ label_2 = st.text_input("Second class", "Negative", disabled=True)
735
+ with col3:
736
+ label_3 = st.text_input("Third class", "Neutral", disabled=True)
737
+ labels = ["Positive", "Negative", "Neutral"]
738
+
739
+
740
+ elif classification_type == "Binary Classification":
741
+ st.write("### Binary Classification Labels")
742
+ col1, col2 = st.columns(2)
743
+
744
+ with col1:
745
+ label_1 = st.text_input("First class", "Positive")
746
+ with col2:
747
+ label_2 = st.text_input("Second class", "Negative")
748
+
749
+ errors = []
750
+ labels = [label_1.strip(), label_2.strip()]
751
+
752
+
753
+ # Strip and lower-case labels for validation
754
+ label_1 = labels[0].strip()
755
+ label_2 = labels[1].strip()
756
+
757
+ # Check for empty class names
758
+ if not label_1:
759
+ errors.append("First class name is required.")
760
+ if not label_2:
761
+ errors.append("Second class name is required.")
762
+
763
+ # Check for duplicates (case insensitive)
764
+ if label_1.lower() == label_2.lower() and label_1 and label_2:
765
+ errors.append("Class names must be different.")
766
+
767
+ # Show errors or success
768
+ if errors:
769
+ for error in errors:
770
+ st.error(error)
771
+ else:
772
+ st.success("Binary class names are valid and unique!")
773
+
774
+
775
+ elif classification_type == "Multi-Class Classification":
776
+ st.write("### Multi-Class Classification Labels")
777
+
778
+ default_labels_by_domain = {
779
+ "News": ["Political", "Sports", "Entertainment", "Technology", "Business"],
780
+ "AG News": ["World", "Sports", "Business", "Sci/Tech"],
781
+ "Tourism": ["Accommodation", "Transportation", "Tourist Attractions",
782
+ "Food & Dining", "Local Experience", "Adventure Activities",
783
+ "Wellness & Spa", "Eco-Friendly Practices", "Family-Friendly",
784
+ "Luxury Tourism"],
785
+ "Restaurant reviews": ["Italian", "French", "American"],
786
+ "E-Commerce reviews": ["Mobile Phones & Accessories", "Laptops & Computers","Kitchen & Dining",
787
+ "Beauty & Personal Care", "Home & Furniture", "Clothing & Fashion",
788
+ "Shoes & Handbags", "Health & Wellness", "Electronics & Gadgets",
789
+ "Books & Stationery","Toys & Games", "Sports & Fitness",
790
+ "Grocery & Gourmet Food","Watches & Accessories", "Baby Products"]
791
+ }
792
+
793
+
794
+
795
+ # Ask user how many classes they want to define
796
+ num_classes = st.slider("Select the number of classes (labels)", min_value=3, max_value=10, value=3)
797
+
798
+ # Use default labels based on selected domain, if available
799
+ defaults = default_labels_by_domain.get(domain, [])
800
+
801
+ labels = []
802
+ errors = []
803
+ cols = st.columns(3) # For nicely arranged label inputs
804
+
805
+ for i in range(num_classes):
806
+ with cols[i % 3]: # Distribute inputs across columns
807
+ default_value = defaults[i] if i < len(defaults) else ""
808
+ label_input = st.text_input(f"Label {i + 1}", default_value)
809
+ normalized_label = label_input.strip().title()
810
+
811
+ if not normalized_label:
812
+ errors.append(f"Label {i + 1} is required.")
813
+ else:
814
+ labels.append(normalized_label)
815
+
816
+ # Check for duplicates (case-insensitive)
817
+ normalized_set = {label.lower() for label in labels}
818
+ if len(labels) != len(normalized_set):
819
+ errors.append("Label names must be unique (case-insensitive).")
820
+
821
+ # Show validation results
822
+ if errors:
823
+ for error in errors:
824
+ st.error(error)
825
+ else:
826
+ st.success("All label names are valid and unique!")
827
+
828
+ labels_valid = not errors # True if no validation errors
829
+
830
+ elif classification_type == "Named Entity Recognition (NER)":
831
+ # NER entity options
832
+ ner_entities = [
833
+ "PERSON - Names of people, fictional characters, historical figures",
834
+ "ORG - Companies, institutions, agencies, teams",
835
+ "LOC - Physical locations (mountains, oceans, etc.)",
836
+ "GPE - Countries, cities, states, political regions",
837
+ "DATE - Calendar dates, years, centuries",
838
+ "TIME - Times, durations",
839
+ "MONEY - Monetary values with currency"
840
+ ]
841
+ selected_entities = st.multiselect(
842
+ "Select entities to recognize",
843
+ ner_entities,
844
+ default=["PERSON - Names of people, fictional characters, historical figures",
845
+ "ORG - Companies, institutions, agencies, teams",
846
+ "LOC - Physical locations (mountains, oceans, etc.)",
847
+ "GPE - Countries, cities, states, political regions",
848
+ "DATE - Calendar dates, years, centuries",
849
+ "TIME - Times, durations",
850
+ "MONEY - Monetary values with currency"],
851
+ key="ner_entity_selection"
852
+ )
853
+
854
+ # Extract just the entity type (before the dash)
855
+ labels = [entity.split(" - ")[0] for entity in selected_entities]
856
+
857
+ if not labels:
858
+ st.warning("Please select at least one entity type")
859
+ labels = ["PERSON"] # Default if nothing selected
860
+
861
+
862
+
863
+
864
+
865
+ #NNew edit
866
+ # elif classification_type == "Multi-Class Classification":
867
+ # st.write("### Multi-Class Classification Labels")
868
+
869
+ # default_labels_by_domain = {
870
+ # "News": ["Political", "Sports", "Entertainment", "Technology", "Business"],
871
+ # "AG News": ["World", "Sports", "Business", "Sci/Tech"],
872
+ # "Tourism": ["Accommodation", "Transportation", "Tourist Attractions",
873
+ # "Food & Dining", "Local Experience", "Adventure Activities",
874
+ # "Wellness & Spa", "Eco-Friendly Practices", "Family-Friendly",
875
+ # "Luxury Tourism"],
876
+ # "Restaurant reviews": ["Italian", "French", "American"]
877
+ # }
878
+ # num_classes = st.slider("Number of classes", 3, 10, 3)
879
+
880
+ # # Get defaults for selected domain, or empty list
881
+ # defaults = default_labels_by_domain.get(domain, [])
882
+
883
+ # labels = []
884
+ # errors = []
885
+ # cols = st.columns(3)
886
+
887
+ # for i in range(num_classes):
888
+ # with cols[i % 3]:
889
+ # default_value = defaults[i] if i < len(defaults) else ""
890
+ # label_input = st.text_input(f"Class {i+1}", default_value)
891
+ # normalized_label = label_input.strip().title()
892
+
893
+ # if not normalized_label:
894
+ # errors.append(f"Class {i+1} name is required.")
895
+ # else:
896
+ # labels.append(normalized_label)
897
+
898
+ # # Check for duplicates (case-insensitive)
899
+ # if len(labels) != len(set(labels)):
900
+ # errors.append("Labels names must be unique (case-insensitive, normalized to Title Case).")
901
+
902
+ # # Show validation results
903
+ # if errors:
904
+ # for error in errors:
905
+ # st.error(error)
906
+ # else:
907
+ # st.success("All Labels names are valid and unique!")
908
+ # labels_valid = not errors # Will be True only if there are no label errors
909
+
910
+
911
+
912
+
913
+ # else:
914
+ # num_classes = st.slider("Number of classes", 3, 23, 3, key="label_num_classes")
915
+ # labels = []
916
+ # cols = st.columns(3)
917
+ # for i in range(num_classes):
918
+ # with cols[i % 3]:
919
+ # label = st.text_input(f"Class {i+1}", f"Class_{i+1}", key=f"label_class_{i}")
920
+ # labels.append(label)
921
+
922
+ use_few_shot = st.toggle("Use few-shot examples for labeling")
923
+ few_shot_examples = []
924
+ if use_few_shot:
925
+ num_few_shot = st.slider("Number of few-shot examples", 1, 10, 1)
926
+ for i in range(num_few_shot):
927
+ with st.expander(f"Few-shot Example {i+1}"):
928
+ content = st.text_area(f"Content", key=f"label_few_shot_content_{i}")
929
+ label = st.selectbox(f"Label", labels, key=f"label_few_shot_label_{i}")
930
+ if content and label:
931
+ few_shot_examples.append(f"{content}\nLabel: {label}")
932
+
933
+ num_examples = st.number_input("Number of examples to classify", 1, 100, 1)
934
+
935
+ examples_to_classify = []
936
+ if num_examples <= 20:
937
+ for i in range(num_examples):
938
+ example = st.text_area(f"Example {i+1}", key=f"example_{i}")
939
+ if example:
940
+ examples_to_classify.append(example)
941
+ else:
942
+ examples_text = st.text_area(
943
+ "Enter examples (one per line)",
944
+ height=300,
945
+ help="Enter each example on a new line"
946
+ )
947
+ if examples_text:
948
+ examples_to_classify = [ex.strip() for ex in examples_text.split('\n') if ex.strip()]
949
+ if len(examples_to_classify) > num_examples:
950
+ examples_to_classify = examples_to_classify[:num_examples]
951
+
952
+ #New Wedyan
953
+ default_system_role = f"You are a professional {classification_type} expert, your role is to classify the provided text examples for {domain} domain."
954
+ system_role = st.text_area("Modify System Role (optional)",
955
+ value=default_system_role,
956
+ key="system_role_input")
957
+ st.session_state['system_role'] = system_role if system_role else default_system_role
958
+ # Labels initialization
959
+ #labels = []
960
+ ####
961
+
962
+ user_prompt = st.text_area("User prompt (optional)", key="label_instructions")
963
+
964
+ few_shot_text = "\n\n".join(few_shot_examples) if few_shot_examples else ""
965
+ examples_text = "\n".join([f"{i+1}. {ex}" for i, ex in enumerate(examples_to_classify)])
966
+
967
+ # Customize prompt template based on classification type
968
+ if classification_type == "Named Entity Recognition (NER)":
969
+ label_prompt_template = PromptTemplate(
970
+ input_variables=["system_role", "labels", "few_shot_examples", "examples", "domain", "user_prompt"],
971
+ template=(
972
+ "{system_role}\n"
973
+ #"- You are a professional Named Entity Recognition (NER) expert in {domain} domain. Your role is to identify and extract the following entity types: {labels}.\n"
974
+ "- For each text example provided, identify all entities of the requested types.\n"
975
+ "- Use the following entities: {labels}.\n"
976
+ "- Return each example followed by the entities you found in this format: 'Example text.\n Entities: [ENTITY_TYPE: entity text\n, ENTITY_TYPE: entity text\n, ...] or [No entities found]'\n"
977
+ "- If no entities of the requested types are found, indicate 'No entities found' in this text.\n"
978
+ "- Be precise about entity boundaries - don't include unnecessary words.\n"
979
+ "- Do not provide any additional information or explanations.\n"
980
+ "- Additional instructions:\n {user_prompt}\n\n"
981
+ "- Use user few-shot examples as guidance if provided:\n{few_shot_examples}\n\n"
982
+ "- Examples to analyze:\n{examples}\n\n"
983
+ "Output:\n"
984
+ )
985
+ )
986
+ else:
987
+ label_prompt_template = PromptTemplate(
988
+
989
+ input_variables=["system_role", "classification_type", "labels", "few_shot_examples", "examples","domain", "user_prompt"],
990
+ template=(
991
+ #"- Let'\s think step by step:"
992
+ "{system_role}\n"
993
+ # "- You are a professional {classification_type} expert in {domain} domain. Your role is to classify the following examples using these labels: {labels}.\n"
994
+ "- Use the following instructions:\n"
995
+ "- Use the following labels: {labels}.\n"
996
+ "- Return the classified text followed by the label in this format: 'text. Label: [label]'\n"
997
+ "- Do not provide any additional information or explanations\n"
998
+ "- User prompt:\n {user_prompt}\n\n"
999
+ "- Use user provided examples as guidence in the classification process:\n\n {few_shot_examples}\n"
1000
+ "- Examples to classify:\n{examples}\n\n"
1001
+ "- Think step by step then classify the examples"
1002
+ #"Output:\n"
1003
+ ))
1004
+
1005
+ # Check if few_shot_examples is already a formatted string
1006
+ # Check if few_shot_examples is already a formatted string
1007
+ if isinstance(few_shot_examples, str):
1008
+ formatted_few_shot = few_shot_examples
1009
+ # If it's a list of already formatted strings
1010
+ elif isinstance(few_shot_examples, list) and all(isinstance(ex, str) for ex in few_shot_examples):
1011
+ formatted_few_shot = "\n".join(few_shot_examples)
1012
+ # If it's a list of dictionaries with 'content' and 'label' keys
1013
+ elif isinstance(few_shot_examples, list) and all(isinstance(ex, dict) and 'content' in ex and 'label' in ex for ex in few_shot_examples):
1014
+ formatted_few_shot = "\n".join([f"{ex['content']}\nLabel: {ex['label']}" for ex in few_shot_examples])
1015
+ else:
1016
+ formatted_few_shot = ""
1017
+
1018
+ system_prompt = label_prompt_template.format(
1019
+ system_role=st.session_state['system_role'],
1020
+ classification_type=classification_type,
1021
+ domain=domain,
1022
+ examples="\n".join(examples_to_classify),
1023
+ labels=", ".join(labels),
1024
+ user_prompt=user_prompt,
1025
+ few_shot_examples=formatted_few_shot
1026
+ )
1027
+
1028
+ # Step 2: Store the system_prompt in st.session_state
1029
+ st.session_state['system_prompt'] = system_prompt
1030
+ #::contentReference[oaicite:0]{index=0}
1031
+ st.write("System Prompt:")
1032
+ #st.code(system_prompt)
1033
+ #st.code(st.session_state['system_prompt'])
1034
+ st.text_area("System Prompt", value=st.session_state['system_prompt'], height=300, max_chars=None, key=None, help=None, disabled=True)
1035
+
1036
+
1037
+
1038
+ if st.button("🏷️ Label Data"):
1039
+ if examples_to_classify:
1040
+ with st.spinner("Labeling data..."):
1041
+ # Generate the system prompt based on classification type
1042
+ if classification_type == "Named Entity Recognition (NER)":
1043
+ system_prompt = label_prompt_template.format(
1044
+ system_role=st.session_state['system_role'],
1045
+ labels=", ".join(labels),
1046
+ domain = domain,
1047
+ few_shot_examples=few_shot_text,
1048
+ examples=examples_text,
1049
+ user_prompt=user_prompt
1050
+ )
1051
+ else:
1052
+ system_prompt = label_prompt_template.format(
1053
+ classification_type=classification_type,
1054
+ system_role=st.session_state['system_role'],
1055
+ domain = domain,
1056
+ labels=", ".join(labels),
1057
+ few_shot_examples=few_shot_text,
1058
+ examples=examples_text,
1059
+ user_prompt=user_prompt
1060
+ )
1061
+ try:
1062
+ stream = client.chat.completions.create(
1063
+ model=selected_model,
1064
+ messages=[{"role": "system", "content": system_prompt}],
1065
+ temperature=temperature,
1066
+ stream=True,
1067
+ max_tokens=20000,
1068
+ top_p = 0.9,
1069
+
1070
+ )
1071
+ #new 24 March
1072
+ # Append user message
1073
+ st.session_state.messages.append({"role": "user", "content": system_prompt})
1074
+ #################
1075
+ response = st.write_stream(stream)
1076
+ st.session_state.messages.append({"role": "assistant", "content": response})
1077
+ # Display the labeled examples
1078
+ # # Optional: If you want to add it as a chat-style message log
1079
+ # preview_str = st.session_state.labeled_preview.to_markdown(index=False)
1080
+ # st.session_state.messages.append({"role": "assistant", "content": f"Here is a preview of the labeled examples:\n\n{preview_str}"})
1081
+
1082
+
1083
+ # # Stream response and append assistant message
1084
+ # #14/4/2024
1085
+ # response = st.write_stream(stream)
1086
+ # st.session_state.messages.append({"role": "assistant", "content": response})
1087
+
1088
+ # Initialize session state variables if they don't exist
1089
+ if 'system_prompt' not in st.session_state:
1090
+ st.session_state.system_prompt = system_prompt
1091
+
1092
+ if 'response' not in st.session_state:
1093
+ st.session_state.response = response
1094
+
1095
+ if 'generated_examples' not in st.session_state:
1096
+ st.session_state.generated_examples = []
1097
+
1098
+ if 'generated_examples_csv' not in st.session_state:
1099
+ st.session_state.generated_examples_csv = None
1100
+
1101
+ if 'generated_examples_json' not in st.session_state:
1102
+ st.session_state.generated_examples_json = None
1103
+
1104
+
1105
+
1106
+
1107
+ # Save labeled examples to CSV
1108
+ #new 14/4/2025
1109
+ labeled_examples = []
1110
+ if classification_type == "Named Entity Recognition (NER)":
1111
+ labeled_examples = []
1112
+ for line in response.split('\n'):
1113
+ if line.strip():
1114
+ parts = line.rsplit('Entities:', 1)
1115
+ if len(parts) == 2:
1116
+ text = parts[0].strip()
1117
+ entities = parts[1].strip()
1118
+ if text and entities:
1119
+ labeled_examples.append({
1120
+ 'text': text,
1121
+ 'entities': entities,
1122
+ 'system_prompt': st.session_state.system_prompt,
1123
+ 'system_role': st.session_state.system_role,
1124
+ 'task_type': 'Named Entity Recognition (NER)',
1125
+ 'Use few-shot example?': 'Yes' if use_few_shot else 'No',
1126
+ })
1127
+
1128
+
1129
+ else:
1130
+ labeled_examples = []
1131
+ for line in response.split('\n'):
1132
+ if line.strip():
1133
+ parts = line.rsplit('Label:', 1)
1134
+ if len(parts) == 2:
1135
+ text = parts[0].strip()
1136
+ label = parts[1].strip()
1137
+ if text and label:
1138
+ labeled_examples.append({
1139
+ 'text': text,
1140
+ 'label': label,
1141
+ 'system_prompt': st.session_state.system_prompt,
1142
+ 'system_role': st.session_state.system_role,
1143
+ 'task_type': 'Data Labeling',
1144
+ 'Use few-shot example?': 'Yes' if use_few_shot else 'No',
1145
+ })
1146
+ # Save and provide download options
1147
+ if labeled_examples:
1148
+ # Update session state
1149
+ st.session_state.labeled_examples = labeled_examples
1150
+
1151
+ # Convert to CSV and JSON
1152
+ df = pd.DataFrame(labeled_examples)
1153
+ st.session_state.labeled_examples_csv = df.to_csv(index=False).encode('utf-8')
1154
+ st.session_state.labeled_examples_json = json.dumps(labeled_examples, indent=2).encode('utf-8')
1155
+
1156
+ # Download buttons
1157
+ st.download_button(
1158
+ "📥 Download Labeled Examples (CSV)",
1159
+ st.session_state.labeled_examples_csv,
1160
+ "labeled_examples.csv",
1161
+ "text/csv",
1162
+ key='download-labeled-csv'
1163
+ )
1164
+
1165
+ st.markdown("""
1166
+ <div style='text-align: left; margin:15px 0; font-weight: 600; color: #666;'>. . . . . . or</div>
1167
+ """, unsafe_allow_html=True)
1168
+
1169
+ st.download_button(
1170
+ "📥 Download Labeled Examples (JSON)",
1171
+ st.session_state.labeled_examples_json,
1172
+ "labeled_examples.json",
1173
+ "application/json",
1174
+ key='download-labeled-json'
1175
+ )
1176
+ # Display the labeled examples
1177
+ st.markdown("##### 📋 Labeled Examples Preview")
1178
+ st.dataframe(df, use_container_width=True)
1179
+ # Display section
1180
+ #st.markdown("### 📋 Labeled Examples Preview")
1181
+ #st.dataframe(st.session_state.labeled_preview, use_container_width=True)
1182
+
1183
+
1184
+
1185
+ # if labeled_examples:
1186
+ # df = pd.DataFrame(labeled_examples)
1187
+ # csv = df.to_csv(index=False).encode('utf-8')
1188
+ # st.download_button(
1189
+ # "📥 Download Labeled Examples",
1190
+ # csv,
1191
+ # "labeled_examples.csv",
1192
+ # "text/csv",
1193
+ # key='download-labeled-csv'
1194
+ # )
1195
+ # # Add space and center the "or"
1196
+ # st.markdown("""
1197
+ # <div style='text-align: left; margin:15px 0; font-weight: 600; color: #666;'>. . . . . . or</div>
1198
+ # """, unsafe_allow_html=True)
1199
+
1200
+ # if labeled_examples:
1201
+ # df = pd.DataFrame(labeled_examples)
1202
+ # csv = df.to_csv(index=False).encode('utf-8')
1203
+ # st.download_button(
1204
+ # "📥 Download Labeled Examples",
1205
+ # csv,
1206
+ # "labeled_examples.json",
1207
+ # "text/json",
1208
+ # key='download-labeled-JSON'
1209
+ # )
1210
+
1211
+ # Add follow-up interaction options
1212
+ #st.markdown("---")
1213
+ #follow_up = st.radio(
1214
+ #"What would you like to do next?",
1215
+ #["Label more data", "Data Generation"],
1216
+ # key="labeling_follow_up"
1217
+ # )
1218
+
1219
+ if st.button("Continue"):
1220
+ if follow_up == "Label more data":
1221
+ st.session_state.examples_to_classify = []
1222
+ st.experimental_rerun()
1223
+ elif follow_up == "Data Generation":
1224
+ st.session_state.task_choice = "Data Labeling"
1225
+ st.experimental_rerun()
1226
+
1227
+ except Exception as e:
1228
+ st.error("An error occurred during labeling.")
1229
+ st.error(f"Details: {e}")
1230
+ else:
1231
+ st.warning("Please enter at least one example to classify.")
1232
+
1233
+ #st.session_state.messages.append({"role": "assistant", "content": response})
1234
+
1235
+
1236
+
1237
+
1238
+ # Footer
1239
+ st.markdown("---")
1240
+ st.markdown(
1241
+ """
1242
+ <div style='text-align: center'>
1243
+ <p>Made with ❤️ by Wedyan AlSakran 2025</p>
1244
+ </div>
1245
+ """,
1246
+ unsafe_allow_html=True
1247
+ )