Wedyan2023 commited on
Commit
294f405
·
verified ·
1 Parent(s): a352396

Create app110.py

Browse files
Files changed (1) hide show
  1. app110.py +1595 -0
app110.py ADDED
@@ -0,0 +1,1595 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import os
4
+ import json
5
+ import base64
6
+ import random
7
+ from streamlit_pdf_viewer import pdf_viewer
8
+ from langchain.prompts import PromptTemplate
9
+ from datetime import datetime
10
+ from pathlib import Path
11
+ from openai import OpenAI
12
+ from dotenv import load_dotenv
13
+ import warnings
14
+
15
+ from transformers import AutoModelForCausalLM, AutoTokenizer
16
+ import torch
17
+
18
+ warnings.filterwarnings('ignore')
19
+
20
+ os.getenv("OAUTH_CLIENT_ID")
21
+
22
+
23
+ # # Load environment variables and initialize the OpenAI client to use Hugging Face Inference API.
24
+ # load_dotenv()
25
+ # client = OpenAI(
26
+ # base_url="https://api-inference.huggingface.co/v1",
27
+ # api_key=os.environ.get('TOKEN2') # Hugging Face API token
28
+ # )
29
+ #####
30
+ from openai import OpenAI
31
+
32
+ client = OpenAI(
33
+ base_url="https://router.huggingface.co/together/v1",
34
+ #api_key="hf_XXXXX",
35
+ api_key=os.environ.get('LLM'), # Hugging Face API token
36
+ )
37
+
38
+ completion = client.chat.completions.create(
39
+ model="meta-llama/Meta-Llama-3-8B-Instruct-Turbo",
40
+ messages=[
41
+ {
42
+ "role": "user",
43
+ "content": "What is the capital of France?"
44
+ }
45
+ ],
46
+ )
47
+
48
+ print(completion.choices[0].message)
49
+ #####
50
+ ##########################################################3
51
+ # import streamlit as st
52
+ # from transformers import AutoModelForCausalLM, AutoTokenizer
53
+ # import torch
54
+
55
+ # # Model selection dropdown
56
+ # selected_model = st.selectbox(
57
+ # "Select Model",
58
+ # ["meta-llama/Meta-Llama-3-8B-Instruct-Turbo",
59
+ # "meta-llama/Llama-3.3-70B-Instruct",
60
+ # "meta-llama/Llama-3.2-3B-Instruct",
61
+ # "meta-llama/Llama-4-Scout-17B-16E-Instruct",
62
+ # "meta-llama/Meta-Llama-3-8B-Instruct",
63
+ # "meta-llama/Llama-3.1-70B-Instruct"],
64
+ # key='model_select'
65
+ # )
66
+
67
+ # @st.cache_resource # Cache the model to prevent reloading
68
+ # def load_model(model_name):
69
+ # try:
70
+ # # Optimized model loading configuration
71
+ # model = AutoModelForCausalLM.from_pretrained(
72
+ # model_name,
73
+ # torch_dtype=torch.float16, # Use half precision
74
+ # device_map="auto", # Automatic device mapping
75
+ # load_in_8bit=True, # Enable 8-bit quantization
76
+ # low_cpu_mem_usage=True, # Optimize CPU memory usage
77
+ # max_memory={0: "10GB"} # Limit GPU memory usage
78
+ # )
79
+
80
+ # tokenizer = AutoTokenizer.from_pretrained(
81
+ # model_name,
82
+ # padding_side="left",
83
+ # truncation_side="left"
84
+ # )
85
+
86
+ # return model, tokenizer
87
+
88
+ # except Exception as e:
89
+ # st.error(f"Error loading model: {str(e)}")
90
+ # return None, None
91
+
92
+ # # Load the selected model with optimizations
93
+ # if selected_model:
94
+ # model, tokenizer = load_model(selected_model)
95
+
96
+ # # Check if model loaded successfully
97
+ # if model is not None:
98
+ # st.success(f"Successfully loaded {selected_model}")
99
+ # else:
100
+ # st.warning("Please select a different model or check your hardware capabilities")
101
+
102
+ # # Function to generate text
103
+ # def generate_response(prompt, model, tokenizer):
104
+ # try:
105
+ # inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
106
+
107
+ # with torch.no_grad():
108
+ # outputs = model.generate(
109
+ # inputs["input_ids"],
110
+ # max_length=256,
111
+ # num_return_sequences=1,
112
+ # temperature=0.7,
113
+ # do_sample=True,
114
+ # pad_token_id=tokenizer.pad_token_id
115
+ # )
116
+
117
+ # response = tokenizer.decode(outputs[0], skip_special_tokens=True)
118
+ # return response
119
+
120
+ # except Exception as e:
121
+ # return f"Error generating response: {str(e)}"
122
+ ############################################################
123
+
124
+ ####new
125
+ # from openai import OpenAI
126
+
127
+ # client = OpenAI(
128
+ # base_url="https://router.huggingface.co/together/v1",
129
+ # api_key=os.environ.get('TOKEN2'),
130
+ # )
131
+
132
+ # completion = client.chat.completions.create(
133
+ # model="meta-llama/Meta-Llama-3-8B-Instruct-Turbo",
134
+ # messages=[
135
+ # {
136
+ # "role": "user",
137
+ # "content": "What is the capital of France?"
138
+ # }
139
+ # ],
140
+ # max_tokens=512,
141
+ # )
142
+
143
+ # print(completion.choices[0].message)
144
+ #####
145
+
146
+ # Create necessary directories
147
+ for dir_name in ['data', 'feedback']:
148
+ if not os.path.exists(dir_name):
149
+ os.makedirs(dir_name)
150
+
151
+ # Custom CSS
152
+ st.markdown("""
153
+ <style>
154
+ .stButton > button {
155
+ width: 100%;
156
+ margin-bottom: 10px;
157
+ background-color: #4CAF50;
158
+ color: white;
159
+ border: none;
160
+ padding: 10px;
161
+ border-radius: 5px;
162
+ }
163
+ .task-button {
164
+ background-color: #2196F3 !important;
165
+ }
166
+ .stSelectbox {
167
+ margin-bottom: 20px;
168
+ }
169
+ .output-container {
170
+ padding: 20px;
171
+ border-radius: 5px;
172
+ border: 1px solid #ddd;
173
+ margin: 10px 0;
174
+ }
175
+ .status-container {
176
+ padding: 10px;
177
+ border-radius: 5px;
178
+ margin: 10px 0;
179
+ }
180
+ .sidebar-info {
181
+ padding: 10px;
182
+ background-color: #f0f2f6;
183
+ border-radius: 5px;
184
+ margin: 10px 0;
185
+ }
186
+ .feedback-button {
187
+ background-color: #ff9800 !important;
188
+ }
189
+ .feedback-container {
190
+ padding: 15px;
191
+ background-color: #f5f5f5;
192
+ border-radius: 5px;
193
+ margin: 15px 0;
194
+ }
195
+ </style>
196
+ """, unsafe_allow_html=True)
197
+
198
+ # Helper functions
199
+ def read_csv_with_encoding(file):
200
+ encodings = ['utf-8', 'latin1', 'iso-8859-1', 'cp1252']
201
+ for encoding in encodings:
202
+ try:
203
+ return pd.read_csv(file, encoding=encoding)
204
+ except UnicodeDecodeError:
205
+ continue
206
+ raise UnicodeDecodeError("Failed to read file with any supported encoding")
207
+
208
+ #def save_feedback(feedback_data):
209
+ #feedback_file = 'feedback/user_feedback.csv'
210
+ #feedback_df = pd.DataFrame([feedback_data])
211
+
212
+ #if os.path.exists(feedback_file):
213
+ #feedback_df.to_csv(feedback_file, mode='a', header=False, index=False)
214
+ #else:
215
+ #feedback_df.to_csv(feedback_file, index=False)
216
+
217
+ def reset_conversation():
218
+ st.session_state.conversation = []
219
+ st.session_state.messages = []
220
+ if 'task_choice' in st.session_state:
221
+ del st.session_state.task_choice
222
+ return None
223
+ #new 24 March
224
+ #user_input = st.text_input("Enter your prompt:")
225
+ ###########33
226
+
227
+ # Initialize session state variables
228
+ if "messages" not in st.session_state:
229
+ st.session_state.messages = []
230
+ if "examples_to_classify" not in st.session_state:
231
+ st.session_state.examples_to_classify = []
232
+ if "system_role" not in st.session_state:
233
+ st.session_state.system_role = ""
234
+
235
+
236
+
237
+ # Main app title
238
+ st.title("🤖🦙 Text Data Labeling and Generation App")
239
+ # def embed_pdf_sidebar(pdf_path):
240
+ # with open(pdf_path, "rb") as f:
241
+ # base64_pdf = base64.b64encode(f.read()).decode('utf-8')
242
+ # pdf_display = f"""
243
+ # <iframe src="data:application/pdf;base64,{base64_pdf}"
244
+ # width="100%" height="400" type="application/pdf"></iframe>
245
+ # """
246
+ # st.markdown(pdf_display, unsafe_allow_html=True)
247
+ #
248
+
249
+
250
+ # Sidebar settings
251
+ with st.sidebar:
252
+ st.title("⚙️ Settings")
253
+
254
+
255
+ #this last code works
256
+ with st.sidebar:
257
+ st.markdown("### 📘Data Generation and Labeling Instructions")
258
+ #st.markdown("<h4 style='color: #4A90E2;'>📘 Instructions</h4>", unsafe_allow_html=True)
259
+ with open("User instructions.pdf", "rb") as f:
260
+ st.download_button(
261
+ label="📄 Download Instructions PDF",
262
+ data=f,
263
+ #file_name="instructions.pdf",
264
+ file_name="User instructions.pdf",
265
+ mime="application/pdf"
266
+ )
267
+
268
+ selected_model = st.selectbox(
269
+ "Select Model",
270
+ ["meta-llama/Llama-3.2-11B-Vision-Instruct","meta-llama/Meta-Llama-3-8B-Instruct-Turbo", "meta-llama/Llama-3.3-70B-Instruct", "meta-llama/Llama-3.2-3B-Instruct","meta-llama/Llama-4-Scout-17B-16E-Instruct", "meta-llama/Meta-Llama-3-8B-Instruct",
271
+ "meta-llama/Llama-3.1-70B-Instruct"],
272
+ key='model_select'
273
+ )
274
+
275
+ #################new oooo
276
+
277
+ # # Model selection dropdown
278
+ # selected_model = st.selectbox(
279
+ # "Select Model",
280
+ # [#"meta-llama/Meta-Llama-3-8B-Instruct-Turbo",
281
+ # "meta-llama/Llama-3.2-3B-Instruct",
282
+ # "meta-llama/Llama-3.3-70B-Instruct",
283
+ # "meta-llama/Llama-3.2-3B-Instruct",
284
+ # "meta-llama/Llama-4-Scout-17B-16E-Instruct",
285
+ # "meta-llama/Meta-Llama-3-8B-Instruct",
286
+ # "meta-llama/Llama-3.1-70B-Instruct"],
287
+ # key='model_select'
288
+ # )
289
+
290
+ # @st.cache_resource # Cache the model to prevent reloading
291
+ # def load_model(model_name):
292
+ # try:
293
+ # # Optimized model loading configuration
294
+ # model = AutoModelForCausalLM.from_pretrained(
295
+ # model_name,
296
+ # torch_dtype=torch.float16, # Use half precision
297
+ # device_map="auto", # Automatic device mapping
298
+ # load_in_8bit=True, # Enable 8-bit quantization
299
+ # low_cpu_mem_usage=True, # Optimize CPU memory usage
300
+ # max_memory={0: "10GB"} # Limit GPU memory usage
301
+ # )
302
+
303
+ # tokenizer = AutoTokenizer.from_pretrained(
304
+ # model_name,
305
+ # padding_side="left",
306
+ # truncation_side="left"
307
+ # )
308
+
309
+ # return model, tokenizer
310
+
311
+ # except Exception as e:
312
+ # st.error(f"Error loading model: {str(e)}")
313
+ # return None, None
314
+
315
+ # # Load the selected model with optimizations
316
+ # if selected_model:
317
+ # model, tokenizer = load_model(selected_model)
318
+
319
+ # # Check if model loaded successfully
320
+ # if model is not None:
321
+ # st.success(f"Successfully loaded {selected_model}")
322
+ # else:
323
+ # st.warning("Please select a different model or check your hardware capabilities")
324
+
325
+ # # Function to generate text
326
+ # def generate_response(prompt, model, tokenizer):
327
+ # try:
328
+ # inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
329
+
330
+ # with torch.no_grad():
331
+ # outputs = model.generate(
332
+ # inputs["input_ids"],
333
+ # max_length=256,
334
+ # num_return_sequences=1,
335
+ # temperature=0.7,
336
+ # do_sample=True,
337
+ # pad_token_id=tokenizer.pad_token_id
338
+ # )
339
+
340
+ # response = tokenizer.decode(outputs[0], skip_special_tokens=True)
341
+ # return response
342
+
343
+ # except Exception as e:
344
+ # return f"Error generating response: {str(e)}"
345
+ # ################
346
+
347
+ # model = AutoModelForCausalLM.from_pretrained(
348
+ # "meta-llama/Meta-Llama-3-8B-Instruct",
349
+ # torch_dtype=torch.float16, # Use half precision
350
+ # device_map="auto", # Automatic device mapping
351
+ # load_in_8bit=True # Load in 8-bit precision
352
+ # )
353
+ temperature = st.slider(
354
+ "Temperature",
355
+ 0.0, 1.0, 0.7,
356
+ help="Controls randomness in generation"
357
+ )
358
+
359
+ st.button("🔄 New Conversation", on_click=reset_conversation)
360
+ with st.container():
361
+ st.markdown(f"""
362
+ <div class="sidebar-info">
363
+ <h4>Current Model: {selected_model}</h4>
364
+ <p><em>Note: Generated content may be inaccurate or false. Check important info.</em></p>
365
+ </div>
366
+ """, unsafe_allow_html=True)
367
+
368
+ feedback_url = "https://docs.google.com/forms/d/e/1FAIpQLSdZ_5mwW-pjqXHgxR0xriyVeRhqdQKgb5c-foXlYAV55Rilsg/viewform?usp=header"
369
+ st.sidebar.markdown(
370
+ f'<a href="{feedback_url}" target="_blank"><button style="width: 100%;">Feedback Form</button></a>',
371
+ unsafe_allow_html=True
372
+ )
373
+
374
+ # Display conversation
375
+ for message in st.session_state.messages:
376
+ with st.chat_message(message["role"]):
377
+ st.markdown(message["content"])
378
+
379
+ # Main content
380
+ if 'task_choice' not in st.session_state:
381
+ col1, col2 = st.columns(2)
382
+ with col1:
383
+ if st.button("📝 Data Generation", key="gen_button", help="Generate new data"):
384
+ st.session_state.task_choice = "Data Generation"
385
+ with col2:
386
+ if st.button("🏷️ Data Labeling", key="label_button", help="Label existing data"):
387
+ st.session_state.task_choice = "Data Labeling"
388
+
389
+ if "task_choice" in st.session_state:
390
+ if st.session_state.task_choice == "Data Generation":
391
+ st.header("📝 Data Generation")
392
+
393
+ # 1. Domain selection
394
+ domain_selection = st.selectbox("Domain", [
395
+ "Restaurant reviews", "E-Commerce reviews", "News", "AG News", "Tourism", "Custom"
396
+ ])
397
+
398
+ # 2. Handle custom domain input
399
+ custom_domain_valid = True # Assume valid until proven otherwise
400
+
401
+ if domain_selection == "Custom":
402
+ domain = st.text_input("Specify custom domain")
403
+ if not domain.strip():
404
+ st.error("Please specify a domain name.")
405
+ custom_domain_valid = False
406
+ else:
407
+ domain = domain_selection
408
+
409
+ # Classification type selection
410
+ classification_type = st.selectbox(
411
+ "Classification Type",
412
+ ["Sentiment Analysis", "Binary Classification", "Multi-Class Classification"]
413
+ )
414
+ # Labels setup based on classification type
415
+ #labels = []
416
+ labels = []
417
+ labels_valid = False
418
+ errors = []
419
+
420
+ def validate_binary_labels(labels):
421
+ errors = []
422
+ normalized = [label.strip().lower() for label in labels]
423
+
424
+ if not labels[0].strip():
425
+ errors.append("First class name is required.")
426
+ if not labels[1].strip():
427
+ errors.append("Second class name is required.")
428
+ if normalized[0] == normalized[1] and all(normalized):
429
+ errors.append("Class names must be different.")
430
+ return errors
431
+
432
+ if classification_type == "Sentiment Analysis":
433
+ st.write("### Sentiment Analysis Labels (Fixed)")
434
+ col1, col2, col3 = st.columns(3)
435
+ with col1:
436
+ st.text_input("First class", "Positive", disabled=True)
437
+ with col2:
438
+ st.text_input("Second class", "Negative", disabled=True)
439
+ with col3:
440
+ st.text_input("Third class", "Neutral", disabled=True)
441
+ labels = ["Positive", "Negative", "Neutral"]
442
+
443
+ elif classification_type == "Binary Classification":
444
+ st.write("### Binary Classification Labels")
445
+ col1, col2 = st.columns(2)
446
+ with col1:
447
+ label_1 = st.text_input("First class", "Positive")
448
+ with col2:
449
+ label_2 = st.text_input("Second class", "Negative")
450
+
451
+ labels = [label_1, label_2]
452
+ errors = validate_binary_labels(labels)
453
+
454
+ if errors:
455
+ st.error("\n".join(errors))
456
+ else:
457
+ st.success("Binary class names are valid and unique!")
458
+
459
+
460
+ elif classification_type == "Multi-Class Classification":
461
+ st.write("### Multi-Class Classification Labels")
462
+
463
+ default_labels_by_domain = {
464
+ "News": ["Political", "Sports", "Entertainment", "Technology", "Business"],
465
+ "AG News": ["World", "Sports", "Business", "Sci/Tech"],
466
+ "Tourism": ["Accommodation", "Transportation", "Tourist Attractions",
467
+ "Food & Dining", "Local Experience", "Adventure Activities",
468
+ "Wellness & Spa", "Eco-Friendly Practices", "Family-Friendly",
469
+ "Luxury Tourism"],
470
+ "Restaurant reviews": ["Italian", "French", "American"],
471
+ "E-Commerce reviews": ["Mobile Phones & Accessories", "Laptops & Computers","Kitchen & Dining",
472
+ "Beauty & Personal Care", "Home & Furniture", "Clothing & Fashion",
473
+ "Shoes & Handbags", "Health & Wellness", "Electronics & Gadgets",
474
+ "Books & Stationery","Toys & Games", "Sports & Fitness",
475
+ "Grocery & Gourmet Food","Watches & Accessories", "Baby Products"]
476
+ }
477
+
478
+ num_classes = st.slider("Number of classes", 3, 15, 3)
479
+
480
+ # Get defaults for selected domain, or empty list
481
+ defaults = default_labels_by_domain.get(domain, [])
482
+
483
+ labels = []
484
+ errors = []
485
+ cols = st.columns(3)
486
+
487
+ for i in range(num_classes):
488
+ with cols[i % 3]:
489
+ default_value = defaults[i] if i < len(defaults) else ""
490
+ label_input = st.text_input(f"Class {i+1}", default_value)
491
+ normalized_label = label_input.strip().title()
492
+
493
+ if not normalized_label:
494
+ errors.append(f"Class {i+1} name is required.")
495
+ else:
496
+ labels.append(normalized_label)
497
+
498
+ # Check for duplicates (case-insensitive)
499
+ if len(labels) != len(set(labels)):
500
+ errors.append("Labels names must be unique (case-insensitive, normalized to Title Case).")
501
+
502
+ # Show validation results
503
+ if errors:
504
+ for error in errors:
505
+ st.error(error)
506
+ else:
507
+ st.success("All Labels names are valid and unique!")
508
+ labels_valid = not errors # Will be True only if there are no label errors
509
+
510
+ ##############
511
+ #new 22/4/2025
512
+ # add additional attributes
513
+ add_attributes = st.checkbox("Add additional attributes (optional)")
514
+ additional_attributes = []
515
+
516
+ if add_attributes:
517
+ num_attributes = st.slider("Number of attributes to add", 1, 5, 1)
518
+ for i in range(num_attributes):
519
+ st.markdown(f"#### Attribute {i+1}")
520
+ attr_name = st.text_input(f"Name of attribute {i+1}", key=f"attr_name_{i}")
521
+ attr_topics = st.text_input(f"Topics (comma-separated) for {attr_name}", key=f"attr_topics_{i}")
522
+ if attr_name and attr_topics:
523
+ topics_list = [topic.strip() for topic in attr_topics.split(",") if topic.strip()]
524
+ additional_attributes.append({"attribute": attr_name, "topics": topics_list})
525
+
526
+ ################
527
+
528
+ # Generation parameters
529
+ col1, col2 = st.columns(2)
530
+ with col1:
531
+ min_words = st.number_input("Min words", 1, 100, 20)
532
+ with col2:
533
+ max_words = st.number_input("Max words", min_words, 100, 50)
534
+
535
+ # Few-shot examples
536
+ use_few_shot = st.toggle("Use few-shot examples")
537
+ few_shot_examples = []
538
+ if use_few_shot:
539
+ num_examples = st.slider("Number of few-shot examples", 1, 10, 1)
540
+ for i in range(num_examples):
541
+ with st.expander(f"Example {i+1}"):
542
+ content = st.text_area(f"Content", key=f"few_shot_content_{i}")
543
+ label = st.selectbox(f"Label", labels, key=f"few_shot_label_{i}")
544
+ if content and label:
545
+ few_shot_examples.append({"content": content, "label": label})
546
+
547
+ num_to_generate = st.number_input("Number of examples", 1, 100, 10)
548
+ #sytem role after
549
+ # System role customization
550
+ #default_system_role = f"You are a professional {classification_type} expert, your role is to generate text examples for {domain} domain. Always generate unique diverse examples and do not repeat the generated data. The generated text should be between {min_words} to {max_words} words long."
551
+ # System role customization
552
+ default_system_role = (
553
+ f"You are a seasoned expert in {classification_type}, specializing in the {domain} domain. "
554
+ f" Your primary responsibility is to generate high-quality, diverse, and unique text examples "
555
+ f"tailored to this domain. Please ensure that each example adheres to the specified length "
556
+ f"requirements, ranging from {min_words} to {max_words} words, and avoid any repetition in the generated content."
557
+ )
558
+ system_role = st.text_area("Modify System Role (optional)",
559
+ value=default_system_role,
560
+ key="system_role_input")
561
+ st.session_state['system_role'] = system_role if system_role else default_system_role
562
+ # Labels initialization
563
+ #labels = []
564
+
565
+
566
+ user_prompt = st.text_area("User Prompt (optional)")
567
+
568
+ # Updated prompt template including system role
569
+ prompt_template = PromptTemplate(
570
+ input_variables=["system_role", "classification_type", "domain", "num_examples",
571
+ "min_words", "max_words", "labels", "user_prompt", "few_shot_examples", "additional_attributes"],
572
+ template=(
573
+ "{system_role}\n"
574
+ "- Use the following parameters:\n"
575
+ "- Generate {num_examples} examples\n"
576
+ "- Each example should be between {min_words} to {max_words} words long\n"
577
+ "- Use these labels: {labels}.\n"
578
+ "- Use the following additional attributes:\n"
579
+ "- {additional_attributes}\n"
580
+ "- Generate the examples in this format: 'Example text. Label: label'\n"
581
+ "- Do not include word counts or any additional information\n"
582
+ "- Always use your creativity and intelligence to generate unique and diverse text data\n"
583
+ "- In sentiment analysis, ensure that the sentiment classification is clearly identified as Positive, Negative, or Neutral. Do not leave the sentiment ambiguous.\n"
584
+ "- In binary sentiment analysis, classify text strictly as either Positive or Negative. Do not include or imply Neutral as an option.\n"
585
+ "- Write unique examples every time.\n"
586
+ "- DO NOT REPEAT your gnerated text. \n"
587
+ "- For each Output, describe it once and move to the next.\n"
588
+ "- List each Output only once, and avoid repeating details.\n"
589
+ "- Additional instructions: {user_prompt}\n\n"
590
+ "- Use the following examples as a reference in the generation process\n\n {few_shot_examples}. \n"
591
+ "- Think step by step, generate numbered examples, and check each newly generated example to ensure it has not been generated before. If it has, modify it"
592
+
593
+ )
594
+ )
595
+ # template=(
596
+ # "{system_role}\n"
597
+ # "- Use the following parameters:\n"
598
+ # "- Generate {num_examples} examples\n"
599
+ # "- Each example should be between {min_words} to {max_words} words long\n"
600
+ # "- Use these labels: {labels}.\n"
601
+ # "- Use the following additional attributes:\n"
602
+ # "{additional_attributes}\n"
603
+ # #"- Format each example like this: 'Example text. Label: [label]. Attribute1: [topic1]. Attribute2: [topic2]'\n"
604
+ # "- Generate the examples in this format: 'Example text. Label: label'\n"
605
+ # "- Additional instructions: {user_prompt}\n"
606
+ # "- Use these few-shot examples if provided:\n{few_shot_examples}\n"
607
+ # "- Think step by step and ensure examples are unique and not repeated."
608
+ # )
609
+ # )
610
+ ##########new 22/4/2025
611
+ formatted_attributes = "\n".join([
612
+ f"- {attr['attribute']}: {', '.join(attr['topics'])}" for attr in additional_attributes
613
+ ])
614
+ #######################
615
+
616
+ # Generate system prompt
617
+ system_prompt = prompt_template.format(
618
+ system_role=st.session_state['system_role'],
619
+ classification_type=classification_type,
620
+ domain=domain,
621
+ num_examples=num_to_generate,
622
+ min_words=min_words,
623
+ max_words=max_words,
624
+ labels=", ".join(labels),
625
+ user_prompt=user_prompt,
626
+ few_shot_examples="\n".join([f"{ex['content']}\nLabel: {ex['label']}" for ex in few_shot_examples]) if few_shot_examples else "",
627
+ additional_attributes=formatted_attributes
628
+ )
629
+
630
+
631
+ # Store system prompt in session state
632
+ st.session_state['system_prompt'] = system_prompt
633
+
634
+ # Display system prompt
635
+ st.write("System Prompt:")
636
+ st.text_area("Current System Prompt", value=st.session_state['system_prompt'],
637
+ height=400, disabled=True)
638
+
639
+
640
+ if st.button("🎯 Generate Examples"):
641
+ #
642
+ errors = []
643
+ if domain_selection == "Custom" and not domain.strip():
644
+ st.warning("Custom domain name is required.")
645
+ elif len(labels) != len(set(labels)):
646
+ st.warning("Class names must be unique.")
647
+ elif any(not lbl.strip() for lbl in labels):
648
+ st.warning("All class labels must be filled in.")
649
+ #else:
650
+ #st.success("Generating examples for domain: {domain}")
651
+
652
+ #if not custom_domain_valid:
653
+ #st.warning("Custom domain name is required.")
654
+ #elif not labels_valid:
655
+ #st.warning("Please fix the label errors before generating examples.")
656
+ #else:
657
+ # Proceed to generate examples
658
+ #st.success(f"Generating examples for domain: {domain}")
659
+
660
+ with st.spinner("Generating examples..."):
661
+ try:
662
+ stream = client.chat.completions.create(
663
+ model=selected_model,
664
+ messages=[{"role": "system", "content": st.session_state['system_prompt']}],
665
+ temperature=temperature,
666
+ stream=True,
667
+ #max_tokens=80000,
668
+ max_tokens=4000,
669
+ top_p=0.9,
670
+ # repetition_penalty=1.2,
671
+ #frequency_penalty=0.5, # Discourages frequent words
672
+ #presence_penalty=0.6,
673
+ )
674
+ #st.session_state['system_prompt'] = system_prompt
675
+ #new 24 march
676
+ st.session_state.messages.append({"role": "user", "content": system_prompt})
677
+ # # ####################
678
+ response = st.write_stream(stream)
679
+ st.session_state.messages.append({"role": "assistant", "content": response})
680
+ # Initialize session state variables if they don't exist
681
+ if 'system_prompt' not in st.session_state:
682
+ st.session_state.system_prompt = system_prompt
683
+
684
+ if 'response' not in st.session_state:
685
+ st.session_state.response = response
686
+
687
+ if 'generated_examples' not in st.session_state:
688
+ st.session_state.generated_examples = []
689
+
690
+ if 'generated_examples_csv' not in st.session_state:
691
+ st.session_state.generated_examples_csv = None
692
+
693
+ if 'generated_examples_json' not in st.session_state:
694
+ st.session_state.generated_examples_json = None
695
+
696
+ # Parse response and generate examples list
697
+ examples_list = []
698
+ for line in response.split('\n'):
699
+ if line.strip():
700
+ parts = line.rsplit('Label:', 1)
701
+ if len(parts) == 2:
702
+ text = parts[0].strip()
703
+ label = parts[1].strip()
704
+ if text and label:
705
+ examples_list.append({
706
+ 'text': text,
707
+ 'label': label,
708
+ 'system_prompt': st.session_state.system_prompt,
709
+ 'system_role': st.session_state.system_role,
710
+ 'task_type': 'Data Generation',
711
+ 'Use few-shot example?': 'Yes' if use_few_shot else 'No',
712
+ })
713
+
714
+ # example_dict = {
715
+ # 'text': text,
716
+ # 'label': label,
717
+ # 'system_prompt': st.session_state.system_prompt,
718
+ # 'system_role': st.session_state.system_role,
719
+ # 'task_type': 'Data Generation',
720
+ # 'Use few-shot example?': 'Yes' if use_few_shot else 'No',
721
+ # }
722
+ # for attr in additional_attributes:
723
+ # example_dict[attr['attribute']] = random.choice(attr['topics'])
724
+
725
+ # examples_list.append(example_dict)
726
+
727
+
728
+ if examples_list:
729
+ # Update session state with new data
730
+ st.session_state.generated_examples = examples_list
731
+
732
+ # Generate CSV and JSON data
733
+ df = pd.DataFrame(examples_list)
734
+ st.session_state.generated_examples_csv = df.to_csv(index=False).encode('utf-8')
735
+ st.session_state.generated_examples_json = json.dumps(examples_list, indent=2).encode('utf-8')
736
+
737
+ # Vertical layout with centered "or" between buttons
738
+ st.download_button(
739
+ "📥 Download Generated Examples (CSV)",
740
+ st.session_state.generated_examples_csv,
741
+ "generated_examples.csv",
742
+ "text/csv",
743
+ key='download-csv-persistent'
744
+ )
745
+
746
+ # Add space and center the "or"
747
+ st.markdown("""
748
+ <div style='text-align: left; margin:15px 0; font-weight: 600; color: #666;'>. . . . . . or</div>
749
+ """, unsafe_allow_html=True)
750
+
751
+ st.download_button(
752
+ "📥 Download Generated Examples (JSON)",
753
+ st.session_state.generated_examples_json,
754
+ "generated_examples.json",
755
+ "application/json",
756
+ key='download-json-persistent'
757
+ )
758
+ # # Display the labeled examples
759
+ # st.markdown("##### 📋 Labeled Examples Preview")
760
+ # st.dataframe(df, use_container_width=True)
761
+
762
+ if st.button("Continue"):
763
+ if follow_up == "Generate more examples":
764
+ st.experimental_rerun()
765
+ elif follow_up == "Data Labeling":
766
+ st.session_state.task_choice = "Data Labeling"
767
+ st.experimental_rerun()
768
+
769
+ except Exception as e:
770
+ st.error("An error occurred during generation.")
771
+ st.error(f"Details: {e}")
772
+
773
+
774
+ # Lableing Process
775
+ elif st.session_state.task_choice == "Data Labeling":
776
+ st.header("🏷️ Data Labeling")
777
+
778
+ domain_selection = st.selectbox("Domain", ["Restaurant reviews", "E-Commerce reviews", "News", "AG News", "Tourism", "Custom"])
779
+ # 2. Handle custom domain input
780
+ custom_domain_valid = True # Assume valid until proven otherwise
781
+
782
+ if domain_selection == "Custom":
783
+ domain = st.text_input("Specify custom domain")
784
+ if not domain.strip():
785
+ st.error("Please specify a domain name.")
786
+ custom_domain_valid = False
787
+ else:
788
+ domain = domain_selection
789
+
790
+
791
+ # Classification type selection
792
+ classification_type = st.selectbox(
793
+ "Classification Type",
794
+ ["Sentiment Analysis", "Binary Classification", "Multi-Class Classification", "Named Entity Recognition (NER)"]
795
+ )
796
+ #NNew edit
797
+ # Labels setup based on classification type
798
+ labels = []
799
+ labels_valid = False
800
+ errors = []
801
+
802
+ if classification_type == "Sentiment Analysis":
803
+ st.write("### Sentiment Analysis Labels (Fixed)")
804
+ col1, col2, col3 = st.columns(3)
805
+ with col1:
806
+ label_1 = st.text_input("First class", "Positive", disabled=True)
807
+ with col2:
808
+ label_2 = st.text_input("Second class", "Negative", disabled=True)
809
+ with col3:
810
+ label_3 = st.text_input("Third class", "Neutral", disabled=True)
811
+ labels = ["Positive", "Negative", "Neutral"]
812
+
813
+
814
+ elif classification_type == "Binary Classification":
815
+ st.write("### Binary Classification Labels")
816
+ col1, col2 = st.columns(2)
817
+
818
+ with col1:
819
+ label_1 = st.text_input("First class", "Positive")
820
+ with col2:
821
+ label_2 = st.text_input("Second class", "Negative")
822
+
823
+ errors = []
824
+ labels = [label_1.strip(), label_2.strip()]
825
+
826
+
827
+ # Strip and lower-case labels for validation
828
+ label_1 = labels[0].strip()
829
+ label_2 = labels[1].strip()
830
+
831
+ # Check for empty class names
832
+ if not label_1:
833
+ errors.append("First class name is required.")
834
+ if not label_2:
835
+ errors.append("Second class name is required.")
836
+
837
+ # Check for duplicates (case insensitive)
838
+ if label_1.lower() == label_2.lower() and label_1 and label_2:
839
+ errors.append("Class names must be different.")
840
+
841
+ # Show errors or success
842
+ if errors:
843
+ for error in errors:
844
+ st.error(error)
845
+ else:
846
+ st.success("Binary class names are valid and unique!")
847
+
848
+
849
+ elif classification_type == "Multi-Class Classification":
850
+ st.write("### Multi-Class Classification Labels")
851
+
852
+ default_labels_by_domain = {
853
+ "News": ["Political", "Sports", "Entertainment", "Technology", "Business"],
854
+ "AG News": ["World", "Sports", "Business", "Sci/Tech"],
855
+ "Tourism": ["Accommodation", "Transportation", "Tourist Attractions",
856
+ "Food & Dining", "Local Experience", "Adventure Activities",
857
+ "Wellness & Spa", "Eco-Friendly Practices", "Family-Friendly",
858
+ "Luxury Tourism"],
859
+ "Restaurant reviews": ["Italian", "French", "American"],
860
+ "E-Commerce reviews": ["Mobile Phones & Accessories", "Laptops & Computers","Kitchen & Dining",
861
+ "Beauty & Personal Care", "Home & Furniture", "Clothing & Fashion",
862
+ "Shoes & Handbags", "Health & Wellness", "Electronics & Gadgets",
863
+ "Books & Stationery","Toys & Games", "Sports & Fitness",
864
+ "Grocery & Gourmet Food","Watches & Accessories", "Baby Products"]
865
+ }
866
+
867
+
868
+
869
+ # Ask user how many classes they want to define
870
+ num_classes = st.slider("Select the number of classes (labels)", min_value=3, max_value=10, value=3)
871
+
872
+ # Use default labels based on selected domain, if available
873
+ defaults = default_labels_by_domain.get(domain, [])
874
+
875
+ labels = []
876
+ errors = []
877
+ cols = st.columns(3) # For nicely arranged label inputs
878
+
879
+ for i in range(num_classes):
880
+ with cols[i % 3]: # Distribute inputs across columns
881
+ default_value = defaults[i] if i < len(defaults) else ""
882
+ label_input = st.text_input(f"Label {i + 1}", default_value)
883
+ normalized_label = label_input.strip().title()
884
+
885
+ if not normalized_label:
886
+ errors.append(f"Label {i + 1} is required.")
887
+ else:
888
+ labels.append(normalized_label)
889
+
890
+ # Check for duplicates (case-insensitive)
891
+ normalized_set = {label.lower() for label in labels}
892
+ if len(labels) != len(normalized_set):
893
+ errors.append("Label names must be unique (case-insensitive).")
894
+
895
+ # Show validation results
896
+ if errors:
897
+ for error in errors:
898
+ st.error(error)
899
+ else:
900
+ st.success("All label names are valid and unique!")
901
+
902
+ labels_valid = not errors # True if no validation errors
903
+
904
+ elif classification_type == "Named Entity Recognition (NER)":
905
+ # # NER entity options
906
+ # ner_entities = [
907
+ # "PERSON - Names of people, fictional characters, historical figures",
908
+ # "ORG - Companies, institutions, agencies, teams",
909
+ # "LOC - Physical locations (mountains, oceans, etc.)",
910
+ # "GPE - Countries, cities, states, political regions",
911
+ # "DATE - Calendar dates, years, centuries",
912
+ # "TIME - Times, durations",
913
+ # "MONEY - Monetary values with currency"
914
+ # ]
915
+ # selected_entities = st.multiselect(
916
+ # "Select entities to recognize",
917
+ # ner_entities,
918
+ # default=["PERSON - Names of people, fictional characters, historical figures",
919
+ # "ORG - Companies, institutions, agencies, teams",
920
+ # "LOC - Physical locations (mountains, oceans, etc.)",
921
+ # "GPE - Countries, cities, states, political regions",
922
+ # "DATE - Calendar dates, years, centuries",
923
+ # "TIME - Times, durations",
924
+ # "MONEY - Monetary values with currency"],
925
+ # key="ner_entity_selection"
926
+ # )
927
+ #new 22/4/2025
928
+ #if classification_type == "Named Entity Recognition (NER)":
929
+ use_few_shot = True
930
+ #new 22/4/2025
931
+ few_shot_examples = [
932
+ {"content": "Mount Everest is the tallest mountain in the world.", "label": "LOC: Mount Everest"},
933
+ {"content": "The President of the United States visited Paris last summer.", "label": "GPE: United States, GPE: Paris"},
934
+ {"content": "Amazon is expanding its offices in Berlin.", "label": "ORG: Amazon, GPE: Berlin"},
935
+ {"content": "J.K. Rowling wrote the Harry Potter books.", "label": "PERSON: J.K. Rowling"},
936
+ {"content": "Apple was founded in California in 1976.", "label": "ORG: Apple, GPE: California, DATE: 1976"},
937
+ {"content": "The Nile is the longest river in Africa.", "label": "LOC: Nile, GPE: Africa"},
938
+ {"content": "He arrived at 3 PM for the meeting.", "label": "TIME: 3 PM"},
939
+ {"content": "She bought the dress for $200.", "label": "MONEY: $200"},
940
+ {"content": "The event is scheduled for July 4th.", "label": "DATE: July 4th"},
941
+ {"content": "The World Health Organization is headquartered in Geneva.", "label": "ORG: World Health Organization, GPE: Geneva"}
942
+ ]
943
+ ###########
944
+
945
+ st.write("### Named Entity Recognition (NER) Entities")
946
+
947
+ # Predefined standard entities
948
+ ner_entities = [
949
+ "PERSON - Names of people, fictional characters, historical figures",
950
+ "ORG - Companies, institutions, agencies, teams",
951
+ "LOC - Physical locations (mountains, oceans, etc.)",
952
+ "GPE - Countries, cities, states, political regions",
953
+ "DATE - Calendar dates, years, centuries",
954
+ "TIME - Times, durations",
955
+ "MONEY - Monetary values with currency"
956
+ ]
957
+
958
+ # User can add custom NER types
959
+ custom_ner_entities = []
960
+ if st.checkbox("Add custom NER entities?"):
961
+ num_custom_ner = st.slider("Number of custom NER entities", 1, 10, 1)
962
+ for i in range(num_custom_ner):
963
+ st.markdown(f"#### Custom Entity {i+1}")
964
+ custom_type = st.text_input(f"Entity type {i+1}", key=f"custom_ner_type_{i}")
965
+ custom_description = st.text_input(f"Description for {custom_type}", key=f"custom_ner_desc_{i}")
966
+ if custom_type and custom_description:
967
+ custom_ner_entities.append(f"{custom_type.upper()} - {custom_description}")
968
+
969
+ # Combine built-in and custom NERs
970
+ all_ner_options = ner_entities + custom_ner_entities
971
+
972
+ selected_entities = st.multiselect(
973
+ "Select entities to recognize",
974
+ all_ner_options,
975
+ default=ner_entities
976
+ )
977
+
978
+ # Extract entity type names (before the dash)
979
+ labels = [entity.split(" - ")[0].strip() for entity in selected_entities]
980
+
981
+ if not labels:
982
+ st.warning("Please select at least one entity type.")
983
+ labels = ["PERSON"]
984
+
985
+ ##########
986
+
987
+ # # Extract just the entity type (before the dash)
988
+ # labels = [entity.split(" - ")[0] for entity in selected_entities]
989
+
990
+ # if not labels:
991
+ # st.warning("Please select at least one entity type")
992
+ # labels = ["PERSON"] # Default if nothing selected
993
+
994
+
995
+
996
+
997
+
998
+ #NNew edit
999
+ # elif classification_type == "Multi-Class Classification":
1000
+ # st.write("### Multi-Class Classification Labels")
1001
+
1002
+ # default_labels_by_domain = {
1003
+ # "News": ["Political", "Sports", "Entertainment", "Technology", "Business"],
1004
+ # "AG News": ["World", "Sports", "Business", "Sci/Tech"],
1005
+ # "Tourism": ["Accommodation", "Transportation", "Tourist Attractions",
1006
+ # "Food & Dining", "Local Experience", "Adventure Activities",
1007
+ # "Wellness & Spa", "Eco-Friendly Practices", "Family-Friendly",
1008
+ # "Luxury Tourism"],
1009
+ # "Restaurant reviews": ["Italian", "French", "American"]
1010
+ # }
1011
+ # num_classes = st.slider("Number of classes", 3, 10, 3)
1012
+
1013
+ # # Get defaults for selected domain, or empty list
1014
+ # defaults = default_labels_by_domain.get(domain, [])
1015
+
1016
+ # labels = []
1017
+ # errors = []
1018
+ # cols = st.columns(3)
1019
+
1020
+ # for i in range(num_classes):
1021
+ # with cols[i % 3]:
1022
+ # default_value = defaults[i] if i < len(defaults) else ""
1023
+ # label_input = st.text_input(f"Class {i+1}", default_value)
1024
+ # normalized_label = label_input.strip().title()
1025
+
1026
+ # if not normalized_label:
1027
+ # errors.append(f"Class {i+1} name is required.")
1028
+ # else:
1029
+ # labels.append(normalized_label)
1030
+
1031
+ # # Check for duplicates (case-insensitive)
1032
+ # if len(labels) != len(set(labels)):
1033
+ # errors.append("Labels names must be unique (case-insensitive, normalized to Title Case).")
1034
+
1035
+ # # Show validation results
1036
+ # if errors:
1037
+ # for error in errors:
1038
+ # st.error(error)
1039
+ # else:
1040
+ # st.success("All Labels names are valid and unique!")
1041
+ # labels_valid = not errors # Will be True only if there are no label errors
1042
+
1043
+
1044
+
1045
+
1046
+ # else:
1047
+ # num_classes = st.slider("Number of classes", 3, 23, 3, key="label_num_classes")
1048
+ # labels = []
1049
+ # cols = st.columns(3)
1050
+ # for i in range(num_classes):
1051
+ # with cols[i % 3]:
1052
+ # label = st.text_input(f"Class {i+1}", f"Class_{i+1}", key=f"label_class_{i}")
1053
+ # labels.append(label)
1054
+
1055
+ use_few_shot = st.toggle("Use few-shot examples for labeling")
1056
+ few_shot_examples = []
1057
+ if use_few_shot:
1058
+ num_few_shot = st.slider("Number of few-shot examples", 1, 10, 1)
1059
+ for i in range(num_few_shot):
1060
+ with st.expander(f"Few-shot Example {i+1}"):
1061
+ content = st.text_area(f"Content", key=f"label_few_shot_content_{i}")
1062
+ label = st.selectbox(f"Label", labels, key=f"label_few_shot_label_{i}")
1063
+ if content and label:
1064
+ few_shot_examples.append(f"{content}\nLabel: {label}")
1065
+
1066
+ num_examples = st.number_input("Number of examples to classify", 1, 100, 1)
1067
+
1068
+ examples_to_classify = []
1069
+ if num_examples <= 10:
1070
+ for i in range(num_examples):
1071
+ example = st.text_area(f"Example {i+1}", key=f"example_{i}")
1072
+ if example:
1073
+ examples_to_classify.append(example)
1074
+ else:
1075
+ examples_text = st.text_area(
1076
+ "Enter examples (one per line)",
1077
+ height=300,
1078
+ help="Enter each example on a new line"
1079
+ )
1080
+ if examples_text:
1081
+ examples_to_classify = [ex.strip() for ex in examples_text.split('\n') if ex.strip()]
1082
+ if len(examples_to_classify) > num_examples:
1083
+ examples_to_classify = examples_to_classify[:num_examples]
1084
+
1085
+ #New Wedyan
1086
+ #default_system_role = f"You are a professional {classification_type} expert, your role is to classify the provided text examples for {domain} domain."
1087
+ # System role customization
1088
+ default_system_role = (f"You are a highly skilled {classification_type} expert."
1089
+ f" Your task is to accurately classify the provided text examples within the {domain} domain."
1090
+ f" Ensure that all classifications are precise, context-aware, and aligned with domain-specific standards and best practices."
1091
+ )
1092
+ system_role = st.text_area("Modify System Role (optional)",
1093
+ value=default_system_role,
1094
+ key="system_role_input")
1095
+ st.session_state['system_role'] = system_role if system_role else default_system_role
1096
+ # Labels initialization
1097
+ #labels = []
1098
+ ####
1099
+
1100
+ user_prompt = st.text_area("User prompt (optional)", key="label_instructions")
1101
+
1102
+ few_shot_text = "\n\n".join(few_shot_examples) if few_shot_examples else ""
1103
+ examples_text = "\n".join([f"{i+1}. {ex}" for i, ex in enumerate(examples_to_classify)])
1104
+
1105
+ # Customize prompt template based on classification type
1106
+ if classification_type == "Named Entity Recognition (NER)":
1107
+ # label_prompt_template = PromptTemplate(
1108
+ # input_variables=["system_role", "labels", "few_shot_examples", "examples", "domain", "user_prompt"],
1109
+ # template=(
1110
+ # "{system_role}\n"
1111
+ # #"- You are a professional Named Entity Recognition (NER) expert in {domain} domain. Your role is to identify and extract the following entity types: {labels}.\n"
1112
+ # "- For each text example provided, identify all entities of the requested types.\n"
1113
+ # "- Use the following entities: {labels}.\n"
1114
+ # "- Return each example followed by the entities you found in this format: 'Example text.\n \n Entities:\n [ENTITY_TYPE: entity text\n\n, ENTITY_TYPE: entity text\n\n, ...] or [No entities found]'\n"
1115
+ # "- If no entities of the requested types are found, indicate 'No entities found' in this text.\n"
1116
+ # "- Be precise about entity boundaries - don't include unnecessary words.\n"
1117
+ # "- Do not provide any additional information or explanations.\n"
1118
+ # "- Additional instructions:\n {user_prompt}\n\n"
1119
+ # "- Use user few-shot examples as guidance if provided:\n{few_shot_examples}\n\n"
1120
+ # "- Examples to analyze:\n{examples}\n\n"
1121
+ # "Output:\n"
1122
+ # )
1123
+ # )
1124
+ #new 22/4/2025
1125
+ # label_prompt_template = PromptTemplate(
1126
+ # input_variables=["system_role", "labels", "few_shot_examples", "examples", "domain", "user_prompt"],
1127
+ # template=(
1128
+ # "{system_role}\n"
1129
+ # "- You are performing Named Entity Recognition (NER) in the domain of {domain}.\n"
1130
+ # "- Use the following entity types: {labels}.\n\n"
1131
+ # "### Reasoning Steps:\n"
1132
+ # "1. Read the example carefully.\n"
1133
+ # "2. For each named entity mentioned, determine its meaning and role in the sentence.\n"
1134
+ # "3. Think about the **context**: Is it a physical location (LOC)? A geopolitical region (GPE)? A person (PERSON)?\n"
1135
+ # "4. Based on the definition of each label, assign the most **specific and correct** label.\n\n"
1136
+ # "For example:\n"
1137
+ # "- 'Mount Everest' → LOC (it's a mountain)\n"
1138
+ # "- 'France' → GPE (it's a country)\n"
1139
+ # "- 'Microsoft' → ORG\n"
1140
+ # "- 'John Smith' → PERSON\n\n"
1141
+ # "- Return each example followed by the entities you found in this format:\n"
1142
+ # "'Example text.'\nEntities: [ENTITY_TYPE: entity text, ENTITY_TYPE: entity text, ...] or [No entities found]\n"
1143
+ # "- If no entities of the requested types are found, return 'No entities found'.\n"
1144
+ # "- Be precise about entity boundaries - don't include extra words.\n"
1145
+ # "- Do not explain or justify your answers.\n\n"
1146
+ # "Additional instructions:\n{user_prompt}\n\n"
1147
+ # "Few-shot examples:\n{few_shot_examples}\n\n"
1148
+ # "Examples to label:\n{examples}\n"
1149
+ # "Output:\n"
1150
+ # )
1151
+ #)
1152
+ # label_prompt_template = PromptTemplate(
1153
+ # input_variables=["system_role", "labels", "few_shot_examples", "examples", "domain", "user_prompt"],
1154
+ # template=(
1155
+ # "{system_role}\n"
1156
+ # "- You are an expert at Named Entity Recognition (NER) for domain: {domain}.\n"
1157
+ # "- Use these entity types: {labels}.\n\n"
1158
+ # "### Output Format:\n"
1159
+ # # "Return each example followed by the entities you found in this format: 'Example text.\n Entities:\n [ENTITY_TYPE: entity text\n\"
1160
+ # "Return each example followed by the entities you found in this format: 'Example text.\n 'Entity types:\n "Then group the entities under each label like this:\n" "
1161
+ # #"Then Start with this line exactly: 'Entity types\n'\n"
1162
+ # #"Then group the entities under each label like this:\n"
1163
+ # "\n PERSON – Angela Merkel, John Smith\n\n"
1164
+ # "\ ORG – Google, United Nations\n\n"
1165
+ # "\n DATE – January 1st, 2023\n\n"
1166
+ # "\n ... and so on.\n\n"
1167
+ # "If entity {labels} not found, do not write it in your response\n"
1168
+ # "- Do NOT output them inline after the text.\n"
1169
+ # "- Do NOT repeat the sentence.\n"
1170
+ # "- If no entities are found for a type, skip it.\n"
1171
+ # "- Keep the format consistent.\n\n"
1172
+ # "User Instructions:\n{user_prompt}\n\n"
1173
+ # "Few-shot Examples:\n{few_shot_examples}\n\n"
1174
+ # "Examples to analyze:\n{examples}"
1175
+ # )
1176
+ # )
1177
+
1178
+
1179
+ label_prompt_template = PromptTemplate(
1180
+ input_variables=["system_role", "labels", "few_shot_examples", "examples", "domain", "user_prompt"],
1181
+ template=(
1182
+ "{system_role}\n"
1183
+ "- You are an expert at Named Entity Recognition (NER) for domain: {domain}.\n"
1184
+ "- Use these entity types: {labels}.\n\n"
1185
+ "### Output Format:\n"
1186
+ "Return each example followed by the entities you found in this format:\n"
1187
+ "'Example text.\nEntity types:\n"
1188
+ "Then group the entities under each label like this:\n"
1189
+ "\nPERSON – Angela Merkel, John Smith\n"
1190
+ "ORG – Google, United Nations\n"
1191
+ "DATE – January 1st, 2023\n"
1192
+ "... and so on.\n\n"
1193
+ "Each new entities group should be in a new line.\n"
1194
+ "If entity type {labels} is not found, do not write it in your response.\n"
1195
+ "- Do NOT output them inline after the text.\n"
1196
+ "- Do NOT repeat the sentence.\n"
1197
+ "- If no entities are found for a type, skip it.\n"
1198
+ "- Keep the format consistent.\n\n"
1199
+ "User Instructions:\n{user_prompt}\n\n"
1200
+ "Few-shot Examples:\n{few_shot_examples}\n\n"
1201
+ "Examples to analyze:\n{examples}"
1202
+ )
1203
+ )
1204
+
1205
+ #######
1206
+ else:
1207
+ label_prompt_template = PromptTemplate(
1208
+
1209
+ input_variables=["system_role", "classification_type", "labels", "few_shot_examples", "examples","domain", "user_prompt"],
1210
+ template=(
1211
+ #"- Let'\s think step by step:"
1212
+ "{system_role}\n"
1213
+ # "- You are a professional {classification_type} expert in {domain} domain. Your role is to classify the following examples using these labels: {labels}.\n"
1214
+ "- Use the following instructions:\n"
1215
+ "- Use the following labels: {labels}.\n"
1216
+ "- Return the classified text followed by the label in this format: 'text. Label: [label]'\n"
1217
+ "- Do not provide any additional information or explanations\n"
1218
+ "- User prompt:\n {user_prompt}\n\n"
1219
+ "- Use user provided examples as guidence in the classification process:\n\n {few_shot_examples}\n"
1220
+ "- Examples to classify:\n{examples}\n\n"
1221
+ "- Think step by step then classify the examples"
1222
+ #"Output:\n"
1223
+ ))
1224
+
1225
+ # Check if few_shot_examples is already a formatted string
1226
+ # Check if few_shot_examples is already a formatted string
1227
+ if isinstance(few_shot_examples, str):
1228
+ formatted_few_shot = few_shot_examples
1229
+ # If it's a list of already formatted strings
1230
+ elif isinstance(few_shot_examples, list) and all(isinstance(ex, str) for ex in few_shot_examples):
1231
+ formatted_few_shot = "\n".join(few_shot_examples)
1232
+ # If it's a list of dictionaries with 'content' and 'label' keys
1233
+ elif isinstance(few_shot_examples, list) and all(isinstance(ex, dict) and 'content' in ex and 'label' in ex for ex in few_shot_examples):
1234
+ formatted_few_shot = "\n".join([f"{ex['content']}\nLabel: {ex['label']}" for ex in few_shot_examples])
1235
+ else:
1236
+ formatted_few_shot = ""
1237
+ # #new 22/4/2025
1238
+ # few_shot_examples = [
1239
+ # {"content": "Mount Everest is the tallest mountain in the world.", "label": "LOC: Mount Everest"},
1240
+ # {"content": "The President of the United States visited Paris last summer.", "label": "GPE: United States, GPE: Paris"},
1241
+ # {"content": "Amazon is expanding its offices in Berlin.", "label": "ORG: Amazon, GPE: Berlin"},
1242
+ # {"content": "J.K. Rowling wrote the Harry Potter books.", "label": "PERSON: J.K. Rowling"},
1243
+ # {"content": "Apple was founded in California in 1976.", "label": "ORG: Apple, GPE: California, DATE: 1976"},
1244
+ # {"content": "The Nile is the longest river in Africa.", "label": "LOC: Nile, GPE: Africa"},
1245
+ # {"content": "He arrived at 3 PM for the meeting.", "label": "TIME: 3 PM"},
1246
+ # {"content": "She bought the dress for $200.", "label": "MONEY: $200"},
1247
+ # {"content": "The event is scheduled for July 4th.", "label": "DATE: July 4th"},
1248
+ # {"content": "The World Health Organization is headquartered in Geneva.", "label": "ORG: World Health Organization, GPE: Geneva"}
1249
+ # ]
1250
+ # ###########
1251
+ # new 22/4/2025
1252
+ #formatted_few_shot = "\n".join([f"{ex['content']}\nEntities: [{ex['label']}]" for ex in few_shot_examples])
1253
+ formatted_few_shot = "\n\n".join([f"{ex['content']}\n\nEntity types\n{ex['label']}" for ex in few_shot_examples])
1254
+
1255
+ ###########
1256
+ system_prompt = label_prompt_template.format(
1257
+ system_role=st.session_state['system_role'],
1258
+ classification_type=classification_type,
1259
+ domain=domain,
1260
+ examples="\n".join(examples_to_classify),
1261
+ labels=", ".join(labels),
1262
+ user_prompt=user_prompt,
1263
+ few_shot_examples=formatted_few_shot
1264
+ )
1265
+
1266
+ # Step 2: Store the system_prompt in st.session_state
1267
+ st.session_state['system_prompt'] = system_prompt
1268
+ #::contentReference[oaicite:0]{index=0}
1269
+ st.write("System Prompt:")
1270
+ #st.code(system_prompt)
1271
+ #st.code(st.session_state['system_prompt'])
1272
+ st.text_area("System Prompt", value=st.session_state['system_prompt'], height=300, max_chars=None, key=None, help=None, disabled=True)
1273
+
1274
+
1275
+
1276
+ if st.button("🏷️ Label Data"):
1277
+ if examples_to_classify:
1278
+ with st.spinner("Labeling data..."):
1279
+ #Generate the system prompt based on classification type
1280
+ if classification_type == "Named Entity Recognition (NER)":
1281
+ system_prompt = label_prompt_template.format(
1282
+ system_role=st.session_state['system_role'],
1283
+ labels=", ".join(labels),
1284
+ domain = domain,
1285
+ few_shot_examples=few_shot_text,
1286
+ examples=examples_text,
1287
+ user_prompt=user_prompt
1288
+ #new
1289
+ #'Use few-shot example?': 'Yes' if use_few_shot else 'No',
1290
+ )
1291
+ # if classification_type == "Named Entity Recognition (NER)":
1292
+ # # Step 1: Split the full response by example
1293
+ # raw_outputs = [block.strip() for block in response.strip().split("Entity types") if block.strip()]
1294
+ # inputs = [ex.strip() for ex in examples_to_classify]
1295
+
1296
+ # # Step 2: Match inputs with NER output blocks
1297
+ # labeled_examples = []
1298
+ # for i, (text, output_block) in enumerate(zip(inputs, raw_outputs)):
1299
+ # labeled_examples.append({
1300
+ # 'text': text,
1301
+ # 'entities': f"Entity types\n{output_block.strip()}",
1302
+ # 'system_prompt': st.session_state.system_prompt,
1303
+ # 'system_role': st.session_state.system_role,
1304
+ # 'task_type': 'Named Entity Recognition (NER)',
1305
+ # 'Use few-shot example?': 'Yes' if use_few_shot else 'No',
1306
+ # })
1307
+
1308
+ # if classification_type == "Named Entity Recognition (NER)":
1309
+ # # Step 1: Split the full response by example
1310
+ # raw_outputs = [block.strip() for block in response.strip().split("Entity types") if block.strip()]
1311
+ # inputs = [ex.strip() for ex in examples_to_classify]
1312
+
1313
+ # # Step 2: Match inputs with NER output blocks
1314
+ # labeled_examples = []
1315
+ # for i, (text, output_block) in enumerate(zip(inputs, raw_outputs)):
1316
+ # labeled_examples.append({
1317
+ # 'text': text,
1318
+ # 'entities': f"Entity types\n{output_block.strip()}",
1319
+ # 'system_prompt': st.session_state.system_prompt,
1320
+ # 'system_role': st.session_state.system_role,
1321
+ # 'task_type': 'Named Entity Recognition (NER)',
1322
+ # 'Use few-shot example?': 'Yes' if use_few_shot else 'No',
1323
+ # })
1324
+
1325
+
1326
+ # import re
1327
+
1328
+ # if classification_type == "Named Entity Recognition (NER)":
1329
+ # # Use regex to split on "Entity types" while keeping it attached to each block
1330
+ # blocks = re.split(r"(Entity types)", response.strip())
1331
+
1332
+ # # Recombine 'Entity types' with each block after splitting
1333
+ # raw_outputs = [
1334
+ # (blocks[i] + blocks[i+1]).strip()
1335
+ # for i in range(1, len(blocks) - 1, 2)
1336
+ # ]
1337
+
1338
+ # inputs = [ex.strip() for ex in examples_to_classify]
1339
+
1340
+ # labeled_examples = []
1341
+ # for i, (text, output_block) in enumerate(zip(inputs, raw_outputs)):
1342
+ # labeled_examples.append({
1343
+ # 'text': text,
1344
+ # 'entities': output_block,
1345
+ # 'system_prompt': st.session_state.system_prompt,
1346
+ # 'system_role': st.session_state.system_role,
1347
+ # 'task_type': 'Named Entity Recognition (NER)',
1348
+ # 'Use few-shot example?': 'Yes' if use_few_shot else 'No',
1349
+ # })
1350
+
1351
+
1352
+ else:
1353
+ system_prompt = label_prompt_template.format(
1354
+ classification_type=classification_type,
1355
+ system_role=st.session_state['system_role'],
1356
+ domain = domain,
1357
+ labels=", ".join(labels),
1358
+ few_shot_examples=few_shot_text,
1359
+ examples=examples_text,
1360
+ user_prompt=user_prompt
1361
+ )
1362
+ try:
1363
+ stream = client.chat.completions.create(
1364
+ model=selected_model,
1365
+ messages=[{"role": "system", "content": system_prompt}],
1366
+ temperature=temperature,
1367
+ stream=True,
1368
+ #max_tokens=20000,
1369
+ max_tokens=4000,
1370
+ top_p = 0.9,
1371
+
1372
+ )
1373
+ #new 24 March
1374
+ # Append user message
1375
+ st.session_state.messages.append({"role": "user", "content": system_prompt})
1376
+ #################
1377
+ response = st.write_stream(stream)
1378
+ st.session_state.messages.append({"role": "assistant", "content": response})
1379
+ # Display the labeled examples
1380
+ # # Optional: If you want to add it as a chat-style message log
1381
+ # preview_str = st.session_state.labeled_preview.to_markdown(index=False)
1382
+ # st.session_state.messages.append({"role": "assistant", "content": f"Here is a preview of the labeled examples:\n\n{preview_str}"})
1383
+
1384
+
1385
+ # # Stream response and append assistant message
1386
+ # #14/4/2024
1387
+ # response = st.write_stream(stream)
1388
+ # st.session_state.messages.append({"role": "assistant", "content": response})
1389
+
1390
+ # Initialize session state variables if they don't exist
1391
+ if 'system_prompt' not in st.session_state:
1392
+ st.session_state.system_prompt = system_prompt
1393
+
1394
+ if 'response' not in st.session_state:
1395
+ st.session_state.response = response
1396
+
1397
+ if 'generated_examples' not in st.session_state:
1398
+ st.session_state.generated_examples = []
1399
+
1400
+ if 'generated_examples_csv' not in st.session_state:
1401
+ st.session_state.generated_examples_csv = None
1402
+
1403
+ if 'generated_examples_json' not in st.session_state:
1404
+ st.session_state.generated_examples_json = None
1405
+
1406
+
1407
+
1408
+
1409
+ # Save labeled examples to CSV
1410
+ #new 14/4/2025
1411
+ #labeled_examples = []
1412
+ # if classification_type == "Named Entity Recognition (NER)":
1413
+ # labeled_examples = []
1414
+ # for line in response.split('\n'):
1415
+ # if line.strip():
1416
+ # parts = line.rsplit('Entities:', 1)
1417
+ # if len(parts) == 2:
1418
+ # text = parts[0].strip()
1419
+ # entities = parts[1].strip()
1420
+ # if text and entities:
1421
+ # labeled_examples.append({
1422
+ # 'text': text,
1423
+ # 'entities': entities,
1424
+ # 'system_prompt': st.session_state.system_prompt,
1425
+ # 'system_role': st.session_state.system_role,
1426
+ # 'task_type': 'Named Entity Recognition (NER)',
1427
+ # 'Use few-shot example?': 'Yes' if use_few_shot else 'No',
1428
+ # })
1429
+
1430
+ #new 22/4/2025
1431
+ labeled_examples = []
1432
+ if classification_type == "Named Entity Recognition (NER)":
1433
+ labeled_examples = [{
1434
+ 'ner_output': response.strip(),
1435
+ 'system_prompt': st.session_state.system_prompt,
1436
+ 'system_role': st.session_state.system_role,
1437
+ 'task_type': 'Named Entity Recognition (NER)',
1438
+ 'Use few-shot example?': 'Yes' if use_few_shot else 'No',
1439
+ }]
1440
+
1441
+ ######
1442
+
1443
+
1444
+ else:
1445
+ labeled_examples = []
1446
+ for line in response.split('\n'):
1447
+ if line.strip():
1448
+ parts = line.rsplit('Label:', 1)
1449
+ if len(parts) == 2:
1450
+ text = parts[0].strip()
1451
+ label = parts[1].strip()
1452
+ if text and label:
1453
+ labeled_examples.append({
1454
+ 'text': text,
1455
+ 'label': label,
1456
+ 'system_prompt': st.session_state.system_prompt,
1457
+ 'system_role': st.session_state.system_role,
1458
+ 'task_type': 'Data Labeling',
1459
+ 'Use few-shot example?': 'Yes' if use_few_shot else 'No',
1460
+ })
1461
+ # Save and provide download options
1462
+ if labeled_examples:
1463
+ # Update session state
1464
+ st.session_state.labeled_examples = labeled_examples
1465
+
1466
+ # Convert to CSV and JSON
1467
+ df = pd.DataFrame(labeled_examples)
1468
+ #new 22/4/2025
1469
+ # CSV
1470
+ st.session_state.labeled_examples_csv = df.to_csv(index=False).encode('utf-8')
1471
+
1472
+ # JSON
1473
+ st.session_state.labeled_examples_json = json.dumps({
1474
+ "metadata": {
1475
+ "domain": domain,
1476
+ "labels": labels,
1477
+ "used_few_shot": use_few_shot,
1478
+ "task_type": "Named Entity Recognition (NER)",
1479
+ "timestamp": datetime.now().isoformat()
1480
+ },
1481
+ "examples": labeled_examples
1482
+ }, indent=2).encode('utf-8')
1483
+
1484
+ ############
1485
+ # CSV
1486
+ # st.session_state.labeled_examples_csv = df.to_csv(index=False).encode('utf-8')
1487
+
1488
+ # # JSON
1489
+ # st.session_state.labeled_examples_json = json.dumps({
1490
+ # "metadata": {
1491
+ # "domain": domain,
1492
+ # "labels": labels,
1493
+ # "used_few_shot": use_few_shot,
1494
+ # "task_type": "Named Entity Recognition (NER)",
1495
+ # "timestamp": datetime.now().isoformat()
1496
+ # },
1497
+ # "examples": labeled_examples
1498
+ # }, indent=2).encode('utf-8')
1499
+
1500
+ ########
1501
+ # st.session_state.labeled_examples_csv = df.to_csv(index=False).encode('utf-8')
1502
+ # st.session_state.labeled_examples_json = json.dumps(labeled_examples, indent=2).encode('utf-8')
1503
+
1504
+ # Download buttons
1505
+ st.download_button(
1506
+ "📥 Download Labeled Examples (CSV)",
1507
+ st.session_state.labeled_examples_csv,
1508
+ "labeled_examples.csv",
1509
+ "text/csv",
1510
+ key='download-labeled-csv'
1511
+ )
1512
+
1513
+ st.markdown("""
1514
+ <div style='text-align: left; margin:15px 0; font-weight: 600; color: #666;'>. . . . . . or</div>
1515
+ """, unsafe_allow_html=True)
1516
+
1517
+ st.download_button(
1518
+ "📥 Download Labeled Examples (JSON)",
1519
+ st.session_state.labeled_examples_json,
1520
+ "labeled_examples.json",
1521
+ "application/json",
1522
+ key='download-labeled-json'
1523
+ )
1524
+ # Display the labeled examples
1525
+ st.markdown("##### 📋 Labeled Examples Preview")
1526
+ st.dataframe(df, use_container_width=True)
1527
+ # Display section
1528
+ #st.markdown("### 📋 Labeled Examples Preview")
1529
+ #st.dataframe(st.session_state.labeled_preview, use_container_width=True)
1530
+
1531
+
1532
+
1533
+ # if labeled_examples:
1534
+ # df = pd.DataFrame(labeled_examples)
1535
+ # csv = df.to_csv(index=False).encode('utf-8')
1536
+ # st.download_button(
1537
+ # "📥 Download Labeled Examples",
1538
+ # csv,
1539
+ # "labeled_examples.csv",
1540
+ # "text/csv",
1541
+ # key='download-labeled-csv'
1542
+ # )
1543
+ # # Add space and center the "or"
1544
+ # st.markdown("""
1545
+ # <div style='text-align: left; margin:15px 0; font-weight: 600; color: #666;'>. . . . . . or</div>
1546
+ # """, unsafe_allow_html=True)
1547
+
1548
+ # if labeled_examples:
1549
+ # df = pd.DataFrame(labeled_examples)
1550
+ # csv = df.to_csv(index=False).encode('utf-8')
1551
+ # st.download_button(
1552
+ # "📥 Download Labeled Examples",
1553
+ # csv,
1554
+ # "labeled_examples.json",
1555
+ # "text/json",
1556
+ # key='download-labeled-JSON'
1557
+ # )
1558
+
1559
+ # Add follow-up interaction options
1560
+ #st.markdown("---")
1561
+ #follow_up = st.radio(
1562
+ #"What would you like to do next?",
1563
+ #["Label more data", "Data Generation"],
1564
+ # key="labeling_follow_up"
1565
+ # )
1566
+
1567
+ if st.button("Continue"):
1568
+ if follow_up == "Label more data":
1569
+ st.session_state.examples_to_classify = []
1570
+ st.experimental_rerun()
1571
+ elif follow_up == "Data Generation":
1572
+ st.session_state.task_choice = "Data Labeling"
1573
+ st.experimental_rerun()
1574
+
1575
+ except Exception as e:
1576
+ st.error("An error occurred during labeling.")
1577
+ st.error(f"Details: {e}")
1578
+ else:
1579
+ st.warning("Please enter at least one example to classify.")
1580
+
1581
+ #st.session_state.messages.append({"role": "assistant", "content": response})
1582
+
1583
+
1584
+
1585
+
1586
+ # Footer
1587
+ st.markdown("---")
1588
+ st.markdown(
1589
+ """
1590
+ <div style='text-align: center'>
1591
+ <p>Made with ❤️ by Wedyan AlSakran 2025</p>
1592
+ </div>
1593
+ """,
1594
+ unsafe_allow_html=True
1595
+ )