import streamlit as st
import pandas as pd
import os
import json
import base64
import random
from streamlit_pdf_viewer import pdf_viewer
from langchain.prompts import PromptTemplate
from datetime import datetime
from pathlib import Path
from openai import OpenAI
from dotenv import load_dotenv
import warnings
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
warnings.filterwarnings('ignore')
os.getenv("OAUTH_CLIENT_ID")
# Load environment variables and initialize the OpenAI client to use Hugging Face Inference API.
load_dotenv()
client = OpenAI(
base_url="https://api-inference.huggingface.co/v1",
api_key=os.environ.get('TOKEN2') # Hugging Face API token
)
##########################################################3
# import streamlit as st
# from transformers import AutoModelForCausalLM, AutoTokenizer
# import torch
# # Model selection dropdown
# selected_model = st.selectbox(
# "Select Model",
# ["meta-llama/Meta-Llama-3-8B-Instruct-Turbo",
# "meta-llama/Llama-3.3-70B-Instruct",
# "meta-llama/Llama-3.2-3B-Instruct",
# "meta-llama/Llama-4-Scout-17B-16E-Instruct",
# "meta-llama/Meta-Llama-3-8B-Instruct",
# "meta-llama/Llama-3.1-70B-Instruct"],
# key='model_select'
# )
# @st.cache_resource # Cache the model to prevent reloading
# def load_model(model_name):
# try:
# # Optimized model loading configuration
# model = AutoModelForCausalLM.from_pretrained(
# model_name,
# torch_dtype=torch.float16, # Use half precision
# device_map="auto", # Automatic device mapping
# load_in_8bit=True, # Enable 8-bit quantization
# low_cpu_mem_usage=True, # Optimize CPU memory usage
# max_memory={0: "10GB"} # Limit GPU memory usage
# )
# tokenizer = AutoTokenizer.from_pretrained(
# model_name,
# padding_side="left",
# truncation_side="left"
# )
# return model, tokenizer
# except Exception as e:
# st.error(f"Error loading model: {str(e)}")
# return None, None
# # Load the selected model with optimizations
# if selected_model:
# model, tokenizer = load_model(selected_model)
# # Check if model loaded successfully
# if model is not None:
# st.success(f"Successfully loaded {selected_model}")
# else:
# st.warning("Please select a different model or check your hardware capabilities")
# # Function to generate text
# def generate_response(prompt, model, tokenizer):
# try:
# inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
# with torch.no_grad():
# outputs = model.generate(
# inputs["input_ids"],
# max_length=256,
# num_return_sequences=1,
# temperature=0.7,
# do_sample=True,
# pad_token_id=tokenizer.pad_token_id
# )
# response = tokenizer.decode(outputs[0], skip_special_tokens=True)
# return response
# except Exception as e:
# return f"Error generating response: {str(e)}"
############################################################
####new
# from openai import OpenAI
# client = OpenAI(
# base_url="https://router.huggingface.co/together/v1",
# api_key=os.environ.get('TOKEN2'),
# )
# completion = client.chat.completions.create(
# model="meta-llama/Meta-Llama-3-8B-Instruct-Turbo",
# messages=[
# {
# "role": "user",
# "content": "What is the capital of France?"
# }
# ],
# max_tokens=512,
# )
# print(completion.choices[0].message)
#####
# Create necessary directories
for dir_name in ['data', 'feedback']:
if not os.path.exists(dir_name):
os.makedirs(dir_name)
# Custom CSS
st.markdown("""
""", unsafe_allow_html=True)
# Helper functions
def read_csv_with_encoding(file):
encodings = ['utf-8', 'latin1', 'iso-8859-1', 'cp1252']
for encoding in encodings:
try:
return pd.read_csv(file, encoding=encoding)
except UnicodeDecodeError:
continue
raise UnicodeDecodeError("Failed to read file with any supported encoding")
#def save_feedback(feedback_data):
#feedback_file = 'feedback/user_feedback.csv'
#feedback_df = pd.DataFrame([feedback_data])
#if os.path.exists(feedback_file):
#feedback_df.to_csv(feedback_file, mode='a', header=False, index=False)
#else:
#feedback_df.to_csv(feedback_file, index=False)
def reset_conversation():
st.session_state.conversation = []
st.session_state.messages = []
if 'task_choice' in st.session_state:
del st.session_state.task_choice
return None
#new 24 March
#user_input = st.text_input("Enter your prompt:")
###########33
# Initialize session state variables
if "messages" not in st.session_state:
st.session_state.messages = []
if "examples_to_classify" not in st.session_state:
st.session_state.examples_to_classify = []
if "system_role" not in st.session_state:
st.session_state.system_role = ""
# Main app title
st.title("π€π¦ Text Data Labeling and Generation App")
# def embed_pdf_sidebar(pdf_path):
# with open(pdf_path, "rb") as f:
# base64_pdf = base64.b64encode(f.read()).decode('utf-8')
# pdf_display = f"""
#
# """
# st.markdown(pdf_display, unsafe_allow_html=True)
#
# Sidebar settings
with st.sidebar:
st.title("βοΈ Settings")
#this last code works
with st.sidebar:
st.markdown("### πData Generation and Labeling Instructions")
#st.markdown("
π Instructions
", unsafe_allow_html=True)
with open("User instructions.pdf", "rb") as f:
st.download_button(
label="π Download Instructions PDF",
data=f,
#file_name="instructions.pdf",
file_name="User instructions.pdf",
mime="application/pdf"
)
selected_model = st.selectbox(
"Select Model",
["meta-llama/Llama-3.2-11B-Vision-Instruct","meta-llama/Meta-Llama-3-8B-Instruct-Turbo", "meta-llama/Llama-3.3-70B-Instruct", "meta-llama/Llama-3.2-3B-Instruct","meta-llama/Llama-4-Scout-17B-16E-Instruct", "meta-llama/Meta-Llama-3-8B-Instruct",
"meta-llama/Llama-3.1-70B-Instruct"],
key='model_select'
)
#################new oooo
# # Model selection dropdown
# selected_model = st.selectbox(
# "Select Model",
# [#"meta-llama/Meta-Llama-3-8B-Instruct-Turbo",
# "meta-llama/Llama-3.2-3B-Instruct",
# "meta-llama/Llama-3.3-70B-Instruct",
# "meta-llama/Llama-3.2-3B-Instruct",
# "meta-llama/Llama-4-Scout-17B-16E-Instruct",
# "meta-llama/Meta-Llama-3-8B-Instruct",
# "meta-llama/Llama-3.1-70B-Instruct"],
# key='model_select'
# )
# @st.cache_resource # Cache the model to prevent reloading
# def load_model(model_name):
# try:
# # Optimized model loading configuration
# model = AutoModelForCausalLM.from_pretrained(
# model_name,
# torch_dtype=torch.float16, # Use half precision
# device_map="auto", # Automatic device mapping
# load_in_8bit=True, # Enable 8-bit quantization
# low_cpu_mem_usage=True, # Optimize CPU memory usage
# max_memory={0: "10GB"} # Limit GPU memory usage
# )
# tokenizer = AutoTokenizer.from_pretrained(
# model_name,
# padding_side="left",
# truncation_side="left"
# )
# return model, tokenizer
# except Exception as e:
# st.error(f"Error loading model: {str(e)}")
# return None, None
# # Load the selected model with optimizations
# if selected_model:
# model, tokenizer = load_model(selected_model)
# # Check if model loaded successfully
# if model is not None:
# st.success(f"Successfully loaded {selected_model}")
# else:
# st.warning("Please select a different model or check your hardware capabilities")
# # Function to generate text
# def generate_response(prompt, model, tokenizer):
# try:
# inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
# with torch.no_grad():
# outputs = model.generate(
# inputs["input_ids"],
# max_length=256,
# num_return_sequences=1,
# temperature=0.7,
# do_sample=True,
# pad_token_id=tokenizer.pad_token_id
# )
# response = tokenizer.decode(outputs[0], skip_special_tokens=True)
# return response
# except Exception as e:
# return f"Error generating response: {str(e)}"
# ################
# model = AutoModelForCausalLM.from_pretrained(
# "meta-llama/Meta-Llama-3-8B-Instruct",
# torch_dtype=torch.float16, # Use half precision
# device_map="auto", # Automatic device mapping
# load_in_8bit=True # Load in 8-bit precision
# )
temperature = st.slider(
"Temperature",
0.0, 1.0, 0.7,
help="Controls randomness in generation"
)
st.button("π New Conversation", on_click=reset_conversation)
with st.container():
st.markdown(f"""
Current Model: {selected_model}
Note: Generated content may be inaccurate or false. Check important info.
""", unsafe_allow_html=True)
feedback_url = "https://docs.google.com/forms/d/e/1FAIpQLSdZ_5mwW-pjqXHgxR0xriyVeRhqdQKgb5c-foXlYAV55Rilsg/viewform?usp=header"
st.sidebar.markdown(
f'',
unsafe_allow_html=True
)
# Display conversation
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
# Main content
if 'task_choice' not in st.session_state:
col1, col2 = st.columns(2)
with col1:
if st.button("π Data Generation", key="gen_button", help="Generate new data"):
st.session_state.task_choice = "Data Generation"
with col2:
if st.button("π·οΈ Data Labeling", key="label_button", help="Label existing data"):
st.session_state.task_choice = "Data Labeling"
if "task_choice" in st.session_state:
if st.session_state.task_choice == "Data Generation":
st.header("π Data Generation")
# 1. Domain selection
domain_selection = st.selectbox("Domain", [
"Restaurant reviews", "E-Commerce reviews", "News", "AG News", "Tourism", "Custom"
])
# 2. Handle custom domain input
custom_domain_valid = True # Assume valid until proven otherwise
if domain_selection == "Custom":
domain = st.text_input("Specify custom domain")
if not domain.strip():
st.error("Please specify a domain name.")
custom_domain_valid = False
else:
domain = domain_selection
# Classification type selection
classification_type = st.selectbox(
"Classification Type",
["Sentiment Analysis", "Binary Classification", "Multi-Class Classification"]
)
# Labels setup based on classification type
#labels = []
labels = []
labels_valid = False
errors = []
def validate_binary_labels(labels):
errors = []
normalized = [label.strip().lower() for label in labels]
if not labels[0].strip():
errors.append("First class name is required.")
if not labels[1].strip():
errors.append("Second class name is required.")
if normalized[0] == normalized[1] and all(normalized):
errors.append("Class names must be different.")
return errors
if classification_type == "Sentiment Analysis":
st.write("### Sentiment Analysis Labels (Fixed)")
col1, col2, col3 = st.columns(3)
with col1:
st.text_input("First class", "Positive", disabled=True)
with col2:
st.text_input("Second class", "Negative", disabled=True)
with col3:
st.text_input("Third class", "Neutral", disabled=True)
labels = ["Positive", "Negative", "Neutral"]
elif classification_type == "Binary Classification":
st.write("### Binary Classification Labels")
col1, col2 = st.columns(2)
with col1:
label_1 = st.text_input("First class", "Positive")
with col2:
label_2 = st.text_input("Second class", "Negative")
labels = [label_1, label_2]
errors = validate_binary_labels(labels)
if errors:
st.error("\n".join(errors))
else:
st.success("Binary class names are valid and unique!")
elif classification_type == "Multi-Class Classification":
st.write("### Multi-Class Classification Labels")
default_labels_by_domain = {
"News": ["Political", "Sports", "Entertainment", "Technology", "Business"],
"AG News": ["World", "Sports", "Business", "Sci/Tech"],
"Tourism": ["Accommodation", "Transportation", "Tourist Attractions",
"Food & Dining", "Local Experience", "Adventure Activities",
"Wellness & Spa", "Eco-Friendly Practices", "Family-Friendly",
"Luxury Tourism"],
"Restaurant reviews": ["Italian", "French", "American"],
"E-Commerce reviews": ["Mobile Phones & Accessories", "Laptops & Computers","Kitchen & Dining",
"Beauty & Personal Care", "Home & Furniture", "Clothing & Fashion",
"Shoes & Handbags", "Health & Wellness", "Electronics & Gadgets",
"Books & Stationery","Toys & Games", "Sports & Fitness",
"Grocery & Gourmet Food","Watches & Accessories", "Baby Products"]
}
num_classes = st.slider("Number of classes", 3, 15, 3)
# Get defaults for selected domain, or empty list
defaults = default_labels_by_domain.get(domain, [])
labels = []
errors = []
cols = st.columns(3)
for i in range(num_classes):
with cols[i % 3]:
default_value = defaults[i] if i < len(defaults) else ""
label_input = st.text_input(f"Class {i+1}", default_value)
normalized_label = label_input.strip().title()
if not normalized_label:
errors.append(f"Class {i+1} name is required.")
else:
labels.append(normalized_label)
# Check for duplicates (case-insensitive)
if len(labels) != len(set(labels)):
errors.append("Labels names must be unique (case-insensitive, normalized to Title Case).")
# Show validation results
if errors:
for error in errors:
st.error(error)
else:
st.success("All Labels names are valid and unique!")
labels_valid = not errors # Will be True only if there are no label errors
##############
#new 22/4/2025
# add additional attributes
add_attributes = st.checkbox("Add additional attributes (optional)")
additional_attributes = []
if add_attributes:
num_attributes = st.slider("Number of attributes to add", 1, 5, 1)
for i in range(num_attributes):
st.markdown(f"#### Attribute {i+1}")
attr_name = st.text_input(f"Name of attribute {i+1}", key=f"attr_name_{i}")
attr_topics = st.text_input(f"Topics (comma-separated) for {attr_name}", key=f"attr_topics_{i}")
if attr_name and attr_topics:
topics_list = [topic.strip() for topic in attr_topics.split(",") if topic.strip()]
additional_attributes.append({"attribute": attr_name, "topics": topics_list})
################
# Generation parameters
col1, col2 = st.columns(2)
with col1:
min_words = st.number_input("Min words", 1, 100, 20)
with col2:
max_words = st.number_input("Max words", min_words, 100, 50)
# Few-shot examples
use_few_shot = st.toggle("Use few-shot examples")
few_shot_examples = []
if use_few_shot:
num_examples = st.slider("Number of few-shot examples", 1, 10, 1)
for i in range(num_examples):
with st.expander(f"Example {i+1}"):
content = st.text_area(f"Content", key=f"few_shot_content_{i}")
label = st.selectbox(f"Label", labels, key=f"few_shot_label_{i}")
if content and label:
few_shot_examples.append({"content": content, "label": label})
num_to_generate = st.number_input("Number of examples", 1, 100, 10)
#sytem role after
# System role customization
#default_system_role = f"You are a professional {classification_type} expert, your role is to generate text examples for {domain} domain. Always generate unique diverse examples and do not repeat the generated data. The generated text should be between {min_words} to {max_words} words long."
# System role customization
default_system_role = (
f"You are a seasoned expert in {classification_type}, specializing in the {domain} domain. "
f" Your primary responsibility is to generate high-quality, diverse, and unique text examples "
f"tailored to this domain. Please ensure that each example adheres to the specified length "
f"requirements, ranging from {min_words} to {max_words} words, and avoid any repetition in the generated content."
)
system_role = st.text_area("Modify System Role (optional)",
value=default_system_role,
key="system_role_input")
st.session_state['system_role'] = system_role if system_role else default_system_role
# Labels initialization
#labels = []
user_prompt = st.text_area("User Prompt (optional)")
# Updated prompt template including system role
prompt_template = PromptTemplate(
input_variables=["system_role", "classification_type", "domain", "num_examples",
"min_words", "max_words", "labels", "user_prompt", "few_shot_examples", "additional_attributes"],
template=(
"{system_role}\n"
"- Use the following parameters:\n"
"- Generate {num_examples} examples\n"
"- Each example should be between {min_words} to {max_words} words long\n"
"- Use these labels: {labels}.\n"
"- Use the following additional attributes:\n"
"- {additional_attributes}\n"
"- Generate the examples in this format: 'Example text. Label: label'\n"
"- Do not include word counts or any additional information\n"
"- Always use your creativity and intelligence to generate unique and diverse text data\n"
"- In sentiment analysis, ensure that the sentiment classification is clearly identified as Positive, Negative, or Neutral. Do not leave the sentiment ambiguous.\n"
"- In binary sentiment analysis, classify text strictly as either Positive or Negative. Do not include or imply Neutral as an option.\n"
"- Write unique examples every time.\n"
"- DO NOT REPEAT your gnerated text. \n"
"- For each Output, describe it once and move to the next.\n"
"- List each Output only once, and avoid repeating details.\n"
"- Additional instructions: {user_prompt}\n\n"
"- Use the following examples as a reference in the generation process\n\n {few_shot_examples}. \n"
"- Think step by step, generate numbered examples, and check each newly generated example to ensure it has not been generated before. If it has, modify it"
)
)
# template=(
# "{system_role}\n"
# "- Use the following parameters:\n"
# "- Generate {num_examples} examples\n"
# "- Each example should be between {min_words} to {max_words} words long\n"
# "- Use these labels: {labels}.\n"
# "- Use the following additional attributes:\n"
# "{additional_attributes}\n"
# #"- Format each example like this: 'Example text. Label: [label]. Attribute1: [topic1]. Attribute2: [topic2]'\n"
# "- Generate the examples in this format: 'Example text. Label: label'\n"
# "- Additional instructions: {user_prompt}\n"
# "- Use these few-shot examples if provided:\n{few_shot_examples}\n"
# "- Think step by step and ensure examples are unique and not repeated."
# )
# )
##########new 22/4/2025
formatted_attributes = "\n".join([
f"- {attr['attribute']}: {', '.join(attr['topics'])}" for attr in additional_attributes
])
#######################
# Generate system prompt
system_prompt = prompt_template.format(
system_role=st.session_state['system_role'],
classification_type=classification_type,
domain=domain,
num_examples=num_to_generate,
min_words=min_words,
max_words=max_words,
labels=", ".join(labels),
user_prompt=user_prompt,
few_shot_examples="\n".join([f"{ex['content']}\nLabel: {ex['label']}" for ex in few_shot_examples]) if few_shot_examples else "",
additional_attributes=formatted_attributes
)
# Store system prompt in session state
st.session_state['system_prompt'] = system_prompt
# Display system prompt
st.write("System Prompt:")
st.text_area("Current System Prompt", value=st.session_state['system_prompt'],
height=400, disabled=True)
if st.button("π― Generate Examples"):
#
errors = []
if domain_selection == "Custom" and not domain.strip():
st.warning("Custom domain name is required.")
elif len(labels) != len(set(labels)):
st.warning("Class names must be unique.")
elif any(not lbl.strip() for lbl in labels):
st.warning("All class labels must be filled in.")
#else:
#st.success("Generating examples for domain: {domain}")
#if not custom_domain_valid:
#st.warning("Custom domain name is required.")
#elif not labels_valid:
#st.warning("Please fix the label errors before generating examples.")
#else:
# Proceed to generate examples
#st.success(f"Generating examples for domain: {domain}")
with st.spinner("Generating examples..."):
try:
stream = client.chat.completions.create(
model=selected_model,
messages=[{"role": "system", "content": st.session_state['system_prompt']}],
temperature=temperature,
stream=True,
#max_tokens=80000,
max_tokens=4000,
top_p=0.9,
# repetition_penalty=1.2,
#frequency_penalty=0.5, # Discourages frequent words
#presence_penalty=0.6,
)
#st.session_state['system_prompt'] = system_prompt
#new 24 march
st.session_state.messages.append({"role": "user", "content": system_prompt})
# # ####################
response = st.write_stream(stream)
st.session_state.messages.append({"role": "assistant", "content": response})
# Initialize session state variables if they don't exist
if 'system_prompt' not in st.session_state:
st.session_state.system_prompt = system_prompt
if 'response' not in st.session_state:
st.session_state.response = response
if 'generated_examples' not in st.session_state:
st.session_state.generated_examples = []
if 'generated_examples_csv' not in st.session_state:
st.session_state.generated_examples_csv = None
if 'generated_examples_json' not in st.session_state:
st.session_state.generated_examples_json = None
# Parse response and generate examples list
examples_list = []
for line in response.split('\n'):
if line.strip():
parts = line.rsplit('Label:', 1)
if len(parts) == 2:
text = parts[0].strip()
label = parts[1].strip()
if text and label:
examples_list.append({
'text': text,
'label': label,
'system_prompt': st.session_state.system_prompt,
'system_role': st.session_state.system_role,
'task_type': 'Data Generation',
'Use few-shot example?': 'Yes' if use_few_shot else 'No',
})
# example_dict = {
# 'text': text,
# 'label': label,
# 'system_prompt': st.session_state.system_prompt,
# 'system_role': st.session_state.system_role,
# 'task_type': 'Data Generation',
# 'Use few-shot example?': 'Yes' if use_few_shot else 'No',
# }
# for attr in additional_attributes:
# example_dict[attr['attribute']] = random.choice(attr['topics'])
# examples_list.append(example_dict)
if examples_list:
# Update session state with new data
st.session_state.generated_examples = examples_list
# Generate CSV and JSON data
df = pd.DataFrame(examples_list)
st.session_state.generated_examples_csv = df.to_csv(index=False).encode('utf-8')
st.session_state.generated_examples_json = json.dumps(examples_list, indent=2).encode('utf-8')
# Vertical layout with centered "or" between buttons
st.download_button(
"π₯ Download Generated Examples (CSV)",
st.session_state.generated_examples_csv,
"generated_examples.csv",
"text/csv",
key='download-csv-persistent'
)
# Add space and center the "or"
st.markdown("""
. . . . . . or
""", unsafe_allow_html=True)
st.download_button(
"π₯ Download Generated Examples (JSON)",
st.session_state.generated_examples_json,
"generated_examples.json",
"application/json",
key='download-json-persistent'
)
# # Display the labeled examples
# st.markdown("##### π Labeled Examples Preview")
# st.dataframe(df, use_container_width=True)
if st.button("Continue"):
if follow_up == "Generate more examples":
st.experimental_rerun()
elif follow_up == "Data Labeling":
st.session_state.task_choice = "Data Labeling"
st.experimental_rerun()
except Exception as e:
st.error("An error occurred during generation.")
st.error(f"Details: {e}")
# Lableing Process
elif st.session_state.task_choice == "Data Labeling":
st.header("π·οΈ Data Labeling")
domain_selection = st.selectbox("Domain", ["Restaurant reviews", "E-Commerce reviews", "News", "AG News", "Tourism", "Custom"])
# 2. Handle custom domain input
custom_domain_valid = True # Assume valid until proven otherwise
if domain_selection == "Custom":
domain = st.text_input("Specify custom domain")
if not domain.strip():
st.error("Please specify a domain name.")
custom_domain_valid = False
else:
domain = domain_selection
# Classification type selection
classification_type = st.selectbox(
"Classification Type",
["Sentiment Analysis", "Binary Classification", "Multi-Class Classification", "Named Entity Recognition (NER)"]
)
#NNew edit
# Labels setup based on classification type
labels = []
labels_valid = False
errors = []
if classification_type == "Sentiment Analysis":
st.write("### Sentiment Analysis Labels (Fixed)")
col1, col2, col3 = st.columns(3)
with col1:
label_1 = st.text_input("First class", "Positive", disabled=True)
with col2:
label_2 = st.text_input("Second class", "Negative", disabled=True)
with col3:
label_3 = st.text_input("Third class", "Neutral", disabled=True)
labels = ["Positive", "Negative", "Neutral"]
elif classification_type == "Binary Classification":
st.write("### Binary Classification Labels")
col1, col2 = st.columns(2)
with col1:
label_1 = st.text_input("First class", "Positive")
with col2:
label_2 = st.text_input("Second class", "Negative")
errors = []
labels = [label_1.strip(), label_2.strip()]
# Strip and lower-case labels for validation
label_1 = labels[0].strip()
label_2 = labels[1].strip()
# Check for empty class names
if not label_1:
errors.append("First class name is required.")
if not label_2:
errors.append("Second class name is required.")
# Check for duplicates (case insensitive)
if label_1.lower() == label_2.lower() and label_1 and label_2:
errors.append("Class names must be different.")
# Show errors or success
if errors:
for error in errors:
st.error(error)
else:
st.success("Binary class names are valid and unique!")
elif classification_type == "Multi-Class Classification":
st.write("### Multi-Class Classification Labels")
default_labels_by_domain = {
"News": ["Political", "Sports", "Entertainment", "Technology", "Business"],
"AG News": ["World", "Sports", "Business", "Sci/Tech"],
"Tourism": ["Accommodation", "Transportation", "Tourist Attractions",
"Food & Dining", "Local Experience", "Adventure Activities",
"Wellness & Spa", "Eco-Friendly Practices", "Family-Friendly",
"Luxury Tourism"],
"Restaurant reviews": ["Italian", "French", "American"],
"E-Commerce reviews": ["Mobile Phones & Accessories", "Laptops & Computers","Kitchen & Dining",
"Beauty & Personal Care", "Home & Furniture", "Clothing & Fashion",
"Shoes & Handbags", "Health & Wellness", "Electronics & Gadgets",
"Books & Stationery","Toys & Games", "Sports & Fitness",
"Grocery & Gourmet Food","Watches & Accessories", "Baby Products"]
}
# Ask user how many classes they want to define
num_classes = st.slider("Select the number of classes (labels)", min_value=3, max_value=10, value=3)
# Use default labels based on selected domain, if available
defaults = default_labels_by_domain.get(domain, [])
labels = []
errors = []
cols = st.columns(3) # For nicely arranged label inputs
for i in range(num_classes):
with cols[i % 3]: # Distribute inputs across columns
default_value = defaults[i] if i < len(defaults) else ""
label_input = st.text_input(f"Label {i + 1}", default_value)
normalized_label = label_input.strip().title()
if not normalized_label:
errors.append(f"Label {i + 1} is required.")
else:
labels.append(normalized_label)
# Check for duplicates (case-insensitive)
normalized_set = {label.lower() for label in labels}
if len(labels) != len(normalized_set):
errors.append("Label names must be unique (case-insensitive).")
# Show validation results
if errors:
for error in errors:
st.error(error)
else:
st.success("All label names are valid and unique!")
labels_valid = not errors # True if no validation errors
elif classification_type == "Named Entity Recognition (NER)":
# # NER entity options
# ner_entities = [
# "PERSON - Names of people, fictional characters, historical figures",
# "ORG - Companies, institutions, agencies, teams",
# "LOC - Physical locations (mountains, oceans, etc.)",
# "GPE - Countries, cities, states, political regions",
# "DATE - Calendar dates, years, centuries",
# "TIME - Times, durations",
# "MONEY - Monetary values with currency"
# ]
# selected_entities = st.multiselect(
# "Select entities to recognize",
# ner_entities,
# default=["PERSON - Names of people, fictional characters, historical figures",
# "ORG - Companies, institutions, agencies, teams",
# "LOC - Physical locations (mountains, oceans, etc.)",
# "GPE - Countries, cities, states, political regions",
# "DATE - Calendar dates, years, centuries",
# "TIME - Times, durations",
# "MONEY - Monetary values with currency"],
# key="ner_entity_selection"
# )
#new 22/4/2025
#if classification_type == "Named Entity Recognition (NER)":
use_few_shot = True
#new 22/4/2025
few_shot_examples = [
{"content": "Mount Everest is the tallest mountain in the world.", "label": "LOC: Mount Everest"},
{"content": "The President of the United States visited Paris last summer.", "label": "GPE: United States, GPE: Paris"},
{"content": "Amazon is expanding its offices in Berlin.", "label": "ORG: Amazon, GPE: Berlin"},
{"content": "J.K. Rowling wrote the Harry Potter books.", "label": "PERSON: J.K. Rowling"},
{"content": "Apple was founded in California in 1976.", "label": "ORG: Apple, GPE: California, DATE: 1976"},
{"content": "The Nile is the longest river in Africa.", "label": "LOC: Nile, GPE: Africa"},
{"content": "He arrived at 3 PM for the meeting.", "label": "TIME: 3 PM"},
{"content": "She bought the dress for $200.", "label": "MONEY: $200"},
{"content": "The event is scheduled for July 4th.", "label": "DATE: July 4th"},
{"content": "The World Health Organization is headquartered in Geneva.", "label": "ORG: World Health Organization, GPE: Geneva"}
]
###########
st.write("### Named Entity Recognition (NER) Entities")
# Predefined standard entities
ner_entities = [
"PERSON - Names of people, fictional characters, historical figures",
"ORG - Companies, institutions, agencies, teams",
"LOC - Physical locations (mountains, oceans, etc.)",
"GPE - Countries, cities, states, political regions",
"DATE - Calendar dates, years, centuries",
"TIME - Times, durations",
"MONEY - Monetary values with currency"
]
# User can add custom NER types
custom_ner_entities = []
if st.checkbox("Add custom NER entities?"):
num_custom_ner = st.slider("Number of custom NER entities", 1, 10, 1)
for i in range(num_custom_ner):
st.markdown(f"#### Custom Entity {i+1}")
custom_type = st.text_input(f"Entity type {i+1}", key=f"custom_ner_type_{i}")
custom_description = st.text_input(f"Description for {custom_type}", key=f"custom_ner_desc_{i}")
if custom_type and custom_description:
custom_ner_entities.append(f"{custom_type.upper()} - {custom_description}")
# Combine built-in and custom NERs
all_ner_options = ner_entities + custom_ner_entities
selected_entities = st.multiselect(
"Select entities to recognize",
all_ner_options,
default=ner_entities
)
# Extract entity type names (before the dash)
labels = [entity.split(" - ")[0].strip() for entity in selected_entities]
if not labels:
st.warning("Please select at least one entity type.")
labels = ["PERSON"]
##########
# # Extract just the entity type (before the dash)
# labels = [entity.split(" - ")[0] for entity in selected_entities]
# if not labels:
# st.warning("Please select at least one entity type")
# labels = ["PERSON"] # Default if nothing selected
#NNew edit
# elif classification_type == "Multi-Class Classification":
# st.write("### Multi-Class Classification Labels")
# default_labels_by_domain = {
# "News": ["Political", "Sports", "Entertainment", "Technology", "Business"],
# "AG News": ["World", "Sports", "Business", "Sci/Tech"],
# "Tourism": ["Accommodation", "Transportation", "Tourist Attractions",
# "Food & Dining", "Local Experience", "Adventure Activities",
# "Wellness & Spa", "Eco-Friendly Practices", "Family-Friendly",
# "Luxury Tourism"],
# "Restaurant reviews": ["Italian", "French", "American"]
# }
# num_classes = st.slider("Number of classes", 3, 10, 3)
# # Get defaults for selected domain, or empty list
# defaults = default_labels_by_domain.get(domain, [])
# labels = []
# errors = []
# cols = st.columns(3)
# for i in range(num_classes):
# with cols[i % 3]:
# default_value = defaults[i] if i < len(defaults) else ""
# label_input = st.text_input(f"Class {i+1}", default_value)
# normalized_label = label_input.strip().title()
# if not normalized_label:
# errors.append(f"Class {i+1} name is required.")
# else:
# labels.append(normalized_label)
# # Check for duplicates (case-insensitive)
# if len(labels) != len(set(labels)):
# errors.append("Labels names must be unique (case-insensitive, normalized to Title Case).")
# # Show validation results
# if errors:
# for error in errors:
# st.error(error)
# else:
# st.success("All Labels names are valid and unique!")
# labels_valid = not errors # Will be True only if there are no label errors
# else:
# num_classes = st.slider("Number of classes", 3, 23, 3, key="label_num_classes")
# labels = []
# cols = st.columns(3)
# for i in range(num_classes):
# with cols[i % 3]:
# label = st.text_input(f"Class {i+1}", f"Class_{i+1}", key=f"label_class_{i}")
# labels.append(label)
use_few_shot = st.toggle("Use few-shot examples for labeling")
few_shot_examples = []
if use_few_shot:
num_few_shot = st.slider("Number of few-shot examples", 1, 10, 1)
for i in range(num_few_shot):
with st.expander(f"Few-shot Example {i+1}"):
content = st.text_area(f"Content", key=f"label_few_shot_content_{i}")
label = st.selectbox(f"Label", labels, key=f"label_few_shot_label_{i}")
if content and label:
few_shot_examples.append(f"{content}\nLabel: {label}")
num_examples = st.number_input("Number of examples to classify", 1, 100, 1)
examples_to_classify = []
if num_examples <= 10:
for i in range(num_examples):
example = st.text_area(f"Example {i+1}", key=f"example_{i}")
if example:
examples_to_classify.append(example)
else:
examples_text = st.text_area(
"Enter examples (one per line)",
height=300,
help="Enter each example on a new line"
)
if examples_text:
examples_to_classify = [ex.strip() for ex in examples_text.split('\n') if ex.strip()]
if len(examples_to_classify) > num_examples:
examples_to_classify = examples_to_classify[:num_examples]
#New Wedyan
#default_system_role = f"You are a professional {classification_type} expert, your role is to classify the provided text examples for {domain} domain."
# System role customization
default_system_role = (f"You are a highly skilled {classification_type} expert."
f" Your task is to accurately classify the provided text examples within the {domain} domain."
f" Ensure that all classifications are precise, context-aware, and aligned with domain-specific standards and best practices."
)
system_role = st.text_area("Modify System Role (optional)",
value=default_system_role,
key="system_role_input")
st.session_state['system_role'] = system_role if system_role else default_system_role
# Labels initialization
#labels = []
####
user_prompt = st.text_area("User prompt (optional)", key="label_instructions")
few_shot_text = "\n\n".join(few_shot_examples) if few_shot_examples else ""
examples_text = "\n".join([f"{i+1}. {ex}" for i, ex in enumerate(examples_to_classify)])
# Customize prompt template based on classification type
if classification_type == "Named Entity Recognition (NER)":
# label_prompt_template = PromptTemplate(
# input_variables=["system_role", "labels", "few_shot_examples", "examples", "domain", "user_prompt"],
# template=(
# "{system_role}\n"
# #"- You are a professional Named Entity Recognition (NER) expert in {domain} domain. Your role is to identify and extract the following entity types: {labels}.\n"
# "- For each text example provided, identify all entities of the requested types.\n"
# "- Use the following entities: {labels}.\n"
# "- Return each example followed by the entities you found in this format: 'Example text.\n \n Entities:\n [ENTITY_TYPE: entity text\n\n, ENTITY_TYPE: entity text\n\n, ...] or [No entities found]'\n"
# "- If no entities of the requested types are found, indicate 'No entities found' in this text.\n"
# "- Be precise about entity boundaries - don't include unnecessary words.\n"
# "- Do not provide any additional information or explanations.\n"
# "- Additional instructions:\n {user_prompt}\n\n"
# "- Use user few-shot examples as guidance if provided:\n{few_shot_examples}\n\n"
# "- Examples to analyze:\n{examples}\n\n"
# "Output:\n"
# )
# )
#new 22/4/2025
# label_prompt_template = PromptTemplate(
# input_variables=["system_role", "labels", "few_shot_examples", "examples", "domain", "user_prompt"],
# template=(
# "{system_role}\n"
# "- You are performing Named Entity Recognition (NER) in the domain of {domain}.\n"
# "- Use the following entity types: {labels}.\n\n"
# "### Reasoning Steps:\n"
# "1. Read the example carefully.\n"
# "2. For each named entity mentioned, determine its meaning and role in the sentence.\n"
# "3. Think about the **context**: Is it a physical location (LOC)? A geopolitical region (GPE)? A person (PERSON)?\n"
# "4. Based on the definition of each label, assign the most **specific and correct** label.\n\n"
# "For example:\n"
# "- 'Mount Everest' β LOC (it's a mountain)\n"
# "- 'France' β GPE (it's a country)\n"
# "- 'Microsoft' β ORG\n"
# "- 'John Smith' β PERSON\n\n"
# "- Return each example followed by the entities you found in this format:\n"
# "'Example text.'\nEntities: [ENTITY_TYPE: entity text, ENTITY_TYPE: entity text, ...] or [No entities found]\n"
# "- If no entities of the requested types are found, return 'No entities found'.\n"
# "- Be precise about entity boundaries - don't include extra words.\n"
# "- Do not explain or justify your answers.\n\n"
# "Additional instructions:\n{user_prompt}\n\n"
# "Few-shot examples:\n{few_shot_examples}\n\n"
# "Examples to label:\n{examples}\n"
# "Output:\n"
# )
#)
# label_prompt_template = PromptTemplate(
# input_variables=["system_role", "labels", "few_shot_examples", "examples", "domain", "user_prompt"],
# template=(
# "{system_role}\n"
# "- You are an expert at Named Entity Recognition (NER) for domain: {domain}.\n"
# "- Use these entity types: {labels}.\n\n"
# "### Output Format:\n"
# # "Return each example followed by the entities you found in this format: 'Example text.\n Entities:\n [ENTITY_TYPE: entity text\n\"
# "Return each example followed by the entities you found in this format: 'Example text.\n 'Entity types:\n "Then group the entities under each label like this:\n" "
# #"Then Start with this line exactly: 'Entity types\n'\n"
# #"Then group the entities under each label like this:\n"
# "\n PERSON β Angela Merkel, John Smith\n\n"
# "\ ORG β Google, United Nations\n\n"
# "\n DATE β January 1st, 2023\n\n"
# "\n ... and so on.\n\n"
# "If entity {labels} not found, do not write it in your response\n"
# "- Do NOT output them inline after the text.\n"
# "- Do NOT repeat the sentence.\n"
# "- If no entities are found for a type, skip it.\n"
# "- Keep the format consistent.\n\n"
# "User Instructions:\n{user_prompt}\n\n"
# "Few-shot Examples:\n{few_shot_examples}\n\n"
# "Examples to analyze:\n{examples}"
# )
# )
label_prompt_template = PromptTemplate(
input_variables=["system_role", "labels", "few_shot_examples", "examples", "domain", "user_prompt"],
template=(
"{system_role}\n"
"- You are an expert at Named Entity Recognition (NER) for domain: {domain}.\n"
"- Use these entity types: {labels}.\n\n"
"### Output Format:\n"
"Return each example followed by the entities you found in this format:\n"
"'Example text.\nEntity types:\n"
"Then group the entities under each label like this:\n"
"\nPERSON β Angela Merkel, John Smith\n"
"ORG β Google, United Nations\n"
"DATE β January 1st, 2023\n"
"... and so on.\n\n"
"Each new entities group should be in a new line.\n"
"If entity type {labels} is not found, do not write it in your response.\n"
"- Do NOT output them inline after the text.\n"
"- Do NOT repeat the sentence.\n"
"- If no entities are found for a type, skip it.\n"
"- Keep the format consistent.\n\n"
"User Instructions:\n{user_prompt}\n\n"
"Few-shot Examples:\n{few_shot_examples}\n\n"
"Examples to analyze:\n{examples}"
)
)
#######
else:
label_prompt_template = PromptTemplate(
input_variables=["system_role", "classification_type", "labels", "few_shot_examples", "examples","domain", "user_prompt"],
template=(
#"- Let'\s think step by step:"
"{system_role}\n"
# "- You are a professional {classification_type} expert in {domain} domain. Your role is to classify the following examples using these labels: {labels}.\n"
"- Use the following instructions:\n"
"- Use the following labels: {labels}.\n"
"- Return the classified text followed by the label in this format: 'text. Label: [label]'\n"
"- Do not provide any additional information or explanations\n"
"- User prompt:\n {user_prompt}\n\n"
"- Use user provided examples as guidence in the classification process:\n\n {few_shot_examples}\n"
"- Examples to classify:\n{examples}\n\n"
"- Think step by step then classify the examples"
#"Output:\n"
))
# Check if few_shot_examples is already a formatted string
# Check if few_shot_examples is already a formatted string
if isinstance(few_shot_examples, str):
formatted_few_shot = few_shot_examples
# If it's a list of already formatted strings
elif isinstance(few_shot_examples, list) and all(isinstance(ex, str) for ex in few_shot_examples):
formatted_few_shot = "\n".join(few_shot_examples)
# If it's a list of dictionaries with 'content' and 'label' keys
elif isinstance(few_shot_examples, list) and all(isinstance(ex, dict) and 'content' in ex and 'label' in ex for ex in few_shot_examples):
formatted_few_shot = "\n".join([f"{ex['content']}\nLabel: {ex['label']}" for ex in few_shot_examples])
else:
formatted_few_shot = ""
# #new 22/4/2025
# few_shot_examples = [
# {"content": "Mount Everest is the tallest mountain in the world.", "label": "LOC: Mount Everest"},
# {"content": "The President of the United States visited Paris last summer.", "label": "GPE: United States, GPE: Paris"},
# {"content": "Amazon is expanding its offices in Berlin.", "label": "ORG: Amazon, GPE: Berlin"},
# {"content": "J.K. Rowling wrote the Harry Potter books.", "label": "PERSON: J.K. Rowling"},
# {"content": "Apple was founded in California in 1976.", "label": "ORG: Apple, GPE: California, DATE: 1976"},
# {"content": "The Nile is the longest river in Africa.", "label": "LOC: Nile, GPE: Africa"},
# {"content": "He arrived at 3 PM for the meeting.", "label": "TIME: 3 PM"},
# {"content": "She bought the dress for $200.", "label": "MONEY: $200"},
# {"content": "The event is scheduled for July 4th.", "label": "DATE: July 4th"},
# {"content": "The World Health Organization is headquartered in Geneva.", "label": "ORG: World Health Organization, GPE: Geneva"}
# ]
# ###########
# new 22/4/2025
#formatted_few_shot = "\n".join([f"{ex['content']}\nEntities: [{ex['label']}]" for ex in few_shot_examples])
formatted_few_shot = "\n\n".join([f"{ex['content']}\n\nEntity types\n{ex['label']}" for ex in few_shot_examples])
###########
system_prompt = label_prompt_template.format(
system_role=st.session_state['system_role'],
classification_type=classification_type,
domain=domain,
examples="\n".join(examples_to_classify),
labels=", ".join(labels),
user_prompt=user_prompt,
few_shot_examples=formatted_few_shot
)
# Step 2: Store the system_prompt in st.session_state
st.session_state['system_prompt'] = system_prompt
#::contentReference[oaicite:0]{index=0}
st.write("System Prompt:")
#st.code(system_prompt)
#st.code(st.session_state['system_prompt'])
st.text_area("System Prompt", value=st.session_state['system_prompt'], height=300, max_chars=None, key=None, help=None, disabled=True)
if st.button("π·οΈ Label Data"):
if examples_to_classify:
with st.spinner("Labeling data..."):
#Generate the system prompt based on classification type
if classification_type == "Named Entity Recognition (NER)":
system_prompt = label_prompt_template.format(
system_role=st.session_state['system_role'],
labels=", ".join(labels),
domain = domain,
few_shot_examples=few_shot_text,
examples=examples_text,
user_prompt=user_prompt
#new
#'Use few-shot example?': 'Yes' if use_few_shot else 'No',
)
# if classification_type == "Named Entity Recognition (NER)":
# # Step 1: Split the full response by example
# raw_outputs = [block.strip() for block in response.strip().split("Entity types") if block.strip()]
# inputs = [ex.strip() for ex in examples_to_classify]
# # Step 2: Match inputs with NER output blocks
# labeled_examples = []
# for i, (text, output_block) in enumerate(zip(inputs, raw_outputs)):
# labeled_examples.append({
# 'text': text,
# 'entities': f"Entity types\n{output_block.strip()}",
# 'system_prompt': st.session_state.system_prompt,
# 'system_role': st.session_state.system_role,
# 'task_type': 'Named Entity Recognition (NER)',
# 'Use few-shot example?': 'Yes' if use_few_shot else 'No',
# })
# if classification_type == "Named Entity Recognition (NER)":
# # Step 1: Split the full response by example
# raw_outputs = [block.strip() for block in response.strip().split("Entity types") if block.strip()]
# inputs = [ex.strip() for ex in examples_to_classify]
# # Step 2: Match inputs with NER output blocks
# labeled_examples = []
# for i, (text, output_block) in enumerate(zip(inputs, raw_outputs)):
# labeled_examples.append({
# 'text': text,
# 'entities': f"Entity types\n{output_block.strip()}",
# 'system_prompt': st.session_state.system_prompt,
# 'system_role': st.session_state.system_role,
# 'task_type': 'Named Entity Recognition (NER)',
# 'Use few-shot example?': 'Yes' if use_few_shot else 'No',
# })
# import re
# if classification_type == "Named Entity Recognition (NER)":
# # Use regex to split on "Entity types" while keeping it attached to each block
# blocks = re.split(r"(Entity types)", response.strip())
# # Recombine 'Entity types' with each block after splitting
# raw_outputs = [
# (blocks[i] + blocks[i+1]).strip()
# for i in range(1, len(blocks) - 1, 2)
# ]
# inputs = [ex.strip() for ex in examples_to_classify]
# labeled_examples = []
# for i, (text, output_block) in enumerate(zip(inputs, raw_outputs)):
# labeled_examples.append({
# 'text': text,
# 'entities': output_block,
# 'system_prompt': st.session_state.system_prompt,
# 'system_role': st.session_state.system_role,
# 'task_type': 'Named Entity Recognition (NER)',
# 'Use few-shot example?': 'Yes' if use_few_shot else 'No',
# })
else:
system_prompt = label_prompt_template.format(
classification_type=classification_type,
system_role=st.session_state['system_role'],
domain = domain,
labels=", ".join(labels),
few_shot_examples=few_shot_text,
examples=examples_text,
user_prompt=user_prompt
)
try:
stream = client.chat.completions.create(
model=selected_model,
messages=[{"role": "system", "content": system_prompt}],
temperature=temperature,
stream=True,
#max_tokens=20000,
max_tokens=4000,
top_p = 0.9,
)
#new 24 March
# Append user message
st.session_state.messages.append({"role": "user", "content": system_prompt})
#################
response = st.write_stream(stream)
st.session_state.messages.append({"role": "assistant", "content": response})
# Display the labeled examples
# # Optional: If you want to add it as a chat-style message log
# preview_str = st.session_state.labeled_preview.to_markdown(index=False)
# st.session_state.messages.append({"role": "assistant", "content": f"Here is a preview of the labeled examples:\n\n{preview_str}"})
# # Stream response and append assistant message
# #14/4/2024
# response = st.write_stream(stream)
# st.session_state.messages.append({"role": "assistant", "content": response})
# Initialize session state variables if they don't exist
if 'system_prompt' not in st.session_state:
st.session_state.system_prompt = system_prompt
if 'response' not in st.session_state:
st.session_state.response = response
if 'generated_examples' not in st.session_state:
st.session_state.generated_examples = []
if 'generated_examples_csv' not in st.session_state:
st.session_state.generated_examples_csv = None
if 'generated_examples_json' not in st.session_state:
st.session_state.generated_examples_json = None
# Save labeled examples to CSV
#new 14/4/2025
#labeled_examples = []
# if classification_type == "Named Entity Recognition (NER)":
# labeled_examples = []
# for line in response.split('\n'):
# if line.strip():
# parts = line.rsplit('Entities:', 1)
# if len(parts) == 2:
# text = parts[0].strip()
# entities = parts[1].strip()
# if text and entities:
# labeled_examples.append({
# 'text': text,
# 'entities': entities,
# 'system_prompt': st.session_state.system_prompt,
# 'system_role': st.session_state.system_role,
# 'task_type': 'Named Entity Recognition (NER)',
# 'Use few-shot example?': 'Yes' if use_few_shot else 'No',
# })
#new 22/4/2025
labeled_examples = []
if classification_type == "Named Entity Recognition (NER)":
labeled_examples = [{
'ner_output': response.strip(),
'system_prompt': st.session_state.system_prompt,
'system_role': st.session_state.system_role,
'task_type': 'Named Entity Recognition (NER)',
'Use few-shot example?': 'Yes' if use_few_shot else 'No',
}]
######
else:
labeled_examples = []
for line in response.split('\n'):
if line.strip():
parts = line.rsplit('Label:', 1)
if len(parts) == 2:
text = parts[0].strip()
label = parts[1].strip()
if text and label:
labeled_examples.append({
'text': text,
'label': label,
'system_prompt': st.session_state.system_prompt,
'system_role': st.session_state.system_role,
'task_type': 'Data Labeling',
'Use few-shot example?': 'Yes' if use_few_shot else 'No',
})
# Save and provide download options
if labeled_examples:
# Update session state
st.session_state.labeled_examples = labeled_examples
# Convert to CSV and JSON
df = pd.DataFrame(labeled_examples)
#new 22/4/2025
# CSV
st.session_state.labeled_examples_csv = df.to_csv(index=False).encode('utf-8')
# JSON
st.session_state.labeled_examples_json = json.dumps({
"metadata": {
"domain": domain,
"labels": labels,
"used_few_shot": use_few_shot,
"task_type": "Named Entity Recognition (NER)",
"timestamp": datetime.now().isoformat()
},
"examples": labeled_examples
}, indent=2).encode('utf-8')
############
# CSV
# st.session_state.labeled_examples_csv = df.to_csv(index=False).encode('utf-8')
# # JSON
# st.session_state.labeled_examples_json = json.dumps({
# "metadata": {
# "domain": domain,
# "labels": labels,
# "used_few_shot": use_few_shot,
# "task_type": "Named Entity Recognition (NER)",
# "timestamp": datetime.now().isoformat()
# },
# "examples": labeled_examples
# }, indent=2).encode('utf-8')
########
# st.session_state.labeled_examples_csv = df.to_csv(index=False).encode('utf-8')
# st.session_state.labeled_examples_json = json.dumps(labeled_examples, indent=2).encode('utf-8')
# Download buttons
st.download_button(
"π₯ Download Labeled Examples (CSV)",
st.session_state.labeled_examples_csv,
"labeled_examples.csv",
"text/csv",
key='download-labeled-csv'
)
st.markdown("""