Data_Generation_LabelingCopy

Sleeping

App Files Files

Wedyan2023 commited on May 9

Commit

bc09f8c

verified ·

1 Parent(s): 2626d68

Create app111.py

Browse files

Files changed (1) hide show

app111.py +1618 -0

app111.py ADDED Viewed

	@@ -0,0 +1,1618 @@

+import streamlit as st
+import pandas as pd
+import os
+import json
+import base64
+import random
+from streamlit_pdf_viewer import pdf_viewer
+from langchain.prompts import PromptTemplate
+from datetime import datetime
+from pathlib import Path
+from openai import OpenAI
+from dotenv import load_dotenv
+import warnings
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+warnings.filterwarnings('ignore')
+os.getenv("OAUTH_CLIENT_ID")
+# # Load environment variables and initialize the OpenAI client to use Hugging Face Inference API.
+# load_dotenv()
+# client = OpenAI(
+#     base_url="https://api-inference.huggingface.co/v1",
+#     #api_key=os.environ.get('TOKEN2') # Hugging Face API token
+#     api_key=os.environ.get('LLM')
+# )
+#######
+#from openai import OpenAI
+client = OpenAI(
+    base_url="https://router.huggingface.co/hf-inference/models/meta-llama/Llama-3.3-70B-Instruct/v1",
+    #api_key="hf_xxxxxxxxxxxxxxxxxxxxxxxx",
+    api_key=os.environ.get('LLM')
+)
+completion = client.chat.completions.create(
+    model="meta-llama/Llama-3.3-70B-Instruct",
+    messages=[
+        {
+            "role": "user",
+            "content": "What is the capital of France?"
+        }
+    ],
+)
+print(completion.choices[0].message)
+#######
+#####
+# from openai import OpenAI
+# client = OpenAI(
+#     base_url="https://router.huggingface.co/together/v1",
+#     #api_key="hf_XXXXX",
+#     api_key=os.environ.get('TOKEN2'), # Hugging Face API token
+# )
+# #meta-llama/Meta-Llama-3-8B-Instruct
+# completion = client.chat.completions.create(
+#     #model="meta-llama/Meta-Llama-3-8B-Instruct-Turbo",
+#     model="meta-llama/Meta-Llama-3-8B-Instruct",
+#     messages=[
+#         {
+#             "role": "user",
+#             "content": "What is the capital of France?"
+#         }
+#     ],
+# )
+#print(completion.choices[0].message)
+#####
+##########################################################3
+# import streamlit as st
+# from transformers import AutoModelForCausalLM, AutoTokenizer
+# import torch
+# # Model selection dropdown
+# selected_model = st.selectbox(
+#     "Select Model",
+#     ["meta-llama/Meta-Llama-3-8B-Instruct-Turbo",
+#      "meta-llama/Llama-3.3-70B-Instruct",
+#      "meta-llama/Llama-3.2-3B-Instruct",
+#      "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+#      "meta-llama/Meta-Llama-3-8B-Instruct",
+#      "meta-llama/Llama-3.1-70B-Instruct"],
+#     key='model_select'
+# )
+# @st.cache_resource  # Cache the model to prevent reloading
+# def load_model(model_name):
+#     try:
+#         # Optimized model loading configuration
+#         model = AutoModelForCausalLM.from_pretrained(
+#             model_name,
+#             torch_dtype=torch.float16,        # Use half precision
+#             device_map="auto",                # Automatic device mapping
+#             load_in_8bit=True,               # Enable 8-bit quantization
+#             low_cpu_mem_usage=True,          # Optimize CPU memory usage
+#             max_memory={0: "10GB"}           # Limit GPU memory usage
+#         )
+#         tokenizer = AutoTokenizer.from_pretrained(
+#             model_name,
+#             padding_side="left",
+#             truncation_side="left"
+#         )
+#         return model, tokenizer
+#     except Exception as e:
+#         st.error(f"Error loading model: {str(e)}")
+#         return None, None
+# # Load the selected model with optimizations
+# if selected_model:
+#     model, tokenizer = load_model(selected_model)
+#     # Check if model loaded successfully
+#     if model is not None:
+#         st.success(f"Successfully loaded {selected_model}")
+#     else:
+#         st.warning("Please select a different model or check your hardware capabilities")
+# # Function to generate text
+# def generate_response(prompt, model, tokenizer):
+#     try:
+#         inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
+#         with torch.no_grad():
+#             outputs = model.generate(
+#                 inputs["input_ids"],
+#                 max_length=256,
+#                 num_return_sequences=1,
+#                 temperature=0.7,
+#                 do_sample=True,
+#                 pad_token_id=tokenizer.pad_token_id
+#             )
+#         response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+#         return response
+#     except Exception as e:
+#         return f"Error generating response: {str(e)}"
+############################################################
+####new
+# from openai import OpenAI
+# client = OpenAI(
+#     base_url="https://router.huggingface.co/together/v1",
+#     api_key=os.environ.get('TOKEN2'),
+# )
+# completion = client.chat.completions.create(
+#     model="meta-llama/Meta-Llama-3-8B-Instruct-Turbo",
+#     messages=[
+#         {
+#             "role": "user",
+#             "content": "What is the capital of France?"
+#         }
+#     ],
+#     max_tokens=512,
+# )
+# print(completion.choices[0].message)
+#####
+# Create necessary directories
+for dir_name in ['data', 'feedback']:
+    if not os.path.exists(dir_name):
+        os.makedirs(dir_name)
+# Custom CSS
+st.markdown("""
+<style>
+    .stButton > button {
+        width: 100%;
+        margin-bottom: 10px;
+        background-color: #4CAF50;
+        color: white;
+        border: none;
+        padding: 10px;
+        border-radius: 5px;
+    }
+    .task-button {
+        background-color: #2196F3 !important;
+    }
+    .stSelectbox {
+        margin-bottom: 20px;
+    }
+    .output-container {
+        padding: 20px;
+        border-radius: 5px;
+        border: 1px solid #ddd;
+        margin: 10px 0;
+    }
+    .status-container {
+        padding: 10px;
+        border-radius: 5px;
+        margin: 10px 0;
+    }
+    .sidebar-info {
+        padding: 10px;
+        background-color: #f0f2f6;
+        border-radius: 5px;
+        margin: 10px 0;
+    }
+    .feedback-button {
+        background-color: #ff9800 !important;
+    }
+    .feedback-container {
+        padding: 15px;
+        background-color: #f5f5f5;
+        border-radius: 5px;
+        margin: 15px 0;
+    }
+</style>
+""", unsafe_allow_html=True)
+# Helper functions
+def read_csv_with_encoding(file):
+    encodings = ['utf-8', 'latin1', 'iso-8859-1', 'cp1252']
+    for encoding in encodings:
+        try:
+            return pd.read_csv(file, encoding=encoding)
+        except UnicodeDecodeError:
+            continue
+    raise UnicodeDecodeError("Failed to read file with any supported encoding")
+#def save_feedback(feedback_data):
+    #feedback_file = 'feedback/user_feedback.csv'
+    #feedback_df = pd.DataFrame([feedback_data])
+    #if os.path.exists(feedback_file):
+        #feedback_df.to_csv(feedback_file, mode='a', header=False, index=False)
+    #else:
+        #feedback_df.to_csv(feedback_file, index=False)
+def reset_conversation():
+    st.session_state.conversation = []
+    st.session_state.messages = []
+    if 'task_choice' in st.session_state:
+        del st.session_state.task_choice
+    return None
+    #new 24 March
+    #user_input = st.text_input("Enter your prompt:")
+###########33
+# Initialize session state variables
+if "messages" not in st.session_state:
+    st.session_state.messages = []
+if "examples_to_classify" not in st.session_state:
+    st.session_state.examples_to_classify = []
+if "system_role" not in st.session_state:
+    st.session_state.system_role = ""
+# Main app title
+st.title("🤖🦙 Text Data Labeling and Generation App")
+# def embed_pdf_sidebar(pdf_path):
+#     with open(pdf_path, "rb") as f:
+#         base64_pdf = base64.b64encode(f.read()).decode('utf-8')
+#     pdf_display = f"""
+#         <iframe src="data:application/pdf;base64,{base64_pdf}"
+#         width="100%" height="400" type="application/pdf"></iframe>
+#     """
+#     st.markdown(pdf_display, unsafe_allow_html=True)
+#
+# Sidebar settings
+with st.sidebar:
+    st.title("⚙️ Settings")
+#this last code works
+with st.sidebar:
+    st.markdown("### 📘Data Generation and Labeling Instructions")
+    #st.markdown("<h4 style='color: #4A90E2;'>📘    Instructions</h4>", unsafe_allow_html=True)
+    with open("User instructions.pdf", "rb") as f:
+        st.download_button(
+            label="📄 Download Instructions PDF",
+            data=f,
+            #file_name="instructions.pdf",
+            file_name="User instructions.pdf",
+            mime="application/pdf"
+        )
+    selected_model = st.selectbox(
+        "Select Model",
+        ["meta-llama/Llama-Prompt-Guard-2-86M","mistralai/Mistral-7B-Instruct-v0.2", "meta-llama/Llama-3.2-11B-Vision-Instruct","meta-llama/Meta-Llama-3-8B-Instruct-Turbo", "meta-llama/Llama-3.3-70B-Instruct", "meta-llama/Llama-3.2-3B-Instruct","meta-llama/Llama-4-Scout-17B-16E-Instruct", "meta-llama/Meta-Llama-3-8B-Instruct",
+         "meta-llama/Llama-3.1-70B-Instruct"],
+        key='model_select'
+    )
+#################new oooo
+# # Model selection dropdown
+# selected_model = st.selectbox(
+#     "Select Model",
+#     [#"meta-llama/Meta-Llama-3-8B-Instruct-Turbo",
+#      "meta-llama/Llama-3.2-3B-Instruct",
+#      "meta-llama/Llama-3.3-70B-Instruct",
+#      "meta-llama/Llama-3.2-3B-Instruct",
+#      "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+#      "meta-llama/Meta-Llama-3-8B-Instruct",
+#      "meta-llama/Llama-3.1-70B-Instruct"],
+#     key='model_select'
+# )
+# @st.cache_resource  # Cache the model to prevent reloading
+# def load_model(model_name):
+#     try:
+#         # Optimized model loading configuration
+#         model = AutoModelForCausalLM.from_pretrained(
+#             model_name,
+#             torch_dtype=torch.float16,        # Use half precision
+#             device_map="auto",                # Automatic device mapping
+#             load_in_8bit=True,               # Enable 8-bit quantization
+#             low_cpu_mem_usage=True,          # Optimize CPU memory usage
+#             max_memory={0: "10GB"}           # Limit GPU memory usage
+#         )
+#         tokenizer = AutoTokenizer.from_pretrained(
+#             model_name,
+#             padding_side="left",
+#             truncation_side="left"
+#         )
+#         return model, tokenizer
+#     except Exception as e:
+#         st.error(f"Error loading model: {str(e)}")
+#         return None, None
+# # Load the selected model with optimizations
+# if selected_model:
+#     model, tokenizer = load_model(selected_model)
+#     # Check if model loaded successfully
+#     if model is not None:
+#         st.success(f"Successfully loaded {selected_model}")
+#     else:
+#         st.warning("Please select a different model or check your hardware capabilities")
+# # Function to generate text
+# def generate_response(prompt, model, tokenizer):
+#     try:
+#         inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
+#         with torch.no_grad():
+#             outputs = model.generate(
+#                 inputs["input_ids"],
+#                 max_length=256,
+#                 num_return_sequences=1,
+#                 temperature=0.7,
+#                 do_sample=True,
+#                 pad_token_id=tokenizer.pad_token_id
+#             )
+#         response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+#         return response
+#     except Exception as e:
+#         return f"Error generating response: {str(e)}"
+# ################
+# model = AutoModelForCausalLM.from_pretrained(
+#     "meta-llama/Meta-Llama-3-8B-Instruct",
+#     torch_dtype=torch.float16,  # Use half precision
+#     device_map="auto",          # Automatic device mapping
+#     load_in_8bit=True          # Load in 8-bit precision
+# )
+    temperature = st.slider(
+        "Temperature",
+        0.0, 1.0, 0.7,
+        help="Controls randomness in generation"
+    )
+    st.button("🔄 New Conversation", on_click=reset_conversation)
+    with st.container():
+        st.markdown(f"""
+           <div class="sidebar-info">
+               <h4>Current Model: {selected_model}</h4>
+               <p><em>Note: Generated content may be inaccurate or false. Check important info.</em></p>
+           </div>
+            """, unsafe_allow_html=True)
+    feedback_url = "https://docs.google.com/forms/d/e/1FAIpQLSdZ_5mwW-pjqXHgxR0xriyVeRhqdQKgb5c-foXlYAV55Rilsg/viewform?usp=header"
+    st.sidebar.markdown(
+        f'<a href="{feedback_url}" target="_blank"><button style="width: 100%;">Feedback Form</button></a>',
+        unsafe_allow_html=True
+    )
+# Display conversation
+for message in st.session_state.messages:
+    with st.chat_message(message["role"]):
+        st.markdown(message["content"])
+# Main content
+if 'task_choice' not in st.session_state:
+    col1, col2 = st.columns(2)
+    with col1:
+        if st.button("📝 Data Generation", key="gen_button", help="Generate new data"):
+            st.session_state.task_choice = "Data Generation"
+    with col2:
+        if st.button("🏷️ Data Labeling", key="label_button", help="Label existing data"):
+            st.session_state.task_choice = "Data Labeling"
+if "task_choice" in st.session_state:
+    if st.session_state.task_choice == "Data Generation":
+        st.header("📝 Data Generation")
+       # 1. Domain selection
+        domain_selection = st.selectbox("Domain", [
+            "Restaurant reviews", "E-Commerce reviews", "News", "AG News", "Tourism", "Custom"
+        ])
+        # 2. Handle custom domain input
+        custom_domain_valid = True  # Assume valid until proven otherwise
+        if domain_selection == "Custom":
+            domain = st.text_input("Specify custom domain")
+            if not domain.strip():
+                st.error("Please specify a domain name.")
+                custom_domain_valid = False
+        else:
+            domain = domain_selection
+        # Classification type selection
+        classification_type = st.selectbox(
+            "Classification Type",
+            ["Sentiment Analysis", "Binary Classification", "Multi-Class Classification"]
+        )
+        # Labels setup based on classification type
+        #labels = []
+        labels = []
+        labels_valid = False
+        errors = []
+        def validate_binary_labels(labels):
+            errors = []
+            normalized = [label.strip().lower() for label in labels]
+            if not labels[0].strip():
+                errors.append("First class name is required.")
+            if not labels[1].strip():
+                errors.append("Second class name is required.")
+            if normalized[0] == normalized[1] and all(normalized):
+                errors.append("Class names must be different.")
+            return errors
+        if classification_type == "Sentiment Analysis":
+            st.write("### Sentiment Analysis Labels (Fixed)")
+            col1, col2, col3 = st.columns(3)
+            with col1:
+                st.text_input("First class", "Positive", disabled=True)
+            with col2:
+                st.text_input("Second class", "Negative", disabled=True)
+            with col3:
+                st.text_input("Third class", "Neutral", disabled=True)
+            labels = ["Positive", "Negative", "Neutral"]
+        elif classification_type == "Binary Classification":
+            st.write("### Binary Classification Labels")
+            col1, col2 = st.columns(2)
+            with col1:
+                label_1 = st.text_input("First class", "Positive")
+            with col2:
+                label_2 = st.text_input("Second class", "Negative")
+            labels = [label_1, label_2]
+            errors = validate_binary_labels(labels)
+            if errors:
+                st.error("\n".join(errors))
+            else:
+                st.success("Binary class names are valid and unique!")
+        elif classification_type == "Multi-Class Classification":
+            st.write("### Multi-Class Classification Labels")
+            default_labels_by_domain = {
+                "News": ["Political", "Sports", "Entertainment", "Technology", "Business"],
+                "AG News": ["World", "Sports", "Business", "Sci/Tech"],
+                "Tourism": ["Accommodation", "Transportation", "Tourist Attractions",
+                            "Food & Dining", "Local Experience", "Adventure Activities",
+                            "Wellness & Spa", "Eco-Friendly Practices", "Family-Friendly",
+                            "Luxury Tourism"],
+                "Restaurant reviews": ["Italian", "French", "American"],
+                "E-Commerce reviews": ["Mobile Phones & Accessories", "Laptops & Computers","Kitchen & Dining",
+                                       "Beauty & Personal Care", "Home & Furniture", "Clothing & Fashion",
+                                      "Shoes & Handbags", "Health & Wellness", "Electronics & Gadgets",
+                                       "Books & Stationery","Toys & Games", "Sports & Fitness",
+                                       "Grocery & Gourmet Food","Watches & Accessories", "Baby Products"]
+            }
+            num_classes = st.slider("Number of classes", 3, 15, 3)
+            # Get defaults for selected domain, or empty list
+            defaults = default_labels_by_domain.get(domain, [])
+            labels = []
+            errors = []
+            cols = st.columns(3)
+            for i in range(num_classes):
+                with cols[i % 3]:
+                    default_value = defaults[i] if i < len(defaults) else ""
+                    label_input = st.text_input(f"Class {i+1}", default_value)
+                    normalized_label = label_input.strip().title()
+                    if not normalized_label:
+                        errors.append(f"Class {i+1} name is required.")
+                    else:
+                        labels.append(normalized_label)
+            # Check for duplicates (case-insensitive)
+            if len(labels) != len(set(labels)):
+                errors.append("Labels names must be unique (case-insensitive, normalized to Title Case).")
+            # Show validation results
+            if errors:
+                for error in errors:
+                    st.error(error)
+            else:
+                st.success("All Labels names are valid and unique!")
+            labels_valid = not errors  # Will be True only if there are no label errors
+                    ##############
+    #new 22/4/2025
+    # add additional attributes
+        add_attributes = st.checkbox("Add additional attributes (optional)")
+        additional_attributes = []
+        if add_attributes:
+            num_attributes = st.slider("Number of attributes to add", 1, 5, 1)
+            for i in range(num_attributes):
+                st.markdown(f"#### Attribute {i+1}")
+                attr_name = st.text_input(f"Name of attribute {i+1}", key=f"attr_name_{i}")
+                attr_topics = st.text_input(f"Topics (comma-separated) for {attr_name}", key=f"attr_topics_{i}")
+                if attr_name and attr_topics:
+                    topics_list = [topic.strip() for topic in attr_topics.split(",") if topic.strip()]
+                    additional_attributes.append({"attribute": attr_name, "topics": topics_list})
+################
+        # Generation parameters
+        col1, col2 = st.columns(2)
+        with col1:
+            min_words = st.number_input("Min words", 1, 100, 20)
+        with col2:
+            max_words = st.number_input("Max words", min_words, 100, 50)
+        # Few-shot examples
+        use_few_shot = st.toggle("Use few-shot examples")
+        few_shot_examples = []
+        if use_few_shot:
+            num_examples = st.slider("Number of few-shot examples", 1, 10, 1)
+            for i in range(num_examples):
+                with st.expander(f"Example {i+1}"):
+                    content = st.text_area(f"Content", key=f"few_shot_content_{i}")
+                    label = st.selectbox(f"Label", labels, key=f"few_shot_label_{i}")
+                    if content and label:
+                        few_shot_examples.append({"content": content, "label": label})
+        num_to_generate = st.number_input("Number of examples", 1, 100, 10)
+        #sytem role after
+         # System role customization
+        #default_system_role = f"You are a professional {classification_type} expert, your role is to generate text examples for {domain} domain. Always generate unique diverse examples and do not repeat the generated data. The generated text should be between {min_words} to {max_words} words long."
+        # System role customization
+        default_system_role = (
+    f"You are a seasoned expert in {classification_type}, specializing in the {domain} domain. "
+    f" Your primary responsibility is to generate high-quality, diverse, and unique text examples "
+    f"tailored to this domain. Please ensure that each example adheres to the specified length "
+    f"requirements, ranging from {min_words} to {max_words} words, and avoid any repetition in the generated content."
+)
+        system_role = st.text_area("Modify System Role (optional)",
+                                value=default_system_role,
+                                key="system_role_input")
+        st.session_state['system_role'] = system_role if system_role else default_system_role
+        # Labels initialization
+        #labels = []
+        user_prompt = st.text_area("User Prompt (optional)")
+        # Updated prompt template including system role
+        prompt_template = PromptTemplate(
+            input_variables=["system_role", "classification_type", "domain", "num_examples",
+                           "min_words", "max_words", "labels", "user_prompt", "few_shot_examples", "additional_attributes"],
+            template=(
+               "{system_role}\n"
+                 "- Use the following parameters:\n"
+                "- Generate {num_examples} examples\n"
+                "- Each example should be between {min_words} to {max_words} words long\n"
+                "- Use these labels: {labels}.\n"
+                "- Use the following additional attributes:\n"
+                "- {additional_attributes}\n"
+                "- Generate the examples in this format: 'Example text. Label: label'\n"
+                "- Do not include word counts or any additional information\n"
+                "- Always use your creativity and intelligence to generate unique and diverse text data\n"
+                "- In sentiment analysis, ensure that the sentiment classification is clearly identified as Positive, Negative, or Neutral. Do not leave the sentiment ambiguous.\n"
+                "- In binary sentiment analysis, classify text strictly as either Positive or Negative. Do not include or imply Neutral as an option.\n"
+                "- Write unique examples every time.\n"
+                "- DO NOT REPEAT your gnerated text. \n"
+                "- For each Output, describe it once and move to the next.\n"
+                "- List each Output only once, and avoid repeating details.\n"
+                "- Additional instructions: {user_prompt}\n\n"
+                "- Use the following examples as a reference in the generation process\n\n {few_shot_examples}. \n"
+                "- Think step by step, generate numbered examples, and check each newly generated example to ensure it has not been generated before. If it has, modify it"
+             )
+         )
+        #    template=(
+        #         "{system_role}\n"
+        #         "- Use the following parameters:\n"
+        #         "- Generate {num_examples} examples\n"
+        #         "- Each example should be between {min_words} to {max_words} words long\n"
+        #         "- Use these labels: {labels}.\n"
+        #         "- Use the following additional attributes:\n"
+        #         "{additional_attributes}\n"
+        #         #"- Format each example like this: 'Example text. Label: [label]. Attribute1: [topic1]. Attribute2: [topic2]'\n"
+        #         "- Generate the examples in this format: 'Example text. Label: label'\n"
+        #         "- Additional instructions: {user_prompt}\n"
+        #         "- Use these few-shot examples if provided:\n{few_shot_examples}\n"
+        #         "- Think step by step and ensure examples are unique and not repeated."
+        #     )
+        # )
+        ##########new 22/4/2025
+        formatted_attributes = "\n".join([
+            f"- {attr['attribute']}: {', '.join(attr['topics'])}" for attr in additional_attributes
+        ])
+#######################
+        # Generate system prompt
+        system_prompt = prompt_template.format(
+            system_role=st.session_state['system_role'],
+            classification_type=classification_type,
+            domain=domain,
+            num_examples=num_to_generate,
+            min_words=min_words,
+            max_words=max_words,
+            labels=", ".join(labels),
+            user_prompt=user_prompt,
+            few_shot_examples="\n".join([f"{ex['content']}\nLabel: {ex['label']}" for ex in few_shot_examples]) if few_shot_examples else "",
+            additional_attributes=formatted_attributes
+        )
+        # Store system prompt in session state
+        st.session_state['system_prompt'] = system_prompt
+        # Display system prompt
+        st.write("System Prompt:")
+        st.text_area("Current System Prompt", value=st.session_state['system_prompt'],
+                    height=400, disabled=True)
+        if st.button("🎯 Generate Examples"):
+            #
+            errors = []
+            if domain_selection == "Custom" and not domain.strip():
+                st.warning("Custom domain name is required.")
+            elif len(labels) != len(set(labels)):
+                st.warning("Class names must be unique.")
+            elif any(not lbl.strip() for lbl in labels):
+                st.warning("All class labels must be filled in.")
+            #else:
+                #st.success("Generating examples for domain: {domain}")
+            #if not custom_domain_valid:
+                #st.warning("Custom domain name is required.")
+            #elif not labels_valid:
+                #st.warning("Please fix the label errors before generating examples.")
+            #else:
+                # Proceed to generate examples
+                #st.success(f"Generating examples for domain: {domain}")
+            with st.spinner("Generating examples..."):
+                try:
+                    stream = client.chat.completions.create(
+                        model=selected_model,
+                        messages=[{"role": "system", "content": st.session_state['system_prompt']}],
+                        temperature=temperature,
+                        stream=True,
+                        #max_tokens=80000,
+                        max_tokens=4000,
+                        top_p=0.9,
+                       # repetition_penalty=1.2,
+                        #frequency_penalty=0.5,      # Discourages frequent words
+                        #presence_penalty=0.6,
+                    )
+ #st.session_state['system_prompt'] = system_prompt
+                    #new 24 march
+                    st.session_state.messages.append({"role": "user", "content": system_prompt})
+                 # # ####################
+                    response = st.write_stream(stream)
+                    st.session_state.messages.append({"role": "assistant", "content": response})
+                        # Initialize session state variables if they don't exist
+                    if 'system_prompt' not in st.session_state:
+                        st.session_state.system_prompt = system_prompt
+                    if 'response' not in st.session_state:
+                        st.session_state.response = response
+                    if 'generated_examples' not in st.session_state:
+                        st.session_state.generated_examples = []
+                    if 'generated_examples_csv' not in st.session_state:
+                        st.session_state.generated_examples_csv = None
+                    if 'generated_examples_json' not in st.session_state:
+                        st.session_state.generated_examples_json = None
+                    # Parse response and generate examples list
+                    examples_list = []
+                    for line in response.split('\n'):
+                        if line.strip():
+                            parts = line.rsplit('Label:', 1)
+                            if len(parts) == 2:
+                                text = parts[0].strip()
+                                label = parts[1].strip()
+                                if text and label:
+                                    examples_list.append({
+                                        'text': text,
+                                        'label': label,
+                                        'system_prompt': st.session_state.system_prompt,
+                                        'system_role': st.session_state.system_role,
+                                        'task_type': 'Data Generation',
+                                        'Use few-shot example?': 'Yes' if use_few_shot else 'No',
+                                    })
+                                    # example_dict = {
+                                    #     'text': text,
+                                    #     'label': label,
+                                    #     'system_prompt': st.session_state.system_prompt,
+                                    #     'system_role': st.session_state.system_role,
+                                    #     'task_type': 'Data Generation',
+                                    #     'Use few-shot example?': 'Yes' if use_few_shot else 'No',
+                                    # }
+                                    # for attr in additional_attributes:
+                                    #     example_dict[attr['attribute']] = random.choice(attr['topics'])
+                                    # examples_list.append(example_dict)
+                    if examples_list:
+                        # Update session state with new data
+                        st.session_state.generated_examples = examples_list
+                        # Generate CSV and JSON data
+                        df = pd.DataFrame(examples_list)
+                        st.session_state.generated_examples_csv = df.to_csv(index=False).encode('utf-8')
+                        st.session_state.generated_examples_json = json.dumps(examples_list, indent=2).encode('utf-8')
+                       # Vertical layout with centered "or" between buttons
+                        st.download_button(
+                            "📥 Download Generated Examples (CSV)",
+                            st.session_state.generated_examples_csv,
+                            "generated_examples.csv",
+                            "text/csv",
+                            key='download-csv-persistent'
+                        )
+                        # Add space and center the "or"
+                        st.markdown("""
+                        <div style='text-align: left; margin:15px 0; font-weight: 600; color: #666;'>. . . . . .         or</div>
+                        """, unsafe_allow_html=True)
+                        st.download_button(
+                            "📥 Download Generated Examples (JSON)",
+                            st.session_state.generated_examples_json,
+                            "generated_examples.json",
+                            "application/json",
+                            key='download-json-persistent'
+                        )
+                        # # Display the labeled examples
+                        # st.markdown("##### 📋 Labeled Examples Preview")
+                        # st.dataframe(df, use_container_width=True)
+                    if st.button("Continue"):
+                        if follow_up == "Generate more examples":
+                            st.experimental_rerun()
+                        elif follow_up == "Data Labeling":
+                            st.session_state.task_choice = "Data Labeling"
+                            st.experimental_rerun()
+                except Exception as e:
+                    st.error("An error occurred during generation.")
+                    st.error(f"Details: {e}")
+# Lableing Process
+    elif st.session_state.task_choice == "Data Labeling":
+        st.header("🏷️ Data Labeling")
+        domain_selection = st.selectbox("Domain", ["Restaurant reviews", "E-Commerce reviews", "News", "AG News", "Tourism", "Custom"])
+            # 2. Handle custom domain input
+        custom_domain_valid = True  # Assume valid until proven otherwise
+        if domain_selection == "Custom":
+            domain = st.text_input("Specify custom domain")
+            if not domain.strip():
+                st.error("Please specify a domain name.")
+                custom_domain_valid = False
+        else:
+            domain = domain_selection
+        # Classification type selection
+        classification_type = st.selectbox(
+            "Classification Type",
+            ["Sentiment Analysis", "Binary Classification", "Multi-Class Classification", "Named Entity Recognition (NER)"]
+        )
+#NNew edit
+        # Labels setup based on classification type
+        labels = []
+        labels_valid = False
+        errors = []
+        if classification_type == "Sentiment Analysis":
+            st.write("### Sentiment Analysis Labels (Fixed)")
+            col1, col2, col3 = st.columns(3)
+            with col1:
+                label_1 = st.text_input("First class", "Positive", disabled=True)
+            with col2:
+                label_2 = st.text_input("Second class", "Negative", disabled=True)
+            with col3:
+                label_3 = st.text_input("Third class", "Neutral", disabled=True)
+            labels = ["Positive", "Negative", "Neutral"]
+        elif classification_type == "Binary Classification":
+            st.write("### Binary Classification Labels")
+            col1, col2 = st.columns(2)
+            with col1:
+                label_1 = st.text_input("First class", "Positive")
+            with col2:
+                label_2 = st.text_input("Second class", "Negative")
+            errors = []
+            labels = [label_1.strip(), label_2.strip()]
+            # Strip and lower-case labels for validation
+            label_1 = labels[0].strip()
+            label_2 = labels[1].strip()
+            # Check for empty class names
+            if not label_1:
+                errors.append("First class name is required.")
+            if not label_2:
+                errors.append("Second class name is required.")
+            # Check for duplicates (case insensitive)
+            if label_1.lower() == label_2.lower() and label_1 and label_2:
+                errors.append("Class names must be different.")
+            # Show errors or success
+            if errors:
+                for error in errors:
+                    st.error(error)
+            else:
+                st.success("Binary class names are valid and unique!")
+        elif classification_type == "Multi-Class Classification":
+                st.write("### Multi-Class Classification Labels")
+                default_labels_by_domain = {
+                    "News": ["Political", "Sports", "Entertainment", "Technology", "Business"],
+                    "AG News": ["World", "Sports", "Business", "Sci/Tech"],
+                    "Tourism": ["Accommodation", "Transportation", "Tourist Attractions",
+                                "Food & Dining", "Local Experience", "Adventure Activities",
+                                "Wellness & Spa", "Eco-Friendly Practices", "Family-Friendly",
+                                "Luxury Tourism"],
+                    "Restaurant reviews": ["Italian", "French", "American"],
+                    "E-Commerce reviews": ["Mobile Phones & Accessories", "Laptops & Computers","Kitchen & Dining",
+                                       "Beauty & Personal Care", "Home & Furniture", "Clothing & Fashion",
+                                      "Shoes & Handbags", "Health & Wellness", "Electronics & Gadgets",
+                                       "Books & Stationery","Toys & Games", "Sports & Fitness",
+                                       "Grocery & Gourmet Food","Watches & Accessories", "Baby Products"]
+            }
+                # Ask user how many classes they want to define
+                num_classes = st.slider("Select the number of classes (labels)", min_value=3, max_value=10, value=3)
+                # Use default labels based on selected domain, if available
+                defaults = default_labels_by_domain.get(domain, [])
+                labels = []
+                errors = []
+                cols = st.columns(3)  # For nicely arranged label inputs
+                for i in range(num_classes):
+                    with cols[i % 3]:  # Distribute inputs across columns
+                        default_value = defaults[i] if i < len(defaults) else ""
+                        label_input = st.text_input(f"Label {i + 1}", default_value)
+                        normalized_label = label_input.strip().title()
+                        if not normalized_label:
+                            errors.append(f"Label {i + 1} is required.")
+                        else:
+                            labels.append(normalized_label)
+                # Check for duplicates (case-insensitive)
+                normalized_set = {label.lower() for label in labels}
+                if len(labels) != len(normalized_set):
+                    errors.append("Label names must be unique (case-insensitive).")
+                # Show validation results
+                if errors:
+                    for error in errors:
+                        st.error(error)
+                else:
+                    st.success("All label names are valid and unique!")
+                labels_valid = not errors  # True if no validation errors
+        elif classification_type == "Named Entity Recognition (NER)":
+            # # NER entity options
+            # ner_entities = [
+            #     "PERSON - Names of people, fictional characters, historical figures",
+            #     "ORG - Companies, institutions, agencies, teams",
+            #     "LOC - Physical locations (mountains, oceans, etc.)",
+            #     "GPE - Countries, cities, states, political regions",
+            #     "DATE - Calendar dates, years, centuries",
+            #     "TIME - Times, durations",
+            #     "MONEY - Monetary values with currency"
+            # ]
+            # selected_entities = st.multiselect(
+            #      "Select entities to recognize",
+            #     ner_entities,
+            #     default=["PERSON - Names of people, fictional characters, historical figures",
+            #              "ORG - Companies, institutions, agencies, teams",
+            #             "LOC - Physical locations (mountains, oceans, etc.)",
+            #             "GPE - Countries, cities, states, political regions",
+            #             "DATE - Calendar dates, years, centuries",
+            #             "TIME - Times, durations",
+            #             "MONEY - Monetary values with currency"],
+            #             key="ner_entity_selection"
+            # )
+            #new 22/4/2025
+            #if classification_type == "Named Entity Recognition (NER)":
+            use_few_shot = True
+            #new 22/4/2025
+            few_shot_examples = [
+                {"content": "Mount Everest is the tallest mountain in the world.", "label": "LOC: Mount Everest"},
+                {"content": "The President of the United States visited Paris last summer.", "label": "GPE: United States, GPE: Paris"},
+                {"content": "Amazon is expanding its offices in Berlin.", "label": "ORG: Amazon, GPE: Berlin"},
+                {"content": "J.K. Rowling wrote the Harry Potter books.", "label": "PERSON: J.K. Rowling"},
+                {"content": "Apple was founded in California in 1976.", "label": "ORG: Apple, GPE: California, DATE: 1976"},
+                {"content": "The Nile is the longest river in Africa.", "label": "LOC: Nile, GPE: Africa"},
+                {"content": "He arrived at 3 PM for the meeting.", "label": "TIME: 3 PM"},
+                {"content": "She bought the dress for $200.", "label": "MONEY: $200"},
+                {"content": "The event is scheduled for July 4th.", "label": "DATE: July 4th"},
+                {"content": "The World Health Organization is headquartered in Geneva.", "label": "ORG: World Health Organization, GPE: Geneva"}
+            ]
+###########
+            st.write("### Named Entity Recognition (NER) Entities")
+            # Predefined standard entities
+            ner_entities = [
+                "PERSON - Names of people, fictional characters, historical figures",
+                "ORG - Companies, institutions, agencies, teams",
+                "LOC - Physical locations (mountains, oceans, etc.)",
+                "GPE - Countries, cities, states, political regions",
+                "DATE - Calendar dates, years, centuries",
+                "TIME - Times, durations",
+                "MONEY - Monetary values with currency"
+            ]
+            # User can add custom NER types
+            custom_ner_entities = []
+            if st.checkbox("Add custom NER entities?"):
+                num_custom_ner = st.slider("Number of custom NER entities", 1, 10, 1)
+                for i in range(num_custom_ner):
+                    st.markdown(f"#### Custom Entity {i+1}")
+                    custom_type = st.text_input(f"Entity type {i+1}", key=f"custom_ner_type_{i}")
+                    custom_description = st.text_input(f"Description for {custom_type}", key=f"custom_ner_desc_{i}")
+                    if custom_type and custom_description:
+                        custom_ner_entities.append(f"{custom_type.upper()} - {custom_description}")
+            # Combine built-in and custom NERs
+            all_ner_options = ner_entities + custom_ner_entities
+            selected_entities = st.multiselect(
+                "Select entities to recognize",
+                all_ner_options,
+                default=ner_entities
+            )
+            # Extract entity type names (before the dash)
+            labels = [entity.split(" - ")[0].strip() for entity in selected_entities]
+            if not labels:
+                st.warning("Please select at least one entity type.")
+                labels = ["PERSON"]
+            ##########
+            # # Extract just the entity type (before the dash)
+            # labels = [entity.split(" - ")[0] for entity in selected_entities]
+            # if not labels:
+            #     st.warning("Please select at least one entity type")
+            #     labels = ["PERSON"]  # Default if nothing selected
+    #NNew edit
+            # elif classification_type == "Multi-Class Classification":
+            #     st.write("### Multi-Class Classification Labels")
+            #     default_labels_by_domain = {
+            #         "News": ["Political", "Sports", "Entertainment", "Technology", "Business"],
+            #         "AG News": ["World", "Sports", "Business", "Sci/Tech"],
+            #         "Tourism": ["Accommodation", "Transportation", "Tourist Attractions",
+            #                     "Food & Dining", "Local Experience", "Adventure Activities",
+            #                     "Wellness & Spa", "Eco-Friendly Practices", "Family-Friendly",
+            #                     "Luxury Tourism"],
+            #         "Restaurant reviews": ["Italian", "French", "American"]
+            #     }
+            #     num_classes = st.slider("Number of classes", 3, 10, 3)
+            #     # Get defaults for selected domain, or empty list
+            #     defaults = default_labels_by_domain.get(domain, [])
+            #     labels = []
+            #     errors = []
+            #     cols = st.columns(3)
+            #     for i in range(num_classes):
+            #         with cols[i % 3]:
+            #             default_value = defaults[i] if i < len(defaults) else ""
+            #             label_input = st.text_input(f"Class {i+1}", default_value)
+            #             normalized_label = label_input.strip().title()
+            #             if not normalized_label:
+            #                 errors.append(f"Class {i+1} name is required.")
+            #             else:
+            #                 labels.append(normalized_label)
+            #     # Check for duplicates (case-insensitive)
+            #     if len(labels) != len(set(labels)):
+                #     errors.append("Labels names must be unique (case-insensitive, normalized to Title Case).")
+                # # Show validation results
+                # if errors:
+                #     for error in errors:
+                #         st.error(error)
+                # else:
+                #     st.success("All Labels names are valid and unique!")
+                # labels_valid = not errors  # Will be True only if there are no label errors
+        # else:
+        #     num_classes = st.slider("Number of classes", 3, 23, 3, key="label_num_classes")
+        #     labels = []
+        #     cols = st.columns(3)
+        #     for i in range(num_classes):
+        #         with cols[i % 3]:
+        #             label = st.text_input(f"Class {i+1}", f"Class_{i+1}", key=f"label_class_{i}")
+        #             labels.append(label)
+        use_few_shot = st.toggle("Use few-shot examples for labeling")
+        few_shot_examples = []
+        if use_few_shot:
+            num_few_shot = st.slider("Number of few-shot examples", 1, 10, 1)
+            for i in range(num_few_shot):
+                with st.expander(f"Few-shot Example {i+1}"):
+                    content = st.text_area(f"Content", key=f"label_few_shot_content_{i}")
+                    label = st.selectbox(f"Label", labels, key=f"label_few_shot_label_{i}")
+                    if content and label:
+                        few_shot_examples.append(f"{content}\nLabel: {label}")
+        num_examples = st.number_input("Number of examples to classify", 1, 100, 1)
+        examples_to_classify = []
+        if num_examples <= 10:
+            for i in range(num_examples):
+                example = st.text_area(f"Example {i+1}", key=f"example_{i}")
+                if example:
+                    examples_to_classify.append(example)
+        else:
+            examples_text = st.text_area(
+                "Enter examples (one per line)",
+                height=300,
+                help="Enter each example on a new line"
+            )
+            if examples_text:
+                examples_to_classify = [ex.strip() for ex in examples_text.split('\n') if ex.strip()]
+                if len(examples_to_classify) > num_examples:
+                    examples_to_classify = examples_to_classify[:num_examples]
+        #New Wedyan
+        #default_system_role = f"You are a professional {classification_type} expert, your role is to classify the provided text examples for {domain} domain."
+         # System role customization
+        default_system_role = (f"You are a highly skilled {classification_type} expert."
+        f" Your task is to accurately classify the provided text examples within the {domain} domain."
+        f" Ensure that all classifications are precise, context-aware, and aligned with domain-specific standards and best practices."
+    )
+        system_role = st.text_area("Modify System Role (optional)",
+                                value=default_system_role,
+                                key="system_role_input")
+        st.session_state['system_role'] = system_role if system_role else default_system_role
+        # Labels initialization
+        #labels = []
+        ####
+        user_prompt = st.text_area("User prompt (optional)", key="label_instructions")
+        few_shot_text = "\n\n".join(few_shot_examples) if few_shot_examples else ""
+        examples_text = "\n".join([f"{i+1}. {ex}" for i, ex in enumerate(examples_to_classify)])
+        # Customize prompt template based on classification type
+        if classification_type == "Named Entity Recognition (NER)":
+            # label_prompt_template = PromptTemplate(
+            #     input_variables=["system_role", "labels", "few_shot_examples", "examples", "domain", "user_prompt"],
+            #     template=(
+            #         "{system_role}\n"
+            #         #"- You are a professional Named Entity Recognition (NER) expert in {domain} domain. Your role is to identify and extract the following entity types: {labels}.\n"
+            #         "- For each text example provided, identify all entities of the requested types.\n"
+            #         "- Use the following entities: {labels}.\n"
+            #         "- Return each example followed by the entities you found in this format: 'Example text.\n \n Entities:\n [ENTITY_TYPE: entity text\n\n, ENTITY_TYPE: entity text\n\n, ...] or [No entities found]'\n"
+            #         "- If no entities of the requested types are found, indicate 'No entities found' in this text.\n"
+            #         "- Be precise about entity boundaries - don't include unnecessary words.\n"
+            #         "- Do not provide any additional information or explanations.\n"
+            #         "- Additional instructions:\n {user_prompt}\n\n"
+            #         "- Use user few-shot examples as guidance if provided:\n{few_shot_examples}\n\n"
+            #         "- Examples to analyze:\n{examples}\n\n"
+            #         "Output:\n"
+            #     )
+            # )
+            #new 22/4/2025
+            # label_prompt_template = PromptTemplate(
+            #     input_variables=["system_role", "labels", "few_shot_examples", "examples", "domain", "user_prompt"],
+            #     template=(
+            #         "{system_role}\n"
+            #         "- You are performing Named Entity Recognition (NER) in the domain of {domain}.\n"
+            #         "- Use the following entity types: {labels}.\n\n"
+            #         "### Reasoning Steps:\n"
+            #         "1. Read the example carefully.\n"
+            #         "2. For each named entity mentioned, determine its meaning and role in the sentence.\n"
+            #         "3. Think about the **context**: Is it a physical location (LOC)? A geopolitical region (GPE)? A person (PERSON)?\n"
+            #         "4. Based on the definition of each label, assign the most **specific and correct** label.\n\n"
+            #         "For example:\n"
+            #         "- 'Mount Everest' → LOC (it's a mountain)\n"
+            #         "- 'France' → GPE (it's a country)\n"
+            #         "- 'Microsoft' → ORG\n"
+            #         "- 'John Smith' → PERSON\n\n"
+            #         "- Return each example followed by the entities you found in this format:\n"
+            #         "'Example text.'\nEntities: [ENTITY_TYPE: entity text, ENTITY_TYPE: entity text, ...] or [No entities found]\n"
+            #         "- If no entities of the requested types are found, return 'No entities found'.\n"
+            #         "- Be precise about entity boundaries - don't include extra words.\n"
+            #         "- Do not explain or justify your answers.\n\n"
+            #         "Additional instructions:\n{user_prompt}\n\n"
+            #         "Few-shot examples:\n{few_shot_examples}\n\n"
+            #         "Examples to label:\n{examples}\n"
+            #         "Output:\n"
+            #     )
+            #)
+            # label_prompt_template = PromptTemplate(
+            #     input_variables=["system_role", "labels", "few_shot_examples", "examples", "domain", "user_prompt"],
+            #     template=(
+            #         "{system_role}\n"
+            #         "- You are an expert at Named Entity Recognition (NER) for domain: {domain}.\n"
+            #         "- Use these entity types: {labels}.\n\n"
+            #         "### Output Format:\n"
+            #         # "Return each example followed by the entities you found in this format: 'Example text.\n Entities:\n [ENTITY_TYPE: entity text\n\"
+            #         "Return each example followed by the entities you found in this format: 'Example text.\n 'Entity types:\n "Then group the entities under each label like this:\n" "
+            #         #"Then Start with this line exactly: 'Entity types\n'\n"
+            #         #"Then group the entities under each label like this:\n"
+            #         "\n PERSON – Angela Merkel, John Smith\n\n"
+            #         "\ ORG – Google, United Nations\n\n"
+            #         "\n DATE – January 1st, 2023\n\n"
+            #         "\n ... and so on.\n\n"
+            #         "If entity {labels} not found, do not write it in your response\n"
+            #         "- Do NOT output them inline after the text.\n"
+            #         "- Do NOT repeat the sentence.\n"
+            #         "- If no entities are found for a type, skip it.\n"
+            #         "- Keep the format consistent.\n\n"
+            #         "User Instructions:\n{user_prompt}\n\n"
+            #         "Few-shot Examples:\n{few_shot_examples}\n\n"
+            #         "Examples to analyze:\n{examples}"
+            #       )
+            # )
+            label_prompt_template = PromptTemplate(
+                input_variables=["system_role", "labels", "few_shot_examples", "examples", "domain", "user_prompt"],
+                template=(
+                    "{system_role}\n"
+                    "- You are an expert at Named Entity Recognition (NER) for domain: {domain}.\n"
+                    "- Use these entity types: {labels}.\n\n"
+                    "### Output Format:\n"
+                    "Return each example followed by the entities you found in this format:\n"
+                    "'Example text.\nEntity types:\n"
+                    "Then group the entities under each label like this:\n"
+                    "\nPERSON – Angela Merkel, John Smith\n"
+                    "ORG – Google, United Nations\n"
+                    "DATE – January 1st, 2023\n"
+                    "... and so on.\n\n"
+                    "Each new entities group should be in a new line.\n"
+                    "If entity type {labels} is not found, do not write it in your response.\n"
+                    "- Do NOT output them inline after the text.\n"
+                    "- Do NOT repeat the sentence.\n"
+                    "- If no entities are found for a type, skip it.\n"
+                    "- Keep the format consistent.\n\n"
+                    "User Instructions:\n{user_prompt}\n\n"
+                    "Few-shot Examples:\n{few_shot_examples}\n\n"
+                    "Examples to analyze:\n{examples}"
+                )
+            )
+#######
+        else:
+                label_prompt_template = PromptTemplate(
+                input_variables=["system_role", "classification_type", "labels", "few_shot_examples", "examples","domain", "user_prompt"],
+                template=(
+                    #"- Let'\s think step by step:"
+                    "{system_role}\n"
+                   # "- You are a professional {classification_type} expert in {domain} domain. Your role is to classify the following examples using these labels: {labels}.\n"
+                    "- Use the following instructions:\n"
+                    "- Use the following labels: {labels}.\n"
+                    "- Return the classified text followed by the label in this format: 'text. Label: [label]'\n"
+                    "- Do not provide any additional information or explanations\n"
+                    "- User prompt:\n {user_prompt}\n\n"
+                    "- Use user provided examples as guidence in the classification process:\n\n {few_shot_examples}\n"
+                    "- Examples to classify:\n{examples}\n\n"
+                    "- Think step by step then classify the examples"
+                    #"Output:\n"
+                ))
+        # Check if few_shot_examples is already a formatted string
+           # Check if few_shot_examples is already a formatted string
+        if isinstance(few_shot_examples, str):
+            formatted_few_shot = few_shot_examples
+        # If it's a list of already formatted strings
+        elif isinstance(few_shot_examples, list) and all(isinstance(ex, str) for ex in few_shot_examples):
+            formatted_few_shot = "\n".join(few_shot_examples)
+        # If it's a list of dictionaries with 'content' and 'label' keys
+        elif isinstance(few_shot_examples, list) and all(isinstance(ex, dict) and 'content' in ex and 'label' in ex for ex in few_shot_examples):
+            formatted_few_shot = "\n".join([f"{ex['content']}\nLabel: {ex['label']}" for ex in few_shot_examples])
+        else:
+            formatted_few_shot = ""
+# #new 22/4/2025
+#         few_shot_examples = [
+#             {"content": "Mount Everest is the tallest mountain in the world.", "label": "LOC: Mount Everest"},
+#             {"content": "The President of the United States visited Paris last summer.", "label": "GPE: United States, GPE: Paris"},
+#             {"content": "Amazon is expanding its offices in Berlin.", "label": "ORG: Amazon, GPE: Berlin"},
+#             {"content": "J.K. Rowling wrote the Harry Potter books.", "label": "PERSON: J.K. Rowling"},
+#             {"content": "Apple was founded in California in 1976.", "label": "ORG: Apple, GPE: California, DATE: 1976"},
+#             {"content": "The Nile is the longest river in Africa.", "label": "LOC: Nile, GPE: Africa"},
+#             {"content": "He arrived at 3 PM for the meeting.", "label": "TIME: 3 PM"},
+#             {"content": "She bought the dress for $200.", "label": "MONEY: $200"},
+#             {"content": "The event is scheduled for July 4th.", "label": "DATE: July 4th"},
+#             {"content": "The World Health Organization is headquartered in Geneva.", "label": "ORG: World Health Organization, GPE: Geneva"}
+#         ]
+# ###########
+# new 22/4/2025
+        #formatted_few_shot = "\n".join([f"{ex['content']}\nEntities: [{ex['label']}]" for ex in few_shot_examples])
+        formatted_few_shot = "\n\n".join([f"{ex['content']}\n\nEntity types\n{ex['label']}" for ex in few_shot_examples])
+        ###########
+        system_prompt = label_prompt_template.format(
+            system_role=st.session_state['system_role'],
+            classification_type=classification_type,
+            domain=domain,
+            examples="\n".join(examples_to_classify),
+            labels=", ".join(labels),
+            user_prompt=user_prompt,
+            few_shot_examples=formatted_few_shot
+        )
+        # Step 2: Store the system_prompt in st.session_state
+        st.session_state['system_prompt'] = system_prompt
+#::contentReference[oaicite:0]{index=0}
+        st.write("System Prompt:")
+        #st.code(system_prompt)
+        #st.code(st.session_state['system_prompt'])
+        st.text_area("System Prompt", value=st.session_state['system_prompt'], height=300, max_chars=None, key=None, help=None, disabled=True)
+        if st.button("🏷️ Label Data"):
+            if examples_to_classify:
+                with st.spinner("Labeling data..."):
+                    #Generate the system prompt based on classification type
+                    if classification_type == "Named Entity Recognition (NER)":
+                        system_prompt = label_prompt_template.format(
+                            system_role=st.session_state['system_role'],
+                            labels=", ".join(labels),
+                            domain = domain,
+                            few_shot_examples=few_shot_text,
+                            examples=examples_text,
+                            user_prompt=user_prompt
+                    #new
+                            #'Use few-shot example?': 'Yes' if use_few_shot else 'No',
+                       )
+                    # if classification_type == "Named Entity Recognition (NER)":
+                    #     # Step 1: Split the full response by example
+                    #     raw_outputs = [block.strip() for block in response.strip().split("Entity types") if block.strip()]
+                    #     inputs = [ex.strip() for ex in examples_to_classify]
+                    #     # Step 2: Match inputs with NER output blocks
+                    #     labeled_examples = []
+                    #     for i, (text, output_block) in enumerate(zip(inputs, raw_outputs)):
+                    #         labeled_examples.append({
+                    #             'text': text,
+                    #             'entities': f"Entity types\n{output_block.strip()}",
+                    #             'system_prompt': st.session_state.system_prompt,
+                    #             'system_role': st.session_state.system_role,
+                    #             'task_type': 'Named Entity Recognition (NER)',
+                    #             'Use few-shot example?': 'Yes' if use_few_shot else 'No',
+                    #         })
+                    # if classification_type == "Named Entity Recognition (NER)":
+                    #     # Step 1: Split the full response by example
+                    #     raw_outputs = [block.strip() for block in response.strip().split("Entity types") if block.strip()]
+                    #     inputs = [ex.strip() for ex in examples_to_classify]
+                    #     # Step 2: Match inputs with NER output blocks
+                    #     labeled_examples = []
+                    #     for i, (text, output_block) in enumerate(zip(inputs, raw_outputs)):
+                    #         labeled_examples.append({
+                    #             'text': text,
+                    #             'entities': f"Entity types\n{output_block.strip()}",
+                    #             'system_prompt': st.session_state.system_prompt,
+                    #             'system_role': st.session_state.system_role,
+                    #             'task_type': 'Named Entity Recognition (NER)',
+                    #             'Use few-shot example?': 'Yes' if use_few_shot else 'No',
+                    #         })
+                    # import re
+                    # if classification_type == "Named Entity Recognition (NER)":
+                    #     # Use regex to split on "Entity types" while keeping it attached to each block
+                    #     blocks = re.split(r"(Entity types)", response.strip())
+                    #     # Recombine 'Entity types' with each block after splitting
+                    #     raw_outputs = [
+                    #         (blocks[i] + blocks[i+1]).strip()
+                    #         for i in range(1, len(blocks) - 1, 2)
+                    #     ]
+                    #     inputs = [ex.strip() for ex in examples_to_classify]
+                    #     labeled_examples = []
+                    #     for i, (text, output_block) in enumerate(zip(inputs, raw_outputs)):
+                    #         labeled_examples.append({
+                    #             'text': text,
+                    #             'entities': output_block,
+                    #             'system_prompt': st.session_state.system_prompt,
+                    #             'system_role': st.session_state.system_role,
+                    #             'task_type': 'Named Entity Recognition (NER)',
+                    #             'Use few-shot example?': 'Yes' if use_few_shot else 'No',
+                    #         })
+                    else:
+                        system_prompt = label_prompt_template.format(
+                            classification_type=classification_type,
+                            system_role=st.session_state['system_role'],
+                            domain = domain,
+                            labels=", ".join(labels),
+                            few_shot_examples=few_shot_text,
+                            examples=examples_text,
+                            user_prompt=user_prompt
+                        )
+                    try:
+                        stream = client.chat.completions.create(
+                            model=selected_model,
+                            messages=[{"role": "system", "content": system_prompt}],
+                            temperature=temperature,
+                            stream=True,
+                            #max_tokens=20000,
+                            max_tokens=4000,
+                            top_p = 0.9,
+                        )
+                        #new 24 March
+                        # Append user message
+                        st.session_state.messages.append({"role": "user", "content": system_prompt})
+                        #################
+                        response = st.write_stream(stream)
+                        st.session_state.messages.append({"role": "assistant", "content": response})
+                         # Display the labeled examples
+                        #    # Optional: If you want to add it as a chat-style message log
+                        # preview_str = st.session_state.labeled_preview.to_markdown(index=False)
+                        # st.session_state.messages.append({"role": "assistant", "content": f"Here is a preview of the labeled examples:\n\n{preview_str}"})
+                        # # Stream response and append assistant message
+                        # #14/4/2024
+                        # response = st.write_stream(stream)
+                        # st.session_state.messages.append({"role": "assistant", "content": response})
+                          # Initialize session state variables if they don't exist
+                        if 'system_prompt' not in st.session_state:
+                            st.session_state.system_prompt = system_prompt
+                        if 'response' not in st.session_state:
+                            st.session_state.response = response
+                        if 'generated_examples' not in st.session_state:
+                            st.session_state.generated_examples = []
+                        if 'generated_examples_csv' not in st.session_state:
+                            st.session_state.generated_examples_csv = None
+                        if 'generated_examples_json' not in st.session_state:
+                            st.session_state.generated_examples_json = None
+                        # Save labeled examples to CSV
+                        #new 14/4/2025
+                        #labeled_examples = []
+                        # if classification_type == "Named Entity Recognition (NER)":
+                        #     labeled_examples = []
+                        #     for line in response.split('\n'):
+                        #         if line.strip():
+                        #             parts = line.rsplit('Entities:', 1)
+                        #             if len(parts) == 2:
+                        #                 text = parts[0].strip()
+                        #                 entities = parts[1].strip()
+                        #                 if text and entities:
+                        #                     labeled_examples.append({
+                        #                         'text': text,
+                        #                         'entities': entities,
+                        #                         'system_prompt': st.session_state.system_prompt,
+                        #                         'system_role': st.session_state.system_role,
+                        #                         'task_type': 'Named Entity Recognition (NER)',
+                        #                         'Use few-shot example?': 'Yes' if use_few_shot else 'No',
+                        #                     })
+                                            #new 22/4/2025
+                        labeled_examples = []
+                        if classification_type == "Named Entity Recognition (NER)":
+                            labeled_examples = [{
+                                'ner_output': response.strip(),
+                                'system_prompt': st.session_state.system_prompt,
+                                'system_role': st.session_state.system_role,
+                                'task_type': 'Named Entity Recognition (NER)',
+                                'Use few-shot example?': 'Yes' if use_few_shot else 'No',
+                            }]
+                                            ######
+                        else:
+                            labeled_examples = []
+                            for line in response.split('\n'):
+                                if line.strip():
+                                    parts = line.rsplit('Label:', 1)
+                                    if len(parts) == 2:
+                                        text = parts[0].strip()
+                                        label = parts[1].strip()
+                                        if text and label:
+                                            labeled_examples.append({
+                                                'text': text,
+                                                'label': label,
+                                                'system_prompt': st.session_state.system_prompt,
+                                                'system_role': st.session_state.system_role,
+                                                'task_type': 'Data Labeling',
+                                                'Use few-shot example?': 'Yes' if use_few_shot else 'No',
+                                            })
+                       # Save and provide download options
+                        if labeled_examples:
+                            # Update session state
+                            st.session_state.labeled_examples = labeled_examples
+                            # Convert to CSV and JSON
+                            df = pd.DataFrame(labeled_examples)
+                            #new 22/4/2025
+                            # CSV
+                            st.session_state.labeled_examples_csv = df.to_csv(index=False).encode('utf-8')
+                            # JSON
+                            st.session_state.labeled_examples_json = json.dumps({
+                                "metadata": {
+                                    "domain": domain,
+                                    "labels": labels,
+                                    "used_few_shot": use_few_shot,
+                                    "task_type": "Named Entity Recognition (NER)",
+                                    "timestamp": datetime.now().isoformat()
+                                },
+                                "examples": labeled_examples
+                            }, indent=2).encode('utf-8')
+                            ############
+                            # CSV
+                            # st.session_state.labeled_examples_csv = df.to_csv(index=False).encode('utf-8')
+                            # # JSON
+                            # st.session_state.labeled_examples_json = json.dumps({
+                            #     "metadata": {
+                            #         "domain": domain,
+                            #         "labels": labels,
+                            #         "used_few_shot": use_few_shot,
+                            #         "task_type": "Named Entity Recognition (NER)",
+                            #         "timestamp": datetime.now().isoformat()
+                            #     },
+                            #     "examples": labeled_examples
+                            # }, indent=2).encode('utf-8')
+                                                        ########
+                            # st.session_state.labeled_examples_csv = df.to_csv(index=False).encode('utf-8')
+                            # st.session_state.labeled_examples_json = json.dumps(labeled_examples, indent=2).encode('utf-8')
+                            # Download buttons
+                            st.download_button(
+                                "📥 Download Labeled Examples (CSV)",
+                                st.session_state.labeled_examples_csv,
+                                "labeled_examples.csv",
+                                "text/csv",
+                                key='download-labeled-csv'
+                            )
+                            st.markdown("""
+                            <div style='text-align: left; margin:15px 0; font-weight: 600; color: #666;'>. . . . . .         or</div>
+                            """, unsafe_allow_html=True)
+                            st.download_button(
+                                "📥 Download Labeled Examples (JSON)",
+                                st.session_state.labeled_examples_json,
+                                "labeled_examples.json",
+                                "application/json",
+                                key='download-labeled-json'
+                            )
+                            # Display the labeled examples
+                            st.markdown("##### 📋 Labeled Examples Preview")
+                            st.dataframe(df, use_container_width=True)
+                            # Display section
+                            #st.markdown("### 📋 Labeled Examples Preview")
+                            #st.dataframe(st.session_state.labeled_preview, use_container_width=True)
+                        # if labeled_examples:
+                        #     df = pd.DataFrame(labeled_examples)
+                        #     csv = df.to_csv(index=False).encode('utf-8')
+                        #     st.download_button(
+                        #         "📥 Download Labeled Examples",
+                        #         csv,
+                        #         "labeled_examples.csv",
+                        #         "text/csv",
+                        #         key='download-labeled-csv'
+                        #     )
+                        # # Add space and center the "or"
+                        # st.markdown("""
+                        # <div style='text-align: left; margin:15px 0; font-weight: 600; color: #666;'>. . . . . .         or</div>
+                        # """, unsafe_allow_html=True)
+                        # if labeled_examples:
+                        #     df = pd.DataFrame(labeled_examples)
+                        #     csv = df.to_csv(index=False).encode('utf-8')
+                        #     st.download_button(
+                        #         "📥 Download Labeled Examples",
+                        #         csv,
+                        #         "labeled_examples.json",
+                        #         "text/json",
+                        #         key='download-labeled-JSON'
+                        #     )
+                        # Add follow-up interaction options
+                        #st.markdown("---")
+                        #follow_up = st.radio(
+                            #"What would you like to do next?",
+                             #["Label more data", "Data Generation"],
+                           # key="labeling_follow_up"
+                      #  )
+                        if st.button("Continue"):
+                            if follow_up == "Label more data":
+                                st.session_state.examples_to_classify = []
+                                st.experimental_rerun()
+                            elif follow_up == "Data Generation":
+                                st.session_state.task_choice = "Data Labeling"
+                                st.experimental_rerun()
+                    except Exception as e:
+                        st.error("An error occurred during labeling.")
+                        st.error(f"Details: {e}")
+            else:
+                st.warning("Please enter at least one example to classify.")
+    #st.session_state.messages.append({"role": "assistant", "content": response})
+# Footer
+st.markdown("---")
+st.markdown(
+    """
+    <div style='text-align: center'>
+        <p>Made with ❤️ by Wedyan AlSakran 2025</p>
+    </div>
+    """,
+    unsafe_allow_html=True
+)