Data_Generation_LabelingCopy

Sleeping

App Files Files

Wedyan2023 commited on Oct 24, 2024

Commit

dc141e7

verified ·

1 Parent(s): 97edc95

Update app.py

Browse files

Files changed (1) hide show

app.py +77 -104

app.py CHANGED Viewed

@@ -1,111 +1,74 @@
-""" Simple Chatbot
-@author: Nigel Gebodh
-@email: [email protected]
-"""
-import numpy as np
 import streamlit as st
 from openai import OpenAI
-import os
-from dotenv import load_dotenv
-load_dotenv()
-# Initialize the client
-client = OpenAI(
-    base_url="https://api-inference.huggingface.co/v1",
-    api_key=os.environ.get('HUGGINGFACEHUB_API_TOKEN')  # Add your Huggingface token here
-)
-# Supported models
-model_links = {
-    "Meta-Llama-3-8B": "meta-llama/Meta-Llama-3-8B-Instruct"
-}
-# Reset conversation
-def reset_conversation():
-    st.session_state.conversation = []
     st.session_state.messages = []
-    return None
-# Sidebar for model selection
-selected_model = st.sidebar.selectbox("Select Model", list(model_links.keys()))
-# Temperature slider
-temp_values = st.sidebar.slider('Select a temperature value', 0.0, 1.0, 0.5)
-# Reset button
-st.sidebar.button('Reset Chat', on_click=reset_conversation)
-# Model description
-st.sidebar.write(f"You're now chatting with **{selected_model}**")
-st.sidebar.markdown("*Generated content may be inaccurate or false.*")
-# Chat initialization
-if "messages" not in st.session_state:
-    st.session_state.messages = []
-# Display chat messages
-for message in st.session_state.messages:
-    with st.chat_message(message["role"]):
-        st.markdown(message["content"])
-# Main logic to choose between data generation and data labeling
-task_choice = st.selectbox("Choose Task", ["Data Generation", "Data Labeling"])
-if task_choice == "Data Generation":
-    classification_type = st.selectbox(
-        "Choose Classification Type",
         ["Sentiment Analysis", "Binary Classification", "Multi-Class Classification"]
     )
     if classification_type == "Sentiment Analysis":
-        st.write("Sentiment Analysis: Positive, Negative, Neutral")
         labels = ["Positive", "Negative", "Neutral"]
     elif classification_type == "Binary Classification":
-        label_1 = st.text_input("Enter first class")
-        label_2 = st.text_input("Enter second class")
-        labels = [label_1, label_2]
     elif classification_type == "Multi-Class Classification":
-        num_classes = st.slider("How many classes?", 3, 10, 3)
-        labels = [st.text_input(f"Class {i+1}") for i in range(num_classes)]
-    domain = st.selectbox("Choose Domain", ["Restaurant reviews", "E-commerce reviews", "Custom"])
     if domain == "Custom":
-        domain = st.text_input("Specify custom domain")
-    min_words = st.number_input("Minimum words per example", min_value=10, max_value=90, value=10)
-    max_words = st.number_input("Maximum words per example", min_value=10, max_value=90, value=90)
-    few_shot = st.radio("Do you want to use few-shot examples?", ["Yes", "No"])
-    if few_shot == "Yes":
-        num_examples = st.slider("How many few-shot examples?", 1, 5, 1)
-        few_shot_examples = [
-            {"content": st.text_area(f"Example {i+1}"), "label": st.selectbox(f"Label for example {i+1}", labels)}
-            for i in range(num_examples)
-        ]
-    else:
-        few_shot_examples = []
-    # Ask the user how many examples they need
-    num_to_generate = st.number_input("How many examples to generate?", min_value=1, max_value=50, value=10)
-    # System prompt generation
-    system_prompt = f"You are a professional {classification_type.lower()} expert. Your role is to generate {num_to_generate} data examples for {domain}. "
-    system_prompt += f"Each example should have a label and consist of between {min_words} and {max_words} words. "
-    system_prompt += "Use the following labels: " + ", ".join(labels) + ". "
-    if few_shot_examples:
-        system_prompt += "Use the following few-shot examples as a reference:\n"
-        for example in few_shot_examples:
-            system_prompt += f"Example: {example['content']}, Label: {example['label']}\n"
-    system_prompt += "Please only provide the examples in the following format:\n"
-    system_prompt += "Example: <text>, Label: <label>\n"
-    st.write("System Prompt:")
-    st.code(system_prompt)
     if st.button("Generate Examples"):
         all_generated_examples = []
         remaining_examples = num_to_generate
@@ -114,26 +77,40 @@ if task_choice == "Data Generation":
             while remaining_examples > 0:
                 chunk_size = min(remaining_examples, 5)
                 try:
                     st.session_state.messages.append({"role": "system", "content": system_prompt})
                     stream = client.chat.completions.create(
-                        model=model_links[selected_model],
                         messages=[
                             {"role": m["role"], "content": m["content"]}
                             for m in st.session_state.messages
                         ],
-                        temperature=temp_values,
                         stream=True,
                         max_tokens=3000,
                     )
-                    response = st.write_stream(stream)
-                    # Split the response into individual examples, assuming each example starts with 'Example: '
                     generated_examples = response.split("Example: ")[1:chunk_size+1]  # Extract up to the chunk size
                     # Store the new examples
-                    all_generated_examples.extend(generated_examples)
                     remaining_examples -= chunk_size
                 except Exception as e:
@@ -141,16 +118,12 @@ if task_choice == "Data Generation":
                     st.write(e)
                     break
-        # Display all generated examples
         for idx, example in enumerate(all_generated_examples):
             st.write(f"Example {idx+1}: {example.strip()}")
-        # Update session state to prevent repetition of old prompts
-        st.session_state.messages = []  # Clear messages after each generation
-else:
-    # Data labeling workflow (for future implementation based on classification)
-    st.write("Data Labeling functionality will go here.")

 import streamlit as st
 from openai import OpenAI
+# Initialize session state
+if 'messages' not in st.session_state:
     st.session_state.messages = []
+# Function to generate system prompt based on user inputs
+def create_system_prompt(classification_type, num_to_generate, domain, min_words, max_words, labels):
+    system_prompt = f"You are a professional {classification_type.lower()} expert. Your role is to generate exactly {num_to_generate} data examples for {domain}. "
+    system_prompt += f"Each example should consist of between {min_words} and {max_words} words. "
+    system_prompt += "Use the following labels: " + ", ".join(labels) + ". Please do not add any extra commentary or explanation. "
+    system_prompt += "Format each example like this: \nExample: <text>, Label: <label>\n"
+    return system_prompt
+# OpenAI client setup (replace with your OpenAI API credentials)
+client = OpenAI(api_key='YOUR_API_KEY')
+# App title
+st.title("Data Generation for Classification")
+# Choice between Data Generation or Data Labeling
+mode = st.radio("Choose Task:", ["Data Generation", "Data Labeling"])
+if mode == "Data Generation":
+    # Step 1: Choose Classification Type
+    classification_type = st.radio(
+        "Select Classification Type:",
         ["Sentiment Analysis", "Binary Classification", "Multi-Class Classification"]
     )
+    # Step 2: Choose labels based on classification type
     if classification_type == "Sentiment Analysis":
         labels = ["Positive", "Negative", "Neutral"]
     elif classification_type == "Binary Classification":
+        class1 = st.text_input("Enter First Class for Binary Classification")
+        class2 = st.text_input("Enter Second Class for Binary Classification")
+        labels = [class1, class2]
     elif classification_type == "Multi-Class Classification":
+        num_classes = st.slider("Number of Classes (Max 10):", 2, 10, 3)
+        labels = [st.text_input(f"Enter Class {i+1}") for i in range(num_classes)]
+    # Step 3: Choose the domain
+    domain = st.radio(
+        "Select Domain:",
+        ["Restaurant reviews", "E-commerce reviews", "Custom"]
+    )
     if domain == "Custom":
+        domain = st.text_input("Enter Custom Domain")
+    # Step 4: Specify example length (min and max words)
+    min_words = st.slider("Minimum Words per Example", 10, 90, 20)
+    max_words = st.slider("Maximum Words per Example", 10, 90, 40)
+    # Step 5: Ask if user wants few-shot examples
+    use_few_shot = st.checkbox("Use Few-Shot Examples?")
+    few_shot_examples = []
+    if use_few_shot:
+        num_few_shots = st.slider("Number of Few-Shot Examples (Max 5):", 1, 5, 2)
+        for i in range(num_few_shots):
+            example_text = st.text_area(f"Enter Example {i+1} Text")
+            example_label = st.selectbox(f"Select Label for Example {i+1}", labels)
+            few_shot_examples.append(f"Example: {example_text}, Label: {example_label}")
+    # Step 6: Specify the number of examples to generate
+    num_to_generate = st.number_input("Number of Examples to Generate", min_value=1, max_value=50, value=10)
+    # Step 7: Generate system prompt based on the inputs
+    system_prompt = create_system_prompt(classification_type, num_to_generate, domain, min_words, max_words, labels)
     if st.button("Generate Examples"):
         all_generated_examples = []
         remaining_examples = num_to_generate
             while remaining_examples > 0:
                 chunk_size = min(remaining_examples, 5)
                 try:
+                    # Add system and user messages to session state
                     st.session_state.messages.append({"role": "system", "content": system_prompt})
+                    # Add few-shot examples to the system prompt
+                    if few_shot_examples:
+                        for example in few_shot_examples:
+                            st.session_state.messages.append({"role": "user", "content": example})
+                    # Stream API request to generate examples
                     stream = client.chat.completions.create(
+                        model="gpt-3.5-turbo",
                         messages=[
                             {"role": m["role"], "content": m["content"]}
                             for m in st.session_state.messages
                         ],
+                        temperature=0.7,
                         stream=True,
                         max_tokens=3000,
                     )
+                    # Capture streamed response
+                    response = ""
+                    for chunk in stream:
+                        if 'content' in chunk['choices'][0]['delta']:
+                            response += chunk['choices'][0]['delta']['content']
+                    # Split response into individual examples by "Example: "
                     generated_examples = response.split("Example: ")[1:chunk_size+1]  # Extract up to the chunk size
+                    # Clean up the extracted examples
+                    cleaned_examples = [f"Example {i+1}: {ex.strip()}" for i, ex in enumerate(generated_examples)]
                     # Store the new examples
+                    all_generated_examples.extend(cleaned_examples)
                     remaining_examples -= chunk_size
                 except Exception as e:
                     st.write(e)
                     break
+        # Display all generated examples properly formatted
         for idx, example in enumerate(all_generated_examples):
             st.write(f"Example {idx+1}: {example.strip()}")
+        # Clear session state to avoid repetition of old prompts
+        st.session_state.messages = []  # Reset after each generation