Wedyan2023 commited on
Commit
dc141e7
·
verified ·
1 Parent(s): 97edc95

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -104
app.py CHANGED
@@ -1,111 +1,74 @@
1
- """ Simple Chatbot
2
- @author: Nigel Gebodh
3
- @email: [email protected]
4
-
5
- """
6
- import numpy as np
7
  import streamlit as st
8
  from openai import OpenAI
9
- import os
10
- from dotenv import load_dotenv
11
-
12
- load_dotenv()
13
-
14
- # Initialize the client
15
- client = OpenAI(
16
- base_url="https://api-inference.huggingface.co/v1",
17
- api_key=os.environ.get('HUGGINGFACEHUB_API_TOKEN') # Add your Huggingface token here
18
- )
19
 
20
- # Supported models
21
- model_links = {
22
- "Meta-Llama-3-8B": "meta-llama/Meta-Llama-3-8B-Instruct"
23
- }
24
-
25
- # Reset conversation
26
- def reset_conversation():
27
- st.session_state.conversation = []
28
  st.session_state.messages = []
29
- return None
30
-
31
- # Sidebar for model selection
32
- selected_model = st.sidebar.selectbox("Select Model", list(model_links.keys()))
33
-
34
- # Temperature slider
35
- temp_values = st.sidebar.slider('Select a temperature value', 0.0, 1.0, 0.5)
36
 
37
- # Reset button
38
- st.sidebar.button('Reset Chat', on_click=reset_conversation)
 
 
 
 
 
39
 
40
- # Model description
41
- st.sidebar.write(f"You're now chatting with **{selected_model}**")
42
- st.sidebar.markdown("*Generated content may be inaccurate or false.*")
43
 
44
- # Chat initialization
45
- if "messages" not in st.session_state:
46
- st.session_state.messages = []
47
-
48
- # Display chat messages
49
- for message in st.session_state.messages:
50
- with st.chat_message(message["role"]):
51
- st.markdown(message["content"])
52
 
53
- # Main logic to choose between data generation and data labeling
54
- task_choice = st.selectbox("Choose Task", ["Data Generation", "Data Labeling"])
55
 
56
- if task_choice == "Data Generation":
57
- classification_type = st.selectbox(
58
- "Choose Classification Type",
 
59
  ["Sentiment Analysis", "Binary Classification", "Multi-Class Classification"]
60
  )
61
-
 
62
  if classification_type == "Sentiment Analysis":
63
- st.write("Sentiment Analysis: Positive, Negative, Neutral")
64
  labels = ["Positive", "Negative", "Neutral"]
65
  elif classification_type == "Binary Classification":
66
- label_1 = st.text_input("Enter first class")
67
- label_2 = st.text_input("Enter second class")
68
- labels = [label_1, label_2]
69
  elif classification_type == "Multi-Class Classification":
70
- num_classes = st.slider("How many classes?", 3, 10, 3)
71
- labels = [st.text_input(f"Class {i+1}") for i in range(num_classes)]
72
-
73
- domain = st.selectbox("Choose Domain", ["Restaurant reviews", "E-commerce reviews", "Custom"])
 
 
 
 
74
  if domain == "Custom":
75
- domain = st.text_input("Specify custom domain")
76
-
77
- min_words = st.number_input("Minimum words per example", min_value=10, max_value=90, value=10)
78
- max_words = st.number_input("Maximum words per example", min_value=10, max_value=90, value=90)
79
-
80
- few_shot = st.radio("Do you want to use few-shot examples?", ["Yes", "No"])
81
- if few_shot == "Yes":
82
- num_examples = st.slider("How many few-shot examples?", 1, 5, 1)
83
- few_shot_examples = [
84
- {"content": st.text_area(f"Example {i+1}"), "label": st.selectbox(f"Label for example {i+1}", labels)}
85
- for i in range(num_examples)
86
- ]
87
- else:
88
- few_shot_examples = []
89
-
90
- # Ask the user how many examples they need
91
- num_to_generate = st.number_input("How many examples to generate?", min_value=1, max_value=50, value=10)
92
-
93
- # System prompt generation
94
- system_prompt = f"You are a professional {classification_type.lower()} expert. Your role is to generate {num_to_generate} data examples for {domain}. "
95
- system_prompt += f"Each example should have a label and consist of between {min_words} and {max_words} words. "
96
- system_prompt += "Use the following labels: " + ", ".join(labels) + ". "
97
-
98
- if few_shot_examples:
99
- system_prompt += "Use the following few-shot examples as a reference:\n"
100
- for example in few_shot_examples:
101
- system_prompt += f"Example: {example['content']}, Label: {example['label']}\n"
102
 
103
- system_prompt += "Please only provide the examples in the following format:\n"
104
- system_prompt += "Example: <text>, Label: <label>\n"
105
-
106
- st.write("System Prompt:")
107
- st.code(system_prompt)
108
-
109
  if st.button("Generate Examples"):
110
  all_generated_examples = []
111
  remaining_examples = num_to_generate
@@ -114,26 +77,40 @@ if task_choice == "Data Generation":
114
  while remaining_examples > 0:
115
  chunk_size = min(remaining_examples, 5)
116
  try:
 
117
  st.session_state.messages.append({"role": "system", "content": system_prompt})
118
 
 
 
 
 
 
 
119
  stream = client.chat.completions.create(
120
- model=model_links[selected_model],
121
  messages=[
122
  {"role": m["role"], "content": m["content"]}
123
  for m in st.session_state.messages
124
  ],
125
- temperature=temp_values,
126
  stream=True,
127
  max_tokens=3000,
128
  )
129
 
130
- response = st.write_stream(stream)
 
 
 
 
131
 
132
- # Split the response into individual examples, assuming each example starts with 'Example: '
133
  generated_examples = response.split("Example: ")[1:chunk_size+1] # Extract up to the chunk size
134
 
 
 
 
135
  # Store the new examples
136
- all_generated_examples.extend(generated_examples)
137
  remaining_examples -= chunk_size
138
 
139
  except Exception as e:
@@ -141,16 +118,12 @@ if task_choice == "Data Generation":
141
  st.write(e)
142
  break
143
 
144
- # Display all generated examples
145
  for idx, example in enumerate(all_generated_examples):
146
  st.write(f"Example {idx+1}: {example.strip()}")
147
-
148
- # Update session state to prevent repetition of old prompts
149
- st.session_state.messages = [] # Clear messages after each generation
150
-
151
- else:
152
- # Data labeling workflow (for future implementation based on classification)
153
- st.write("Data Labeling functionality will go here.")
154
 
155
 
156
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  from openai import OpenAI
 
 
 
 
 
 
 
 
 
 
3
 
4
+ # Initialize session state
5
+ if 'messages' not in st.session_state:
 
 
 
 
 
 
6
  st.session_state.messages = []
 
 
 
 
 
 
 
7
 
8
+ # Function to generate system prompt based on user inputs
9
+ def create_system_prompt(classification_type, num_to_generate, domain, min_words, max_words, labels):
10
+ system_prompt = f"You are a professional {classification_type.lower()} expert. Your role is to generate exactly {num_to_generate} data examples for {domain}. "
11
+ system_prompt += f"Each example should consist of between {min_words} and {max_words} words. "
12
+ system_prompt += "Use the following labels: " + ", ".join(labels) + ". Please do not add any extra commentary or explanation. "
13
+ system_prompt += "Format each example like this: \nExample: <text>, Label: <label>\n"
14
+ return system_prompt
15
 
16
+ # OpenAI client setup (replace with your OpenAI API credentials)
17
+ client = OpenAI(api_key='YOUR_API_KEY')
 
18
 
19
+ # App title
20
+ st.title("Data Generation for Classification")
 
 
 
 
 
 
21
 
22
+ # Choice between Data Generation or Data Labeling
23
+ mode = st.radio("Choose Task:", ["Data Generation", "Data Labeling"])
24
 
25
+ if mode == "Data Generation":
26
+ # Step 1: Choose Classification Type
27
+ classification_type = st.radio(
28
+ "Select Classification Type:",
29
  ["Sentiment Analysis", "Binary Classification", "Multi-Class Classification"]
30
  )
31
+
32
+ # Step 2: Choose labels based on classification type
33
  if classification_type == "Sentiment Analysis":
 
34
  labels = ["Positive", "Negative", "Neutral"]
35
  elif classification_type == "Binary Classification":
36
+ class1 = st.text_input("Enter First Class for Binary Classification")
37
+ class2 = st.text_input("Enter Second Class for Binary Classification")
38
+ labels = [class1, class2]
39
  elif classification_type == "Multi-Class Classification":
40
+ num_classes = st.slider("Number of Classes (Max 10):", 2, 10, 3)
41
+ labels = [st.text_input(f"Enter Class {i+1}") for i in range(num_classes)]
42
+
43
+ # Step 3: Choose the domain
44
+ domain = st.radio(
45
+ "Select Domain:",
46
+ ["Restaurant reviews", "E-commerce reviews", "Custom"]
47
+ )
48
  if domain == "Custom":
49
+ domain = st.text_input("Enter Custom Domain")
50
+
51
+ # Step 4: Specify example length (min and max words)
52
+ min_words = st.slider("Minimum Words per Example", 10, 90, 20)
53
+ max_words = st.slider("Maximum Words per Example", 10, 90, 40)
54
+
55
+ # Step 5: Ask if user wants few-shot examples
56
+ use_few_shot = st.checkbox("Use Few-Shot Examples?")
57
+
58
+ few_shot_examples = []
59
+ if use_few_shot:
60
+ num_few_shots = st.slider("Number of Few-Shot Examples (Max 5):", 1, 5, 2)
61
+ for i in range(num_few_shots):
62
+ example_text = st.text_area(f"Enter Example {i+1} Text")
63
+ example_label = st.selectbox(f"Select Label for Example {i+1}", labels)
64
+ few_shot_examples.append(f"Example: {example_text}, Label: {example_label}")
65
+
66
+ # Step 6: Specify the number of examples to generate
67
+ num_to_generate = st.number_input("Number of Examples to Generate", min_value=1, max_value=50, value=10)
68
+
69
+ # Step 7: Generate system prompt based on the inputs
70
+ system_prompt = create_system_prompt(classification_type, num_to_generate, domain, min_words, max_words, labels)
 
 
 
 
 
71
 
 
 
 
 
 
 
72
  if st.button("Generate Examples"):
73
  all_generated_examples = []
74
  remaining_examples = num_to_generate
 
77
  while remaining_examples > 0:
78
  chunk_size = min(remaining_examples, 5)
79
  try:
80
+ # Add system and user messages to session state
81
  st.session_state.messages.append({"role": "system", "content": system_prompt})
82
 
83
+ # Add few-shot examples to the system prompt
84
+ if few_shot_examples:
85
+ for example in few_shot_examples:
86
+ st.session_state.messages.append({"role": "user", "content": example})
87
+
88
+ # Stream API request to generate examples
89
  stream = client.chat.completions.create(
90
+ model="gpt-3.5-turbo",
91
  messages=[
92
  {"role": m["role"], "content": m["content"]}
93
  for m in st.session_state.messages
94
  ],
95
+ temperature=0.7,
96
  stream=True,
97
  max_tokens=3000,
98
  )
99
 
100
+ # Capture streamed response
101
+ response = ""
102
+ for chunk in stream:
103
+ if 'content' in chunk['choices'][0]['delta']:
104
+ response += chunk['choices'][0]['delta']['content']
105
 
106
+ # Split response into individual examples by "Example: "
107
  generated_examples = response.split("Example: ")[1:chunk_size+1] # Extract up to the chunk size
108
 
109
+ # Clean up the extracted examples
110
+ cleaned_examples = [f"Example {i+1}: {ex.strip()}" for i, ex in enumerate(generated_examples)]
111
+
112
  # Store the new examples
113
+ all_generated_examples.extend(cleaned_examples)
114
  remaining_examples -= chunk_size
115
 
116
  except Exception as e:
 
118
  st.write(e)
119
  break
120
 
121
+ # Display all generated examples properly formatted
122
  for idx, example in enumerate(all_generated_examples):
123
  st.write(f"Example {idx+1}: {example.strip()}")
124
+
125
+ # Clear session state to avoid repetition of old prompts
126
+ st.session_state.messages = [] # Reset after each generation
 
 
 
 
127
 
128
 
129