Wedyan2023 commited on
Commit
8c32e22
·
verified ·
1 Parent(s): cd976d7

Create app9.py

Browse files
Files changed (1) hide show
  1. app9.py +389 -0
app9.py ADDED
@@ -0,0 +1,389 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import os
4
+ from datetime import datetime
5
+ import random
6
+ from pathlib import Path
7
+ from openai import OpenAI
8
+ from dotenv import load_dotenv
9
+ from langchain_core.prompts import PromptTemplate
10
+
11
+ # Initialize the client
12
+ # Load environment variables
13
+ load_dotenv()
14
+ client = OpenAI(
15
+ base_url="https://api-inference.huggingface.co/v1",
16
+ api_key=os.environ.get('GP_WED') # Add your Huggingface token here
17
+ )
18
+
19
+ # Load environment variables
20
+ ##load_dotenv()
21
+ ##openai_api_key = os.getenv("OPENAI_API_KEY")
22
+
23
+ # Initialize OpenAI client
24
+ ##client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
25
+
26
+ # Custom CSS for better appearance
27
+ st.markdown("""
28
+ <style>
29
+ .stButton > button {
30
+ width: 100%;
31
+ margin-bottom: 10px;
32
+ background-color: #4CAF50;
33
+ color: white;
34
+ border: none;
35
+ padding: 10px;
36
+ border-radius: 5px;
37
+ }
38
+ .task-button {
39
+ background-color: #2196F3 !important;
40
+ }
41
+ .stSelectbox {
42
+ margin-bottom: 20px;
43
+ }
44
+ .output-container {
45
+ padding: 20px;
46
+ border-radius: 5px;
47
+ border: 1px solid #ddd;
48
+ margin: 10px 0;
49
+ }
50
+ .status-container {
51
+ padding: 10px;
52
+ border-radius: 5px;
53
+ margin: 10px 0;
54
+ }
55
+ .sidebar-info {
56
+ padding: 10px;
57
+ background-color: #f0f2f6;
58
+ border-radius: 5px;
59
+ margin: 10px 0;
60
+ }
61
+ </style>
62
+ """, unsafe_allow_html=True)
63
+
64
+ # Create data directories if they don't exist
65
+ if not os.path.exists('data'):
66
+ os.makedirs('data')
67
+
68
+ def read_csv_with_encoding(file):
69
+ encodings = ['utf-8', 'latin1', 'iso-8859-1', 'cp1252']
70
+ for encoding in encodings:
71
+ try:
72
+ return pd.read_csv(file, encoding=encoding)
73
+ except UnicodeDecodeError:
74
+ continue
75
+ raise UnicodeDecodeError("Failed to read file with any supported encoding")
76
+
77
+ def save_to_csv(data, filename):
78
+ df = pd.DataFrame(data)
79
+ df.to_csv(f'data/{filename}', index=False)
80
+ return df
81
+
82
+ def load_from_csv(filename):
83
+ try:
84
+ return pd.read_csv(f'data/{filename}')
85
+ except:
86
+ return pd.DataFrame()
87
+
88
+ # Define reset function
89
+ def reset_conversation():
90
+ st.session_state.conversation = []
91
+ st.session_state.messages = []
92
+
93
+ # Initialize session state
94
+ if "messages" not in st.session_state:
95
+ st.session_state.messages = []
96
+
97
+ # Main app title
98
+ st.title("🤖 LangChain-Based Data Interaction App")
99
+
100
+ # Sidebar settings
101
+ with st.sidebar:
102
+ st.title("⚙️ Settings")
103
+
104
+ selected_model = st.selectbox(
105
+ "Select Model",
106
+ ["meta-llama/Meta-Llama-3-8B-Instruct"],
107
+ key='model_select'
108
+ )
109
+
110
+ temperature = st.slider(
111
+ "Temperature",
112
+ 0.0, 1.0, 0.5,
113
+ help="Controls randomness in generation"
114
+ )
115
+
116
+ st.button("🔄 Reset Conversation", on_click=reset_conversation)
117
+
118
+ with st.container():
119
+ st.markdown("""
120
+ <div class="sidebar-info">
121
+ <h4>Current Model: {}</h4>
122
+ <p><em>Note: Generated content may be inaccurate or false.</em></p>
123
+ </div>
124
+ """.format(selected_model), unsafe_allow_html=True)
125
+
126
+ # Main content
127
+ col1, col2 = st.columns(2)
128
+
129
+ with col1:
130
+ if st.button("📝 Data Generation", key="gen_button", help="Generate new data"):
131
+ st.session_state.task_choice = "Data Generation"
132
+
133
+ with col2:
134
+ if st.button("🏷️ Data Labeling", key="label_button", help="Label existing data"):
135
+ st.session_state.task_choice = "Data Labeling"
136
+
137
+ if "task_choice" in st.session_state:
138
+ if st.session_state.task_choice == "Data Generation":
139
+ st.header("📝 Data Generation")
140
+
141
+ classification_type = st.selectbox(
142
+ "Classification Type",
143
+ ["Sentiment Analysis", "Binary Classification", "Multi-Class Classification"]
144
+ )
145
+
146
+ if classification_type == "Sentiment Analysis":
147
+ labels = ["Positive", "Negative", "Neutral"]
148
+ elif classification_type == "Binary Classification":
149
+ col1, col2 = st.columns(2)
150
+ with col1:
151
+ label_1 = st.text_input("First class", "Positive")
152
+ with col2:
153
+ label_2 = st.text_input("Second class", "Negative")
154
+ labels = [label_1, label_2] if label_1 and label_2 else ["Positive", "Negative"]
155
+ else:
156
+ num_classes = st.slider("Number of classes", 3, 10, 3)
157
+ labels = []
158
+ cols = st.columns(3)
159
+ for i in range(num_classes):
160
+ with cols[i % 3]:
161
+ label = st.text_input(f"Class {i+1}", f"Class_{i+1}")
162
+ labels.append(label)
163
+
164
+ domain = st.selectbox("Domain", ["Restaurant reviews", "E-commerce reviews", "Custom"])
165
+ if domain == "Custom":
166
+ domain = st.text_input("Specify custom domain")
167
+
168
+ col1, col2 = st.columns(2)
169
+ with col1:
170
+ min_words = st.number_input("Min words", 10, 90, 20)
171
+ with col2:
172
+ max_words = st.number_input("Max words", min_words, 90, 50)
173
+
174
+ use_few_shot = st.toggle("Use few-shot examples")
175
+ few_shot_examples = []
176
+ if use_few_shot:
177
+ num_examples = st.slider("Number of few-shot examples", 1, 5, 1)
178
+ for i in range(num_examples):
179
+ with st.expander(f"Example {i+1}"):
180
+ content = st.text_area(f"Content", key=f"few_shot_content_{i}")
181
+ label = st.selectbox(f"Label", labels, key=f"few_shot_label_{i}")
182
+ if content and label:
183
+ few_shot_examples.append({"content": content, "label": label})
184
+
185
+ num_to_generate = st.number_input("Number of examples", 1, 100, 10)
186
+ user_prompt = st.text_area("Additional instructions (optional)")
187
+
188
+ # Updated prompt template with word length constraints
189
+ prompt_template = PromptTemplate(
190
+ input_variables=["classification_type", "domain", "num_examples", "min_words", "max_words", "labels", "user_prompt"],
191
+ template=(
192
+ "You are a professional {classification_type} expert tasked with generating examples for {domain}.\n"
193
+ "Use the following parameters:\n"
194
+ "- Generate exactly {num_examples} examples\n"
195
+ "- Each example MUST be between {min_words} and {max_words} words long\n"
196
+ "- Use these labels: {labels}\n"
197
+ "- Generate the examples in this format: 'Example text. Label: [label]'\n"
198
+ "- Do not include word counts or any additional information\n"
199
+ "Additional instructions: {user_prompt}\n\n"
200
+ "Generate numbered examples:"
201
+ )
202
+ )
203
+
204
+ col1, col2 = st.columns(2)
205
+ with col1:
206
+ if st.button("🎯 Generate Examples"):
207
+ with st.spinner("Generating examples..."):
208
+ system_prompt = prompt_template.format(
209
+ classification_type=classification_type,
210
+ domain=domain,
211
+ num_examples=num_to_generate,
212
+ min_words=min_words,
213
+ max_words=max_words,
214
+ labels=", ".join(labels),
215
+ user_prompt=user_prompt
216
+ )
217
+ try:
218
+ stream = client.chat.completions.create(
219
+ model=selected_model,
220
+ messages=[{"role": "system", "content": system_prompt}],
221
+ temperature=temperature,
222
+ stream=True,
223
+ max_tokens=3000,
224
+ )
225
+ response = st.write_stream(stream)
226
+ st.session_state.messages.append({"role": "assistant", "content": response})
227
+ except Exception as e:
228
+ st.error("An error occurred during generation.")
229
+ st.error(f"Details: {e}")
230
+
231
+ with col2:
232
+ if st.button("🔄 Regenerate"):
233
+ st.session_state.messages = st.session_state.messages[:-1] if st.session_state.messages else []
234
+ with st.spinner("Regenerating examples..."):
235
+ system_prompt = prompt_template.format(
236
+ classification_type=classification_type,
237
+ domain=domain,
238
+ num_examples=num_to_generate,
239
+ min_words=min_words,
240
+ max_words=max_words,
241
+ labels=", ".join(labels),
242
+ user_prompt=user_prompt
243
+ )
244
+ try:
245
+ stream = client.chat.completions.create(
246
+ model=selected_model,
247
+ messages=[{"role": "system", "content": system_prompt}],
248
+ temperature=temperature,
249
+ stream=True,
250
+ max_tokens=3000,
251
+ )
252
+ response = st.write_stream(stream)
253
+ st.session_state.messages.append({"role": "assistant", "content": response})
254
+ except Exception as e:
255
+ st.error("An error occurred during regeneration.")
256
+ st.error(f"Details: {e}")
257
+
258
+ elif st.session_state.task_choice == "Data Labeling":
259
+ st.header("🏷️ Data Labeling")
260
+
261
+ classification_type = st.selectbox(
262
+ "Classification Type",
263
+ ["Sentiment Analysis", "Binary Classification", "Multi-Class Classification"],
264
+ key="label_class_type"
265
+ )
266
+
267
+ if classification_type == "Sentiment Analysis":
268
+ labels = ["Positive", "Negative", "Neutral"]
269
+ elif classification_type == "Binary Classification":
270
+ col1, col2 = st.columns(2)
271
+ with col1:
272
+ label_1 = st.text_input("First class", "Positive", key="label_first")
273
+ with col2:
274
+ label_2 = st.text_input("Second class", "Negative", key="label_second")
275
+ labels = [label_1, label_2] if label_1 and label_2 else ["Positive", "Negative"]
276
+ else:
277
+ num_classes = st.slider("Number of classes", 3, 10, 3, key="label_num_classes")
278
+ labels = []
279
+ cols = st.columns(3)
280
+ for i in range(num_classes):
281
+ with cols[i % 3]:
282
+ label = st.text_input(f"Class {i+1}", f"Class_{i+1}", key=f"label_class_{i}")
283
+ labels.append(label)
284
+
285
+ use_few_shot = st.toggle("Use few-shot examples for labeling")
286
+ few_shot_examples = []
287
+ if use_few_shot:
288
+ num_few_shot = st.slider("Number of few-shot examples", 1, 5, 1)
289
+ for i in range(num_few_shot):
290
+ with st.expander(f"Few-shot Example {i+1}"):
291
+ content = st.text_area(f"Content", key=f"label_few_shot_content_{i}")
292
+ label = st.selectbox(f"Label", labels, key=f"label_few_shot_label_{i}")
293
+ if content and label:
294
+ few_shot_examples.append(f"{content}\nLabel: {label}")
295
+
296
+ num_examples = st.number_input("Number of examples to classify", 1, 100, 1)
297
+
298
+ examples_to_classify = []
299
+ if num_examples <= 20:
300
+ for i in range(num_examples):
301
+ example = st.text_area(f"Example {i+1}", key=f"example_{i}")
302
+ if example:
303
+ examples_to_classify.append(example)
304
+ else:
305
+ examples_text = st.text_area(
306
+ "Enter examples (one per line)",
307
+ height=300,
308
+ help="Enter each example on a new line"
309
+ )
310
+ if examples_text:
311
+ examples_to_classify = [ex.strip() for ex in examples_text.split('\n') if ex.strip()]
312
+ if len(examples_to_classify) > num_examples:
313
+ examples_to_classify = examples_to_classify[:num_examples]
314
+
315
+ user_prompt = st.text_area("Additional instructions (optional)", key="label_instructions")
316
+
317
+ # Updated prompt template for labeling
318
+ few_shot_text = "\n\n".join(few_shot_examples) if few_shot_examples else ""
319
+ examples_text = "\n".join([f"{i+1}. {ex}" for i, ex in enumerate(examples_to_classify)])
320
+
321
+ label_prompt_template = PromptTemplate(
322
+ input_variables=["classification_type", "labels", "few_shot_examples", "examples", "user_prompt"],
323
+ template=(
324
+ "You are a professional {classification_type} expert. Classify the following examples using these labels: {labels}.\n"
325
+ "Instructions:\n"
326
+ "- Return the numbered example followed by its classification in the format: 'Example text. Label: [label]'\n"
327
+ "- Do not provide any additional information or explanations\n"
328
+ "{user_prompt}\n\n"
329
+ "Few-shot examples:\n{few_shot_examples}\n\n"
330
+ "Examples to classify:\n{examples}\n\n"
331
+ "Output:\n"
332
+ )
333
+ )
334
+
335
+ col1, col2 = st.columns(2)
336
+ with col1:
337
+ if st.button("🏷️ Label Data"):
338
+ if examples_to_classify:
339
+ with st.spinner("Labeling data..."):
340
+ system_prompt = label_prompt_template.format(
341
+ classification_type=classification_type,
342
+ labels=", ".join(labels),
343
+ few_shot_examples=few_shot_text,
344
+ examples=examples_text,
345
+ user_prompt=user_prompt
346
+ )
347
+ try:
348
+ stream = client.chat.completions.create(
349
+ model=selected_model,
350
+ messages=[{"role": "system", "content": system_prompt}],
351
+ temperature=temperature,
352
+ stream=True,
353
+ max_tokens=3000,
354
+ )
355
+ response = st.write_stream(stream)
356
+ st.session_state.messages.append({"role": "assistant", "content": response})
357
+ except Exception as e:
358
+ st.error("An error occurred during labeling.")
359
+ st.error(f"Details: {e}")
360
+ else:
361
+ st.warning("Please enter at least one example to classify.")
362
+
363
+ with col2:
364
+ if st.button("🔄 Relabel"):
365
+ if examples_to_classify:
366
+ st.session_state.messages = st.session_state.messages[:-1] if st.session_state.messages else []
367
+ with st.spinner("Relabeling data..."):
368
+ system_prompt = label_prompt_template.format(
369
+ classification_type=classification_type,
370
+ labels=", ".join(labels),
371
+ few_shot_examples=few_shot_text,
372
+ examples=examples_text,
373
+ user_prompt=user_prompt
374
+ )
375
+ try:
376
+ stream = client.chat.completions.create(
377
+ model=selected_model,
378
+ messages=[{"role": "system", "content": system_prompt}],
379
+ temperature=temperature,
380
+ stream=True,
381
+ max_tokens=3000,
382
+ )
383
+ response = st.write_stream(stream)
384
+ st.session_state.messages.append({"role": "assistant", "content": response})
385
+ except Exception as e:
386
+ st.error("An error occurred during relabeling.")
387
+ st.error(f"Details: {e}")
388
+ else:
389
+ st.warning("Please enter at least one example to classify.")