Data_Generation_LabelingCopy

Running

App Files Files

Data_Generation_LabelingCopy / app10.py

Wedyan2023

Update app10.py

cc1fa22 verified 3 months ago

raw

history blame

8.76 kB

	## update of aap7.py

	import os
	import streamlit as st
	from openai import OpenAI
	from dotenv import load_dotenv
	from langchain_core.prompts import PromptTemplate

	# Load environment variables
	load_dotenv()
	##openai_api_key = os.getenv("OPENAI_API_KEY")

	# Initialize the client
	client = OpenAI(
	base_url="https://api-inference.huggingface.co/v1",
	api_key=os.environ.get('TOKEN2') # Add your Huggingface token here
	)


	# Initialize the OpenAI client
	##client = OpenAI(
	##base_url="https://api-inference.huggingface.co/v1",
	##api_key=openai_api_key
	##)

	# Define reset function for the conversation
	def reset_conversation():
	st.session_state.conversation = []
	st.session_state.messages = []

	# Streamlit interface setup
	st.title("🤖 Text Data Generation & Labeling App")
	st.sidebar.title("Settings")

	# Sidebar settings
	selected_model = st.sidebar.selectbox("Select Model", ["meta-llama/Meta-Llama-3-8B-Instruct"])
	temperature = st.sidebar.slider("Temperature", 0.0, 1.0, 0.5)
	st.sidebar.button("Reset Conversation", on_click=reset_conversation)
	st.sidebar.write(f"You're now chatting with {selected_model}")
	st.sidebar.markdown("Note: Generated content may be inaccurate or false.")

	# Initialize conversation state
	if "messages" not in st.session_state:
	st.session_state.messages = []

	# Display conversation
	for message in st.session_state.messages:
	with st.chat_message(message["role"]):
	st.markdown(message["content"])

	# Main logic: choose between Data Generation and Data Labeling
	task_choice = st.selectbox("Choose Task", ["Data Generation", "Data Labeling"])

	if task_choice == "Data Generation":
	classification_type = st.selectbox(
	"Choose Classification Type",
	["Sentiment Analysis", "Binary Classification", "Multi-Class Classification"]
	)

	if classification_type == "Sentiment Analysis":
	labels = ["Positive", "Negative", "Neutral"]
	elif classification_type == "Binary Classification":
	label_1 = st.text_input("Enter first class")
	label_2 = st.text_input("Enter second class")
	labels = [label_1, label_2]
	else: # Multi-Class Classification
	num_classes = st.slider("How many classes?", 3, 10, 3)
	labels = [st.text_input(f"Class {i+1}") for i in range(num_classes)]

	domain = st.selectbox("Choose Domain", ["Restaurant reviews", "E-commerce reviews", "Custom"])
	if domain == "Custom":
	domain = st.text_input("Specify custom domain")

	min_words = st.number_input("Minimum words per example", min_value=10, max_value=90, value=10)
	max_words = st.number_input("Maximum words per example", min_value=10, max_value=90, value=90)

	use_few_shot = st.radio("Use few-shot examples?", ["Yes", "No"])
	few_shot_examples = []
	if use_few_shot == "Yes":
	num_examples = st.slider("Number of few-shot examples", 1, 5, 1)
	for i in range(num_examples):
	content = st.text_area(f"Example {i+1} Content")
	label = st.selectbox(f"Example {i+1} Label", labels)
	few_shot_examples.append({"content": content, "label": label})

	num_to_generate = st.number_input("Number of examples to generate", 1, 100, 10)
	user_prompt = st.text_area("Enter additional instructions", "")

	# Construct the LangChain prompt
	prompt_template = PromptTemplate(
	input_variables=["classification_type", "domain", "num_examples", "min_words", "max_words", "labels", "user_prompt"],
	template=(
	"You are a professional {classification_type} expert tasked with generating examples for {domain}.\n"
	"Use the following parameters:\n"
	"- Number of examples: {num_examples}\n"
	"- Word range: {min_words}-{max_words}\n"
	"- Labels: {labels}\n"
	"{user_prompt}"
	)
	)
	system_prompt = prompt_template.format(
	classification_type=classification_type,
	domain=domain,
	num_examples=num_to_generate,
	min_words=min_words,
	max_words=max_words,
	labels=", ".join(labels),
	user_prompt=user_prompt
	)

	st.write("System Prompt:")
	st.code(system_prompt)

	if st.button("Generate Examples"):
	with st.spinner("Generating..."):
	st.session_state.messages.append({"role": "system", "content": system_prompt})
	try:
	stream = client.chat.completions.create(
	model=selected_model,
	messages=[{"role": "system", "content": system_prompt}],
	temperature=temperature,
	stream=True,
	max_tokens=3000,
	)
	response = st.write_stream(stream)
	st.session_state.messages.append({"role": "assistant", "content": response})
	except Exception as e:
	st.error("An error occurred during generation.")
	st.error(f"Details: {e}")


	elif task_choice == "Data Labeling":
	# Labeling logic
	labeling_type = st.selectbox(
	"Classification Type for Labeling",
	["Sentiment Analysis", "Binary Classification", "Multi-Class Classification"]
	)

	if labeling_type == "Sentiment Analysis":
	labels = ["Positive", "Negative", "Neutral"]
	elif labeling_type == "Binary Classification":
	label_1 = st.text_input("First label for classification")
	label_2 = st.text_input("Second label for classification")
	labels = [label_1, label_2]
	else: # Multi-Class Classification
	num_classes = st.slider("Number of labels", 3, 10, 3)
	labels = [st.text_input(f"Label {i+1}") for i in range(num_classes)]

	use_few_shot_labeling = st.radio("Add few-shot examples for labeling?", ["Yes", "No"])
	few_shot_labeling_examples = []
	if use_few_shot_labeling == "Yes":
	num_labeling_examples = st.slider("Number of few-shot labeling examples", 1, 5, 1)
	for i in range(num_labeling_examples):
	content = st.text_area(f"Labeling Example {i+1} Content")
	label = st.selectbox(f"Label for Example {i+1}", labels)
	few_shot_labeling_examples.append({"content": content, "label": label})

	text_to_classify = st.text_area("Enter text to classify")

	if st.button("Classify Text"):
	if text_to_classify:
	# Construct the labeling prompt
	labeling_prompt_template = PromptTemplate(
	input_variables=["labeling_type", "labels", "few_shot_examples", "text_to_classify"],
	template=(
	"You are an expert in {labeling_type} classification. "
	"Classify the following text using: {labels}.\n\n"
	"DO NO write additional information or commentary"
	"use user {few_shot_examples} as guidance in labeling process\n"
	"Write calassifaication as {text_to_classify}. Label: [Label] \n"
	"Classify this: {text_to_classify}"
	)
	)

	# Prepare few-shot examples for the prompt
	few_shot_examples_text = ""
	if few_shot_labeling_examples:
	few_shot_examples_text += "Example classifications:\n"
	for ex in few_shot_labeling_examples:
	few_shot_examples_text += f"Text: {ex['content']} - Label: {ex['label']}\n"

	# Format the prompt with the user's input
	labeling_prompt = labeling_prompt_template.format(
	labeling_type=labeling_type.lower(),
	labels=", ".join(labels),
	few_shot_examples=few_shot_examples_text.strip(),
	text_to_classify=text_to_classify
	)

	with st.spinner("Classifying..."):
	st.session_state.messages.append({"role": "system", "content": labeling_prompt})
	try:
	stream = client.chat.completions.create(
	model=selected_model,
	messages=[{"role": "system", "content": labeling_prompt}],
	temperature=temperature,
	stream=True,
	max_tokens=3000,
	)
	labeling_response = st.write_stream(stream)
	# Format response to match desired output
	formatted_response = f"Label: {labeling_response}"
	st.write(formatted_response)
	except Exception as e:
	st.error("An error occurred during classification.")
	st.error(f"Details: {e}")
	else:
	st.warning("Please enter text to classify.")