Spaces:

echung682
/

rlhf_app_for_emotionAI_model

Sleeping

App Files Files Community

echung682 commited on Feb 26

Commit

c280082

verified ·

1 Parent(s): 89ee5f0

Create app.py

Browse files

Files changed (1) hide show

app.py +150 -0

app.py ADDED Viewed

	@@ -0,0 +1,150 @@

+import gradio as gr
+from transformers import (
+    AutoModelForSequenceClassification,
+    pipeline
+)
+from datasets import load_dataset
+import json
+import os
+import subprocess
+#importing the model
+model_ckpt = "echung682/finetuned-emotion-ai-model"
+model = AutoModelForSequenceClassification.from_pretrained(model_ckpt)
+pipe = pipeline(model=model_ckpt)
+#importing the dataset (a whole bunch of text)
+emotion_dataset = load_dataset("echung682/emotion-analysis-tweets")
+#in order to keep the data persistent on HuggingFace repo
+def save_to_repo():
+    # Add & commit the latest flagged.csv file to the Hugging Face Space repo
+    os.system("git pull origin main")  # Pull latest changes (to avoid conflicts)
+    os.system("git add feedback_data/flagged.csv")
+    os.system('git commit -m "Update flagged data"')
+    os.system("git push origin main")  # Push updated file to the repo
+'''
+in order to keep track of what the last prompt was that was given human feedback
+'''
+def load_state():
+    try:
+        with open("state.json", "r") as f:
+            return json.load(f).get("count", 0)
+    except FileNotFoundError:
+        return 0 #if the file doesn't have count variable in it, then it will return 0, which is good - that's the first index
+# Save state to file
+def save_state(count):
+    with open("state.json", "w") as f:
+        json.dump({"count": count}, f)
+def increment():
+    count = load_state()
+    count += 1
+    save_state(count)
+    return count
+def save_state_to_repo():
+    os.system("git pull origin main")  # Pull latest changes (to avoid conflicts)
+    os.system("git add state.json")
+    os.system('git commit -m "Update state"')
+    os.system("git push origin main")  # Push updated file to the repo
+'''
+keeping track of the prompt, options, and chosen option
+then increasing the index number (so it doesn't ask everyone to look at the same ones)
+writes the new data into the Gradio file
+pushes the new data and the index number into their respective files to keep track across multiple users
+'''
+def updateDataset(prompt, option1, option2, flagged_option):
+    # This function is called when a user clicks a flagging button.
+    if flagged_option == option1:
+        chosen = option1
+        rejected = option2
+    elif flagged_option == option2:
+        chosen = option2
+        rejected = option1
+    else:  # Handle unexpected cases (shouldn't happen with radio buttons)
+        chosen = ""
+        rejected = ""
+    index = increment()
+    with open("feedback_data/flagged.csv", "a") as f:
+        f.write(f"{prompt},{chosen},{rejected}\n")
+    # Push the updated file to the repo
+    save_to_repo() #all of the inputs and outputs for the Gradio interface, that will save to the feedback_data file (and then pushed to HuggingFace repo)
+    save_state_to_repo()
+    return prompt, chosen, rejected, "Submitted! Please answer another...", index
+'''
+finding the correct prompt based on the global index
+extracting the top two scoring emotions
+returning these
+'''
+def emotion_analysis_data_collection():
+  index = load_state()
+  result = pipe(emotion_dataset["train"]["text"][index], top_k = None)
+  score_list = [] #empty list to hold the scores
+  emotion_list = [] #empty list to hold the emotions
+  for emotion in result:
+    emotion_list.append(emotion["label"]) #extracting the emotions from the results
+    score_list.append(emotion["score"]) #extracing the scores from the results
+  emotion_dict = {}
+  for index, value in enumerate(emotion_list):
+    emotion_dict[value] = score_list[index]
+  dictKeys_list = list(emotion_dict.keys())
+  emotion_highestScore = dictKeys_list[0]
+  emotion_secondHighestScore = dictKeys_list[1]
+  #print(emotion_highestScore)
+  #print(emotion_secondHighestScore)
+  #print(" ")
+  return emotion_dataset["train"]["text"][index], emotion_highestScore, emotion_secondHighestScore
+'''
+designing the gradio interface
+has the two options and a Radio object that will keep track of the chosen emotion
+'''
+with gr.Blocks() as survey:
+  gr.Markdown(
+      """
+      # Please choose the emotion that best describes the prompt
+      """
+  )
+  tweet, emotion_highestScore, emotion_secondHighestScore = emotion_analysis_data_collection() #calls the function that figures out what the prompt and two highest scoring emotions are
+  sentence = gr.Textbox(tweet, label="Prompt:", interactive=False)
+  #print(emotion_highestScore)
+  #print(emotion_secondHighestScore)
+  #testOutput = gr.Textbox()
+  with gr.Row():
+    emotion1 = gr.Textbox(emotion_highestScore, label="Emotion Choice 1:", interactive=False)
+    emotion2 = gr.Textbox(emotion_secondHighestScore, label="Emotion Choice 2", interactive=False)
+  options = gr.Radio([emotion_highestScore, emotion_secondHighestScore], label="Choose one:")
+  submit_btn = gr.Button("Submit Choice")
+  submit_btn.click(fn=updateDataset,
+                   inputs=[sentence, emotion1, emotion2, options],
+                   outputs=[gr.Textbox(label="Prompt"), gr.Textbox(label="Chosen Response"), gr.Textbox(label="Rejected Response"), gr.Textbox(label="Confirmation Message"), gr.Textbox(label="Prompt Number")],
+                   )
+survey.launch()