echung682 commited on
Commit
00e27e2
·
verified ·
1 Parent(s): 987f3af

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -46
app.py CHANGED
@@ -18,48 +18,64 @@ pipe = pipeline(model=model_ckpt)
18
  emotion_dataset = load_dataset("echung682/emotion-analysis-tweets")
19
 
20
  #in order to keep the data persistent on HuggingFace repo
 
21
  def save_to_repo():
22
- # Add & commit the latest flagged.csv file to the Hugging Face Space repo
23
- os.system("git pull origin main") # Pull latest changes (to avoid conflicts)
24
- os.system("git add feedback_data/flagged.csv")
25
- os.system('git commit -m "Update flagged data"')
26
- os.system("git push origin main") # Push updated file to the repo
 
 
 
 
27
 
28
- STATE_FILE = "state.json"
29
 
30
  '''
31
  in order to keep track of what the last prompt was that was given human feedback
32
  '''
33
  def load_state():
34
- if not os.path.exists(STATE_FILE): # Handle missing file
35
- return 0 # Default count value
36
-
37
  try:
38
- with open(STATE_FILE, "r") as f:
39
- content = f.read().strip() # Remove leading/trailing spaces
40
- if not content: # Handle empty file
41
- return 0 # Default count value
42
- return json.loads(content).get("count", 0) # Safely parse JSON
43
- except (json.JSONDecodeError, OSError): # Catch JSON errors or I/O issues
44
- return 0 # Default count value
45
- #if the file doesn't have count variable in it, then it will return 0, which is good - that's the first index
46
 
47
  # Save state to file
48
- def save_state(count):
49
  with open("state.json", "w") as f:
50
- json.dump({"count": count}, f)
51
-
52
- def increment():
53
- count = load_state()
54
- count += 1
55
- save_state(count)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  return count
57
 
58
  def save_state_to_repo():
59
- os.system("git pull origin main") # Pull latest changes (to avoid conflicts)
60
- os.system("git add state.json")
61
- os.system('git commit -m "Update state"')
62
- os.system("git push origin main") # Push updated file to the repo
 
 
 
63
 
64
 
65
  '''
@@ -80,7 +96,7 @@ def updateDataset(prompt, option1, option2, flagged_option):
80
  chosen = ""
81
  rejected = ""
82
 
83
- index = increment()
84
 
85
  with open("feedback_data/flagged.csv", "a") as f:
86
  f.write(f"{prompt},{chosen},{rejected}\n")
@@ -98,28 +114,28 @@ extracting the top two scoring emotions
98
  returning these
99
  '''
100
  def emotion_analysis_data_collection():
101
- index = load_state()
102
- result = pipe(emotion_dataset["train"]["text"][index], top_k = None)
103
- score_list = [] #empty list to hold the scores
104
- emotion_list = [] #empty list to hold the emotions
105
 
106
- for emotion in result:
107
- emotion_list.append(emotion["label"]) #extracting the emotions from the results
108
- score_list.append(emotion["score"]) #extracing the scores from the results
109
 
110
- emotion_dict = {}
111
- for index, value in enumerate(emotion_list):
112
- emotion_dict[value] = score_list[index]
113
 
114
- dictKeys_list = list(emotion_dict.keys())
115
- emotion_highestScore = dictKeys_list[0]
116
- emotion_secondHighestScore = dictKeys_list[1]
117
 
118
- #print(emotion_highestScore)
119
- #print(emotion_secondHighestScore)
120
- #print(" ")
121
 
122
- return emotion_dataset["train"]["text"][index], emotion_highestScore, emotion_secondHighestScore
123
 
124
 
125
 
 
18
  emotion_dataset = load_dataset("echung682/emotion-analysis-tweets")
19
 
20
  #in order to keep the data persistent on HuggingFace repo
21
+ #they are saved as secrets in my HuggingFace space because they shouldn't be visible in the code
22
  def save_to_repo():
23
+ try:
24
+ subprocess.run(["git", "config", "--global", "user.email", os.environ["GIT_EMAIL"]], check=True)
25
+ subprocess.run(["git", "config", "--global", "user.name", os.environ["GIT_USER"]], check=True)
26
+ subprocess.run(["git", "pull", "origin", "main"], check=True)
27
+ subprocess.run(["git", "add", "feedback_data/flagged.csv"], check=True)
28
+ subprocess.run(["git", "commit", "-m", "Update flagged data"], check=True)
29
+ subprocess.run(["git", "push", "origin", "main"], check=True)
30
+ except subprocess.CalledProcessError as e:
31
+ print(f"Git operation failed: {e}")
32
 
 
33
 
34
  '''
35
  in order to keep track of what the last prompt was that was given human feedback
36
  '''
37
  def load_state():
 
 
 
38
  try:
39
+ with open("state.json", "r") as f:
40
+ state = json.load(f)
41
+ return state.get("count", 0), state.get("processed_indices", [])
42
+ except FileNotFoundError:
43
+ return 0, []
 
 
 
44
 
45
  # Save state to file
46
+ def save_state(count, processed_indices):
47
  with open("state.json", "w") as f:
48
+ json.dump({
49
+ "count": count,
50
+ "processed_indices": processed_indices #list of prompts that we already processed
51
+ }, f)
52
+
53
+ def get_next_prompt():
54
+ count, processed = load_state()
55
+ dataset_size = len(emotion_dataset["train"])
56
+
57
+ # If we've processed all prompts, start over
58
+ if len(processed) >= dataset_size:
59
+ processed = []
60
+
61
+ # Find next unprocessed index
62
+ while count in processed: #skipping the prompts that we already processed
63
+ count = (count + 1) % dataset_size
64
+
65
+ processed.append(count)
66
+ save_state(count, processed)
67
+ save_state_to_repo()
68
+
69
  return count
70
 
71
  def save_state_to_repo():
72
+ try:
73
+ subprocess.run(["git", "pull", "origin", "main"], check=True)
74
+ subprocess.run(["git", "add", "state.json"], check=True)
75
+ subprocess.run(["git", "commit", "-m", "Update state"], check=True)
76
+ subprocess.run(["git", "push", "origin", "main"], check=True)
77
+ except subprocess.CalledProcessError as e:
78
+ print(f"Git operation failed: {e}")
79
 
80
 
81
  '''
 
96
  chosen = ""
97
  rejected = ""
98
 
99
+ index = get_next_prompt()
100
 
101
  with open("feedback_data/flagged.csv", "a") as f:
102
  f.write(f"{prompt},{chosen},{rejected}\n")
 
114
  returning these
115
  '''
116
  def emotion_analysis_data_collection():
117
+ index = get_next_prompt()
118
+ result = pipe(emotion_dataset["train"]["text"][index], top_k = None)
119
+ score_list = [] #empty list to hold the scores
120
+ emotion_list = [] #empty list to hold the emotions
121
 
122
+ for emotion in result:
123
+ emotion_list.append(emotion["label"]) #extracting the emotions from the results
124
+ score_list.append(emotion["score"]) #extracing the scores from the results
125
 
126
+ emotion_dict = {}
127
+ for index, value in enumerate(emotion_list):
128
+ emotion_dict[value] = score_list[index]
129
 
130
+ dictKeys_list = list(emotion_dict.keys())
131
+ emotion_highestScore = dictKeys_list[0]
132
+ emotion_secondHighestScore = dictKeys_list[1]
133
 
134
+ #print(emotion_highestScore)
135
+ #print(emotion_secondHighestScore)
136
+ #print(" ")
137
 
138
+ return emotion_dataset["train"]["text"][index], emotion_highestScore, emotion_secondHighestScore
139
 
140
 
141