Archisman Karmakar commited on
Commit
853c736
Β·
1 Parent(s): 4d35689

2025.03.20.post1 MAJOR

Browse files
Files changed (28) hide show
  1. README.md +2 -2
  2. app_main_hf.py +14 -14
  3. dashboard.py +10 -8
  4. emotionMoodtag_analysis/__init__.py +0 -0
  5. emotionMoodtag_analysis/config/stage2_models.json +32 -0
  6. emotionMoodtag_analysis/emotion_analysis_main.py +317 -0
  7. emotionMoodtag_analysis/hmv_cfg_base_stage2/__init__.py +0 -0
  8. {sentiment_analysis/hmv_cfg_base_stage1 β†’ emotionMoodtag_analysis/hmv_cfg_base_stage2}/imports.py +24 -24
  9. emotionMoodtag_analysis/hmv_cfg_base_stage2/model1.py +89 -0
  10. emotionMoodtag_analysis/hmv_cfg_base_stage2/model2.py +163 -0
  11. emotion_analysis.py +0 -9
  12. poetry.lock +15 -15
  13. pyproject.toml +1 -1
  14. pyprojectOLD.toml +2 -1
  15. requirements.txt +2 -2
  16. {sentiment_analysis β†’ sentimentPolarity_analysis}/__init__.py +0 -0
  17. {sentiment_analysis β†’ sentimentPolarity_analysis}/config/stage1_models.json +62 -62
  18. {sentiment_analysis β†’ sentimentPolarity_analysis}/hmv_cfg_base_stage1/__init__.py +1 -1
  19. sentimentPolarity_analysis/hmv_cfg_base_stage1/imports.py +25 -0
  20. {sentiment_analysis β†’ sentimentPolarity_analysis}/hmv_cfg_base_stage1/model1.py +85 -85
  21. {sentiment_analysis β†’ sentimentPolarity_analysis}/hmv_cfg_base_stage1/model2.py +2 -2
  22. {sentiment_analysis β†’ sentimentPolarity_analysis}/hmv_cfg_base_stage1/model3.py +0 -0
  23. {sentiment_analysis β†’ sentimentPolarity_analysis}/hmv_cfg_base_stage1/model4.py +0 -0
  24. {sentiment_analysis β†’ sentimentPolarity_analysis}/sentiment_analysis_main.py +0 -0
  25. sentiment_analysis/hmv_cfg_base_stage1/__pycache__/__init__.cpython-310.pyc +0 -0
  26. sentiment_analysis/hmv_cfg_base_stage1/__pycache__/__init__.cpython-312.pyc +0 -0
  27. sentiment_analysis/hmv_cfg_base_stage1/__pycache__/model1.cpython-310.pyc +0 -0
  28. sentiment_analysis/hmv_cfg_base_stage1/__pycache__/model1.cpython-312.pyc +0 -0
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
  title: Tachygraphy Microtext Analysis And Normalization
3
  emoji: πŸ’»
4
- colorFrom: purple
5
- colorTo: gray
6
  sdk: streamlit
7
  sdk_version: 1.43.2
8
  python_version: "3.12"
 
1
  ---
2
  title: Tachygraphy Microtext Analysis And Normalization
3
  emoji: πŸ’»
4
+ colorFrom: orange
5
+ colorTo: red
6
  sdk: streamlit
7
  sdk_version: 1.43.2
8
  python_version: "3.12"
app_main_hf.py CHANGED
@@ -39,8 +39,8 @@ import importlib.util
39
  # sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), )))
40
 
41
 
42
- from emotion_analysis import show_emotion_analysis
43
- from sentiment_analysis.sentiment_analysis_main import show_sentiment_analysis
44
  from dashboard import show_dashboard
45
 
46
 
@@ -54,15 +54,15 @@ st.set_page_config(
54
 
55
  def free_memory():
56
  # """Free up CPU & GPU memory before loading a new model."""
57
- global current_model, current_tokenizer
58
 
59
- if current_model is not None:
60
- del current_model # Delete the existing model
61
- current_model = None # Reset reference
62
 
63
- if current_tokenizer is not None:
64
- del current_tokenizer # Delete the tokenizer
65
- current_tokenizer = None
66
 
67
  gc.collect() # Force garbage collection for CPU memory
68
 
@@ -149,19 +149,19 @@ def main():
149
 
150
  if selection == "Dashboard":
151
  st.cache_resource.clear()
152
- # free_memory()
153
  show_dashboard()
154
 
155
  elif selection == "Stage 1: Sentiment Polarity Analysis":
156
  st.cache_resource.clear()
157
- # free_memory()
158
  show_sentiment_analysis()
159
 
160
  elif selection == "Stage 2: Emotion Mood-tag Analysis":
161
  st.cache_resource.clear()
162
- # free_memory()
163
- # show_emotion_analysis()
164
- st.write("This section is under development.")
165
 
166
  elif selection == "Stage 3: Text Transformation & Normalization":
167
  st.cache_resource.clear()
 
39
  # sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), )))
40
 
41
 
42
+ from emotionMoodtag_analysis.emotion_analysis_main import show_emotion_analysis
43
+ from sentimentPolarity_analysis.sentiment_analysis_main import show_sentiment_analysis
44
  from dashboard import show_dashboard
45
 
46
 
 
54
 
55
  def free_memory():
56
  # """Free up CPU & GPU memory before loading a new model."""
57
+ # global current_model, current_tokenizer
58
 
59
+ # if current_model is not None:
60
+ # del current_model # Delete the existing model
61
+ # current_model = None # Reset reference
62
 
63
+ # if current_tokenizer is not None:
64
+ # del current_tokenizer # Delete the tokenizer
65
+ # current_tokenizer = None
66
 
67
  gc.collect() # Force garbage collection for CPU memory
68
 
 
149
 
150
  if selection == "Dashboard":
151
  st.cache_resource.clear()
152
+ free_memory()
153
  show_dashboard()
154
 
155
  elif selection == "Stage 1: Sentiment Polarity Analysis":
156
  st.cache_resource.clear()
157
+ free_memory()
158
  show_sentiment_analysis()
159
 
160
  elif selection == "Stage 2: Emotion Mood-tag Analysis":
161
  st.cache_resource.clear()
162
+ free_memory()
163
+ show_emotion_analysis()
164
+ # st.write("This section is under development.")
165
 
166
  elif selection == "Stage 3: Text Transformation & Normalization":
167
  st.cache_resource.clear()
dashboard.py CHANGED
@@ -11,15 +11,15 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), )))
11
 
12
  def free_memory():
13
  # """Free up CPU & GPU memory before loading a new model."""
14
- global current_model, current_tokenizer
15
 
16
- if current_model is not None:
17
- del current_model # Delete the existing model
18
- current_model = None # Reset reference
19
 
20
- if current_tokenizer is not None:
21
- del current_tokenizer # Delete the tokenizer
22
- current_tokenizer = None
23
 
24
  gc.collect() # Force garbage collection for CPU memory
25
 
@@ -91,7 +91,9 @@ def show_dashboard():
91
  - Training Source: [GitHub @ Tachygraphy Micro-text Analysis & Normalization](https://github.com/ArchismanKarmakar/Tachygraphy-Microtext-Analysis-And-Normalization)
92
  - Kaggle Collections: [Kaggle @ Tachygraphy Micro-text Analysis & Normalization](https://www.kaggle.com/datasets/archismancoder/dataset-tachygraphy/data?select=Tachygraphy_MicroText-AIO-V3.xlsx)
93
  - Hugging Face Org: [Hugging Face @ Tachygraphy Micro-text Analysis & Normalization](https://huggingface.co/tachygraphy-microtrext-norm-org)
94
- - Deployment: [Streamlit + Hugging Face @ GitHub](https://github.com/ArchismanKarmakar/Tachygraphy-Microtext-Analysis-And-Normalization-Deployment-Source-HuggingFace_Streamlit_JPX14032025)
 
 
95
  """)
96
 
97
  create_footer()
 
11
 
12
  def free_memory():
13
  # """Free up CPU & GPU memory before loading a new model."""
14
+ # global current_model, current_tokenizer
15
 
16
+ # if current_model is not None:
17
+ # del current_model # Delete the existing model
18
+ # current_model = None # Reset reference
19
 
20
+ # if current_tokenizer is not None:
21
+ # del current_tokenizer # Delete the tokenizer
22
+ # current_tokenizer = None
23
 
24
  gc.collect() # Force garbage collection for CPU memory
25
 
 
91
  - Training Source: [GitHub @ Tachygraphy Micro-text Analysis & Normalization](https://github.com/ArchismanKarmakar/Tachygraphy-Microtext-Analysis-And-Normalization)
92
  - Kaggle Collections: [Kaggle @ Tachygraphy Micro-text Analysis & Normalization](https://www.kaggle.com/datasets/archismancoder/dataset-tachygraphy/data?select=Tachygraphy_MicroText-AIO-V3.xlsx)
93
  - Hugging Face Org: [Hugging Face @ Tachygraphy Micro-text Analysis & Normalization](https://huggingface.co/tachygraphy-microtrext-norm-org)
94
+ - Deployment Source: [GitHub](https://github.com/ArchismanKarmakar/Tachygraphy-Microtext-Analysis-And-Normalization-Deployment-Source-HuggingFace_Streamlit_JPX14032025)
95
+ - Streamlit Deployemnt: [Streamlit](https://tachygraphy-microtext.streamlit.app/)
96
+ - Hugging Face Space Deployment: [Hugging Face Space](https://huggingface.co/spaces/tachygraphy-microtrext-norm-org/Tachygraphy-Microtext-Analysis-and-Normalization-ArchismanCoder)
97
  """)
98
 
99
  create_footer()
emotionMoodtag_analysis/__init__.py ADDED
File without changes
emotionMoodtag_analysis/config/stage2_models.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "1": {
3
+ "name": "DeBERTa v3 Base for Sequence Classification",
4
+ "type": "hf_automodel_finetuned_dbt3",
5
+ "module_path": "hmv_cfg_base_stage2.model1",
6
+ "hf_location": "tachygraphy-microtrext-norm-org/DeBERTa-v3-seqClassfication-LV2-EmotionMoodtags-Batch8",
7
+ "tokenizer_class": "DebertaV2Tokenizer",
8
+ "model_class": "DebertaV2ForSequenceClassification",
9
+ "problem_type": "regression",
10
+ "base_model": "microsoft/deberta-v3-base",
11
+ "base_model_class": "DebertaV2ForSequenceClassification",
12
+ "num_labels": 7,
13
+ "device": "cpu",
14
+ "load_function": "load_model",
15
+ "predict_function": "predict"
16
+ },
17
+ "2": {
18
+ "name": "DeBERTa v3 Base Custom Model with minimal Regularized Loss",
19
+ "type": "db3_base_custom",
20
+ "module_path": "hmv_cfg_base_stage2.model2",
21
+ "hf_location": "tachygraphy-microtrext-norm-org/DeBERTa-v3-Base-Cust-LV2-EmotionMoodtags-minRegLoss",
22
+ "tokenizer_class": "DebertaV2Tokenizer",
23
+ "model_class": "EmotionModel",
24
+ "problem_type": "regression",
25
+ "base_model": "microsoft/deberta-v3-base",
26
+ "base_model_class": "DebertaV2Model",
27
+ "num_labels": 7,
28
+ "device": "cpu",
29
+ "load_function": "load_model",
30
+ "predict_function": "predict"
31
+ }
32
+ }
emotionMoodtag_analysis/emotion_analysis_main.py ADDED
@@ -0,0 +1,317 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+
4
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), )))
5
+
6
+ from imports import *
7
+ import importlib.util
8
+ import os
9
+ import sys
10
+ import joblib
11
+ import time
12
+ import torch
13
+ # from transformers.utils import move_cache_to_trash
14
+ # from huggingface_hub import delete_cache
15
+ from transformers.utils.hub import TRANSFORMERS_CACHE
16
+ import shutil
17
+
18
+
19
+ # from hmv_cfg_base_stage1.model1 import load_model as load_model1
20
+ # from hmv_cfg_base_stage1.model1 import predict as predict1
21
+
22
+ BASE_DIR = os.path.dirname(os.path.abspath(__file__))
23
+ CONFIG_STAGE2 = os.path.join(BASE_DIR, "config", "stage2_models.json")
24
+ LOADERS_STAGE2 = os.path.join(BASE_DIR, "hmv-cfg-base-stage2")
25
+
26
+
27
+ EMOTION_MOODTAG_LABELS = [
28
+ "anger", "disgust", "fear", "joy", "neutral",
29
+ "sadness", "surprise"
30
+ ]
31
+
32
+ current_model = None
33
+ current_tokenizer = None
34
+
35
+
36
+ # Enabling Resource caching
37
+
38
+
39
+ # @st.cache_resource
40
+ def load_model_config():
41
+ with open(CONFIG_STAGE2, "r") as f:
42
+ model_data = json.load(f)
43
+
44
+ # Extract names for dropdown
45
+ model_options = {v["name"]: v for v in model_data.values()}
46
+ return model_data, model_options
47
+
48
+
49
+ MODEL_DATA, MODEL_OPTIONS = load_model_config()
50
+
51
+
52
+ # βœ… Dynamically Import Model Functions
53
+ def import_from_module(module_name, function_name):
54
+ try:
55
+ module = importlib.import_module(module_name)
56
+ return getattr(module, function_name)
57
+ except (ModuleNotFoundError, AttributeError) as e:
58
+ st.error(f"❌ Import Error: {e}")
59
+ return None
60
+
61
+
62
+ def free_memory():
63
+ # """Free up CPU & GPU memory before loading a new model."""
64
+ global current_model, current_tokenizer
65
+
66
+ if current_model is not None:
67
+ del current_model # Delete the existing model
68
+ current_model = None # Reset reference
69
+
70
+ if current_tokenizer is not None:
71
+ del current_tokenizer # Delete the tokenizer
72
+ current_tokenizer = None
73
+
74
+ gc.collect() # Force garbage collection for CPU memory
75
+
76
+ if torch.cuda.is_available():
77
+ torch.cuda.empty_cache() # Free GPU memory
78
+ torch.cuda.ipc_collect() # Clean up PyTorch GPU cache
79
+
80
+ # If running on CPU, reclaim memory using OS-level commands
81
+ try:
82
+ if torch.cuda.is_available() is False:
83
+ psutil.virtual_memory() # Refresh memory stats
84
+ except Exception as e:
85
+ print(f"Memory cleanup error: {e}")
86
+
87
+ # Delete cached Hugging Face models
88
+ try:
89
+ cache_dir = TRANSFORMERS_CACHE
90
+ if os.path.exists(cache_dir):
91
+ shutil.rmtree(cache_dir)
92
+ print("Cache cleared!")
93
+ except Exception as e:
94
+ print(f"❌ Cache cleanup error: {e}")
95
+
96
+
97
+
98
+ def load_selected_model(model_name):
99
+ global current_model, current_tokenizer
100
+
101
+ # st.cache_resource.clear()
102
+
103
+ # free_memory()
104
+
105
+ # st.write("DEBUG: Available Models:", MODEL_OPTIONS.keys()) # βœ… See available models
106
+ # st.write("DEBUG: Selected Model:", MODEL_OPTIONS[model_name]) # βœ… Check selected model
107
+ # st.write("DEBUG: Model Name:", model_name) # βœ… Check selected model
108
+
109
+ if model_name not in MODEL_OPTIONS:
110
+ st.error(f"⚠️ Model '{model_name}' not found in config!")
111
+ return None, None, None
112
+
113
+ model_info = MODEL_OPTIONS[model_name]
114
+ hf_location = model_info["hf_location"]
115
+
116
+ model_module = model_info["module_path"]
117
+ load_function = model_info["load_function"]
118
+ predict_function = model_info["predict_function"]
119
+
120
+ load_model_func = import_from_module(model_module, load_function)
121
+ predict_func = import_from_module(model_module, predict_function)
122
+
123
+ if load_model_func is None or predict_func is None:
124
+ st.error("❌ Model functions could not be loaded!")
125
+ return None, None, None
126
+
127
+ model, tokenizer = load_model_func()
128
+
129
+ current_model, current_tokenizer = model, tokenizer
130
+ return model, tokenizer, predict_func
131
+
132
+
133
+ def disable_ui():
134
+ st.components.v1.html(
135
+ """
136
+ <style>
137
+ #ui-disable-overlay {
138
+ position: fixed;
139
+ top: 0;
140
+ left: 0;
141
+ width: 100vw;
142
+ height: 100vh;
143
+ background-color: rgba(200, 200, 200, 0.5);
144
+ z-index: 9999;
145
+ }
146
+ </style>
147
+ <div id="ui-disable-overlay"></div>
148
+ """,
149
+ height=0,
150
+ scrolling=False
151
+ )
152
+
153
+
154
+ def enable_ui():
155
+ st.components.v1.html(
156
+ """
157
+ <script>
158
+ var overlay = document.getElementById("ui-disable-overlay");
159
+ if (overlay) {
160
+ overlay.parentNode.removeChild(overlay);
161
+ }
162
+ </script>
163
+ """,
164
+ height=0,
165
+ scrolling=False
166
+ )
167
+
168
+ # Function to increment progress dynamically
169
+ def update_progress(progress_bar, start, end, delay=0.1):
170
+ for i in range(start, end + 1, 5): # Increment in steps of 5%
171
+ progress_bar.progress(i)
172
+ time.sleep(delay) # Simulate processing time
173
+ # st.experimental_rerun() # Refresh the page
174
+
175
+
176
+ # Function to update session state when model changes
177
+ def on_model_change():
178
+ st.session_state.model_changed = True # Mark model as changed
179
+
180
+
181
+ # Function to update session state when text changes
182
+
183
+
184
+ def on_text_change():
185
+ st.session_state.text_changed = True # Mark text as changed
186
+
187
+
188
+ # Initialize session state variables
189
+ if "selected_model" not in st.session_state:
190
+ st.session_state.selected_model = list(MODEL_OPTIONS.keys())[
191
+ 0] # Default model
192
+ if "user_input" not in st.session_state:
193
+ st.session_state.user_input = ""
194
+ if "last_processed_input" not in st.session_state:
195
+ st.session_state.last_processed_input = ""
196
+ if "model_changed" not in st.session_state:
197
+ st.session_state.model_changed = False
198
+ if "text_changed" not in st.session_state:
199
+ st.session_state.text_changed = False
200
+ if "disabled" not in st.session_state:
201
+ st.session_state.disabled = False
202
+
203
+ # Enabling Resource caching
204
+ def show_emotion_analysis():
205
+ st.title("Stage 2: Emotion Mood-tag Analysis")
206
+ st.write("This section handles emotion mood-tag analysis.")
207
+
208
+ # Model selection with change detection
209
+ selected_model = st.selectbox(
210
+ "Choose a model:", list(MODEL_OPTIONS.keys()), key="selected_model", on_change=on_model_change
211
+ )
212
+
213
+ # Text input with change detection
214
+ user_input = st.text_input(
215
+ "Enter text for emotions mood-tag analysis:", key="user_input", on_change=on_text_change
216
+ )
217
+ user_input_copy = user_input
218
+
219
+ # Only run inference if:
220
+ # 1. The text is NOT empty
221
+ # 2. The text has changed OR the model has changed
222
+ if user_input.strip() and (st.session_state.text_changed or st.session_state.model_changed):
223
+
224
+ # disable_ui()
225
+
226
+
227
+ # Reset session state flags
228
+ st.session_state.last_processed_input = user_input
229
+ st.session_state.model_changed = False
230
+ st.session_state.text_changed = False # Store selected model
231
+
232
+ # ADD A DYNAMIC PROGRESS BAR
233
+ progress_bar = st.progress(0)
234
+ update_progress(progress_bar, 0, 10)
235
+ # status_text = st.empty()
236
+
237
+ # update_progress(0, 10)
238
+ # status_text.text("Loading model...")
239
+
240
+ # Make prediction
241
+
242
+ # model, tokenizer = load_model()
243
+ # model, tokenizer = load_selected_model(selected_model)
244
+ with st.spinner("Please wait..."):
245
+ model, tokenizer, predict_func = load_selected_model(selected_model)
246
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
247
+
248
+ if model is None:
249
+ st.error(
250
+ "⚠️ Error: Model failed to load! Check model selection or configuration.")
251
+ st.stop()
252
+
253
+ # model.to(device)
254
+ if hasattr(model, "to"):
255
+ model.to(device)
256
+
257
+ # predictions = predict(user_input, model, tokenizer, device)
258
+
259
+ predictions = predict_func(user_input, model, tokenizer, device)
260
+ print(predictions)
261
+
262
+ # Squeeze predictions to remove extra dimensions
263
+ predictions_array = predictions.squeeze()
264
+
265
+ # Convert to binary predictions (argmax)
266
+ binary_predictions = np.zeros_like(predictions_array)
267
+ max_indices = np.argmax(predictions_array)
268
+ binary_predictions[max_indices] = 1
269
+
270
+ # Update progress bar for prediction and model loading
271
+ update_progress(progress_bar, 10, 100)
272
+
273
+ # Display raw predictions
274
+ st.write(f"**Predicted Emotion Scores:** {predictions_array}")
275
+
276
+ # enable_ui()
277
+ ##
278
+ # Display binary classification result
279
+ # st.write(f"**Predicted Sentiment:**")
280
+ # st.write(f"**NEGATIVE:** {binary_predictions[0]}, **NEUTRAL:** {binary_predictions[1]}, **POSITIVE:** {binary_predictions[2]}")
281
+ # st.write(f"**NEUTRAL:** {binary_predictions[1]}")
282
+ # st.write(f"**POSITIVE:** {binary_predictions[2]}")
283
+
284
+ # 1️⃣ **Polar Plot (Plotly)**
285
+ emotion_moodtags = predictions_array.tolist()
286
+ fig_polar = px.line_polar(
287
+ pd.DataFrame(dict(r=emotion_moodtags,
288
+ theta=EMOTION_MOODTAG_LABELS)),
289
+ r='r', theta='theta', line_close=True
290
+ )
291
+ st.plotly_chart(fig_polar)
292
+
293
+ # 2️⃣ **Normalized Horizontal Bar Chart (Matplotlib)**
294
+ normalized_predictions = predictions_array / predictions_array.sum()
295
+
296
+ fig, ax = plt.subplots(figsize=(8, 2))
297
+ left = 0
298
+ for i in range(len(normalized_predictions)):
299
+ ax.barh(0, normalized_predictions[i], color=plt.cm.tab10(
300
+ i), left=left, label=EMOTION_MOODTAG_LABELS[i])
301
+ left += normalized_predictions[i]
302
+
303
+ # Configure the chart
304
+ ax.set_xlim(0, 1)
305
+ ax.set_yticks([])
306
+ ax.set_xticks(np.arange(0, 1.1, 0.1))
307
+ ax.legend(loc='upper center', bbox_to_anchor=(
308
+ 0.5, -0.15), ncol=len(EMOTION_MOODTAG_LABELS))
309
+ plt.title("Emotion Mood-tags Prediction Distribution")
310
+
311
+ # Display in Streamlit
312
+ st.pyplot(fig)
313
+
314
+ progress_bar.empty()
315
+
316
+ if __name__ == "__main__":
317
+ show_emotion_analysis()
emotionMoodtag_analysis/hmv_cfg_base_stage2/__init__.py ADDED
File without changes
{sentiment_analysis/hmv_cfg_base_stage1 β†’ emotionMoodtag_analysis/hmv_cfg_base_stage2}/imports.py RENAMED
@@ -1,25 +1,25 @@
1
- import os
2
- import sys
3
-
4
- sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), )))
5
-
6
- import streamlit as st
7
- from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModel, DebertaV2Tokenizer, DebertaV2ForSequenceClassification, DebertaV2Model
8
- # import torch
9
- import numpy as np
10
- import matplotlib.pyplot as plt
11
- import plotly.express as px
12
- import pandas as pd
13
- import json
14
- import gc
15
- import psutil
16
- import importlib
17
- import importlib.util
18
- import asyncio
19
- # import pytorch_lightning as pl
20
-
21
- import safetensors
22
- from safetensors import load_file, save_file
23
- import json
24
- import huggingface_hub
25
  from huggingface_hub import hf_hub_download
 
1
+ import os
2
+ import sys
3
+
4
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), )))
5
+
6
+ import streamlit as st
7
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModel, DebertaV2Tokenizer, DebertaV2ForSequenceClassification, DebertaV2Model
8
+ # import torch
9
+ import numpy as np
10
+ import matplotlib.pyplot as plt
11
+ import plotly.express as px
12
+ import pandas as pd
13
+ import json
14
+ import gc
15
+ import psutil
16
+ import importlib
17
+ import importlib.util
18
+ import asyncio
19
+ # import pytorch_lightning as pl
20
+
21
+ import safetensors
22
+ from safetensors import load_file, save_file
23
+ import json
24
+ import huggingface_hub
25
  from huggingface_hub import hf_hub_download
emotionMoodtag_analysis/hmv_cfg_base_stage2/model1.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+
4
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), )))
5
+
6
+ BASE_DIR = os.path.dirname(os.path.abspath(__file__))
7
+ CONFIG_STAGE2 = os.path.join(BASE_DIR, "..", "config", "stage2_models.json")
8
+
9
+ import torch
10
+ import torch.nn as nn
11
+ from imports import *
12
+ import torch.nn.functional as F
13
+
14
+
15
+
16
+
17
+ MODEL_OPTIONS = {
18
+ "1": {
19
+ "name": "DeBERTa v3 Base for Sequence Classification",
20
+ "type": "hf_automodel_finetuned_dbt3",
21
+ "module_path": "hmv_cfg_base_stage2.model1",
22
+ "hf_location": "tachygraphy-microtrext-norm-org/DeBERTa-v3-seqClassfication-LV2-EmotionMoodtags-Batch8",
23
+ "tokenizer_class": "DebertaV2Tokenizer",
24
+ "model_class": "DebertaV2ForSequenceClassification",
25
+ "problem_type": "regression",
26
+ "base_model": "microsoft/deberta-v3-base",
27
+ "base_model_class": "DebertaV2ForSequenceClassification",
28
+ "num_labels": 7,
29
+ "device": "cpu",
30
+ "load_function": "load_model",
31
+ "predict_function": "predict"
32
+ }
33
+ }
34
+
35
+
36
+ model_key = "1"
37
+ model_info = MODEL_OPTIONS[model_key]
38
+ hf_location = model_info["hf_location"]
39
+
40
+ tokenizer_class = globals()[model_info["tokenizer_class"]]
41
+ model_class = globals()[model_info["model_class"]]
42
+
43
+
44
+ @st.cache_resource
45
+ def load_model():
46
+ tokenizer = tokenizer_class.from_pretrained(hf_location)
47
+ print("Loading model 1")
48
+ model = model_class.from_pretrained(hf_location,
49
+ problem_type=model_info["problem_type"],
50
+ num_labels=model_info["num_labels"]
51
+ )
52
+ print("Model 1 loaded")
53
+
54
+ return model, tokenizer
55
+
56
+
57
+ def predict(text, model, tokenizer, device, max_len=128):
58
+ # Tokenize and pad the input text
59
+ inputs = tokenizer(
60
+ text,
61
+ add_special_tokens=True,
62
+ padding=True,
63
+ truncation=False,
64
+ return_tensors="pt",
65
+ return_token_type_ids=False,
66
+ ).to(device) # Move input tensors to the correct device
67
+
68
+ with torch.no_grad():
69
+ outputs = model(**inputs)
70
+
71
+ # probabilities = outputs.logits.cpu().numpy()
72
+
73
+ # probabilities = torch.relu(outputs.logits)
74
+ # probabilities = torch.clamp(torch.tensor(probabilities), min=0.00000, max=1.00000).cpu().numpy()
75
+ # probabilities /= probabilities.sum()
76
+ # probabilities = probabilities.cpu().numpy()
77
+
78
+ # predictions = outputs.logits.cpu().numpy()
79
+
80
+ relu_logits = F.relu(outputs.logits)
81
+ clipped_logits = torch.clamp(relu_logits, max=1.00000000, min=0.00000000)
82
+ predictions = clipped_logits.cpu().numpy()
83
+
84
+ return predictions
85
+
86
+
87
+ if __name__ == "__main__":
88
+ model, tokenizer = load_model()
89
+ print("Model and tokenizer loaded successfully.")
emotionMoodtag_analysis/hmv_cfg_base_stage2/model2.py ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from safetensors.torch import save_file, safe_open
2
+ from huggingface_hub import hf_hub_download
3
+ import json
4
+ import safetensors
5
+ from transformers import DebertaV2Model, DebertaV2Tokenizer
6
+ import torch.nn as nn
7
+ import torch.nn.functional as F
8
+ import torch
9
+ import joblib
10
+ import importlib.util
11
+ from imports import *
12
+ import os
13
+ import sys
14
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), )))
15
+
16
+
17
+ # from safetensors import load_file, save_file
18
+
19
+ BASE_DIR = os.path.dirname(os.path.abspath(__file__))
20
+ CONFIG_STAGE2 = os.path.join(BASE_DIR, "..", "config", "stage2_models.json")
21
+
22
+
23
+ MODEL_OPTIONS = {
24
+ "2": {
25
+ "name": "DeBERTa v3 Base Custom Model with minimal Regularized Loss",
26
+ "type": "db3_base_custom",
27
+ "module_path": "hmv_cfg_base_stage2.model2",
28
+ "hf_location": "tachygraphy-microtrext-norm-org/DeBERTa-v3-Base-Cust-LV2-EmotionMoodtags-minRegLoss",
29
+ "tokenizer_class": "DebertaV2Tokenizer",
30
+ "model_class": "EmotionModel",
31
+ "problem_type": "regression",
32
+ "base_model": "microsoft/deberta-v3-base",
33
+ "base_model_class": "DebertaV2Model",
34
+ "num_labels": 7,
35
+ "device": "cpu",
36
+ "load_function": "load_model",
37
+ "predict_function": "predict"
38
+ }
39
+ }
40
+
41
+
42
+ class EmotionModel(nn.Module):
43
+ def __init__(self, roberta_model, n_classes = 7, dropout_rate = 0.2):
44
+ super(EmotionModel, self).__init__()
45
+
46
+ self.roberta = roberta_model
47
+ self.drop = nn.Dropout(p=dropout_rate)
48
+ self.fc1 = nn.Linear(self.roberta.config.hidden_size, 512)
49
+ self.relu = nn.ReLU()
50
+ self.fc2 = nn.Linear(512, 256)
51
+ self.out = nn.Linear(256, n_classes)
52
+
53
+ def forward(self, input_ids, attention_mask):
54
+ output = self.roberta(input_ids=input_ids, attention_mask=attention_mask)
55
+ # hidden_states = output.last_hidden_state
56
+
57
+ # Extract the [CLS] token representation (first token in the sequence)
58
+ cls_token_state = output.last_hidden_state[:, 0, :]
59
+ output = self.drop(cls_token_state)
60
+ output = self.relu(self.fc1(output))
61
+ output = self.drop(output)
62
+ output = self.relu(self.fc2(output))
63
+ # output = self.drop(output)
64
+ return self.out(output)
65
+
66
+
67
+ def save_pretrained(self, save_directory):
68
+ os.makedirs(save_directory, exist_ok=True)
69
+
70
+ model_weights = self.state_dict()
71
+ save_file(model_weights, os.path.join(save_directory, "model.safetensors"))
72
+
73
+ config = {
74
+ "hidden_size": self.roberta.config.hidden_size,
75
+ "num_labels": self.out.out_features,
76
+ "dropout_rate": self.drop.p,
77
+ "roberta_model": self.roberta.name_or_path, # βœ… Save model name
78
+ }
79
+ with open(os.path.join(save_directory, "config.json"), "w") as f:
80
+ json.dump(config, f)
81
+
82
+ print(f"Model saved in {save_directory}")
83
+
84
+
85
+ @classmethod
86
+ @st.cache_resource
87
+ def load_pretrained(cls, model_path_or_repo):
88
+ # """Loads and caches the model (RoBERTa + EmotionModel) only when called."""
89
+ print(f"Loading model from {model_path_or_repo}...")
90
+
91
+ model_config_path = hf_hub_download(model_path_or_repo, "config.json")
92
+ model_weights_path = hf_hub_download(model_path_or_repo, "model.safetensors")
93
+
94
+ with open(model_config_path, "r") as f:
95
+ config = json.load(f)
96
+
97
+ print(f"Loading RoBERTa model: {config['roberta_model']}...")
98
+ roberta_model = DebertaV2Model.from_pretrained(
99
+ config["roberta_model"],
100
+ )
101
+
102
+ model = cls(
103
+ roberta_model, n_classes=config["num_labels"], dropout_rate=config["dropout_rate"]
104
+ )
105
+
106
+ with safe_open(model_weights_path, framework="pt", device="cpu") as f:
107
+ model_weights = {key: f.get_tensor(key) for key in f.keys()}
108
+ model.load_state_dict(model_weights)
109
+
110
+ print(f"Model loaded from {model_path_or_repo}")
111
+ return model
112
+
113
+
114
+ model_key = "2"
115
+ model_info = MODEL_OPTIONS[model_key]
116
+ hf_location = model_info["hf_location"]
117
+ base_model = model_info["base_model"]
118
+
119
+ tokenizer_class = globals()[model_info["tokenizer_class"]]
120
+ model_class = globals()[model_info["model_class"]]
121
+
122
+
123
+ @st.cache_resource
124
+ def load_model():
125
+ tokenizer = tokenizer_class.from_pretrained(hf_location)
126
+ print("Loading model 2")
127
+ model = EmotionModel.load_pretrained(hf_location)
128
+ print("Model 2 loaded")
129
+ # model.eval()
130
+
131
+ return model, tokenizer
132
+
133
+
134
+ def predict(text, model, tokenizer, device, max_len=128):
135
+ # model.eval() # Set model to evaluation mode
136
+
137
+ # Tokenize and pad the input text
138
+ inputs = tokenizer(
139
+ text,
140
+ add_special_tokens=True,
141
+ padding=True,
142
+ truncation=False,
143
+ return_tensors="pt",
144
+ return_token_type_ids=False,
145
+ ).to(device) # Move input tensors to the correct device
146
+
147
+ with torch.no_grad():
148
+ outputs = model(**inputs)
149
+
150
+ # Apply sigmoid activation (for BCEWithLogitsLoss)
151
+ # probabilities = torch.sigmoid(outputs).cpu().numpy()
152
+ # probabilities = outputs.cpu().numpy()
153
+
154
+ relu_logits = F.relu(outputs)
155
+ clipped_logits = torch.clamp(relu_logits, max=1.00000000, min=0.00000000)
156
+ probabilities = clipped_logits.cpu().numpy()
157
+
158
+ return probabilities
159
+
160
+
161
+ if __name__ == "__main__":
162
+ model, tokenizer = load_model()
163
+ print("Model and tokenizer loaded successfully.")
emotion_analysis.py DELETED
@@ -1,9 +0,0 @@
1
- import streamlit as st
2
-
3
- def show_emotion_analysis():
4
- st.title("Stage 2: Emotion Mood-tag Analysis")
5
- st.write("This section will handle emotion detection.")
6
- # Add your emotion detection code here
7
-
8
- if __name__ == "__main__":
9
- show_emotion_analysis()
 
 
 
 
 
 
 
 
 
 
poetry.lock CHANGED
@@ -2352,14 +2352,14 @@ files = [
2352
 
2353
  [[package]]
2354
  name = "lightning-utilities"
2355
- version = "0.14.1"
2356
  description = "Lightning toolbox for across the our ecosystem."
2357
  optional = false
2358
  python-versions = ">=3.9"
2359
  groups = ["main"]
2360
  files = [
2361
- {file = "lightning_utilities-0.14.1-py3-none-any.whl", hash = "sha256:badc40a70d8e933706aa0b4f63b12392dbf8208728743a40b55edb5e81797311"},
2362
- {file = "lightning_utilities-0.14.1.tar.gz", hash = "sha256:9fb56c76dc07a46c075e1f78594fcb161091eae5944b34c2b43258d23c202791"},
2363
  ]
2364
 
2365
  [package.dependencies]
@@ -4146,23 +4146,23 @@ files = [
4146
 
4147
  [[package]]
4148
  name = "protobuf"
4149
- version = "5.29.3"
4150
  description = ""
4151
  optional = false
4152
  python-versions = ">=3.8"
4153
  groups = ["main"]
4154
  files = [
4155
- {file = "protobuf-5.29.3-cp310-abi3-win32.whl", hash = "sha256:3ea51771449e1035f26069c4c7fd51fba990d07bc55ba80701c78f886bf9c888"},
4156
- {file = "protobuf-5.29.3-cp310-abi3-win_amd64.whl", hash = "sha256:a4fa6f80816a9a0678429e84973f2f98cbc218cca434abe8db2ad0bffc98503a"},
4157
- {file = "protobuf-5.29.3-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:a8434404bbf139aa9e1300dbf989667a83d42ddda9153d8ab76e0d5dcaca484e"},
4158
- {file = "protobuf-5.29.3-cp38-abi3-manylinux2014_aarch64.whl", hash = "sha256:daaf63f70f25e8689c072cfad4334ca0ac1d1e05a92fc15c54eb9cf23c3efd84"},
4159
- {file = "protobuf-5.29.3-cp38-abi3-manylinux2014_x86_64.whl", hash = "sha256:c027e08a08be10b67c06bf2370b99c811c466398c357e615ca88c91c07f0910f"},
4160
- {file = "protobuf-5.29.3-cp38-cp38-win32.whl", hash = "sha256:84a57163a0ccef3f96e4b6a20516cedcf5bb3a95a657131c5c3ac62200d23252"},
4161
- {file = "protobuf-5.29.3-cp38-cp38-win_amd64.whl", hash = "sha256:b89c115d877892a512f79a8114564fb435943b59067615894c3b13cd3e1fa107"},
4162
- {file = "protobuf-5.29.3-cp39-cp39-win32.whl", hash = "sha256:0eb32bfa5219fc8d4111803e9a690658aa2e6366384fd0851064b963b6d1f2a7"},
4163
- {file = "protobuf-5.29.3-cp39-cp39-win_amd64.whl", hash = "sha256:6ce8cc3389a20693bfde6c6562e03474c40851b44975c9b2bf6df7d8c4f864da"},
4164
- {file = "protobuf-5.29.3-py3-none-any.whl", hash = "sha256:0a18ed4a24198528f2333802eb075e59dea9d679ab7a6c5efb017a59004d849f"},
4165
- {file = "protobuf-5.29.3.tar.gz", hash = "sha256:5da0f41edaf117bde316404bad1a486cb4ededf8e4a54891296f648e8e076620"},
4166
  ]
4167
 
4168
  [[package]]
 
2352
 
2353
  [[package]]
2354
  name = "lightning-utilities"
2355
+ version = "0.14.2"
2356
  description = "Lightning toolbox for across the our ecosystem."
2357
  optional = false
2358
  python-versions = ">=3.9"
2359
  groups = ["main"]
2360
  files = [
2361
+ {file = "lightning_utilities-0.14.2-py3-none-any.whl", hash = "sha256:da791fcaa731f651ec76a1a3b12994ed05af4d6841f2e78760233552709ef05d"},
2362
+ {file = "lightning_utilities-0.14.2.tar.gz", hash = "sha256:0466a4f1bb9dff1c7190d4c7a32d1a8a1109f94fb816931efe8fb8b12bb0ab8d"},
2363
  ]
2364
 
2365
  [package.dependencies]
 
4146
 
4147
  [[package]]
4148
  name = "protobuf"
4149
+ version = "5.29.4"
4150
  description = ""
4151
  optional = false
4152
  python-versions = ">=3.8"
4153
  groups = ["main"]
4154
  files = [
4155
+ {file = "protobuf-5.29.4-cp310-abi3-win32.whl", hash = "sha256:13eb236f8eb9ec34e63fc8b1d6efd2777d062fa6aaa68268fb67cf77f6839ad7"},
4156
+ {file = "protobuf-5.29.4-cp310-abi3-win_amd64.whl", hash = "sha256:bcefcdf3976233f8a502d265eb65ea740c989bacc6c30a58290ed0e519eb4b8d"},
4157
+ {file = "protobuf-5.29.4-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:307ecba1d852ec237e9ba668e087326a67564ef83e45a0189a772ede9e854dd0"},
4158
+ {file = "protobuf-5.29.4-cp38-abi3-manylinux2014_aarch64.whl", hash = "sha256:aec4962f9ea93c431d5714ed1be1c93f13e1a8618e70035ba2b0564d9e633f2e"},
4159
+ {file = "protobuf-5.29.4-cp38-abi3-manylinux2014_x86_64.whl", hash = "sha256:d7d3f7d1d5a66ed4942d4fefb12ac4b14a29028b209d4bfb25c68ae172059922"},
4160
+ {file = "protobuf-5.29.4-cp38-cp38-win32.whl", hash = "sha256:1832f0515b62d12d8e6ffc078d7e9eb06969aa6dc13c13e1036e39d73bebc2de"},
4161
+ {file = "protobuf-5.29.4-cp38-cp38-win_amd64.whl", hash = "sha256:476cb7b14914c780605a8cf62e38c2a85f8caff2e28a6a0bad827ec7d6c85d68"},
4162
+ {file = "protobuf-5.29.4-cp39-cp39-win32.whl", hash = "sha256:fd32223020cb25a2cc100366f1dedc904e2d71d9322403224cdde5fdced0dabe"},
4163
+ {file = "protobuf-5.29.4-cp39-cp39-win_amd64.whl", hash = "sha256:678974e1e3a9b975b8bc2447fca458db5f93a2fb6b0c8db46b6675b5b5346812"},
4164
+ {file = "protobuf-5.29.4-py3-none-any.whl", hash = "sha256:3fde11b505e1597f71b875ef2fc52062b6a9740e5f7c8997ce878b6009145862"},
4165
+ {file = "protobuf-5.29.4.tar.gz", hash = "sha256:4f1dfcd7997b31ef8f53ec82781ff434a28bf71d9102ddde14d076adcfc78c99"},
4166
  ]
4167
 
4168
  [[package]]
pyproject.toml CHANGED
@@ -1,6 +1,6 @@
1
  [project]
2
  name = "tachygraphy-microtext-analysis-and-normalization"
3
- version = "2025.03.18.post5"
4
  description = ""
5
  authors = [
6
  { name = "Archisman Karmakar", email = "[email protected]" },
 
1
  [project]
2
  name = "tachygraphy-microtext-analysis-and-normalization"
3
+ version = "2025.03.20.post1"
4
  description = ""
5
  authors = [
6
  { name = "Archisman Karmakar", email = "[email protected]" },
pyprojectOLD.toml CHANGED
@@ -1,6 +1,7 @@
1
  [project]
2
  name = "tachygraphy-microtext-analysis-and-normalization"
3
- version = "2025.03.18.post4_3"
 
4
  # version = "2025.03.18.post3"
5
  # version = "2025.03.18.post2"
6
  # version = "2025.03.18.post1"
 
1
  [project]
2
  name = "tachygraphy-microtext-analysis-and-normalization"
3
+ version = "2025.03.18.post5"
4
+ # version = "2025.03.18.post4_3"
5
  # version = "2025.03.18.post3"
6
  # version = "2025.03.18.post2"
7
  # version = "2025.03.18.post1"
requirements.txt CHANGED
@@ -87,7 +87,7 @@ keras==3.9.0 ; python_version >= "3.12" and python_version < "4.0"
87
  keyring==25.6.0 ; python_version >= "3.12" and python_version < "4.0"
88
  kiwisolver==1.4.8 ; python_version >= "3.12" and python_version < "4.0"
89
  libclang==18.1.1 ; python_version >= "3.12" and python_version < "4.0"
90
- lightning-utilities==0.14.1 ; python_version >= "3.12" and python_version < "4.0"
91
  locket==1.0.0 ; python_version >= "3.12" and python_version < "4.0"
92
  lxml==5.3.1 ; python_version >= "3.12" and python_version < "4.0"
93
  markdown-it-py==3.0.0 ; python_version >= "3.12" and python_version < "4.0"
@@ -145,7 +145,7 @@ portalocker==3.1.1 ; python_version >= "3.12" and python_version < "4.0"
145
  prometheus-client==0.21.1 ; python_version >= "3.12" and python_version < "4.0"
146
  prompt-toolkit==3.0.50 ; python_version >= "3.12" and python_version < "4.0"
147
  propcache==0.3.0 ; python_version >= "3.12" and python_version < "4.0"
148
- protobuf==5.29.3 ; python_version >= "3.12" and python_version < "4.0"
149
  psutil==7.0.0 ; python_version >= "3.12" and python_version < "4.0"
150
  ptyprocess==0.7.0 ; python_version >= "3.12" and python_version < "4.0" and sys_platform != "win32" and sys_platform != "emscripten"
151
  pure-eval==0.2.3 ; python_version >= "3.12" and python_version < "4.0"
 
87
  keyring==25.6.0 ; python_version >= "3.12" and python_version < "4.0"
88
  kiwisolver==1.4.8 ; python_version >= "3.12" and python_version < "4.0"
89
  libclang==18.1.1 ; python_version >= "3.12" and python_version < "4.0"
90
+ lightning-utilities==0.14.2 ; python_version >= "3.12" and python_version < "4.0"
91
  locket==1.0.0 ; python_version >= "3.12" and python_version < "4.0"
92
  lxml==5.3.1 ; python_version >= "3.12" and python_version < "4.0"
93
  markdown-it-py==3.0.0 ; python_version >= "3.12" and python_version < "4.0"
 
145
  prometheus-client==0.21.1 ; python_version >= "3.12" and python_version < "4.0"
146
  prompt-toolkit==3.0.50 ; python_version >= "3.12" and python_version < "4.0"
147
  propcache==0.3.0 ; python_version >= "3.12" and python_version < "4.0"
148
+ protobuf==5.29.4 ; python_version >= "3.12" and python_version < "4.0"
149
  psutil==7.0.0 ; python_version >= "3.12" and python_version < "4.0"
150
  ptyprocess==0.7.0 ; python_version >= "3.12" and python_version < "4.0" and sys_platform != "win32" and sys_platform != "emscripten"
151
  pure-eval==0.2.3 ; python_version >= "3.12" and python_version < "4.0"
{sentiment_analysis β†’ sentimentPolarity_analysis}/__init__.py RENAMED
File without changes
{sentiment_analysis β†’ sentimentPolarity_analysis}/config/stage1_models.json RENAMED
@@ -1,62 +1,62 @@
1
- {
2
- "1": {
3
- "name": "DeBERTa v3 Base for Sequence Classification",
4
- "type": "hf_automodel_finetuned_dbt3",
5
- "module_path": "hmv_cfg_base_stage1.model1",
6
- "hf_location": "tachygraphy-microtrext-norm-org/DeBERTa-v3-seqClassfication-LV1-SentimentPolarities-Batch8",
7
- "tokenizer_class": "DebertaV2Tokenizer",
8
- "model_class": "DebertaV2ForSequenceClassification",
9
- "problem_type": "multi_label_classification",
10
- "base_model": "microsoft/deberta-v3-base",
11
- "base_model_class": "DebertaV2ForSequenceClassification",
12
- "num_labels": 3,
13
- "device": "cpu",
14
- "load_function": "load_model",
15
- "predict_function": "predict"
16
- },
17
- "2": {
18
- "name": "DeBERTa v3 Base Custom Model with minimal Regularized Loss",
19
- "type": "db3_base_custom",
20
- "module_path": "hmv_cfg_base_stage1.model2",
21
- "hf_location": "tachygraphy-microtrext-norm-org/DeBERTa-v3-Base-Cust-LV1-SentimentPolarities-minRegLoss",
22
- "tokenizer_class": "DebertaV2Tokenizer",
23
- "model_class": "SentimentModel",
24
- "problem_type": "multi_label_classification",
25
- "base_model": "microsoft/deberta-v3-base",
26
- "base_model_class": "DebertaV2Model",
27
- "num_labels": 3,
28
- "device": "cpu",
29
- "load_function": "load_model",
30
- "predict_function": "predict"
31
- },
32
- "3": {
33
- "name": "BERT Base Uncased Custom Model",
34
- "type": "bert_base_uncased_custom",
35
- "module_path": "hmv_cfg_base_stage1.model3",
36
- "hf_location": "https://huggingface.co/tachygraphy-microtrext-norm-org/BERT-LV1-SentimentPolarities/resolve/main/saved_weights.pt",
37
- "tokenizer_class": "AutoTokenizer",
38
- "model_class": "BERT_architecture",
39
- "problem_type": "multi_label_classification",
40
- "base_model": "bert-base-uncased",
41
- "base_model_class": "AutoModel",
42
- "num_labels": 3,
43
- "device": "cpu",
44
- "load_function": "load_model",
45
- "predict_function": "predict"
46
- },
47
- "4": {
48
- "name": "LSTM Custom Model",
49
- "type": "lstm_uncased_custom",
50
- "module_path": "hmv_cfg_base_stage1.model4",
51
- "hf_location": "tachygraphy-microtrext-norm-org/LSTM-LV1-SentimentPolarities",
52
- "tokenizer_class": "",
53
- "model_class": "",
54
- "problem_type": "multi_label_classification",
55
- "base_model": "",
56
- "base_model_class": "",
57
- "num_labels": 3,
58
- "device": "cpu",
59
- "load_function": "load_model",
60
- "predict_function": "predict"
61
- }
62
- }
 
1
+ {
2
+ "1": {
3
+ "name": "DeBERTa v3 Base for Sequence Classification",
4
+ "type": "hf_automodel_finetuned_dbt3",
5
+ "module_path": "hmv_cfg_base_stage1.model1",
6
+ "hf_location": "tachygraphy-microtrext-norm-org/DeBERTa-v3-seqClassfication-LV1-SentimentPolarities-Batch8",
7
+ "tokenizer_class": "DebertaV2Tokenizer",
8
+ "model_class": "DebertaV2ForSequenceClassification",
9
+ "problem_type": "multi_label_classification",
10
+ "base_model": "microsoft/deberta-v3-base",
11
+ "base_model_class": "DebertaV2ForSequenceClassification",
12
+ "num_labels": 3,
13
+ "device": "cpu",
14
+ "load_function": "load_model",
15
+ "predict_function": "predict"
16
+ },
17
+ "2": {
18
+ "name": "DeBERTa v3 Base Custom Model with minimal Regularized Loss",
19
+ "type": "db3_base_custom",
20
+ "module_path": "hmv_cfg_base_stage1.model2",
21
+ "hf_location": "tachygraphy-microtrext-norm-org/DeBERTa-v3-Base-Cust-LV1-SentimentPolarities-minRegLoss",
22
+ "tokenizer_class": "DebertaV2Tokenizer",
23
+ "model_class": "SentimentModel",
24
+ "problem_type": "multi_label_classification",
25
+ "base_model": "microsoft/deberta-v3-base",
26
+ "base_model_class": "DebertaV2Model",
27
+ "num_labels": 3,
28
+ "device": "cpu",
29
+ "load_function": "load_model",
30
+ "predict_function": "predict"
31
+ },
32
+ "3": {
33
+ "name": "BERT Base Uncased Custom Model",
34
+ "type": "bert_base_uncased_custom",
35
+ "module_path": "hmv_cfg_base_stage1.model3",
36
+ "hf_location": "https://huggingface.co/tachygraphy-microtrext-norm-org/BERT-LV1-SentimentPolarities/resolve/main/saved_weights.pt",
37
+ "tokenizer_class": "AutoTokenizer",
38
+ "model_class": "BERT_architecture",
39
+ "problem_type": "multi_label_classification",
40
+ "base_model": "bert-base-uncased",
41
+ "base_model_class": "AutoModel",
42
+ "num_labels": 3,
43
+ "device": "cpu",
44
+ "load_function": "load_model",
45
+ "predict_function": "predict"
46
+ },
47
+ "4": {
48
+ "name": "LSTM Custom Model",
49
+ "type": "lstm_uncased_custom",
50
+ "module_path": "hmv_cfg_base_stage1.model4",
51
+ "hf_location": "tachygraphy-microtrext-norm-org/LSTM-LV1-SentimentPolarities",
52
+ "tokenizer_class": "",
53
+ "model_class": "",
54
+ "problem_type": "multi_label_classification",
55
+ "base_model": "",
56
+ "base_model_class": "",
57
+ "num_labels": 3,
58
+ "device": "cpu",
59
+ "load_function": "load_model",
60
+ "predict_function": "predict"
61
+ }
62
+ }
{sentiment_analysis β†’ sentimentPolarity_analysis}/hmv_cfg_base_stage1/__init__.py RENAMED
@@ -1 +1 @@
1
- # from . import model1
 
1
+ # from . import model1
sentimentPolarity_analysis/hmv_cfg_base_stage1/imports.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+
4
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), )))
5
+
6
+ import streamlit as st
7
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModel, DebertaV2Tokenizer, DebertaV2ForSequenceClassification, DebertaV2Model
8
+ # import torch
9
+ import numpy as np
10
+ import matplotlib.pyplot as plt
11
+ import plotly.express as px
12
+ import pandas as pd
13
+ import json
14
+ import gc
15
+ import psutil
16
+ import importlib
17
+ import importlib.util
18
+ import asyncio
19
+ # import pytorch_lightning as pl
20
+
21
+ import safetensors
22
+ from safetensors import load_file, save_file
23
+ import json
24
+ import huggingface_hub
25
+ from huggingface_hub import hf_hub_download
{sentiment_analysis β†’ sentimentPolarity_analysis}/hmv_cfg_base_stage1/model1.py RENAMED
@@ -1,85 +1,85 @@
1
- import os
2
- import sys
3
-
4
- sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), )))
5
-
6
- BASE_DIR = os.path.dirname(os.path.abspath(__file__))
7
- CONFIG_STAGE1 = os.path.join(BASE_DIR, "..", "config", "stage1_models.json")
8
-
9
- import torch
10
- import torch.nn as nn
11
- from imports import *
12
- import torch.nn.functional as F
13
-
14
-
15
-
16
-
17
- MODEL_OPTIONS = {
18
- "1": {
19
- "name": "DeBERTa v3 Base for Sequence Classification",
20
- "type": "hf_automodel_finetuned_dbt3",
21
- "module_path": "hmv_cfg_base_stage1.model1",
22
- "hf_location": "tachygraphy-microtrext-norm-org/DeBERTa-v3-seqClassfication-LV1-SentimentPolarities-Batch8",
23
- "tokenizer_class": "DebertaV2Tokenizer",
24
- "model_class": "DebertaV2ForSequenceClassification",
25
- "problem_type": "multi_label_classification",
26
- "base_model": "microsoft/deberta-v3-base",
27
- "base_model_class": "DebertaV2ForSequenceClassification",
28
- "num_labels": 3,
29
- "device": "cpu",
30
- "load_function": "load_model",
31
- "predict_function": "predict"
32
- }
33
- }
34
-
35
-
36
- model_key = "1"
37
- model_info = MODEL_OPTIONS[model_key]
38
- hf_location = model_info["hf_location"]
39
-
40
- tokenizer_class = globals()[model_info["tokenizer_class"]]
41
- model_class = globals()[model_info["model_class"]]
42
-
43
-
44
- @st.cache_resource
45
- def load_model():
46
- tokenizer = tokenizer_class.from_pretrained(hf_location)
47
- print("Loading model 1")
48
- model = model_class.from_pretrained(hf_location,
49
- problem_type=model_info["problem_type"],
50
- num_labels=model_info["num_labels"]
51
- )
52
- print("Model 1 loaded")
53
-
54
- return model, tokenizer
55
-
56
-
57
- def predict(text, model, tokenizer, device, max_len=128):
58
- # Tokenize and pad the input text
59
- inputs = tokenizer(
60
- text,
61
- add_special_tokens=True,
62
- padding=True,
63
- truncation=False,
64
- return_tensors="pt",
65
- return_token_type_ids=False,
66
- ).to(device) # Move input tensors to the correct device
67
-
68
- with torch.no_grad():
69
- outputs = model(**inputs)
70
-
71
- # probabilities = outputs.logits.cpu().numpy()
72
-
73
- # probabilities = torch.relu(outputs.logits)
74
- # probabilities = torch.clamp(torch.tensor(probabilities), min=0.00000, max=1.00000).cpu().numpy()
75
- # probabilities /= probabilities.sum()
76
- # probabilities = probabilities.cpu().numpy()
77
-
78
- predictions = torch.sigmoid(outputs.logits).cpu().numpy()
79
-
80
- return predictions
81
-
82
-
83
- if __name__ == "__main__":
84
- model, tokenizer = load_model()
85
- print("Model and tokenizer loaded successfully.")
 
1
+ import os
2
+ import sys
3
+
4
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), )))
5
+
6
+ BASE_DIR = os.path.dirname(os.path.abspath(__file__))
7
+ CONFIG_STAGE1 = os.path.join(BASE_DIR, "..", "config", "stage1_models.json")
8
+
9
+ import torch
10
+ import torch.nn as nn
11
+ from imports import *
12
+ import torch.nn.functional as F
13
+
14
+
15
+
16
+
17
+ MODEL_OPTIONS = {
18
+ "1": {
19
+ "name": "DeBERTa v3 Base for Sequence Classification",
20
+ "type": "hf_automodel_finetuned_dbt3",
21
+ "module_path": "hmv_cfg_base_stage1.model1",
22
+ "hf_location": "tachygraphy-microtrext-norm-org/DeBERTa-v3-seqClassfication-LV1-SentimentPolarities-Batch8",
23
+ "tokenizer_class": "DebertaV2Tokenizer",
24
+ "model_class": "DebertaV2ForSequenceClassification",
25
+ "problem_type": "multi_label_classification",
26
+ "base_model": "microsoft/deberta-v3-base",
27
+ "base_model_class": "DebertaV2ForSequenceClassification",
28
+ "num_labels": 3,
29
+ "device": "cpu",
30
+ "load_function": "load_model",
31
+ "predict_function": "predict"
32
+ }
33
+ }
34
+
35
+
36
+ model_key = "1"
37
+ model_info = MODEL_OPTIONS[model_key]
38
+ hf_location = model_info["hf_location"]
39
+
40
+ tokenizer_class = globals()[model_info["tokenizer_class"]]
41
+ model_class = globals()[model_info["model_class"]]
42
+
43
+
44
+ @st.cache_resource
45
+ def load_model():
46
+ tokenizer = tokenizer_class.from_pretrained(hf_location)
47
+ print("Loading model 1")
48
+ model = model_class.from_pretrained(hf_location,
49
+ problem_type=model_info["problem_type"],
50
+ num_labels=model_info["num_labels"]
51
+ )
52
+ print("Model 1 loaded")
53
+
54
+ return model, tokenizer
55
+
56
+
57
+ def predict(text, model, tokenizer, device, max_len=128):
58
+ # Tokenize and pad the input text
59
+ inputs = tokenizer(
60
+ text,
61
+ add_special_tokens=True,
62
+ padding=True,
63
+ truncation=False,
64
+ return_tensors="pt",
65
+ return_token_type_ids=False,
66
+ ).to(device) # Move input tensors to the correct device
67
+
68
+ with torch.no_grad():
69
+ outputs = model(**inputs)
70
+
71
+ # probabilities = outputs.logits.cpu().numpy()
72
+
73
+ # probabilities = torch.relu(outputs.logits)
74
+ # probabilities = torch.clamp(torch.tensor(probabilities), min=0.00000, max=1.00000).cpu().numpy()
75
+ # probabilities /= probabilities.sum()
76
+ # probabilities = probabilities.cpu().numpy()
77
+
78
+ predictions = torch.sigmoid(outputs.logits).cpu().numpy()
79
+
80
+ return predictions
81
+
82
+
83
+ if __name__ == "__main__":
84
+ model, tokenizer = load_model()
85
+ print("Model and tokenizer loaded successfully.")
{sentiment_analysis β†’ sentimentPolarity_analysis}/hmv_cfg_base_stage1/model2.py RENAMED
@@ -11,7 +11,7 @@ import joblib
11
 
12
  import torch
13
  import torch.nn as nn
14
- import torch.functional as F
15
  from transformers import DebertaV2Model, DebertaV2Tokenizer
16
  import safetensors
17
  # from safetensors import load_file, save_file
@@ -78,7 +78,7 @@ class SentimentModel(nn.Module):
78
  @classmethod
79
  @st.cache_resource
80
  def load_pretrained(cls, model_path_or_repo):
81
- """Loads and caches the model (RoBERTa + SentimentModel) only when called."""
82
  print(f"Loading model from {model_path_or_repo}...")
83
 
84
  model_config_path = hf_hub_download(model_path_or_repo, "config.json")
 
11
 
12
  import torch
13
  import torch.nn as nn
14
+ import torch.nn.functional as F
15
  from transformers import DebertaV2Model, DebertaV2Tokenizer
16
  import safetensors
17
  # from safetensors import load_file, save_file
 
78
  @classmethod
79
  @st.cache_resource
80
  def load_pretrained(cls, model_path_or_repo):
81
+ # """Loads and caches the model (RoBERTa + SentimentModel) only when called."""
82
  print(f"Loading model from {model_path_or_repo}...")
83
 
84
  model_config_path = hf_hub_download(model_path_or_repo, "config.json")
{sentiment_analysis β†’ sentimentPolarity_analysis}/hmv_cfg_base_stage1/model3.py RENAMED
File without changes
{sentiment_analysis β†’ sentimentPolarity_analysis}/hmv_cfg_base_stage1/model4.py RENAMED
File without changes
{sentiment_analysis β†’ sentimentPolarity_analysis}/sentiment_analysis_main.py RENAMED
File without changes
sentiment_analysis/hmv_cfg_base_stage1/__pycache__/__init__.cpython-310.pyc DELETED
Binary file (186 Bytes)
 
sentiment_analysis/hmv_cfg_base_stage1/__pycache__/__init__.cpython-312.pyc DELETED
Binary file (284 Bytes)
 
sentiment_analysis/hmv_cfg_base_stage1/__pycache__/model1.cpython-310.pyc DELETED
Binary file (1.95 kB)
 
sentiment_analysis/hmv_cfg_base_stage1/__pycache__/model1.cpython-312.pyc DELETED
Binary file (2.96 kB)