Archisman Karmakar commited on
Commit
19dcfe5
·
1 Parent(s): e999632

2025.03.25.post1

Browse files
.github/workflows/deploy_to_HF_space_DIRECT.yml CHANGED
@@ -76,8 +76,8 @@ jobs:
76
  env:
77
  HF_READ_WRITE_TOKEN: ${{ secrets.HF_READ_WRITE_TOKEN }}
78
  run: |
79
- git remote add space https://huggingface.co/spaces/tachygraphy-microtext-normalization-iemk/Tachygraphy-Microtext-Analysis-and-Normalization-ArchismanCoder
80
- git push --force https://${{ secrets.HF_USERNAME }}:${{ secrets.HF_READ_WRITE_TOKEN }}@huggingface.co/spaces/tachygraphy-microtext-normalization-iemk/Tachygraphy-Microtext-Analysis-and-Normalization-ArchismanCoder
81
 
82
 
83
 
@@ -214,7 +214,7 @@ jobs:
214
 
215
  # - name: Clone Hugging Face Space repository
216
  # run: |
217
- # git clone https://HF_USERNAME:${{ secrets.HF_TOKEN }}@huggingface.co/spaces/tachygraphy-microtext-normalization-iemk/Tachygraphy-Microtext-Analysis-and-Normalization-ArchismanCoder hf-space
218
 
219
  # - name: Copy repository files to HF Space
220
  # run: |
@@ -227,7 +227,7 @@ jobs:
227
  # # run: |
228
  # # cd hf-space
229
  # # git init
230
- # # git remote add origin https://huggingface.co/spaces/tachygraphy-microtext-normalization-iemk/Tachygraphy-Microtext-Analysis-and-Normalization-ArchismanCoder
231
  # # git checkout -b main
232
  # # git add .
233
  # # git commit -m "Update deployment via GitHub Actions"
@@ -240,7 +240,7 @@ jobs:
240
  # git init
241
  # # Remove existing origin if it exists
242
  # git remote remove origin || true
243
- # git remote add origin https://huggingface.co/spaces/tachygraphy-microtext-normalization-iemk/Tachygraphy-Microtext-Analysis-and-Normalization-ArchismanCoder
244
  # git checkout -b main
245
  # git add .
246
  # git commit -m "Update deployment via GitHub Actions"
 
76
  env:
77
  HF_READ_WRITE_TOKEN: ${{ secrets.HF_READ_WRITE_TOKEN }}
78
  run: |
79
+ git remote add space https://huggingface.co/spaces/Tachygraphy-Microtext-Normalization-IEMK25/Tachygraphy-Microtext-Analysis-and-Normalization-ArchismanCoder
80
+ git push --force https://${{ secrets.HF_USERNAME }}:${{ secrets.HF_READ_WRITE_TOKEN }}@huggingface.co/spaces/Tachygraphy-Microtext-Normalization-IEMK25/Tachygraphy-Microtext-Analysis-and-Normalization-ArchismanCoder
81
 
82
 
83
 
 
214
 
215
  # - name: Clone Hugging Face Space repository
216
  # run: |
217
+ # git clone https://HF_USERNAME:${{ secrets.HF_TOKEN }}@huggingface.co/spaces/Tachygraphy-Microtext-Normalization-IEMK25/Tachygraphy-Microtext-Analysis-and-Normalization-ArchismanCoder hf-space
218
 
219
  # - name: Copy repository files to HF Space
220
  # run: |
 
227
  # # run: |
228
  # # cd hf-space
229
  # # git init
230
+ # # git remote add origin https://huggingface.co/spaces/Tachygraphy-Microtext-Normalization-IEMK25/Tachygraphy-Microtext-Analysis-and-Normalization-ArchismanCoder
231
  # # git checkout -b main
232
  # # git add .
233
  # # git commit -m "Update deployment via GitHub Actions"
 
240
  # git init
241
  # # Remove existing origin if it exists
242
  # git remote remove origin || true
243
+ # git remote add origin https://huggingface.co/spaces/Tachygraphy-Microtext-Normalization-IEMK25/Tachygraphy-Microtext-Analysis-and-Normalization-ArchismanCoder
244
  # git checkout -b main
245
  # git add .
246
  # git commit -m "Update deployment via GitHub Actions"
.github/workflows/dfploy_to_HF_space_DOCKER CHANGED
@@ -28,7 +28,7 @@ jobs:
28
 
29
 
30
  - name: Build the Docker image
31
- run: docker build -t huggingface.co/spaces/tachygraphy-microtext-normalization-iemk/Tachygraphy-Microtext-Analysis-and-Normalization-ArchismanCoder .
32
 
33
  - name: Push the Docker image to Hugging Face
34
- run: docker push huggingface.co/spaces/tachygraphy-microtext-normalization-iemk/Tachygraphy-Microtext-Analysis-and-Normalization-ArchismanCoder
 
28
 
29
 
30
  - name: Build the Docker image
31
+ run: docker build -t huggingface.co/spaces/Tachygraphy-Microtext-Normalization-IEMK25/Tachygraphy-Microtext-Analysis-and-Normalization-ArchismanCoder .
32
 
33
  - name: Push the Docker image to Hugging Face
34
+ run: docker push huggingface.co/spaces/Tachygraphy-Microtext-Normalization-IEMK25/Tachygraphy-Microtext-Analysis-and-Normalization-ArchismanCoder
app_main_hf.py CHANGED
@@ -50,6 +50,8 @@ from emotionMoodtag_analysis.emotion_analysis_main import show_emotion_analysis
50
  from sentimentPolarity_analysis.sentiment_analysis_main import show_sentiment_analysis
51
  from transformation_and_Normalization.transformationNormalization_main import transform_and_normalize
52
  from dashboard import show_dashboard
 
 
53
 
54
 
55
  # from text_transformation import show_text_transformation
@@ -138,8 +140,8 @@ def main():
138
 
139
  selection = option_menu(
140
  menu_title=None, # No title for a sleek look
141
- options=["Dashboard", "Stage 1: Sentiment Polarity Analysis", "Stage 2: Emotion Mood-tag Analysis", "Stage 3: Text Transformation & Normalization"],
142
- icons=['house', 'diagram-3', "snow", 'activity'],
143
  menu_icon="cast", # Main menu icon
144
  default_index=0, # Highlight the first option
145
  orientation="vertical",
@@ -210,6 +212,18 @@ def main():
210
  transform_and_normalize()
211
  # st.write("This section is under development.")
212
 
 
 
 
 
 
 
 
 
 
 
 
 
213
 
214
 
215
  # st.sidebar.title("Navigation")
 
50
  from sentimentPolarity_analysis.sentiment_analysis_main import show_sentiment_analysis
51
  from transformation_and_Normalization.transformationNormalization_main import transform_and_normalize
52
  from dashboard import show_dashboard
53
+ from stacked_stacking_stages.stacking_stages import show_stacking_stages
54
+ from data_collection_form.data_collector import show_data_collector
55
 
56
 
57
  # from text_transformation import show_text_transformation
 
140
 
141
  selection = option_menu(
142
  menu_title=None, # No title for a sleek look
143
+ options=["Dashboard", "Stage 1: Sentiment Polarity Analysis", "Stage 2: Emotion Mood-tag Analysis", "Stage 3: Text Transformation & Normalization", "Stacked Stages", "Data Correction & Collection"],
144
+ icons=['house', 'diagram-3', "snow", 'activity', 'collection', 'database-up'],
145
  menu_icon="cast", # Main menu icon
146
  default_index=0, # Highlight the first option
147
  orientation="vertical",
 
212
  transform_and_normalize()
213
  # st.write("This section is under development.")
214
 
215
+ elif selection == "Stacked Stages":
216
+ # st.title("Stacked Stages")
217
+ # st.cache_resource.clear()
218
+ # free_memory()
219
+ show_stacking_stages()
220
+
221
+ elif selection == "Data Correction & Collection":
222
+ # st.title("Data Correction & Collection")
223
+ # st.cache_resource.clear()
224
+ # free_memory()
225
+ show_data_collector()
226
+
227
 
228
 
229
  # st.sidebar.title("Navigation")
dashboard.py CHANGED
@@ -44,8 +44,102 @@ def free_memory():
44
  print(f"❌ Cache cleanup error: {e}")
45
 
46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  def create_footer():
48
- st.divider()
 
49
 
50
  # 🛠️ Layout using Streamlit columns
51
  col1, col2, col3 = st.columns([1, 1, 1])
@@ -90,14 +184,20 @@ def show_dashboard():
90
  st.write("""
91
  - Training Source: [GitHub @ Tachygraphy Micro-text Analysis & Normalization](https://github.com/ArchismanKarmakar/Tachygraphy-Microtext-Analysis-And-Normalization)
92
  - Kaggle Collections: [Kaggle @ Tachygraphy Micro-text Analysis & Normalization](https://www.kaggle.com/datasets/archismancoder/dataset-tachygraphy/data?select=Tachygraphy_MicroText-AIO-V3.xlsx)
93
- - Hugging Face Org: [Hugging Face @ Tachygraphy Micro-text Analysis & Normalization](https://huggingface.co/tachygraphy-microtext-normalization-iemk)
94
  - Deployment Source: [GitHub](https://github.com/ArchismanKarmakar/Tachygraphy-Microtext-Analysis-And-Normalization-Deployment-Source-HuggingFace_Streamlit_JPX14032025)
95
  - Streamlit Deployemnt: [Streamlit](https://tachygraphy-microtext.streamlit.app/)
96
- - Hugging Face Space Deployment: [Hugging Face Space](https://huggingface.co/spaces/tachygraphy-microtext-normalization-iemk/Tachygraphy-Microtext-Analysis-and-Normalization-ArchismanCoder)
97
  """)
98
 
99
  create_footer()
100
 
 
 
 
 
 
 
101
 
102
  def __main__():
103
  show_dashboard()
 
44
  print(f"❌ Cache cleanup error: {e}")
45
 
46
 
47
+ def create_sample_example1():
48
+ st.write("""
49
+ ## Sample Example 1
50
+ """)
51
+ graph = """
52
+ digraph {
53
+ // Global graph settings with explicit DPI
54
+ graph [bgcolor="white", rankdir=TB, splines=true, nodesep=0.8, ranksep=0.8];
55
+ node [shape=box, style="rounded,filled", fontname="Helvetica", fontsize=9, margin="0.15,0.1"];
56
+
57
+ // Define nodes with custom colors
58
+ Input [label="Input:\nbruh, floods in Kerala, rescue ops non-stop 🚁", fillcolor="#ffe6de", fontcolor="#000000"];
59
+ Output [label="Output:\nBrother, the floods in Kerala are severe,\nand rescue operations are ongoing continuously.", fillcolor="#ffe6de", fontcolor="#000000"];
60
+ Sentiment [label="Sentiment:\nNEUTRAL", fillcolor="#ecdeff", fontcolor="black"];
61
+
62
+ // Emotion nodes with a uniform style
63
+ Anger [label="Anger: 0.080178231", fillcolor="#deffe1", fontcolor="black"];
64
+ Disgust [label="Disgust: 0.015257259", fillcolor="#deffe1", fontcolor="black"];
65
+ Fear [label="Fear: 0.601871967", fillcolor="#deffe1", fontcolor="black"];
66
+ Joy [label="Joy: 0.00410547", fillcolor="#deffe1", fontcolor="black"];
67
+ NeutralE [label="Neutral: 0.0341026", fillcolor="#deffe1", fontcolor="black"];
68
+ Sadness [label="Sadness: 0.245294735", fillcolor="#deffe1", fontcolor="black"];
69
+ Surprise [label="Surprise: 0.019189769", fillcolor="#deffe1", fontcolor="black"];
70
+
71
+ // Define edges with a consistent style
72
+ edge [color="#7a7a7a", penwidth=3];
73
+
74
+ // Establish the tree structure
75
+ Input -> Output;
76
+ Input -> Sentiment;
77
+ Sentiment -> Anger;
78
+ Sentiment -> Disgust;
79
+ Sentiment -> Fear;
80
+ Sentiment -> Joy;
81
+ Sentiment -> NeutralE;
82
+ Sentiment -> Sadness;
83
+ Sentiment -> Surprise;
84
+ }
85
+ """
86
+ st.graphviz_chart(graph)
87
+
88
+
89
+ def create_sample_example2():
90
+ st.write("""
91
+ ## Sample Example 2
92
+ """)
93
+ graph = """
94
+ digraph {
95
+ // Global graph settings
96
+ graph [bgcolor="white", rankdir=TB, splines=true, nodesep=0.8, ranksep=0.8];
97
+ node [shape=box, style="rounded,filled", fontname="Helvetica", fontsize=9, margin="0.15,0.1"];
98
+
99
+ // Define nodes with custom colors
100
+ Input [label="Input:\nu rlly think all that talk means u tough? lol, when I step up, u ain't gon say sh*t", fillcolor="#ffe6de", fontcolor="black"];
101
+ Output [label="Output:\nyou really think all that talk makes you tough lol when i step up you are not going to say anything", fillcolor="#ffe6de", fontcolor="black"];
102
+ Sentiment [label="Sentiment:\nNEGATIVE", fillcolor="#ecdeff", fontcolor="black"];
103
+
104
+ // Emotion nodes with a uniform style
105
+ Anger [label="Anger: 0.14403291", fillcolor="#deffe1", fontcolor="black"];
106
+ Disgust [label="Disgust: 0.039282672", fillcolor="#deffe1", fontcolor="black"];
107
+ Fear [label="Fear: 0.014349542", fillcolor="#deffe1", fontcolor="black"];
108
+ Joy [label="Joy: 0.048965044", fillcolor="#deffe1", fontcolor="black"];
109
+ NeutralE [label="Neutral: 0.494852662", fillcolor="#deffe1", fontcolor="black"];
110
+ Sadness [label="Sadness: 0.021111647", fillcolor="#deffe1", fontcolor="black"];
111
+ Surprise [label="Surprise: 0.237405464", fillcolor="#deffe1", fontcolor="black"];
112
+
113
+ // Define edges with a consistent style
114
+ edge [color="#7a7a7a", penwidth=3];
115
+
116
+ // Establish the tree structure
117
+ Input -> Output;
118
+ Input -> Sentiment;
119
+ Sentiment -> Anger;
120
+ Sentiment -> Disgust;
121
+ Sentiment -> Fear;
122
+ Sentiment -> Joy;
123
+ Sentiment -> NeutralE;
124
+ Sentiment -> Sadness;
125
+ Sentiment -> Surprise;
126
+ }
127
+ """
128
+ st.graphviz_chart(graph)
129
+
130
+
131
+ def create_project_overview():
132
+ # st.divider()
133
+ st.markdown("## Project Overview")
134
+ st.write(f"""
135
+ Tachygraphy—originally developed to expedite writing—has evolved over centuries. In the 1990s, it reappeared as micro-text, driving faster communication on social media with characteristics like 'Anytime, Anyplace, Anybody, and Anything (4A)'. This project focuses on the analysis and normalization of micro-text, which is a prevalent form of informal communication today. It aims to enhance Natural Language Processing (NLP) tasks by standardizing micro-text for better sentiment analysis, emotion analysis, data extraction and normalization to understandable form aka. 4A message decoding as primary objective.
136
+ """
137
+ )
138
+
139
+
140
  def create_footer():
141
+ # st.divider()
142
+ st.markdown("## About Us")
143
 
144
  # 🛠️ Layout using Streamlit columns
145
  col1, col2, col3 = st.columns([1, 1, 1])
 
184
  st.write("""
185
  - Training Source: [GitHub @ Tachygraphy Micro-text Analysis & Normalization](https://github.com/ArchismanKarmakar/Tachygraphy-Microtext-Analysis-And-Normalization)
186
  - Kaggle Collections: [Kaggle @ Tachygraphy Micro-text Analysis & Normalization](https://www.kaggle.com/datasets/archismancoder/dataset-tachygraphy/data?select=Tachygraphy_MicroText-AIO-V3.xlsx)
187
+ - Hugging Face Org: [Hugging Face @ Tachygraphy Micro-text Analysis & Normalization](https://huggingface.co/Tachygraphy-Microtext-Normalization-IEMK25)
188
  - Deployment Source: [GitHub](https://github.com/ArchismanKarmakar/Tachygraphy-Microtext-Analysis-And-Normalization-Deployment-Source-HuggingFace_Streamlit_JPX14032025)
189
  - Streamlit Deployemnt: [Streamlit](https://tachygraphy-microtext.streamlit.app/)
190
+ - Hugging Face Space Deployment: [Hugging Face Space](https://huggingface.co/spaces/Tachygraphy-Microtext-Normalization-IEMK25/Tachygraphy-Microtext-Analysis-and-Normalization-ArchismanCoder)
191
  """)
192
 
193
  create_footer()
194
 
195
+ create_project_overview()
196
+
197
+ create_sample_example1()
198
+
199
+ # create_sample_example2()
200
+
201
 
202
  def __main__():
203
  show_dashboard()
data_collection_form/__init__.py ADDED
File without changes
data_collection_form/data_collector.py ADDED
@@ -0,0 +1,387 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import shutil
2
+ from transformers.utils.hub import TRANSFORMERS_CACHE
3
+ import torch
4
+ import time
5
+ import joblib
6
+ import importlib.util
7
+ from imports import *
8
+ import os
9
+ import sys
10
+ import time
11
+ import uuid
12
+ import math
13
+
14
+ from dotenv import load_dotenv
15
+ # import psycopg2
16
+ from supabase import create_client, Client
17
+ from datetime import datetime, timezone
18
+ from collections import OrderedDict
19
+
20
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), )))
21
+
22
+ env_path = os.path.join(os.path.dirname(__file__),
23
+ "..", ".devcontainer", ".env")
24
+
25
+ # from transformers.utils import move_cache_to_trash
26
+ # from huggingface_hub import delete_cache
27
+
28
+
29
+ # from hmv_cfg_base_stage1.model1 import load_model as load_model1
30
+ # from hmv_cfg_base_stage1.model1 import predict as predict1
31
+
32
+ BASE_DIR = os.path.dirname(os.path.abspath(__file__))
33
+ CONFIG_STAGE1 = os.path.join(BASE_DIR, "..", "sentimentPolarity_analysis", "config", "stage1_models.json")
34
+ CONFIG_STAGE2 = os.path.join(BASE_DIR, "..", "emotionMoodtag_analysis", "config", "stage2_models.json")
35
+ CONFIG_STAGE3 = os.path.join(BASE_DIR, "..", "transformation_and_Normalization", "config", "stage3_models.json")
36
+ LOADERS_STAGE_COLLECTOR = os.path.join(BASE_DIR, "hmv_cfg_base_dlc")
37
+
38
+
39
+ EMOTION_MOODTAG_LABELS = [
40
+ "anger", "disgust", "fear", "joy", "neutral",
41
+ "sadness", "surprise"
42
+ ]
43
+
44
+ SENTIMENT_POLARITY_LABELS = [
45
+ "negative", "neutral", "positive"
46
+ ]
47
+
48
+
49
+ current_model = None
50
+ current_tokenizer = None
51
+
52
+
53
+ # Enabling Resource caching
54
+
55
+ # Load environment variables from .env
56
+ load_dotenv()
57
+
58
+ # @st.cache_resource
59
+ # DATABASE_URL = os.environ.get("DATABASE_URL")
60
+
61
+ # def get_connection():
62
+ # # """Establish a connection to the database."""
63
+ # # return psycopg2.connect(os.environ.get("DATABASE_URL"))
64
+ # supabase: Client = create_client(os.environ.get("SUPABASE_URL"), os.environ.get("anon_key"))
65
+ # return supabase
66
+
67
+ # @st.cache_resource
68
+
69
+
70
+ def load_model_config1():
71
+ with open(CONFIG_STAGE1, "r") as f:
72
+ model_data = json.load(f)
73
+
74
+ # Extract names for dropdown
75
+ # model_options is a dict mapping model name to its config
76
+ model_options = {v["name"]: v for v in model_data.values()}
77
+
78
+ # Create an OrderedDict and insert a default option at the beginning.
79
+ default_option = "--Select the model used for inference (if applicable)--"
80
+ model_options_with_default = OrderedDict()
81
+ model_options_with_default[default_option] = None # or any placeholder value
82
+ # Add the rest of the options
83
+ for key, value in model_options.items():
84
+ model_options_with_default[key] = value
85
+
86
+ return model_data, model_options_with_default
87
+
88
+
89
+ MODEL_DATA1, MODEL_OPTIONS1 = load_model_config1()
90
+
91
+
92
+ def load_model_config2():
93
+ with open(CONFIG_STAGE2, "r") as f:
94
+ model_data = json.load(f)
95
+
96
+ # Extract names for dropdown
97
+ # model_options is a dict mapping model name to its config
98
+ model_options = {v["name"]: v for v in model_data.values()}
99
+
100
+ # Create an OrderedDict and insert a default option at the beginning.
101
+ default_option = "--Select the model used for inference (if applicable)--"
102
+ model_options_with_default = OrderedDict()
103
+ model_options_with_default[default_option] = None # or any placeholder value
104
+ # Add the rest of the options
105
+ for key, value in model_options.items():
106
+ model_options_with_default[key] = value
107
+
108
+ return model_data, model_options_with_default
109
+
110
+ MODEL_DATA2, MODEL_OPTIONS2 = load_model_config2()
111
+
112
+
113
+ def load_model_config3():
114
+ with open(CONFIG_STAGE3, "r") as f:
115
+ model_data = json.load(f)
116
+
117
+ # Extract names for dropdown
118
+ # model_options is a dict mapping model name to its config
119
+ model_options = {v["name"]: v for v in model_data.values()}
120
+
121
+ # Create an OrderedDict and insert a default option at the beginning.
122
+ default_option = "--Select the model used for inference (if applicable)--"
123
+ model_options_with_default = OrderedDict()
124
+ model_options_with_default[default_option] = None # or any placeholder value
125
+ # Add the rest of the options
126
+ for key, value in model_options.items():
127
+ model_options_with_default[key] = value
128
+
129
+ return model_data, model_options_with_default
130
+
131
+
132
+ MODEL_DATA3, MODEL_OPTIONS3 = load_model_config3()
133
+
134
+
135
+ # ✅ Dynamically Import Model Functions
136
+ def import_from_module(module_name, function_name):
137
+ try:
138
+ module = importlib.import_module(module_name)
139
+ return getattr(module, function_name)
140
+ except (ModuleNotFoundError, AttributeError) as e:
141
+ st.error(f"❌ Import Error: {e}")
142
+ return None
143
+
144
+
145
+ def free_memory():
146
+ # """Free up CPU & GPU memory before loading a new model."""
147
+ global current_model, current_tokenizer
148
+
149
+ if current_model is not None:
150
+ del current_model # Delete the existing model
151
+ current_model = None # Reset reference
152
+
153
+ if current_tokenizer is not None:
154
+ del current_tokenizer # Delete the tokenizer
155
+ current_tokenizer = None
156
+
157
+ gc.collect() # Force garbage collection for CPU memory
158
+
159
+ if torch.cuda.is_available():
160
+ torch.cuda.empty_cache() # Free GPU memory
161
+ torch.cuda.ipc_collect() # Clean up PyTorch GPU cache
162
+
163
+ # If running on CPU, reclaim memory using OS-level commands
164
+ try:
165
+ if torch.cuda.is_available() is False:
166
+ psutil.virtual_memory() # Refresh memory stats
167
+ except Exception as e:
168
+ print(f"Memory cleanup error: {e}")
169
+
170
+ # Delete cached Hugging Face models
171
+ try:
172
+ cache_dir = TRANSFORMERS_CACHE
173
+ if os.path.exists(cache_dir):
174
+ shutil.rmtree(cache_dir)
175
+ print("Cache cleared!")
176
+ except Exception as e:
177
+ print(f"❌ Cache cleanup error: {e}")
178
+
179
+
180
+ def disable_ui():
181
+ st.components.v1.html(
182
+ """
183
+ <style>
184
+ #ui-disable-overlay {
185
+ position: fixed;
186
+ top: 0;
187
+ left: 0;
188
+ width: 100vw;
189
+ height: 100vh;
190
+ background-color: rgba(200, 200, 200, 0.5);
191
+ z-index: 9999;
192
+ }
193
+ </style>
194
+ <div id="ui-disable-overlay"></div>
195
+ """,
196
+ height=0,
197
+ scrolling=False
198
+ )
199
+
200
+
201
+ def enable_ui():
202
+ st.components.v1.html(
203
+ """
204
+ <script>
205
+ var overlay = document.getElementById("ui-disable-overlay");
206
+ if (overlay) {
207
+ overlay.parentNode.removeChild(overlay);
208
+ }
209
+ </script>
210
+ """,
211
+ height=0,
212
+ scrolling=False
213
+ )
214
+
215
+ # Function to increment progress dynamically
216
+
217
+
218
+ def get_env_variable(var_name):
219
+ # Try os.environ first (this covers local development and HF Spaces)
220
+ value = os.environ.get(var_name)
221
+ if value is None:
222
+ # Fall back to st.secrets if available (e.g., on Streamlit Cloud)
223
+ try:
224
+ value = st.secrets[var_name]
225
+ except KeyError:
226
+ value = None
227
+ return value
228
+
229
+
230
+ def show_data_collector():
231
+ st.title("Data Correction & Collection Page")
232
+
233
+ st.error("New API keys are coming in Q2 2025, May 1st, old API authentication will be deprecated and blocked by PostgREST.")
234
+ st.warning(
235
+ "This page is running in test mode, please be careful with your data.")
236
+ st.error("The database is running in debug log mode, please be careful with your data.")
237
+
238
+ with st.form("feedback_form", clear_on_submit=True, border=False):
239
+ st.write("### Data Collection Form")
240
+ st.write(
241
+ "#### If the predictions generated are wrong, please provide feedback to help improve the model.")
242
+
243
+ # Model selection dropdown for Stage 3
244
+ model_names3 = list(MODEL_OPTIONS3.keys())
245
+ selected_model3 = st.selectbox(
246
+ "Choose a model:", model_names3, key="selected_model_stage3"
247
+ )
248
+
249
+ # Text Feedback Inputs
250
+ col1, col2 = st.columns(2)
251
+ with col1:
252
+ feedback = st.text_input(
253
+ "Enter the correct expanded standard formal English text:",
254
+ key="feedback_input"
255
+ )
256
+ with col2:
257
+ feedback2 = st.text_input(
258
+ "Enter any one of the wrongly predicted text:",
259
+ key="feedback_input2"
260
+ )
261
+
262
+ st.warning(
263
+ "The correct slider is for the probability of actual label and wrong slider is the probability predicted by any model which is wrong for that label.")
264
+
265
+
266
+
267
+ st.write("#### Sentiment Polarity Feedback (Select values between 0 and 1)")
268
+ SENTIMENT_POLARITY_LABELS = ["negative", "neutral", "positive"]
269
+
270
+ model_names1 = list(MODEL_OPTIONS1.keys())
271
+ selected_model1 = st.selectbox(
272
+ "Choose a model:", model_names1, key="selected_model_stage1"
273
+ )
274
+
275
+ sentiment_feedback = {}
276
+ # For sentiment, we have 3 labels so we can place them in one row.
277
+ sentiment_cols = st.columns(len(SENTIMENT_POLARITY_LABELS))
278
+ for idx, label in enumerate(SENTIMENT_POLARITY_LABELS):
279
+ with sentiment_cols[idx]:
280
+ st.write(f"**{label.capitalize()}**")
281
+ # Create two subcolumns for "Correct" and "Wrong"
282
+ subcol_correct, subcol_wrong = st.columns(2)
283
+ with subcol_correct:
284
+ correct_value = st.slider(
285
+ "Correct",
286
+ min_value=0.0,
287
+ max_value=1.0,
288
+ value=0.33, # default value
289
+ step=0.01,
290
+ format="%.2f",
291
+ key=f"sentiment_{label}_correct"
292
+ )
293
+ with subcol_wrong:
294
+ wrong_value = st.slider(
295
+ "Wrong",
296
+ min_value=0.0,
297
+ max_value=1.0,
298
+ value=0.0, # default value
299
+ step=0.01,
300
+ format="%.2f",
301
+ key=f"sentiment_{label}_wrong"
302
+ )
303
+ sentiment_feedback[label] = {"correct": correct_value, "wrong": wrong_value}
304
+
305
+ # st.write("**Collected Sentiment Feedback:**")
306
+ # st.write(sentiment_feedback)
307
+
308
+ # ---------------------------
309
+ # Emotion Feedback
310
+ # ---------------------------
311
+ st.write("#### Emotion Feedback (Select values between 0 and 1)")
312
+ EMOTION_MOODTAG_LABELS = [
313
+ "anger", "disgust", "fear", "joy", "neutral",
314
+ "sadness", "surprise"
315
+ ]
316
+
317
+ model_names2 = list(MODEL_OPTIONS2.keys())
318
+ selected_model2 = st.selectbox(
319
+ "Choose a model:", model_names2, key="selected_model_stage2"
320
+ )
321
+
322
+ emotion_feedback = {}
323
+ max_cols = 3 # Maximum number of emotion labels in one row
324
+ num_labels = len(EMOTION_MOODTAG_LABELS)
325
+ num_rows = math.ceil(num_labels / max_cols)
326
+
327
+ for row in range(num_rows):
328
+ # Get labels for this row.
329
+ row_labels = EMOTION_MOODTAG_LABELS[row * max_cols:(row + 1) * max_cols]
330
+ # Create main columns for each label in this row.
331
+ main_cols = st.columns(len(row_labels))
332
+ for idx, label in enumerate(row_labels):
333
+ with main_cols[idx]:
334
+ st.write(f"**{label.capitalize()}**")
335
+ # Create two subcolumns for correct and wrong values.
336
+ subcol_correct, subcol_wrong = st.columns(2)
337
+ with subcol_correct:
338
+ correct_value = st.slider(
339
+ "Correct",
340
+ min_value=0.0,
341
+ max_value=1.0,
342
+ value=0.0,
343
+ step=0.01,
344
+ format="%.2f",
345
+ key=f"emotion_{label}_correct"
346
+ )
347
+ with subcol_wrong:
348
+ wrong_value = st.slider(
349
+ "Wrong",
350
+ min_value=0.0,
351
+ max_value=1.0,
352
+ value=0.0,
353
+ step=0.01,
354
+ format="%.2f",
355
+ key=f"emotion_{label}_wrong"
356
+ )
357
+ emotion_feedback[label] = {"correct": correct_value, "wrong": wrong_value}
358
+
359
+
360
+ # Use form_submit_button instead of st.button inside a form
361
+ submit_feedback = st.form_submit_button("Submit Feedback")
362
+
363
+ if submit_feedback and feedback.strip() and feedback2.strip():
364
+ # Prepare data to insert
365
+ data_to_insert = {
366
+ "input_text": st.session_state.get("user_input_stage3", ""),
367
+ "correct_text_by_user": feedback,
368
+ "model_used": st.session_state.get("selected_model_stage3", "unknown"),
369
+ "wrong_pred_any": feedback2,
370
+ "sentiment_feedback": sentiment_feedback,
371
+ "emotion_feedback": emotion_feedback
372
+ }
373
+ st.error("Feedback submission is disabled in debug logging mode.")
374
+ # try:
375
+ # from supabase import create_client, Client
376
+ # from dotenv import load_dotenv
377
+ # load_dotenv() # or load_dotenv(dotenv_path=env_path) if you have a specific path
378
+ # supabase: Client = create_client(
379
+ # get_env_variable("SUPABASE_DB_TACHYGRAPHY_DB_URL"),
380
+ # get_env_variable("SUPABASE_DB_TACHYGRAPHY_ANON_API_KEY")
381
+ # )
382
+ # response = supabase.table(
383
+ # get_env_variable("SUPABASE_DB_TACHYGRAPHY_DB_STAGE3_TABLE")
384
+ # ).insert(data_to_insert, returning="minimal").execute()
385
+ # st.success("Feedback submitted successfully!")
386
+ # except Exception as e:
387
+ # st.error(f"Feedback submission failed: {e}")
data_collection_form/hmv_cfg_base_dcl/__init__.py ADDED
File without changes
data_collection_form/hmv_cfg_base_dcl/imports.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+
4
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), )))
5
+
6
+ import streamlit as st
7
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModel
8
+ # import torch
9
+ import numpy as np
10
+ import matplotlib.pyplot as plt
11
+ import plotly.express as px
12
+ import pandas as pd
13
+ import json
14
+ import gc
15
+ import psutil
16
+ import importlib
17
+ import importlib.util
18
+ import asyncio
19
+ # import pytorch_lightning as pl
20
+
21
+ import safetensors
22
+ from safetensors import load_file, save_file
23
+ import json
24
+ import huggingface_hub
25
+ from huggingface_hub import hf_hub_download
emotionMoodtag_analysis/config/stage2_models.json CHANGED
@@ -3,7 +3,7 @@
3
  "name": "DeBERTa v3 Base for Sequence Classification",
4
  "type": "hf_automodel_finetuned_dbt3",
5
  "module_path": "hmv_cfg_base_stage2.model1",
6
- "hf_location": "tachygraphy-microtext-normalization-iemk/DeBERTa-v3-seqClassfication-LV2-EmotionMoodtags-Batch8",
7
  "tokenizer_class": "DebertaV2Tokenizer",
8
  "model_class": "DebertaV2ForSequenceClassification",
9
  "problem_type": "regression",
@@ -18,7 +18,7 @@
18
  "name": "DeBERTa v3 Base Custom Model with minimal Regularized Loss",
19
  "type": "db3_base_custom",
20
  "module_path": "hmv_cfg_base_stage2.model2",
21
- "hf_location": "tachygraphy-microtext-normalization-iemk/DeBERTa-v3-Base-Cust-LV2-EmotionMoodtags-minRegLoss",
22
  "tokenizer_class": "DebertaV2Tokenizer",
23
  "model_class": "EmotionModel",
24
  "problem_type": "regression",
 
3
  "name": "DeBERTa v3 Base for Sequence Classification",
4
  "type": "hf_automodel_finetuned_dbt3",
5
  "module_path": "hmv_cfg_base_stage2.model1",
6
+ "hf_location": "Tachygraphy-Microtext-Normalization-IEMK25/DeBERTa-v3-seqClassfication-LV2-EmotionMoodtags-Batch8",
7
  "tokenizer_class": "DebertaV2Tokenizer",
8
  "model_class": "DebertaV2ForSequenceClassification",
9
  "problem_type": "regression",
 
18
  "name": "DeBERTa v3 Base Custom Model with minimal Regularized Loss",
19
  "type": "db3_base_custom",
20
  "module_path": "hmv_cfg_base_stage2.model2",
21
+ "hf_location": "Tachygraphy-Microtext-Normalization-IEMK25/DeBERTa-v3-Base-Cust-LV2-EmotionMoodtags-minRegLoss",
22
  "tokenizer_class": "DebertaV2Tokenizer",
23
  "model_class": "EmotionModel",
24
  "problem_type": "regression",
poetry.lock CHANGED
@@ -1249,14 +1249,14 @@ tests = ["asttokens (>=2.1.0)", "coverage", "coverage-enable-subprocess", "ipyth
1249
 
1250
  [[package]]
1251
  name = "faker"
1252
- version = "37.0.2"
1253
  description = "Faker is a Python package that generates fake data for you."
1254
  optional = false
1255
  python-versions = ">=3.9"
1256
  groups = ["main"]
1257
  files = [
1258
- {file = "faker-37.0.2-py3-none-any.whl", hash = "sha256:8955706c56c28099585e9e2b6f814eb0a3a227eb36a2ee3eb9ab577c4764eacc"},
1259
- {file = "faker-37.0.2.tar.gz", hash = "sha256:948bd27706478d3aa0b6f9f58b9f25207098f6ca79852c7b49c44a8ced2bc59b"},
1260
  ]
1261
 
1262
  [package.dependencies]
@@ -3152,24 +3152,20 @@ files = [
3152
 
3153
  [[package]]
3154
  name = "narwhals"
3155
- version = "1.31.0"
3156
  description = "Extremely lightweight compatibility layer between dataframe libraries"
3157
  optional = false
3158
  python-versions = ">=3.8"
3159
  groups = ["main"]
3160
  files = [
3161
- {file = "narwhals-1.31.0-py3-none-any.whl", hash = "sha256:2a7b79bb5f511055c4c0142121fc0d4171ea171458e12d44dbd9c8fc6488e997"},
3162
- {file = "narwhals-1.31.0.tar.gz", hash = "sha256:333472e2562343dfdd27407ec9b5114a07c81d0416794e4ac6b703dd925c6a1a"},
3163
  ]
3164
 
3165
  [package.extras]
3166
- core = ["duckdb", "pandas", "polars", "pyarrow", "sqlframe"]
3167
  cudf = ["cudf (>=24.10.0)"]
3168
  dask = ["dask[dataframe] (>=2024.8)"]
3169
- dev = ["covdefaults", "hypothesis", "mypy (>=1.15.0,<1.16.0)", "pandas-stubs (==2.2.3.250308)", "polars (==1.25.2)", "pre-commit", "pyarrow-stubs (==17.18)", "pyright", "pytest", "pytest-cov", "pytest-env", "pytest-randomly", "sqlframe (==3.24.1)", "typing-extensions", "uv"]
3170
- docs = ["black", "duckdb", "jinja2", "markdown-exec[ansi]", "mkdocs", "mkdocs-autorefs", "mkdocs-material", "mkdocstrings-python (>=1.16)", "mkdocstrings[python]", "pandas", "polars (>=1.0.0)", "pyarrow"]
3171
  duckdb = ["duckdb (>=1.0)"]
3172
- extra = ["scikit-learn"]
3173
  ibis = ["ibis-framework (>=6.0.0)", "packaging", "pyarrow-hotfix", "rich"]
3174
  modin = ["modin"]
3175
  pandas = ["pandas (>=0.25.3)"]
@@ -3177,8 +3173,6 @@ polars = ["polars (>=0.20.3)"]
3177
  pyarrow = ["pyarrow (>=11.0.0)"]
3178
  pyspark = ["pyspark (>=3.5.0)"]
3179
  sqlframe = ["sqlframe (>=3.22.0)"]
3180
- tests = ["covdefaults", "hypothesis", "pytest", "pytest-cov", "pytest-env", "pytest-randomly"]
3181
- typing = ["hypothesis", "mypy (>=1.15.0,<1.16.0)", "pandas-stubs (==2.2.3.250308)", "polars (==1.25.2)", "pyarrow-stubs (==17.18)", "pyright", "pytest", "sqlframe (==3.24.1)", "typing-extensions", "uv"]
3182
 
3183
  [[package]]
3184
  name = "nest-asyncio"
@@ -4617,14 +4611,14 @@ extra = ["pygments (>=2.19.1)"]
4617
 
4618
  [[package]]
4619
  name = "pyparsing"
4620
- version = "3.2.2"
4621
  description = "pyparsing module - Classes and methods to define and execute parsing grammars"
4622
  optional = false
4623
  python-versions = ">=3.9"
4624
  groups = ["main"]
4625
  files = [
4626
- {file = "pyparsing-3.2.2-py3-none-any.whl", hash = "sha256:6ab05e1cb111cc72acc8ed811a3ca4c2be2af8d7b6df324347f04fd057d8d793"},
4627
- {file = "pyparsing-3.2.2.tar.gz", hash = "sha256:2a857aee851f113c2de9d4bfd9061baea478cb0f1c7ca6cbf594942d6d111575"},
4628
  ]
4629
 
4630
  [package.extras]
@@ -4659,14 +4653,14 @@ six = ">=1.5"
4659
 
4660
  [[package]]
4661
  name = "python-dotenv"
4662
- version = "1.0.1"
4663
  description = "Read key-value pairs from a .env file and set them as environment variables"
4664
  optional = false
4665
- python-versions = ">=3.8"
4666
  groups = ["main"]
4667
  files = [
4668
- {file = "python-dotenv-1.0.1.tar.gz", hash = "sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca"},
4669
- {file = "python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a"},
4670
  ]
4671
 
4672
  [package.extras]
@@ -4705,14 +4699,14 @@ test = ["cloudpickle (>=1.3)", "coverage (==7.3.1)", "fastapi", "numpy (>=1.17.2
4705
 
4706
  [[package]]
4707
  name = "pytz"
4708
- version = "2025.1"
4709
  description = "World timezone definitions, modern and historical"
4710
  optional = false
4711
  python-versions = "*"
4712
  groups = ["main"]
4713
  files = [
4714
- {file = "pytz-2025.1-py2.py3-none-any.whl", hash = "sha256:89dd22dca55b46eac6eda23b2d72721bf1bdfef212645d81513ef5d03038de57"},
4715
- {file = "pytz-2025.1.tar.gz", hash = "sha256:c2db42be2a2518b28e65f9207c4d05e6ff547d1efa4086469ef855e4ab70178e"},
4716
  ]
4717
 
4718
  [[package]]
@@ -6613,14 +6607,14 @@ test = ["argcomplete (>=3.0.3)", "mypy (>=1.7.0)", "pre-commit", "pytest (>=7.0,
6613
 
6614
  [[package]]
6615
  name = "transformers"
6616
- version = "4.50.0"
6617
  description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow"
6618
  optional = false
6619
  python-versions = ">=3.9.0"
6620
  groups = ["main"]
6621
  files = [
6622
- {file = "transformers-4.50.0-py3-none-any.whl", hash = "sha256:d75465d523a28bcfef0028c671f682edee29418ab9a5a15cf8a05171e7c54cb7"},
6623
- {file = "transformers-4.50.0.tar.gz", hash = "sha256:d4b0f587ec88825981103fee0a1e80230d956ecc8a7f3feeaafbe49a233c88b8"},
6624
  ]
6625
 
6626
  [package.dependencies]
 
1249
 
1250
  [[package]]
1251
  name = "faker"
1252
+ version = "37.1.0"
1253
  description = "Faker is a Python package that generates fake data for you."
1254
  optional = false
1255
  python-versions = ">=3.9"
1256
  groups = ["main"]
1257
  files = [
1258
+ {file = "faker-37.1.0-py3-none-any.whl", hash = "sha256:dc2f730be71cb770e9c715b13374d80dbcee879675121ab51f9683d262ae9a1c"},
1259
+ {file = "faker-37.1.0.tar.gz", hash = "sha256:ad9dc66a3b84888b837ca729e85299a96b58fdaef0323ed0baace93c9614af06"},
1260
  ]
1261
 
1262
  [package.dependencies]
 
3152
 
3153
  [[package]]
3154
  name = "narwhals"
3155
+ version = "1.32.0"
3156
  description = "Extremely lightweight compatibility layer between dataframe libraries"
3157
  optional = false
3158
  python-versions = ">=3.8"
3159
  groups = ["main"]
3160
  files = [
3161
+ {file = "narwhals-1.32.0-py3-none-any.whl", hash = "sha256:8bdbf3f76155887412eea04b0b06303856ac1aa3d9e8bda5b5e54612855fa560"},
3162
+ {file = "narwhals-1.32.0.tar.gz", hash = "sha256:bd0aa41434737adb4b26f8593f3559abc7d938730ece010fe727b58bc363580d"},
3163
  ]
3164
 
3165
  [package.extras]
 
3166
  cudf = ["cudf (>=24.10.0)"]
3167
  dask = ["dask[dataframe] (>=2024.8)"]
 
 
3168
  duckdb = ["duckdb (>=1.0)"]
 
3169
  ibis = ["ibis-framework (>=6.0.0)", "packaging", "pyarrow-hotfix", "rich"]
3170
  modin = ["modin"]
3171
  pandas = ["pandas (>=0.25.3)"]
 
3173
  pyarrow = ["pyarrow (>=11.0.0)"]
3174
  pyspark = ["pyspark (>=3.5.0)"]
3175
  sqlframe = ["sqlframe (>=3.22.0)"]
 
 
3176
 
3177
  [[package]]
3178
  name = "nest-asyncio"
 
4611
 
4612
  [[package]]
4613
  name = "pyparsing"
4614
+ version = "3.2.3"
4615
  description = "pyparsing module - Classes and methods to define and execute parsing grammars"
4616
  optional = false
4617
  python-versions = ">=3.9"
4618
  groups = ["main"]
4619
  files = [
4620
+ {file = "pyparsing-3.2.3-py3-none-any.whl", hash = "sha256:a749938e02d6fd0b59b356ca504a24982314bb090c383e3cf201c95ef7e2bfcf"},
4621
+ {file = "pyparsing-3.2.3.tar.gz", hash = "sha256:b9c13f1ab8b3b542f72e28f634bad4de758ab3ce4546e4301970ad6fa77c38be"},
4622
  ]
4623
 
4624
  [package.extras]
 
4653
 
4654
  [[package]]
4655
  name = "python-dotenv"
4656
+ version = "1.1.0"
4657
  description = "Read key-value pairs from a .env file and set them as environment variables"
4658
  optional = false
4659
+ python-versions = ">=3.9"
4660
  groups = ["main"]
4661
  files = [
4662
+ {file = "python_dotenv-1.1.0-py3-none-any.whl", hash = "sha256:d7c01d9e2293916c18baf562d95698754b0dbbb5e74d457c45d4f6561fb9d55d"},
4663
+ {file = "python_dotenv-1.1.0.tar.gz", hash = "sha256:41f90bc6f5f177fb41f53e87666db362025010eb28f60a01c9143bfa33a2b2d5"},
4664
  ]
4665
 
4666
  [package.extras]
 
4699
 
4700
  [[package]]
4701
  name = "pytz"
4702
+ version = "2025.2"
4703
  description = "World timezone definitions, modern and historical"
4704
  optional = false
4705
  python-versions = "*"
4706
  groups = ["main"]
4707
  files = [
4708
+ {file = "pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00"},
4709
+ {file = "pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3"},
4710
  ]
4711
 
4712
  [[package]]
 
6607
 
6608
  [[package]]
6609
  name = "transformers"
6610
+ version = "4.50.1"
6611
  description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow"
6612
  optional = false
6613
  python-versions = ">=3.9.0"
6614
  groups = ["main"]
6615
  files = [
6616
+ {file = "transformers-4.50.1-py3-none-any.whl", hash = "sha256:e9b9bd274518150528c1d745c7ebba72d27e4e52f2deffaa1fddebad6912da5d"},
6617
+ {file = "transformers-4.50.1.tar.gz", hash = "sha256:6ee542d2cce7e1b6a06ae350599c27ddf2e6e45ec9d0cb42915b37fca3d6399a"},
6618
  ]
6619
 
6620
  [package.dependencies]
pyproject.toml CHANGED
@@ -1,6 +1,6 @@
1
  [project]
2
  name = "tachygraphy-microtext-analysis-and-normalization"
3
- version = "2025.03.24.post1"
4
  description = ""
5
  authors = [
6
  { name = "Archisman Karmakar", email = "[email protected]" },
 
1
  [project]
2
  name = "tachygraphy-microtext-analysis-and-normalization"
3
+ version = "2025.03.25.post1"
4
  description = ""
5
  authors = [
6
  { name = "Archisman Karmakar", email = "[email protected]" },
pyprojectOLD.toml CHANGED
@@ -1,6 +1,7 @@
1
  [project]
2
  name = "tachygraphy-microtext-analysis-and-normalization"
3
- version = "2025.03.22.post1"
 
4
  # version = "2025.03.21.post1"
5
  # version = "2025.03.18.post5"
6
  # version = "2025.03.18.post4_3"
 
1
  [project]
2
  name = "tachygraphy-microtext-analysis-and-normalization"
3
+ version = "2025.03.24.post1"
4
+ # version = "2025.03.22.post1"
5
  # version = "2025.03.21.post1"
6
  # version = "2025.03.18.post5"
7
  # version = "2025.03.18.post4_3"
requirements.txt CHANGED
@@ -45,7 +45,7 @@ entrypoints==0.4 ; python_version >= "3.12" and python_version < "4.0"
45
  et-xmlfile==2.0.0 ; python_version >= "3.12" and python_version < "4.0"
46
  evaluate==0.4.3 ; python_version >= "3.12" and python_version < "4.0"
47
  executing==2.2.0 ; python_version >= "3.12" and python_version < "4.0"
48
- faker==37.0.2 ; python_version >= "3.12" and python_version < "4.0"
49
  fastjsonschema==2.21.1 ; python_version >= "3.12" and python_version < "4.0"
50
  favicon==0.7.0 ; python_version >= "3.12" and python_version < "4.0"
51
  filelock==3.18.0 ; python_version >= "3.12" and python_version < "4.0"
@@ -111,7 +111,7 @@ msgpack==1.1.0 ; python_version >= "3.12" and python_version < "4.0"
111
  multidict==6.2.0 ; python_version >= "3.12" and python_version < "4.0"
112
  multiprocess==0.70.16 ; python_version >= "3.12" and python_version < "4.0"
113
  namex==0.0.8 ; python_version >= "3.12" and python_version < "4.0"
114
- narwhals==1.31.0 ; python_version >= "3.12" and python_version < "4.0"
115
  nest-asyncio==1.6.0 ; python_version >= "3.12" and python_version < "4.0"
116
  networkx==3.4.2 ; python_version >= "3.12" and python_version < "4.0"
117
  nltk==3.9.1 ; python_version >= "3.12" and python_version < "4.0"
@@ -164,12 +164,12 @@ pydantic==2.10.6 ; python_version >= "3.12" and python_version < "4.0"
164
  pydeck==0.9.1 ; python_version >= "3.12" and python_version < "4.0"
165
  pygments==2.19.1 ; python_version >= "3.12" and python_version < "4.0"
166
  pymdown-extensions==10.14.3 ; python_version >= "3.12" and python_version < "4.0"
167
- pyparsing==3.2.2 ; python_version >= "3.12" and python_version < "4.0"
168
  pyproject-hooks==1.2.0 ; python_version >= "3.12" and python_version < "4.0"
169
  python-dateutil==2.9.0.post0 ; python_version >= "3.12" and python_version < "4.0"
170
- python-dotenv==1.0.1 ; python_version >= "3.12" and python_version < "4.0"
171
  pytorch-lightning==2.5.1 ; python_version >= "3.12" and python_version < "4.0"
172
- pytz==2025.1 ; python_version >= "3.12" and python_version < "4.0"
173
  pywin32-ctypes==0.2.3 ; python_version >= "3.12" and python_version < "4.0" and sys_platform == "win32"
174
  pywin32==309 ; python_version >= "3.12" and python_version < "4.0" and (sys_platform == "win32" or platform_system == "Windows")
175
  pyyaml==6.0.2 ; python_version >= "3.12" and python_version < "4.0"
@@ -238,7 +238,7 @@ torchvision==0.21.0 ; python_version >= "3.12" and python_version < "4.0"
238
  tornado==6.4.2 ; python_version >= "3.12" and python_version < "4.0"
239
  tqdm==4.67.1 ; python_version >= "3.12" and python_version < "4.0"
240
  traitlets==5.14.3 ; python_version >= "3.12" and python_version < "4.0"
241
- transformers==4.50.0 ; python_version >= "3.12" and python_version < "4.0"
242
  triton==3.2.0 ; python_version >= "3.12" and python_version < "4.0" and platform_system == "Linux" and platform_machine == "x86_64"
243
  trove-classifiers==2025.3.19.19 ; python_version >= "3.12" and python_version < "4.0"
244
  typing-extensions==4.12.2 ; python_version >= "3.12" and python_version < "4.0"
 
45
  et-xmlfile==2.0.0 ; python_version >= "3.12" and python_version < "4.0"
46
  evaluate==0.4.3 ; python_version >= "3.12" and python_version < "4.0"
47
  executing==2.2.0 ; python_version >= "3.12" and python_version < "4.0"
48
+ faker==37.1.0 ; python_version >= "3.12" and python_version < "4.0"
49
  fastjsonschema==2.21.1 ; python_version >= "3.12" and python_version < "4.0"
50
  favicon==0.7.0 ; python_version >= "3.12" and python_version < "4.0"
51
  filelock==3.18.0 ; python_version >= "3.12" and python_version < "4.0"
 
111
  multidict==6.2.0 ; python_version >= "3.12" and python_version < "4.0"
112
  multiprocess==0.70.16 ; python_version >= "3.12" and python_version < "4.0"
113
  namex==0.0.8 ; python_version >= "3.12" and python_version < "4.0"
114
+ narwhals==1.32.0 ; python_version >= "3.12" and python_version < "4.0"
115
  nest-asyncio==1.6.0 ; python_version >= "3.12" and python_version < "4.0"
116
  networkx==3.4.2 ; python_version >= "3.12" and python_version < "4.0"
117
  nltk==3.9.1 ; python_version >= "3.12" and python_version < "4.0"
 
164
  pydeck==0.9.1 ; python_version >= "3.12" and python_version < "4.0"
165
  pygments==2.19.1 ; python_version >= "3.12" and python_version < "4.0"
166
  pymdown-extensions==10.14.3 ; python_version >= "3.12" and python_version < "4.0"
167
+ pyparsing==3.2.3 ; python_version >= "3.12" and python_version < "4.0"
168
  pyproject-hooks==1.2.0 ; python_version >= "3.12" and python_version < "4.0"
169
  python-dateutil==2.9.0.post0 ; python_version >= "3.12" and python_version < "4.0"
170
+ python-dotenv==1.1.0 ; python_version >= "3.12" and python_version < "4.0"
171
  pytorch-lightning==2.5.1 ; python_version >= "3.12" and python_version < "4.0"
172
+ pytz==2025.2 ; python_version >= "3.12" and python_version < "4.0"
173
  pywin32-ctypes==0.2.3 ; python_version >= "3.12" and python_version < "4.0" and sys_platform == "win32"
174
  pywin32==309 ; python_version >= "3.12" and python_version < "4.0" and (sys_platform == "win32" or platform_system == "Windows")
175
  pyyaml==6.0.2 ; python_version >= "3.12" and python_version < "4.0"
 
238
  tornado==6.4.2 ; python_version >= "3.12" and python_version < "4.0"
239
  tqdm==4.67.1 ; python_version >= "3.12" and python_version < "4.0"
240
  traitlets==5.14.3 ; python_version >= "3.12" and python_version < "4.0"
241
+ transformers==4.50.1 ; python_version >= "3.12" and python_version < "4.0"
242
  triton==3.2.0 ; python_version >= "3.12" and python_version < "4.0" and platform_system == "Linux" and platform_machine == "x86_64"
243
  trove-classifiers==2025.3.19.19 ; python_version >= "3.12" and python_version < "4.0"
244
  typing-extensions==4.12.2 ; python_version >= "3.12" and python_version < "4.0"
sentimentPolarity_analysis/config/stage1_models.json CHANGED
@@ -3,7 +3,7 @@
3
  "name": "DeBERTa v3 Base for Sequence Classification",
4
  "type": "hf_automodel_finetuned_dbt3",
5
  "module_path": "hmv_cfg_base_stage1.model1",
6
- "hf_location": "tachygraphy-microtext-normalization-iemk/DeBERTa-v3-seqClassfication-LV1-SentimentPolarities-Batch8",
7
  "tokenizer_class": "DebertaV2Tokenizer",
8
  "model_class": "DebertaV2ForSequenceClassification",
9
  "problem_type": "multi_label_classification",
@@ -18,7 +18,7 @@
18
  "name": "DeBERTa v3 Base Custom Model with minimal Regularized Loss",
19
  "type": "db3_base_custom",
20
  "module_path": "hmv_cfg_base_stage1.model2",
21
- "hf_location": "tachygraphy-microtext-normalization-iemk/DeBERTa-v3-Base-Cust-LV1-SentimentPolarities-minRegLoss",
22
  "tokenizer_class": "DebertaV2Tokenizer",
23
  "model_class": "SentimentModel",
24
  "problem_type": "multi_label_classification",
@@ -33,7 +33,7 @@
33
  "name": "BERT Base Uncased Custom Model",
34
  "type": "bert_base_uncased_custom",
35
  "module_path": "hmv_cfg_base_stage1.model3",
36
- "hf_location": "https://huggingface.co/tachygraphy-microtext-normalization-iemk/BERT-LV1-SentimentPolarities/resolve/main/saved_weights.pt",
37
  "tokenizer_class": "AutoTokenizer",
38
  "model_class": "BERT_architecture",
39
  "problem_type": "multi_label_classification",
@@ -48,7 +48,7 @@
48
  "name": "LSTM Custom Model",
49
  "type": "lstm_uncased_custom",
50
  "module_path": "hmv_cfg_base_stage1.model4",
51
- "hf_location": "tachygraphy-microtext-normalization-iemk/LSTM-LV1-SentimentPolarities",
52
  "tokenizer_class": "",
53
  "model_class": "",
54
  "problem_type": "multi_label_classification",
 
3
  "name": "DeBERTa v3 Base for Sequence Classification",
4
  "type": "hf_automodel_finetuned_dbt3",
5
  "module_path": "hmv_cfg_base_stage1.model1",
6
+ "hf_location": "Tachygraphy-Microtext-Normalization-IEMK25/DeBERTa-v3-seqClassfication-LV1-SentimentPolarities-Batch8",
7
  "tokenizer_class": "DebertaV2Tokenizer",
8
  "model_class": "DebertaV2ForSequenceClassification",
9
  "problem_type": "multi_label_classification",
 
18
  "name": "DeBERTa v3 Base Custom Model with minimal Regularized Loss",
19
  "type": "db3_base_custom",
20
  "module_path": "hmv_cfg_base_stage1.model2",
21
+ "hf_location": "Tachygraphy-Microtext-Normalization-IEMK25/DeBERTa-v3-Base-Cust-LV1-SentimentPolarities-minRegLoss",
22
  "tokenizer_class": "DebertaV2Tokenizer",
23
  "model_class": "SentimentModel",
24
  "problem_type": "multi_label_classification",
 
33
  "name": "BERT Base Uncased Custom Model",
34
  "type": "bert_base_uncased_custom",
35
  "module_path": "hmv_cfg_base_stage1.model3",
36
+ "hf_location": "https://huggingface.co/Tachygraphy-Microtext-Normalization-IEMK25/BERT-LV1-SentimentPolarities/resolve/main/saved_weights.pt",
37
  "tokenizer_class": "AutoTokenizer",
38
  "model_class": "BERT_architecture",
39
  "problem_type": "multi_label_classification",
 
48
  "name": "LSTM Custom Model",
49
  "type": "lstm_uncased_custom",
50
  "module_path": "hmv_cfg_base_stage1.model4",
51
+ "hf_location": "Tachygraphy-Microtext-Normalization-IEMK25/LSTM-LV1-SentimentPolarities",
52
  "tokenizer_class": "",
53
  "model_class": "",
54
  "problem_type": "multi_label_classification",
stacked_stacking_stages/__init__.py ADDED
File without changes
stacked_stacking_stages/hmv_cfg_base_stk_stg/__init__.py ADDED
File without changes
stacked_stacking_stages/hmv_cfg_base_stk_stg/imports.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+
4
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), )))
5
+
6
+ import streamlit as st
7
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModel, AutoModelForSeq2SeqLM
8
+ # import torch
9
+ import numpy as np
10
+ import matplotlib.pyplot as plt
11
+ import plotly.express as px
12
+ import pandas as pd
13
+ import json
14
+ import gc
15
+ import psutil
16
+ import importlib
17
+ import importlib.util
18
+ import asyncio
19
+ # import pytorch_lightning as pl
20
+
21
+ import safetensors
22
+ from safetensors import load_file, save_file
23
+ import json
24
+ import huggingface_hub
25
+ from huggingface_hub import hf_hub_download
stacked_stacking_stages/stacking_stages.py ADDED
@@ -0,0 +1,774 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import shutil
2
+ from transformers.utils.hub import TRANSFORMERS_CACHE
3
+ import torch
4
+ import time
5
+ import joblib
6
+ import importlib.util
7
+ from imports import *
8
+ import os
9
+ import sys
10
+ import time
11
+ import uuid
12
+ import math
13
+
14
+ from dotenv import load_dotenv
15
+ # import psycopg2
16
+ from supabase import create_client, Client
17
+ from datetime import datetime, timezone
18
+ from collections import OrderedDict
19
+
20
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), )))
21
+
22
+ env_path = os.path.join(os.path.dirname(__file__),
23
+ "..", ".devcontainer", ".env")
24
+
25
+ # from transformers.utils import move_cache_to_trash
26
+ # from huggingface_hub import delete_cache
27
+
28
+
29
+ # from hmv_cfg_base_stage1.model1 import load_model as load_model1
30
+ # from hmv_cfg_base_stage1.model1 import predict as predict1
31
+
32
+ BASE_DIR = os.path.dirname(os.path.abspath(__file__))
33
+ CONFIG_STAGE1 = os.path.join(BASE_DIR, "..", "sentimentPolarity_analysis", "config", "stage1_models.json")
34
+ CONFIG_STAGE2 = os.path.join(BASE_DIR, "..", "emotionMoodtag_analysis", "config", "stage2_models.json")
35
+ CONFIG_STAGE3 = os.path.join(BASE_DIR, "..", "transformation_and_Normalization", "config", "stage3_models.json")
36
+ LOADERS_STAGE_COLLECTOR = os.path.join(BASE_DIR, "hmv_cfg_base_dlc")
37
+
38
+
39
+ EMOTION_MOODTAG_LABELS = [
40
+ "anger", "disgust", "fear", "joy", "neutral",
41
+ "sadness", "surprise"
42
+ ]
43
+
44
+ SENTIMENT_POLARITY_LABELS = [
45
+ "negative", "neutral", "positive"
46
+ ]
47
+
48
+
49
+ current_model = None
50
+ current_tokenizer = None
51
+
52
+
53
+ # Enabling Resource caching
54
+
55
+ # Load environment variables from .env
56
+ load_dotenv()
57
+
58
+ # @st.cache_resource
59
+ # DATABASE_URL = os.environ.get("DATABASE_URL")
60
+
61
+ # def get_connection():
62
+ # # """Establish a connection to the database."""
63
+ # # return psycopg2.connect(os.environ.get("DATABASE_URL"))
64
+ # supabase: Client = create_client(os.environ.get("SUPABASE_URL"), os.environ.get("anon_key"))
65
+ # return supabase
66
+
67
+ # @st.cache_resource
68
+
69
+
70
+ def load_model_config1():
71
+ with open(CONFIG_STAGE1, "r") as f:
72
+ model_data = json.load(f)
73
+ # Convert model_data values to a list and take only the first two entries
74
+ top2_data = list(model_data.values())[:2]
75
+ # Create a dictionary mapping from model name to its configuration for the top two models
76
+ model_options = {v["name"]: v for v in top2_data}
77
+ return top2_data, model_options
78
+
79
+
80
+
81
+ MODEL_DATA1, MODEL_OPTIONS1 = load_model_config1()
82
+
83
+ # MODEL_DATA1_1=MODEL_DATA1[0]
84
+ # MODEL_OPTIONS1_1=MODEL_OPTIONS1[0]
85
+
86
+
87
+ def load_model_config2():
88
+ with open(CONFIG_STAGE2, "r") as f:
89
+ model_data = json.load(f)
90
+ # Convert model_data values to a list and take only the first two entries
91
+ top2_data = list(model_data.values())[:2]
92
+ # Create a dictionary mapping from model name to its configuration for the top two models
93
+ model_options = {v["name"]: v for v in top2_data}
94
+ return top2_data, model_options
95
+
96
+
97
+ MODEL_DATA2, MODEL_OPTIONS2 = load_model_config2()
98
+
99
+ # MODEL_DATA2_1=MODEL_DATA2[0]
100
+ # MODEL_OPTIONS2_1=MODEL_OPTIONS2[0]
101
+
102
+
103
+ def load_model_config3():
104
+ with open(CONFIG_STAGE3, "r") as f:
105
+ model_data = json.load(f)
106
+ # Convert model_data values to a list and take only the first two entries
107
+ top2_data = list(model_data.values())[:2]
108
+ # Create a dictionary mapping from model name to its configuration for the top two models
109
+ model_options = {v["name"]: v for v in top2_data}
110
+ return top2_data, model_options
111
+
112
+
113
+
114
+ MODEL_DATA3, MODEL_OPTIONS3 = load_model_config3()
115
+
116
+ # MODEL_DATA3_1=MODEL_DATA3[0]
117
+ # MODEL_OPTIONS3_1=MODEL_OPTIONS3[0]
118
+
119
+
120
+ # ✅ Dynamically Import Model Functions
121
+ def import_from_module(module_name, function_name):
122
+ try:
123
+ module = importlib.import_module(module_name)
124
+ return getattr(module, function_name)
125
+ except (ModuleNotFoundError, AttributeError) as e:
126
+ st.error(f"❌ Import Error: {e}")
127
+ return None
128
+
129
+
130
+ def free_memory():
131
+ # """Free up CPU & GPU memory before loading a new model."""
132
+ global current_model, current_tokenizer
133
+
134
+ if current_model is not None:
135
+ del current_model # Delete the existing model
136
+ current_model = None # Reset reference
137
+
138
+ if current_tokenizer is not None:
139
+ del current_tokenizer # Delete the tokenizer
140
+ current_tokenizer = None
141
+
142
+ gc.collect() # Force garbage collection for CPU memory
143
+
144
+ if torch.cuda.is_available():
145
+ torch.cuda.empty_cache() # Free GPU memory
146
+ torch.cuda.ipc_collect() # Clean up PyTorch GPU cache
147
+
148
+ # If running on CPU, reclaim memory using OS-level commands
149
+ try:
150
+ if torch.cuda.is_available() is False:
151
+ psutil.virtual_memory() # Refresh memory stats
152
+ except Exception as e:
153
+ print(f"Memory cleanup error: {e}")
154
+
155
+ # Delete cached Hugging Face models
156
+ try:
157
+ cache_dir = TRANSFORMERS_CACHE
158
+ if os.path.exists(cache_dir):
159
+ shutil.rmtree(cache_dir)
160
+ print("Cache cleared!")
161
+ except Exception as e:
162
+ print(f"❌ Cache cleanup error: {e}")
163
+
164
+
165
+ def load_selected_model1(model_name):
166
+ global current_model, current_tokenizer
167
+
168
+ # st.cache_resource.clear()
169
+
170
+ # free_memory()
171
+
172
+ # st.write("DEBUG: Available Models:", MODEL_OPTIONS.keys()) # ✅ See available models
173
+ # st.write("DEBUG: Selected Model:", MODEL_OPTIONS[model_name]) # ✅ Check selected model
174
+ # st.write("DEBUG: Model Name:", model_name) # ✅ Check selected model
175
+
176
+ if model_name not in MODEL_OPTIONS1:
177
+ st.error(f"⚠️ Model '{model_name}' not found in config!")
178
+ return None, None, None
179
+
180
+ model_info = MODEL_OPTIONS1[model_name]
181
+ hf_location = model_info["hf_location"]
182
+
183
+ model_module = model_info["module_path"]
184
+ load_function = model_info["load_function"]
185
+ predict_function = model_info["predict_function"]
186
+
187
+ load_model_func = import_from_module(model_module, load_function)
188
+ predict_func = import_from_module(model_module, predict_function)
189
+
190
+ if load_model_func is None or predict_func is None:
191
+ st.error("❌ Model functions could not be loaded!")
192
+ return None, None, None
193
+
194
+ model, tokenizer = load_model_func()
195
+
196
+ current_model, current_tokenizer = model, tokenizer
197
+ return model, tokenizer, predict_func
198
+
199
+ def load_selected_model2(model_name):
200
+ global current_model, current_tokenizer
201
+
202
+ # st.cache_resource.clear()
203
+
204
+ # free_memory()
205
+
206
+ # st.write("DEBUG: Available Models:", MODEL_OPTIONS.keys()) # ✅ See available models
207
+ # st.write("DEBUG: Selected Model:", MODEL_OPTIONS[model_name]) # ✅ Check selected model
208
+ # st.write("DEBUG: Model Name:", model_name) # ✅ Check selected model
209
+
210
+ if model_name not in MODEL_OPTIONS2:
211
+ st.error(f"⚠️ Model '{model_name}' not found in config!")
212
+ return None, None, None
213
+
214
+ model_info = MODEL_OPTIONS2[model_name]
215
+ hf_location = model_info["hf_location"]
216
+
217
+ model_module = model_info["module_path"]
218
+ load_function = model_info["load_function"]
219
+ predict_function = model_info["predict_function"]
220
+
221
+ load_model_func = import_from_module(model_module, load_function)
222
+ predict_func = import_from_module(model_module, predict_function)
223
+
224
+ if load_model_func is None or predict_func is None:
225
+ st.error("❌ Model functions could not be loaded!")
226
+ return None, None, None
227
+
228
+ model, tokenizer = load_model_func()
229
+
230
+ current_model, current_tokenizer = model, tokenizer
231
+ return model, tokenizer, predict_func
232
+
233
+ def load_selected_model3(model_name):
234
+ global current_model, current_tokenizer
235
+
236
+ # st.cache_resource.clear()
237
+
238
+ # free_memory()
239
+
240
+ # st.write("DEBUG: Available Models:", MODEL_OPTIONS.keys()) # ✅ See available models
241
+ # st.write("DEBUG: Selected Model:", MODEL_OPTIONS[model_name]) # ✅ Check selected model
242
+ # st.write("DEBUG: Model Name:", model_name) # ✅ Check selected model
243
+
244
+ if model_name not in MODEL_OPTIONS3:
245
+ st.error(f"⚠️ Model '{model_name}' not found in config!")
246
+ return None, None, None
247
+
248
+ model_info = MODEL_OPTIONS3[model_name]
249
+ hf_location = model_info["hf_location"]
250
+
251
+ model_module = model_info["module_path"]
252
+ load_function = model_info["load_function"]
253
+ predict_function = model_info["predict_function"]
254
+
255
+ load_model_func = import_from_module(model_module, load_function)
256
+ predict_func = import_from_module(model_module, predict_function)
257
+
258
+ if load_model_func is None or predict_func is None:
259
+ st.error("❌ Model functions could not be loaded!")
260
+ return None, None, None
261
+
262
+ model, tokenizer = load_model_func()
263
+
264
+ current_model, current_tokenizer = model, tokenizer
265
+ return model, tokenizer, predict_func
266
+
267
+
268
+ def disable_ui():
269
+ st.components.v1.html(
270
+ """
271
+ <style>
272
+ #ui-disable-overlay {
273
+ position: fixed;
274
+ top: 0;
275
+ left: 0;
276
+ width: 100vw;
277
+ height: 100vh;
278
+ background-color: rgba(200, 200, 200, 0.5);
279
+ z-index: 9999;
280
+ }
281
+ </style>
282
+ <div id="ui-disable-overlay"></div>
283
+ """,
284
+ height=0,
285
+ scrolling=False
286
+ )
287
+
288
+
289
+ def enable_ui():
290
+ st.components.v1.html(
291
+ """
292
+ <script>
293
+ var overlay = document.getElementById("ui-disable-overlay");
294
+ if (overlay) {
295
+ overlay.parentNode.removeChild(overlay);
296
+ }
297
+ </script>
298
+ """,
299
+ height=0,
300
+ scrolling=False
301
+ )
302
+
303
+ # Function to increment progress dynamically
304
+
305
+
306
+ def get_sentiment_emotion_graph_code(input_text, normalized_text, sentiment_array, emotion_array):
307
+ """
308
+ Returns a Graphviz code string representing:
309
+ - Input Text as the root
310
+ - Normalized Text as a child
311
+ - A Sentiment node with its probabilities as children (using SENTIMENT_POLARITY_LABELS)
312
+ - An Emotion node with its probabilities as children (using EMOTION_MOODTAG_LABELS)
313
+ - Arrows from each sentiment node to the Emotion node with fixed penwidths (5 for highest, 3 for middle, 1 for lowest)
314
+
315
+ Both sentiment_array and emotion_array are NumPy arrays (possibly nested, e.g. [[values]]),
316
+ so they are squeezed before use.
317
+ """
318
+ import numpy as np
319
+
320
+ # Flatten arrays in case they are nested
321
+ sentiment_flat = np.array(sentiment_array).squeeze()
322
+ emotion_flat = np.array(emotion_array).squeeze()
323
+
324
+ # Create pairs for each sentiment label with its probability
325
+ sentiment_pairs = list(zip(SENTIMENT_POLARITY_LABELS, sentiment_flat))
326
+ # Sort by probability (ascending)
327
+ sentiment_sorted = sorted(sentiment_pairs, key=lambda x: x[1])
328
+
329
+ # Create a penwidth map: label -> penwidth
330
+ penwidth_map = {}
331
+
332
+ # Collect all unique probabilities to handle ties
333
+ unique_probs = set(prob for _, prob in sentiment_sorted)
334
+
335
+ if len(unique_probs) == 1:
336
+ # All sentiments have the same probability; use mid-range width (e.g., 3) for all
337
+ for label, _ in sentiment_sorted:
338
+ penwidth_map[label] = 3
339
+ elif len(unique_probs) == 2:
340
+ # Two unique probabilities: assign min width 1 and max width 5 accordingly
341
+ min_prob = sentiment_sorted[0][1]
342
+ max_prob = sentiment_sorted[-1][1]
343
+ for label, prob in sentiment_sorted:
344
+ if prob == min_prob:
345
+ penwidth_map[label] = 1
346
+ else:
347
+ penwidth_map[label] = 5
348
+ else:
349
+ # For three distinct probabilities, assign 1 to the smallest, 3 to the middle, 5 to the largest.
350
+ penwidth_map[sentiment_sorted[0][0]] = 1
351
+ penwidth_map[sentiment_sorted[1][0]] = 3
352
+ penwidth_map[sentiment_sorted[2][0]] = 5
353
+
354
+ # Build the basic Graphviz structure
355
+ graph_code = f'''
356
+ digraph G {{
357
+ rankdir=TB;
358
+ node [shape=box, style="rounded,filled", fontname="Helvetica", fontsize=12];
359
+
360
+ Input [label="Input Text:\\n{input_text.replace('"', '\\"')}", fillcolor="#ffe6de", fontcolor="#000000"];
361
+ Normalized [label="Normalized Text:\\n{normalized_text.replace('"', '\\"')}", fillcolor="#ffe6de", fontcolor="#000000"];
362
+ Sentiment [label="Sentiment"];
363
+ Emotion [label="Emotion"];
364
+
365
+ Input -> Normalized;
366
+ Input -> Sentiment;
367
+ Sentiment -> Emotion;
368
+ '''
369
+
370
+ # Add sentiment nodes (displaying full values without truncation)
371
+ for label, prob in sentiment_pairs:
372
+ node_id = f"S_{label}"
373
+ graph_code += f'\n {node_id} [label="{label}: {prob}", fillcolor="#ecdeff", fontcolor="black"];'
374
+ graph_code += f'\n Sentiment -> {node_id};'
375
+
376
+ # Add emotion nodes (displaying full values)
377
+ for i, label in enumerate(EMOTION_MOODTAG_LABELS):
378
+ if i < len(emotion_flat):
379
+ prob = emotion_flat[i]
380
+ node_id = f"E_{label}"
381
+ graph_code += f'\n {node_id} [label="{label}: {prob}", fillcolor="#deffe1", fontcolor="black"];'
382
+ graph_code += f'\n Emotion -> {node_id};'
383
+
384
+ # Add arrows from each sentiment node to the Emotion node with fixed penwidth based on ranking
385
+ for label, prob in sentiment_pairs:
386
+ node_id = f"S_{label}"
387
+ pw = penwidth_map[label]
388
+ graph_code += f'\n {node_id} -> Emotion [penwidth={pw}];'
389
+
390
+ graph_code += "\n}"
391
+ return graph_code
392
+
393
+
394
+
395
+
396
+
397
+
398
+ def get_env_variable(var_name):
399
+ # Try os.environ first (this covers local development and HF Spaces)
400
+ value = os.environ.get(var_name)
401
+ if value is None:
402
+ # Fall back to st.secrets if available (e.g., on Streamlit Cloud)
403
+ try:
404
+ value = st.secrets[var_name]
405
+ except KeyError:
406
+ value = None
407
+ return value
408
+
409
+
410
+ def update_progress(progress_bar, start, end, delay=0.1):
411
+ for i in range(start, end + 1, 5): # Increment in steps of 5%
412
+ progress_bar.progress(i)
413
+ time.sleep(delay) # Simulate processing time
414
+ # st.experimental_rerun() # Refresh the page
415
+
416
+
417
+ # Function to update session state when model changes
418
+ def on_model_change():
419
+ st.cache_data.clear()
420
+ st.cache_resource.clear()
421
+ free_memory()
422
+ st.session_state.model_changed = True # Mark model as changed
423
+
424
+ # Reset flags to trigger new prediction and show feedback form
425
+ st.session_state.prediction_generated = False
426
+ st.session_state.feedback_submitted = False
427
+ st.session_state.predictions = None
428
+ st.session_state.graphviz_code = None
429
+ st.session_state.last_processed_input = ""
430
+
431
+
432
+ # Function to update session state when text changes
433
+
434
+
435
+ def on_text_change():
436
+ st.session_state.text_changed = True # Mark text as changed
437
+
438
+ st.session_state.prediction_generated = False
439
+ st.session_state.feedback_submitted = False
440
+ st.session_state.predictions = None
441
+ st.session_state.graphviz_code = None
442
+ # st.session_state.last_processed_input = ""
443
+
444
+
445
+ def update_top_k_from_slider():
446
+ st.session_state.top_k = st.session_state.top_k_slider
447
+
448
+ st.session_state.prediction_generated = False
449
+ st.session_state.feedback_submitted = False
450
+ st.session_state.predictions = None
451
+ st.session_state.graphviz_code = None
452
+ # st.session_state.last_processed_input = ""
453
+
454
+
455
+ def update_top_k_from_input():
456
+ st.session_state.top_k = st.session_state.top_k_input
457
+
458
+ st.session_state.prediction_generated = False
459
+ st.session_state.feedback_submitted = False
460
+ st.session_state.predictions = None
461
+ st.session_state.graphviz_code = None
462
+ # st.session_state.last_processed_input = ""
463
+
464
+ def on_temperature_change():
465
+ st.session_state.prediction_generated = False
466
+ st.session_state.feedback_submitted = False
467
+ st.session_state.predictions = None
468
+ st.session_state.graphviz_code = None
469
+ # st.session_state.last_processed_input = ""
470
+
471
+ def on_top_p_change():
472
+ st.session_state.prediction_generated = False
473
+ st.session_state.feedback_submitted = False
474
+ st.session_state.predictions = None
475
+ st.session_state.graphviz_code = None
476
+ # st.session_state.last_processed_input = ""
477
+
478
+ def on_beam_checkbox_change():
479
+ st.session_state.prediction_generated = False
480
+ st.session_state.feedback_submitted = False
481
+ st.session_state.predictions = None
482
+ st.session_state.graphviz_code = None
483
+ # st.session_state.last_processed_input = ""
484
+
485
+ def on_enable_sampling_checkbox_change():
486
+ st.session_state.prediction_generated = False
487
+ st.session_state.feedback_submitted = False
488
+ st.session_state.predictions = None
489
+ st.session_state.graphviz_code = None
490
+ # st.session_state.last_processed_input = ""
491
+
492
+ def on_enable_earlyStopping_checkbox_change():
493
+ st.session_state.prediction_generated = False
494
+ st.session_state.feedback_submitted = False
495
+ st.session_state.predictions = None
496
+ st.session_state.graphviz_code = None
497
+ # st.session_state.last_processed_input = ""
498
+
499
+ def on_max_new_tokens_change():
500
+ st.session_state.prediction_generated = False
501
+ st.session_state.feedback_submitted = False
502
+ st.session_state.predictions = None
503
+ st.session_state.graphviz_code = None
504
+ # st.session_state.last_processed_input = ""
505
+
506
+ def on_num_return_sequences_change():
507
+ st.session_state.prediction_generated = False
508
+ st.session_state.feedback_submitted = False
509
+ st.session_state.predictions = None
510
+ st.session_state.graphviz_code = None
511
+ # st.session_state.last_processed_input = ""
512
+
513
+ # Initialize session state variables
514
+ if "selected_model1" not in st.session_state:
515
+ st.session_state.selected_model1 = list(MODEL_OPTIONS1.keys())[
516
+ 0] # Default model
517
+ if "selected_model2" not in st.session_state:
518
+ st.session_state.selected_model2 = list(MODEL_OPTIONS2.keys())[
519
+ 0]
520
+ if "selected_model3" not in st.session_state:
521
+ st.session_state.selected_model3 = list(MODEL_OPTIONS3.keys())[
522
+ 0]
523
+ if "user_input" not in st.session_state:
524
+ st.session_state.user_input = ""
525
+ if "last_processed_input" not in st.session_state:
526
+ st.session_state.last_processed_input = ""
527
+ if "model_changed" not in st.session_state:
528
+ st.session_state.model_changed = False
529
+ if "text_changed" not in st.session_state:
530
+ st.session_state.text_changed = False
531
+ if "disabled" not in st.session_state:
532
+ st.session_state.disabled = False
533
+
534
+ if "top_k" not in st.session_state:
535
+ st.session_state.top_k = 50
536
+
537
+
538
+ if "last_change" not in st.session_state:
539
+ st.session_state.last_change = time.time()
540
+ if "auto_predict_triggered" not in st.session_state:
541
+ st.session_state.auto_predict_triggered = False
542
+
543
+
544
+
545
+
546
+
547
+ def show_stacking_stages():
548
+ # No cache clearing here—only in the model change callback!
549
+
550
+ # st.write(st.session_state)
551
+
552
+ if "last_change" not in st.session_state:
553
+ st.session_state.last_change = time.time()
554
+ if "auto_predict_triggered" not in st.session_state:
555
+ st.session_state.auto_predict_triggered = False
556
+
557
+
558
+ if "top_k" not in st.session_state:
559
+ st.session_state.top_k = 50
560
+
561
+ model_names1 = list(MODEL_OPTIONS1.keys())
562
+ model_names2 = list(MODEL_OPTIONS2.keys())
563
+ model_names3 = list(MODEL_OPTIONS3.keys())
564
+
565
+ st.title("Stacking all the best models together")
566
+
567
+ st.warning("If memory is low, this page may take a while to load or might fail too if memory overshoots or due to CUDA_Side_Device_Assertions.")
568
+
569
+ # Check if the stored selected model is valid; if not, reset it
570
+ if "selected_model1" in st.session_state:
571
+ if st.session_state.selected_model1 not in model_names1:
572
+ st.session_state.selected_model1 = model_names1[0]
573
+ else:
574
+ st.session_state.selected_model1 = model_names1[0]
575
+
576
+ if "selected_model2" in st.session_state:
577
+ if st.session_state.selected_model2 not in model_names2:
578
+ st.session_state.selected_model2 = model_names2[0]
579
+ else:
580
+ st.session_state.selected_model2 = model_names2[0]
581
+
582
+ if "selected_model3" in st.session_state:
583
+ if st.session_state.selected_model3 not in model_names3:
584
+ st.session_state.selected_model3 = model_names3[0]
585
+ else:
586
+ st.session_state.selected_model3 = model_names3[0]
587
+
588
+ # st.title("Stacking all the best models together")
589
+ st.write("This section handles the sentiment analysis and emotion analysis of informal text and then transformation and normalization of it into standard formal English.")
590
+
591
+ # Model selection with change detection; clearing cache happens in on_model_change()
592
+ col1, col2, col3 = st.columns(3)
593
+ with col1:
594
+ selected_model1 = st.selectbox(
595
+ "Choose a model:", model_names1, key="selected_model_stage1", on_change=on_model_change
596
+ )
597
+ with col2:
598
+ selected_model2 = st.selectbox(
599
+ "Choose a model:", model_names2, key="selected_model_stage2", on_change=on_model_change
600
+ )
601
+ with col3:
602
+ selected_model3 = st.selectbox(
603
+ "Choose a model:", model_names3, key="selected_model_stage3", on_change=on_model_change
604
+ )
605
+
606
+ # Text input with change detection
607
+ user_input = st.text_input(
608
+ "Enter text for emotions mood-tag analysis:", key="user_input_stage3", on_change=on_text_change
609
+ )
610
+
611
+ if st.session_state.get("last_processed_input", "") != user_input:
612
+ st.session_state.prediction_generated = False
613
+ st.session_state.feedback_submitted = False
614
+
615
+ st.markdown("#### Generation Parameters")
616
+ col1, col2 = st.columns(2)
617
+
618
+ with col1:
619
+ use_beam = st.checkbox("Use Beam Search", value=False, on_change=on_beam_checkbox_change)
620
+ if use_beam:
621
+ beams = st.number_input("Number of beams:", min_value=1, max_value=10, value=3, step=1, on_change=on_beam_checkbox_change)
622
+ do_sample = False
623
+ temp = None
624
+ top_p = None
625
+ top_k = None
626
+ else:
627
+ beams = None
628
+ do_sample = st.checkbox("Enable Sampling", value=True, on_change=on_enable_sampling_checkbox_change)
629
+ temp = st.slider("Temperature:", min_value=0.1, max_value=2.0, value=0.4, step=0.1, on_change=on_temperature_change) if do_sample else None
630
+
631
+ with col2:
632
+ top_p = st.slider("Top-p (nucleus sampling):", min_value=0.0, max_value=1.0, value=0.9, step=0.05, on_change=on_top_p_change) if (not use_beam and do_sample) else None
633
+ model_config = MODEL_OPTIONS3[selected_model3]
634
+ max_top_k = model_config.get("max_top_k", 50)
635
+ if not use_beam and do_sample:
636
+ col_slider, col_input = st.columns(2)
637
+ st.write("Top-K: Top K most probable tokens, recommended range: 10-60")
638
+ with col_slider:
639
+ top_k_slider = st.slider(
640
+ "Top-k (slider):",
641
+ min_value=0,
642
+ max_value=max_top_k,
643
+ value=st.session_state.top_k,
644
+ step=1,
645
+ key="top_k_slider",
646
+ on_change=update_top_k_from_slider
647
+ )
648
+ with col_input:
649
+ top_k_input = st.number_input(
650
+ "Top-k (number input):",
651
+ min_value=0,
652
+ max_value=max_top_k,
653
+ value=st.session_state.top_k,
654
+ step=1,
655
+ key="top_k_input",
656
+ on_change=update_top_k_from_input
657
+ )
658
+ final_top_k = st.session_state.top_k
659
+ else:
660
+ final_top_k = None
661
+
662
+ col_tokens, col_return = st.columns(2)
663
+ with col_tokens:
664
+ max_new_tokens = st.number_input("Max New Tokens:", min_value=1, value=1024, step=1, on_change=on_max_new_tokens_change)
665
+ early_stopping = st.checkbox("Early Stopping", value=True, on_change=on_enable_earlyStopping_checkbox_change)
666
+ with col_return:
667
+ if beams is not None:
668
+ num_return_sequences = st.number_input(
669
+ "Num Return Sequences:",
670
+ min_value=1,
671
+ max_value=beams,
672
+ value=1,
673
+ step=1,
674
+ on_change=on_num_return_sequences_change
675
+ )
676
+ else:
677
+ num_return_sequences = st.number_input(
678
+ "Num Return Sequences:",
679
+ min_value=1,
680
+ max_value=3,
681
+ value=1,
682
+ step=1,
683
+ on_change=on_num_return_sequences_change
684
+ )
685
+ user_input_copy = user_input
686
+
687
+ current_time = time.time()
688
+ if user_input.strip() and (current_time - st.session_state.last_change >= 1.25) and st.session_state.get("prediction_generated", False) is False:
689
+ st.session_state.last_processed_input = user_input
690
+
691
+ progress_bar = st.progress(0)
692
+ update_progress(progress_bar, 0, 10)
693
+ col_spinner, col_warning = st.columns(2)
694
+
695
+ with col_warning:
696
+ warning_placeholder = st.empty()
697
+ warning_placeholder.warning("Don't change the text data or any input parameters or switch models or pages while inference is loading...")
698
+
699
+ with col_spinner:
700
+ with st.spinner("Please wait, inference is loading..."):
701
+ model1, tokenizer1, predict_func1 = load_selected_model1(selected_model1)
702
+ model2, tokenizer2, predict_func2 = load_selected_model2(selected_model2)
703
+ model3, tokenizer3, predict_func3 = load_selected_model3(selected_model3)
704
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
705
+ if model1 is None:
706
+ st.error("⚠️ Error: Model 1 failed to load!")
707
+ st.stop()
708
+ if hasattr(model1, "to"):
709
+ model1.to(device)
710
+ if model2 is None:
711
+ st.error("⚠️ Error: Model 2 failed to load!")
712
+ st.stop()
713
+ if hasattr(model2, "to"):
714
+ model2.to(device)
715
+ if model3 is None:
716
+ st.error("⚠️ Error: Model 3 failed to load!")
717
+ st.stop()
718
+ if hasattr(model3, "to"):
719
+ model3.to(device)
720
+ predictions1 = predict_func1(user_input, model1, tokenizer1, device)
721
+ predictions2 = predict_func2(user_input, model2, tokenizer2, device)
722
+ predictions = predict_func3(
723
+ model3, tokenizer3, user_input, device,
724
+ num_return_sequences,
725
+ beams,
726
+ do_sample,
727
+ temp,
728
+ top_p,
729
+ final_top_k,
730
+ max_new_tokens,
731
+ early_stopping
732
+ )
733
+
734
+ update_progress(progress_bar, 10, 100)
735
+
736
+ warning_placeholder.empty()
737
+
738
+ st.session_state.predictions = predictions
739
+ st.session_state.predictions1 = predictions1
740
+ st.session_state.predictions2 = predictions2
741
+ print(predictions1)
742
+ print(predictions2)
743
+ if len(predictions) > 1:
744
+ st.write("### Most Probable Predictions:")
745
+ for i, pred in enumerate(predictions, start=1):
746
+ st.markdown(f"**Prediction Sequence {i}:** {pred}")
747
+ else:
748
+ st.write("### Predicted Sequence:")
749
+ st.write(predictions[0])
750
+
751
+ graph_code = get_sentiment_emotion_graph_code(user_input, predictions[0], predictions1, predictions2)
752
+ st.session_state.graphviz_code = graph_code
753
+
754
+ # Now display the graph from session state:
755
+ st.graphviz_chart(st.session_state.graphviz_code)
756
+ progress_bar.empty()
757
+ # else:
758
+ # st.info("Waiting for input to settle...")
759
+
760
+ # Mark that a prediction has been generated
761
+ st.session_state.prediction_generated = True
762
+
763
+ else:
764
+ # If predictions are already generated, display the stored ones
765
+ if st.session_state.get("predictions") and st.session_state.get("graphviz_code") and st.session_state.get("predictions2") and st.session_state.get("predictions1"):
766
+ predictions = st.session_state.predictions
767
+ if len(predictions) > 1:
768
+ st.write("### Most Probable Predictions:")
769
+ for i, pred in enumerate(predictions, start=1):
770
+ st.markdown(f"**Prediction Sequence {i}:** {pred}")
771
+ else:
772
+ st.write("### Predicted Sequence:")
773
+ st.write(predictions[0])
774
+ st.graphviz_chart(st.session_state.graphviz_code)
transformation_and_Normalization/config/stage3_models.json CHANGED
@@ -3,7 +3,7 @@
3
  "name": "Facebook BART Base for Conditional Text Generation",
4
  "type": "hf_automodel_finetuned_fbtctg",
5
  "module_path": "hmv_cfg_base_stage3.model1",
6
- "hf_location": "tachygraphy-microtext-normalization-iemk/BART-base-HF-Seq2Seq-Trainer-Batch4",
7
  "tokenizer_class": "BartTokenizer",
8
  "model_class": "BartForConditionalGeneration",
9
  "problem_type": "text_transformamtion_and_normalization",
@@ -18,7 +18,7 @@
18
  "name": "Microsoft Prophet Net Uncased Large for Conditional Text Generation",
19
  "type": "hf_automodel_finetuned_mstctg",
20
  "module_path": "hmv_cfg_base_stage3.model2",
21
- "hf_location": "tachygraphy-microtext-normalization-iemk/ProphetNet_ForCondGen_Uncased_Large_HFTSeq2Seq_Batch4_ngram3",
22
  "tokenizer_class": "ProphetNetTokenizer",
23
  "model_class": "ProphetNetForConditionalGeneration",
24
  "problem_type": "text_transformamtion_and_normalization",
@@ -33,7 +33,7 @@
33
  "name": "Google T5 v1.1 Base for Conditional Text Generation",
34
  "type": "hf_automodel_finetuned_gt5tctg",
35
  "module_path": "hmv_cfg_base_stage3.model3",
36
- "hf_location": "tachygraphy-microtext-normalization-iemk/T5-1.1-HF-seq2seq-Trainer-Batch4",
37
  "tokenizer_class": "T5Tokenizer",
38
  "model_class": "T5ForConditionalGeneration",
39
  "problem_type": "text_transformamtion_and_normalization",
 
3
  "name": "Facebook BART Base for Conditional Text Generation",
4
  "type": "hf_automodel_finetuned_fbtctg",
5
  "module_path": "hmv_cfg_base_stage3.model1",
6
+ "hf_location": "Tachygraphy-Microtext-Normalization-IEMK25/BART-base-HF-Seq2Seq-Trainer-Batch4",
7
  "tokenizer_class": "BartTokenizer",
8
  "model_class": "BartForConditionalGeneration",
9
  "problem_type": "text_transformamtion_and_normalization",
 
18
  "name": "Microsoft Prophet Net Uncased Large for Conditional Text Generation",
19
  "type": "hf_automodel_finetuned_mstctg",
20
  "module_path": "hmv_cfg_base_stage3.model2",
21
+ "hf_location": "Tachygraphy-Microtext-Normalization-IEMK25/ProphetNet_ForCondGen_Uncased_Large_HFTSeq2Seq_Batch4_ngram3",
22
  "tokenizer_class": "ProphetNetTokenizer",
23
  "model_class": "ProphetNetForConditionalGeneration",
24
  "problem_type": "text_transformamtion_and_normalization",
 
33
  "name": "Google T5 v1.1 Base for Conditional Text Generation",
34
  "type": "hf_automodel_finetuned_gt5tctg",
35
  "module_path": "hmv_cfg_base_stage3.model3",
36
+ "hf_location": "Tachygraphy-Microtext-Normalization-IEMK25/T5-1.1-HF-seq2seq-Trainer-Batch4",
37
  "tokenizer_class": "T5Tokenizer",
38
  "model_class": "T5ForConditionalGeneration",
39
  "problem_type": "text_transformamtion_and_normalization",
transformation_and_Normalization/transformationNormalization_main.py CHANGED
@@ -36,6 +36,11 @@ EMOTION_MOODTAG_LABELS = [
36
  "sadness", "surprise"
37
  ]
38
 
 
 
 
 
 
39
  current_model = None
40
  current_tokenizer = None
41
 
@@ -490,54 +495,54 @@ def transform_and_normalize():
490
  st.write(predictions[0])
491
 
492
  # Only show the feedback form if a prediction has been generated
493
- if st.session_state.get("prediction_generated", False):
494
- if not st.session_state.get("feedback_submitted", False):
495
- with st.form("feedback_form", clear_on_submit=True, border=False):
496
- st.error("New API keys are coming in Q2 2025, May 1st, old API authentication will be deprecated and blocked by Postgrest.")
497
- st.warning("This form and database are running in test mode, please be careful with your data.")
498
- st.write("### Data Collection Form")
499
- st.write("#### If the predictions generated are wrong, please provide feedback to help improve the model.")
500
- col1, col2 = st.columns(2)
501
- with col1:
502
- feedback = st.text_input(
503
- "Enter the correct expanded standard formal English text:",
504
- key="feedback_input"
505
- )
506
- with col2:
507
- feedback2 = st.text_input(
508
- "Enter any one of the wrongly predicted text:",
509
- key="feedback_input2"
510
- )
511
- submit_feedback = st.form_submit_button("Submit Feedback")
512
- if submit_feedback and feedback.strip() and feedback2.strip():
513
- data_to_insert = {
514
- # "id" : str(uuid.uuid4()), # text
515
- # "created_at": datetime.now(timezone.utc).isoformat(), # timestamp
516
- "input_text": user_input, # text
517
- "correct_text_by_user": feedback, # text
518
- "model_used": selected_model, # text
519
- "wrong_pred_any": feedback2 if feedback2.strip() else ""
520
- }
521
- # Here we use the supabase client already created above
522
- # supabase = get_connection()
523
- # load_dotenv()
524
- # print("SUPABASE_URL:", os.environ.get("SUPABASE_URL"))
525
- # print("anon_key:", os.environ.get("anon_key"))
526
- # print("table3_name:", os.environ.get("table3_name"))
527
- # load_dotenv(dotenv_path=env_path)
528
- # load_dotenv()
529
- # supabase: Client = create_client(os.environ.get("SUPABASE_URL"), os.environ.get("anon_key"))
530
- # response = supabase.table(os.environ.get("table3_name")).insert(data_to_insert, returning="minimal").execute()
531
- try:
532
- supabase: Client = create_client(get_env_variable("SUPABASE_DB_TACHYGRAPHY_DB_URL"), get_env_variable("SUPABASE_DB_TACHYGRAPHY_ANON_API_KEY"))
533
- response = supabase.table(get_env_variable("SUPABASE_DB_TACHYGRAPHY_DB_STAGE3_TABLE")).insert(data_to_insert, returning="minimal").execute()
534
- st.success("Feedback submitted successfully!")
535
- st.session_state.feedback_submitted = True
536
- except Exception as e:
537
- st.error(f"Feedback submission failed: {e}")
538
 
539
- else:
540
- st.info("Feedback already submitted for this prediction.")
541
 
542
  if __name__ == "__main__":
543
  transform_and_normalize()
 
36
  "sadness", "surprise"
37
  ]
38
 
39
+ SENTIMENT_POLARITY_LABELS = [
40
+ "negative", "neutral", "positive"
41
+ ]
42
+
43
+
44
  current_model = None
45
  current_tokenizer = None
46
 
 
495
  st.write(predictions[0])
496
 
497
  # Only show the feedback form if a prediction has been generated
498
+ # if st.session_state.get("prediction_generated", False):
499
+ # if not st.session_state.get("feedback_submitted", False):
500
+ # with st.form("feedback_form", clear_on_submit=True, border=False):
501
+ # st.error("New API keys are coming in Q2 2025, May 1st, old API authentication will be deprecated and blocked by Postgrest.")
502
+ # st.warning("This form and database are running in test mode, please be careful with your data.")
503
+ # st.write("### Data Collection Form")
504
+ # st.write("#### If the predictions generated are wrong, please provide feedback to help improve the model.")
505
+ # col1, col2 = st.columns(2)
506
+ # with col1:
507
+ # feedback = st.text_input(
508
+ # "Enter the correct expanded standard formal English text:",
509
+ # key="feedback_input"
510
+ # )
511
+ # with col2:
512
+ # feedback2 = st.text_input(
513
+ # "Enter any one of the wrongly predicted text:",
514
+ # key="feedback_input2"
515
+ # )
516
+ # submit_feedback = st.form_submit_button("Submit Feedback")
517
+ # if submit_feedback and feedback.strip() and feedback2.strip():
518
+ # data_to_insert = {
519
+ # # "id" : str(uuid.uuid4()), # text
520
+ # # "created_at": datetime.now(timezone.utc).isoformat(), # timestamp
521
+ # "input_text": user_input, # text
522
+ # "correct_text_by_user": feedback, # text
523
+ # "model_used": selected_model, # text
524
+ # "wrong_pred_any": feedback2 if feedback2.strip() else ""
525
+ # }
526
+ # # Here we use the supabase client already created above
527
+ # # supabase = get_connection()
528
+ # # load_dotenv()
529
+ # # print("SUPABASE_URL:", os.environ.get("SUPABASE_URL"))
530
+ # # print("anon_key:", os.environ.get("anon_key"))
531
+ # # print("table3_name:", os.environ.get("table3_name"))
532
+ # # load_dotenv(dotenv_path=env_path)
533
+ # # load_dotenv()
534
+ # # supabase: Client = create_client(os.environ.get("SUPABASE_URL"), os.environ.get("anon_key"))
535
+ # # response = supabase.table(os.environ.get("table3_name")).insert(data_to_insert, returning="minimal").execute()
536
+ # try:
537
+ # supabase: Client = create_client(get_env_variable("SUPABASE_DB_TACHYGRAPHY_DB_URL"), get_env_variable("SUPABASE_DB_TACHYGRAPHY_ANON_API_KEY"))
538
+ # response = supabase.table(get_env_variable("SUPABASE_DB_TACHYGRAPHY_DB_STAGE3_TABLE")).insert(data_to_insert, returning="minimal").execute()
539
+ # st.success("Feedback submitted successfully!")
540
+ # st.session_state.feedback_submitted = True
541
+ # except Exception as e:
542
+ # st.error(f"Feedback submission failed: {e}")
543
 
544
+ # else:
545
+ # st.info("Feedback already submitted for this prediction.")
546
 
547
  if __name__ == "__main__":
548
  transform_and_normalize()