Archisman Karmakar commited on
Commit
9bbbd14
·
1 Parent(s): 30a70ad

2025.03.21.post1 MAJOR

Browse files
app_main_hf.py CHANGED
@@ -41,6 +41,7 @@ import importlib.util
41
 
42
  from emotionMoodtag_analysis.emotion_analysis_main import show_emotion_analysis
43
  from sentimentPolarity_analysis.sentiment_analysis_main import show_sentiment_analysis
 
44
  from dashboard import show_dashboard
45
 
46
 
@@ -89,6 +90,10 @@ def free_memory():
89
 
90
 
91
  def main():
 
 
 
 
92
  # selection = option_menu(
93
  # menu_title="Navigation",
94
  # options=[
@@ -147,27 +152,34 @@ def main():
147
  # # show_text_transformation()
148
  # st.write("This section is under development.")
149
 
150
- if selection == "Dashboard":
 
 
151
  st.cache_resource.clear()
152
  free_memory()
 
 
 
 
 
153
  show_dashboard()
154
 
155
  elif selection == "Stage 1: Sentiment Polarity Analysis":
156
- st.cache_resource.clear()
157
- free_memory()
158
  show_sentiment_analysis()
159
 
160
  elif selection == "Stage 2: Emotion Mood-tag Analysis":
161
- st.cache_resource.clear()
162
- free_memory()
163
  show_emotion_analysis()
164
  # st.write("This section is under development.")
165
 
166
  elif selection == "Stage 3: Text Transformation & Normalization":
167
- st.cache_resource.clear()
168
  # free_memory()
169
- # show_text_transformation()
170
- st.write("This section is under development.")
171
 
172
 
173
 
 
41
 
42
  from emotionMoodtag_analysis.emotion_analysis_main import show_emotion_analysis
43
  from sentimentPolarity_analysis.sentiment_analysis_main import show_sentiment_analysis
44
+ from transformation_and_Normalization.transformationNormalization_main import transform_and_normalize
45
  from dashboard import show_dashboard
46
 
47
 
 
90
 
91
 
92
  def main():
93
+
94
+ if "current_page" not in st.session_state:
95
+ st.session_state.current_page = None
96
+
97
  # selection = option_menu(
98
  # menu_title="Navigation",
99
  # options=[
 
152
  # # show_text_transformation()
153
  # st.write("This section is under development.")
154
 
155
+
156
+
157
+ if st.session_state.current_page != selection:
158
  st.cache_resource.clear()
159
  free_memory()
160
+ st.session_state.current_page = selection
161
+
162
+ if selection == "Dashboard":
163
+ # st.cache_resource.clear()
164
+ # free_memory()
165
  show_dashboard()
166
 
167
  elif selection == "Stage 1: Sentiment Polarity Analysis":
168
+ # st.cache_resource.clear()
169
+ # free_memory()
170
  show_sentiment_analysis()
171
 
172
  elif selection == "Stage 2: Emotion Mood-tag Analysis":
173
+ # st.cache_resource.clear()
174
+ # free_memory()
175
  show_emotion_analysis()
176
  # st.write("This section is under development.")
177
 
178
  elif selection == "Stage 3: Text Transformation & Normalization":
179
+ # st.cache_resource.clear()
180
  # free_memory()
181
+ transform_and_normalize()
182
+ # st.write("This section is under development.")
183
 
184
 
185
 
emotionMoodtag_analysis/emotion_analysis_main.py CHANGED
@@ -202,6 +202,16 @@ if "disabled" not in st.session_state:
202
 
203
  # Enabling Resource caching
204
  def show_emotion_analysis():
 
 
 
 
 
 
 
 
 
 
205
  st.title("Stage 2: Emotion Mood-tag Analysis")
206
  st.write("This section handles emotion mood-tag analysis.")
207
 
 
202
 
203
  # Enabling Resource caching
204
  def show_emotion_analysis():
205
+
206
+ model_names = list(MODEL_OPTIONS.keys())
207
+
208
+ # Check if the stored selected model is valid; if not, reset it
209
+ if "selected_model" in st.session_state:
210
+ if st.session_state.selected_model not in model_names:
211
+ st.session_state.selected_model = model_names[0]
212
+ else:
213
+ st.session_state.selected_model = model_names[0]
214
+
215
  st.title("Stage 2: Emotion Mood-tag Analysis")
216
  st.write("This section handles emotion mood-tag analysis.")
217
 
pyproject.toml CHANGED
@@ -1,6 +1,6 @@
1
  [project]
2
  name = "tachygraphy-microtext-analysis-and-normalization"
3
- version = "2025.03.20.post1"
4
  description = ""
5
  authors = [
6
  { name = "Archisman Karmakar", email = "[email protected]" },
 
1
  [project]
2
  name = "tachygraphy-microtext-analysis-and-normalization"
3
+ version = "2025.03.21.post1"
4
  description = ""
5
  authors = [
6
  { name = "Archisman Karmakar", email = "[email protected]" },
pyprojectOLD.toml CHANGED
@@ -1,6 +1,7 @@
1
  [project]
2
  name = "tachygraphy-microtext-analysis-and-normalization"
3
- version = "2025.03.18.post5"
 
4
  # version = "2025.03.18.post4_3"
5
  # version = "2025.03.18.post3"
6
  # version = "2025.03.18.post2"
 
1
  [project]
2
  name = "tachygraphy-microtext-analysis-and-normalization"
3
+ version = "2025.03.21.post1"
4
+ # version = "2025.03.18.post5"
5
  # version = "2025.03.18.post4_3"
6
  # version = "2025.03.18.post3"
7
  # version = "2025.03.18.post2"
sentimentPolarity_analysis/sentiment_analysis_main.py CHANGED
@@ -201,6 +201,15 @@ if "disabled" not in st.session_state:
201
 
202
  def show_sentiment_analysis():
203
 
 
 
 
 
 
 
 
 
 
204
  st.title("Stage 1: Sentiment Polarity Analysis")
205
  st.write("This section handles sentiment analysis.")
206
 
 
201
 
202
  def show_sentiment_analysis():
203
 
204
+ model_names = list(MODEL_OPTIONS.keys())
205
+
206
+ # Check if the stored selected model is valid; if not, reset it
207
+ if "selected_model" in st.session_state:
208
+ if st.session_state.selected_model not in model_names:
209
+ st.session_state.selected_model = model_names[0]
210
+ else:
211
+ st.session_state.selected_model = model_names[0]
212
+
213
  st.title("Stage 1: Sentiment Polarity Analysis")
214
  st.write("This section handles sentiment analysis.")
215
 
transformation_and_Normalization/__init__.py ADDED
File without changes
transformation_and_Normalization/config/stage3_models.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "1": {
3
+ "name": "Facebook BART Base for Conditional Text Generation",
4
+ "type": "hf_automodel_finetuned_fbtctg",
5
+ "module_path": "hmv_cfg_base_stage3.model1",
6
+ "hf_location": "tachygraphy-microtrext-norm-org/BART-base-HF-Seq2Seq-Trainer-Batch4",
7
+ "tokenizer_class": "BartTokenizer",
8
+ "model_class": "BartForConditionalGeneration",
9
+ "problem_type": "text_transformamtion_and_normalization",
10
+ "base_model": "facebook/bart-base",
11
+ "base_model_class": "BartForConditionalGeneration",
12
+ "device": "cpu",
13
+ "max_top_k": 50265,
14
+ "load_function": "load_model",
15
+ "predict_function": "predict"
16
+ }
17
+ }
transformation_and_Normalization/hmv_cfg_base_stage3/__init__.py ADDED
File without changes
transformation_and_Normalization/hmv_cfg_base_stage3/imports.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+
4
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), )))
5
+
6
+ import streamlit as st
7
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModel
8
+ # import torch
9
+ import numpy as np
10
+ import matplotlib.pyplot as plt
11
+ import plotly.express as px
12
+ import pandas as pd
13
+ import json
14
+ import gc
15
+ import psutil
16
+ import importlib
17
+ import importlib.util
18
+ import asyncio
19
+ # import pytorch_lightning as pl
20
+
21
+ import safetensors
22
+ from safetensors import load_file, save_file
23
+ import json
24
+ import huggingface_hub
25
+ from huggingface_hub import hf_hub_download
transformation_and_Normalization/hmv_cfg_base_stage3/model1.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import BartForConditionalGeneration, BartTokenizer, AutoTokenizer, AutoModelForSequenceClassification, AutoModel
2
+ import torch.nn.functional as F
3
+ from imports import *
4
+ import torch.nn as nn
5
+ import torch
6
+ import os
7
+ import sys
8
+
9
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), )))
10
+
11
+ BASE_DIR = os.path.dirname(os.path.abspath(__file__))
12
+ CONFIG_STAGE2 = os.path.join(BASE_DIR, "..", "config", "stage2_models.json")
13
+
14
+
15
+ MODEL_OPTIONS = {
16
+ "1": {
17
+ "name": "Facebook BART Base for Conditional Text Generation",
18
+ "type": "hf_automodel_finetuned_fbtctg",
19
+ "module_path": "hmv_cfg_base_stage3.model1",
20
+ "hf_location": "tachygraphy-microtrext-norm-org/BART-base-HF-Seq2Seq-Trainer-Batch4",
21
+ "tokenizer_class": "BartTokenizer",
22
+ "model_class": "BartForConditionalGeneration",
23
+ "problem_type": "text_transformamtion_and_normalization",
24
+ "base_model": "facebook/bart-base",
25
+ "base_model_class": "BartForConditionalGeneration",
26
+ "device": "cpu",
27
+ "max_top_k": 50265,
28
+ "load_function": "load_model",
29
+ "predict_function": "predict"
30
+ }
31
+ }
32
+
33
+ model_key = "1"
34
+ model_info = MODEL_OPTIONS[model_key]
35
+ hf_location = model_info["hf_location"]
36
+
37
+ tokenizer_class = globals()[model_info["tokenizer_class"]]
38
+ model_class = globals()[model_info["model_class"]]
39
+
40
+
41
+ @st.cache_resource
42
+ def load_model():
43
+ tokenizer = tokenizer_class.from_pretrained(hf_location)
44
+ print("Loading model 1")
45
+ model = model_class.from_pretrained(hf_location,
46
+ device_map=torch.device(
47
+ "cuda" if torch.cuda.is_available() else "cpu")
48
+ )
49
+ print("Model 1 loaded")
50
+
51
+ return model, tokenizer
52
+
53
+
54
+ def predict(
55
+ model, tokenizer, text, device,
56
+ num_return_sequences=1,
57
+ beams=None, # Beam search
58
+ do_sample=False, # Sampling flag
59
+ temp=None, # Temperature (only for sampling)
60
+ top_p=None,
61
+ top_k=None,
62
+ max_new_tokens=1024,
63
+ early_stopping=True
64
+ ):
65
+ # Tokenize input
66
+ padded = tokenizer(text, return_tensors='pt', truncation=False, padding=True).to(device)
67
+ input_ids = padded['input_ids'].to(device)
68
+ attention_mask = padded['attention_mask'].to(device)
69
+
70
+ # Validate arguments
71
+ if beams is not None and do_sample:
72
+ raise ValueError("Cannot use `beams` and `do_sample=True` together. Choose either beam search (`beams=5`) or sampling (`do_sample=True, temp=0.7`).")
73
+
74
+ if temp is not None and not do_sample:
75
+ raise ValueError("`temp` (temperature) can only be used in sampling mode (`do_sample=True`).")
76
+
77
+ if (top_p is not None or top_k is not None) and not do_sample:
78
+ raise ValueError("`top_p` and `top_k` can only be used in sampling mode (`do_sample=True`).")
79
+
80
+ # Beam search (Deterministic)
81
+ if beams is not None:
82
+ outputs = model.generate(
83
+ input_ids=input_ids,
84
+ attention_mask=attention_mask,
85
+ max_new_tokens=max_new_tokens,
86
+ num_return_sequences=num_return_sequences,
87
+ num_beams=beams,
88
+ early_stopping=early_stopping,
89
+ do_sample=False # No randomness
90
+ )
91
+
92
+ # Sampling Cases
93
+ else:
94
+ generate_args = {
95
+ "input_ids": input_ids,
96
+ "attention_mask": attention_mask,
97
+ "max_new_tokens": max_new_tokens,
98
+ "num_return_sequences": num_return_sequences,
99
+ "do_sample": True, # Enable stochastic sampling
100
+ "temperature": temp if temp is not None else 0.7, # Default temp if not passed
101
+ }
102
+
103
+ # Add `top_p` if set
104
+ if top_p is not None:
105
+ generate_args["top_p"] = top_p
106
+
107
+ # Add `top_k` if set
108
+ if top_k is not None:
109
+ generate_args["top_k"] = top_k
110
+
111
+ # Generate
112
+ outputs = model.generate(**generate_args)
113
+
114
+ # Decode predictions into human-readable text
115
+ predictions = tokenizer.batch_decode(outputs, skip_special_tokens=True)
116
+
117
+ return predictions
transformation_and_Normalization/transformationNormalization_main.py ADDED
@@ -0,0 +1,585 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import shutil
2
+ from transformers.utils.hub import TRANSFORMERS_CACHE
3
+ import torch
4
+ import time
5
+ import joblib
6
+ import importlib.util
7
+ from imports import *
8
+ import os
9
+ import sys
10
+ import time
11
+
12
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), )))
13
+
14
+ # from transformers.utils import move_cache_to_trash
15
+ # from huggingface_hub import delete_cache
16
+
17
+
18
+ # from hmv_cfg_base_stage1.model1 import load_model as load_model1
19
+ # from hmv_cfg_base_stage1.model1 import predict as predict1
20
+
21
+ BASE_DIR = os.path.dirname(os.path.abspath(__file__))
22
+ CONFIG_STAGE3 = os.path.join(BASE_DIR, "config", "stage3_models.json")
23
+ LOADERS_STAGE3 = os.path.join(BASE_DIR, "hmv_cfg_base_stage3")
24
+
25
+
26
+ EMOTION_MOODTAG_LABELS = [
27
+ "anger", "disgust", "fear", "joy", "neutral",
28
+ "sadness", "surprise"
29
+ ]
30
+
31
+ current_model = None
32
+ current_tokenizer = None
33
+
34
+
35
+ # Enabling Resource caching
36
+
37
+
38
+ # @st.cache_resource
39
+ def load_model_config():
40
+ with open(CONFIG_STAGE3, "r") as f:
41
+ model_data = json.load(f)
42
+
43
+ # Extract names for dropdown
44
+ model_options = {v["name"]: v for v in model_data.values()}
45
+ return model_data, model_options
46
+
47
+
48
+ MODEL_DATA, MODEL_OPTIONS = load_model_config()
49
+
50
+
51
+ # ✅ Dynamically Import Model Functions
52
+ def import_from_module(module_name, function_name):
53
+ try:
54
+ module = importlib.import_module(module_name)
55
+ return getattr(module, function_name)
56
+ except (ModuleNotFoundError, AttributeError) as e:
57
+ st.error(f"❌ Import Error: {e}")
58
+ return None
59
+
60
+
61
+ def free_memory():
62
+ # """Free up CPU & GPU memory before loading a new model."""
63
+ global current_model, current_tokenizer
64
+
65
+ if current_model is not None:
66
+ del current_model # Delete the existing model
67
+ current_model = None # Reset reference
68
+
69
+ if current_tokenizer is not None:
70
+ del current_tokenizer # Delete the tokenizer
71
+ current_tokenizer = None
72
+
73
+ gc.collect() # Force garbage collection for CPU memory
74
+
75
+ if torch.cuda.is_available():
76
+ torch.cuda.empty_cache() # Free GPU memory
77
+ torch.cuda.ipc_collect() # Clean up PyTorch GPU cache
78
+
79
+ # If running on CPU, reclaim memory using OS-level commands
80
+ try:
81
+ if torch.cuda.is_available() is False:
82
+ psutil.virtual_memory() # Refresh memory stats
83
+ except Exception as e:
84
+ print(f"Memory cleanup error: {e}")
85
+
86
+ # Delete cached Hugging Face models
87
+ try:
88
+ cache_dir = TRANSFORMERS_CACHE
89
+ if os.path.exists(cache_dir):
90
+ shutil.rmtree(cache_dir)
91
+ print("Cache cleared!")
92
+ except Exception as e:
93
+ print(f"❌ Cache cleanup error: {e}")
94
+
95
+
96
+ def load_selected_model(model_name):
97
+ global current_model, current_tokenizer
98
+
99
+ # st.cache_resource.clear()
100
+
101
+ # free_memory()
102
+
103
+ # st.write("DEBUG: Available Models:", MODEL_OPTIONS.keys()) # ✅ See available models
104
+ # st.write("DEBUG: Selected Model:", MODEL_OPTIONS[model_name]) # ✅ Check selected model
105
+ # st.write("DEBUG: Model Name:", model_name) # ✅ Check selected model
106
+
107
+ if model_name not in MODEL_OPTIONS:
108
+ st.error(f"⚠️ Model '{model_name}' not found in config!")
109
+ return None, None, None
110
+
111
+ model_info = MODEL_OPTIONS[model_name]
112
+ hf_location = model_info["hf_location"]
113
+
114
+ model_module = model_info["module_path"]
115
+ load_function = model_info["load_function"]
116
+ predict_function = model_info["predict_function"]
117
+
118
+ load_model_func = import_from_module(model_module, load_function)
119
+ predict_func = import_from_module(model_module, predict_function)
120
+
121
+ if load_model_func is None or predict_func is None:
122
+ st.error("❌ Model functions could not be loaded!")
123
+ return None, None, None
124
+
125
+ model, tokenizer = load_model_func()
126
+
127
+ current_model, current_tokenizer = model, tokenizer
128
+ return model, tokenizer, predict_func
129
+
130
+
131
+ def disable_ui():
132
+ st.components.v1.html(
133
+ """
134
+ <style>
135
+ #ui-disable-overlay {
136
+ position: fixed;
137
+ top: 0;
138
+ left: 0;
139
+ width: 100vw;
140
+ height: 100vh;
141
+ background-color: rgba(200, 200, 200, 0.5);
142
+ z-index: 9999;
143
+ }
144
+ </style>
145
+ <div id="ui-disable-overlay"></div>
146
+ """,
147
+ height=0,
148
+ scrolling=False
149
+ )
150
+
151
+
152
+ def enable_ui():
153
+ st.components.v1.html(
154
+ """
155
+ <script>
156
+ var overlay = document.getElementById("ui-disable-overlay");
157
+ if (overlay) {
158
+ overlay.parentNode.removeChild(overlay);
159
+ }
160
+ </script>
161
+ """,
162
+ height=0,
163
+ scrolling=False
164
+ )
165
+
166
+ # Function to increment progress dynamically
167
+
168
+
169
+ def update_progress(progress_bar, start, end, delay=0.1):
170
+ for i in range(start, end + 1, 5): # Increment in steps of 5%
171
+ progress_bar.progress(i)
172
+ time.sleep(delay) # Simulate processing time
173
+ # st.experimental_rerun() # Refresh the page
174
+
175
+
176
+ # Function to update session state when model changes
177
+ def on_model_change():
178
+ st.cache_resource.clear()
179
+ free_memory()
180
+ st.session_state.model_changed = True # Mark model as changed
181
+
182
+
183
+ # Function to update session state when text changes
184
+
185
+
186
+ def on_text_change():
187
+ st.session_state.text_changed = True # Mark text as changed
188
+
189
+
190
+ def update_top_k_from_slider():
191
+ st.session_state.top_k = st.session_state.top_k_slider
192
+
193
+
194
+ def update_top_k_from_input():
195
+ st.session_state.top_k = st.session_state.top_k_input
196
+
197
+
198
+ # Initialize session state variables
199
+ if "selected_model" not in st.session_state:
200
+ st.session_state.selected_model = list(MODEL_OPTIONS.keys())[
201
+ 0] # Default model
202
+ if "user_input" not in st.session_state:
203
+ st.session_state.user_input = ""
204
+ if "last_processed_input" not in st.session_state:
205
+ st.session_state.last_processed_input = ""
206
+ if "model_changed" not in st.session_state:
207
+ st.session_state.model_changed = False
208
+ if "text_changed" not in st.session_state:
209
+ st.session_state.text_changed = False
210
+ if "disabled" not in st.session_state:
211
+ st.session_state.disabled = False
212
+
213
+ if "top_k" not in st.session_state:
214
+ st.session_state.top_k = 50
215
+
216
+
217
+ if "last_change" not in st.session_state:
218
+ st.session_state.last_change = time.time()
219
+ if "auto_predict_triggered" not in st.session_state:
220
+ st.session_state.auto_predict_triggered = False
221
+
222
+
223
+ def transform_and_normalize():
224
+ # No cache clearing here—only in the model change callback!
225
+
226
+ # st.write(st.session_state)
227
+
228
+ if "top_k" not in st.session_state:
229
+ st.session_state.top_k = 50
230
+
231
+ model_names = list(MODEL_OPTIONS.keys())
232
+
233
+ # Check if the stored selected model is valid; if not, reset it
234
+ if "selected_model" in st.session_state:
235
+ if st.session_state.selected_model not in model_names:
236
+ st.session_state.selected_model = model_names[0]
237
+ else:
238
+ st.session_state.selected_model = model_names[0]
239
+
240
+ st.title("Stage 3: Text Transformation & Normalization")
241
+ st.write("This section handles the transformation and normalization of informal text into standard formal English.")
242
+
243
+ # Model selection with change detection; clearing cache happens in on_model_change()
244
+ selected_model = st.selectbox(
245
+ "Choose a model:", model_names, key="selected_model", on_change=on_model_change
246
+ )
247
+
248
+ # Text input with change detection
249
+ user_input = st.text_input(
250
+ "Enter text for emotions mood-tag analysis:", key="user_input", on_change=on_text_change
251
+ )
252
+
253
+ st.markdown("#### Generation Parameters")
254
+ col1, col2 = st.columns(2)
255
+
256
+ with col1:
257
+ use_beam = st.checkbox("Use Beam Search", value=False)
258
+ if use_beam:
259
+ beams = st.number_input("Number of beams:", min_value=1, max_value=10, value=3, step=1)
260
+ do_sample = False
261
+ temp = None
262
+ top_p = None
263
+ top_k = None
264
+ else:
265
+ beams = None
266
+ do_sample = st.checkbox("Enable Sampling", value=True)
267
+ temp = st.slider("Temperature:", min_value=0.1, max_value=2.0, value=0.4, step=0.1) if do_sample else None
268
+
269
+ with col2:
270
+ top_p = st.slider("Top-p (nucleus sampling):", min_value=0.0, max_value=1.0, value=0.9, step=0.05) if (not use_beam and do_sample) else None
271
+ model_config = MODEL_OPTIONS[selected_model]
272
+ max_top_k = model_config.get("max_top_k", 50)
273
+ if not use_beam and do_sample:
274
+ col_slider, col_input = st.columns(2)
275
+ st.write("Top-K: Top K most probable tokens, recommended range: 10-60")
276
+ with col_slider:
277
+ top_k_slider = st.slider(
278
+ "Top-k (slider):",
279
+ min_value=0,
280
+ max_value=max_top_k,
281
+ value=st.session_state.top_k,
282
+ step=1,
283
+ key="top_k_slider",
284
+ on_change=update_top_k_from_slider
285
+ )
286
+ with col_input:
287
+ top_k_input = st.number_input(
288
+ "Top-k (number input):",
289
+ min_value=0,
290
+ max_value=max_top_k,
291
+ value=st.session_state.top_k,
292
+ step=1,
293
+ key="top_k_input",
294
+ on_change=update_top_k_from_input
295
+ )
296
+ final_top_k = st.session_state.top_k
297
+ else:
298
+ final_top_k = None
299
+
300
+ col_tokens, col_return = st.columns(2)
301
+ with col_tokens:
302
+ max_new_tokens = st.number_input("Max New Tokens:", min_value=1, value=1024, step=1)
303
+ early_stopping = st.checkbox("Early Stopping", value=True)
304
+ with col_return:
305
+ if beams is not None:
306
+ num_return_sequences = st.number_input(
307
+ "Num Return Sequences:",
308
+ min_value=1,
309
+ max_value=beams,
310
+ value=1,
311
+ step=1
312
+ )
313
+ else:
314
+ num_return_sequences = st.number_input(
315
+ "Num Return Sequences:",
316
+ min_value=1,
317
+ max_value=3,
318
+ value=1,
319
+ step=1
320
+ )
321
+ user_input_copy = user_input
322
+
323
+ current_time = time.time()
324
+ if user_input.strip() and (current_time - st.session_state.last_change >= 1.5):
325
+ st.session_state.last_processed_input = user_input
326
+
327
+ progress_bar = st.progress(0)
328
+ update_progress(progress_bar, 0, 10)
329
+ with st.spinner("Predicting..."):
330
+ model, tokenizer, predict_func = load_selected_model(selected_model)
331
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
332
+ if model is None:
333
+ st.error("⚠️ Error: Model failed to load!")
334
+ st.stop()
335
+ if hasattr(model, "to"):
336
+ model.to(device)
337
+ predictions = predict_func(
338
+ model, tokenizer, user_input, device,
339
+ num_return_sequences,
340
+ beams,
341
+ do_sample,
342
+ temp,
343
+ top_p,
344
+ final_top_k,
345
+ max_new_tokens,
346
+ early_stopping
347
+ )
348
+ update_progress(progress_bar, 10, 100)
349
+
350
+ if len(predictions) > 1:
351
+ st.write("### Multiple Predictions:")
352
+ for i, pred in enumerate(predictions, start=1):
353
+ st.markdown(f"**Sequence {i}:** {pred}")
354
+ else:
355
+ st.write("### Prediction:")
356
+ st.write(predictions[0])
357
+ progress_bar.empty()
358
+ # else:
359
+ # st.info("Waiting for input to settle...")
360
+
361
+ if __name__ == "__main__":
362
+ transform_and_normalize()
363
+
364
+
365
+
366
+
367
+ # # Main function to show the app
368
+ # def transform_and_normalize():
369
+
370
+ # # st.cache_resource.clear()
371
+ # # free_memory()
372
+
373
+ # if "top_k" not in st.session_state:
374
+ # st.session_state.top_k = 50
375
+
376
+ # model_names = list(MODEL_OPTIONS.keys())
377
+
378
+ # # Check if the stored selected model is valid; if not, reset it
379
+ # if "selected_model" in st.session_state:
380
+ # if st.session_state.selected_model not in model_names:
381
+ # st.session_state.selected_model = model_names[0]
382
+ # else:
383
+ # st.session_state.selected_model = model_names[0]
384
+
385
+ # st.title("Stage 3: Text Transformation & Normalization")
386
+ # st.write("This section handles the transformation and normalization of informal text containing short-hands (microtexts), abbreviations, acronyms, slangs, multilingual conversational text etc. into readable, understandable standard formal English.")
387
+
388
+ # # Model selection with change detection
389
+ # selected_model = st.selectbox(
390
+ # "Choose a model:", model_names, key="selected_model", on_change=on_model_change
391
+ # )
392
+
393
+ # # Text input with change detection
394
+ # user_input = st.text_input(
395
+ # "Enter text for emotions mood-tag analysis:", key="user_input", on_change=on_text_change
396
+ # )
397
+
398
+ # st.markdown("#### Generation Parameters")
399
+ # col1, col2 = st.columns(2)
400
+
401
+ # with col1:
402
+ # use_beam = st.checkbox("Use Beam Search", value=False)
403
+ # if use_beam:
404
+ # beams = st.number_input(
405
+ # "Number of beams:", min_value=1, value=5, step=1)
406
+ # do_sample = False
407
+ # temp = None
408
+ # top_p = None
409
+ # top_k = None
410
+ # else:
411
+ # beams = None
412
+ # do_sample = st.checkbox("Enable Sampling", value=True)
413
+ # temp = st.slider("Temperature:", min_value=0.1, max_value=2.0,
414
+ # value=0.7, step=0.1) if do_sample else None
415
+
416
+ # with col2:
417
+ # top_p = st.slider("Top-p (nucleus sampling):", min_value=0.0, max_value=1.0,
418
+ # value=0.9, step=0.05) if not use_beam and do_sample else None
419
+ # model_config = MODEL_OPTIONS[selected_model]
420
+ # max_top_k = model_config.get("max_top_k", 50)
421
+ # # top_k = st.number_input("Top-k:", min_value=0, value=50, step=1) if not use_beam and do_sample else None
422
+ # # top_k = st.slider("Top-k:", min_value=0, max_value=max_top_k, value=50, step=1) if (not use_beam and do_sample) else None
423
+
424
+ # if not use_beam and do_sample:
425
+
426
+ # col_slider, col_input = st.columns(2)
427
+
428
+ # with col_slider:
429
+ # top_k_slider = st.slider(
430
+ # "Top-k (slider):",
431
+ # min_value=0,
432
+ # max_value=max_top_k,
433
+ # value=st.session_state.top_k,
434
+ # step=1,
435
+ # key="top_k_slider",
436
+ # on_change=update_top_k_from_slider
437
+ # )
438
+ # with col_input:
439
+ # top_k_input = st.number_input(
440
+ # "Top-k (number input):",
441
+ # min_value=0,
442
+ # max_value=max_top_k,
443
+ # value=st.session_state.top_k,
444
+ # step=1,
445
+ # key="top_k_input",
446
+ # on_change=update_top_k_from_input
447
+ # )
448
+ # final_top_k = st.session_state.top_k
449
+ # else:
450
+ # final_top_k = None
451
+
452
+ # # max_new_tokens = st.number_input("Max New Tokens:", min_value=1, value=1024, step=1)
453
+ # # early_stopping = st.checkbox("Early Stopping", value=True)
454
+ # # num_return_sequences = st.number_input("Num Return Sequences:", min_value=1, value=1, step=1)
455
+
456
+ # col_tokens, col_return = st.columns(2)
457
+
458
+ # with col_tokens:
459
+ # max_new_tokens = st.number_input(
460
+ # "Max New Tokens:", min_value=1, value=1024, step=1)
461
+ # early_stopping = st.checkbox("Early Stopping", value=True)
462
+
463
+ # with col_return:
464
+ # if beams is not None:
465
+ # num_return_sequences = st.number_input(
466
+ # "Num Return Sequences:",
467
+ # min_value=1,
468
+ # max_value=beams,
469
+ # value=1,
470
+ # step=1
471
+ # )
472
+ # else:
473
+ # num_return_sequences = st.number_input(
474
+ # "Num Return Sequences:",
475
+ # min_value=1,
476
+ # max_value=3,
477
+ # value=1,
478
+ # step=1
479
+ # )
480
+
481
+ # user_input_copy = user_input
482
+
483
+
484
+
485
+ # current_time = time.time()
486
+ # if user_input.strip() and (current_time - st.session_state.last_change >= 1.5):
487
+ # # Reset change flag (if needed)
488
+ # st.session_state.last_processed_input = user_input
489
+
490
+ # progress_bar = st.progress(0)
491
+ # update_progress(progress_bar, 0, 10)
492
+ # with st.spinner("Predicting..."):
493
+ # model, tokenizer, predict_func = load_selected_model(selected_model)
494
+ # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
495
+ # if model is None:
496
+ # st.error("⚠️ Error: Model failed to load!")
497
+ # st.stop()
498
+ # if hasattr(model, "to"):
499
+ # model.to(device)
500
+ # predictions = predict_func(
501
+ # model, tokenizer, user_input, device,
502
+ # num_return_sequences,
503
+ # beams,
504
+ # do_sample,
505
+ # temp,
506
+ # top_p,
507
+ # final_top_k,
508
+ # max_new_tokens,
509
+ # early_stopping
510
+ # )
511
+ # update_progress(progress_bar, 10, 100)
512
+
513
+ # if len(predictions) > 1:
514
+ # st.write("### Multiple Predictions:")
515
+ # for i, pred in enumerate(predictions, start=1):
516
+ # st.markdown(f"**Sequence {i}:** {pred}")
517
+ # else:
518
+ # st.write("### Prediction:")
519
+ # st.write(predictions[0])
520
+ # progress_bar.empty()
521
+ # else:
522
+ # st.info("Waiting for input to settle...")
523
+
524
+ # Only run inference if:
525
+ # 1. The text is NOT empty
526
+ # 2. The text has changed OR the model has changed
527
+ # auto_predict = False
528
+ # if user_input.strip():
529
+ # if (user_input != st.session_state.last_processed_input) or st.session_state.model_changed:
530
+ # auto_predict = True
531
+
532
+ # if auto_predict:
533
+ # run_inference = True
534
+ # else:
535
+ # run_inference = st.button("Run Prediction")
536
+
537
+ # if run_inference and user_input.strip():
538
+ # # Reset change flags and update last processed input
539
+ # st.session_state.last_processed_input = user_input
540
+ # st.session_state.model_changed = False
541
+ # st.session_state.text_changed = False
542
+
543
+ # progress_bar = st.progress(0)
544
+ # update_progress(progress_bar, 0, 10)
545
+
546
+ # with st.spinner("Please wait..."):
547
+ # model, tokenizer, predict_func = load_selected_model(selected_model)
548
+ # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
549
+ # if model is None:
550
+ # st.error("⚠️ Error: Model failed to load! Check model selection or configuration.")
551
+ # st.stop()
552
+ # if hasattr(model, "to"):
553
+ # model.to(device)
554
+
555
+ # predictions = predict_func(
556
+ # model, tokenizer, user_input, device,
557
+ # num_return_sequences,
558
+ # beams,
559
+ # do_sample,
560
+ # temp,
561
+ # top_p,
562
+ # final_top_k,
563
+ # max_new_tokens,
564
+ # early_stopping
565
+ # )
566
+ # update_progress(progress_bar, 10, 100)
567
+
568
+ # if len(predictions) > 1:
569
+ # st.write("### Multiple Predicted Transformed & Normalized Texts:")
570
+ # for i, pred in enumerate(predictions, start=1):
571
+ # st.markdown(f"**Sequence {i}:** {pred}")
572
+ # else:
573
+ # st.write("### Predicted Transformed & Normalized Text:")
574
+ # st.write(predictions[0])
575
+ # progress_bar.empty()
576
+
577
+
578
+ # if __name__ == "__main__":
579
+ # # st.cache_resource.clear()
580
+ # # free_memory()
581
+ # transform_and_normalize()
582
+ # # show_dashboard()
583
+ # # show_emotion_analysis()
584
+ # # show_sentiment_analysis()
585
+ # # show_text_transformation()