Spaces:

Tachygraphy-Microtext-Normalization-IEMK25
/

Tachygraphy-Microtext-Analysis-and-Normalization-ArchismanCoder

Sleeping

App Files Files Community

Archisman Karmakar commited on Mar 20

Commit

853c736

1 Parent(s): 4d35689

2025.03.20.post1 MAJOR

Browse files

Files changed (28) hide show

README.md +2 -2
app_main_hf.py +14 -14
dashboard.py +10 -8
emotionMoodtag_analysis/__init__.py +0 -0
emotionMoodtag_analysis/config/stage2_models.json +32 -0
emotionMoodtag_analysis/emotion_analysis_main.py +317 -0
emotionMoodtag_analysis/hmv_cfg_base_stage2/__init__.py +0 -0
{sentiment_analysis/hmv_cfg_base_stage1 → emotionMoodtag_analysis/hmv_cfg_base_stage2}/imports.py +24 -24
emotionMoodtag_analysis/hmv_cfg_base_stage2/model1.py +89 -0
emotionMoodtag_analysis/hmv_cfg_base_stage2/model2.py +163 -0
emotion_analysis.py +0 -9
poetry.lock +15 -15
pyproject.toml +1 -1
pyprojectOLD.toml +2 -1
requirements.txt +2 -2
{sentiment_analysis → sentimentPolarity_analysis}/__init__.py +0 -0
{sentiment_analysis → sentimentPolarity_analysis}/config/stage1_models.json +62 -62
{sentiment_analysis → sentimentPolarity_analysis}/hmv_cfg_base_stage1/__init__.py +1 -1
sentimentPolarity_analysis/hmv_cfg_base_stage1/imports.py +25 -0
{sentiment_analysis → sentimentPolarity_analysis}/hmv_cfg_base_stage1/model1.py +85 -85
{sentiment_analysis → sentimentPolarity_analysis}/hmv_cfg_base_stage1/model2.py +2 -2
{sentiment_analysis → sentimentPolarity_analysis}/hmv_cfg_base_stage1/model3.py +0 -0
{sentiment_analysis → sentimentPolarity_analysis}/hmv_cfg_base_stage1/model4.py +0 -0
{sentiment_analysis → sentimentPolarity_analysis}/sentiment_analysis_main.py +0 -0
sentiment_analysis/hmv_cfg_base_stage1/__pycache__/__init__.cpython-310.pyc +0 -0
sentiment_analysis/hmv_cfg_base_stage1/__pycache__/__init__.cpython-312.pyc +0 -0
sentiment_analysis/hmv_cfg_base_stage1/__pycache__/model1.cpython-310.pyc +0 -0
sentiment_analysis/hmv_cfg_base_stage1/__pycache__/model1.cpython-312.pyc +0 -0

README.md CHANGED Viewed

@@ -1,8 +1,8 @@
 ---
 title: Tachygraphy Microtext Analysis And Normalization
 emoji: 💻
-colorFrom: purple
-colorTo: gray
 sdk: streamlit
 sdk_version: 1.43.2
 python_version: "3.12"

 ---
 title: Tachygraphy Microtext Analysis And Normalization
 emoji: 💻
+colorFrom: orange
+colorTo: red
 sdk: streamlit
 sdk_version: 1.43.2
 python_version: "3.12"

app_main_hf.py CHANGED Viewed

@@ -39,8 +39,8 @@ import importlib.util
 # sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), )))
-from emotion_analysis import show_emotion_analysis
-from sentiment_analysis.sentiment_analysis_main import show_sentiment_analysis
 from dashboard import show_dashboard
@@ -54,15 +54,15 @@ st.set_page_config(
 def free_memory():
     #  """Free up CPU & GPU memory before loading a new model."""
-    global current_model, current_tokenizer
-    if current_model is not None:
-        del current_model  # Delete the existing model
-        current_model = None  # Reset reference
-    if current_tokenizer is not None:
-        del current_tokenizer  # Delete the tokenizer
-        current_tokenizer = None
     gc.collect()  # Force garbage collection for CPU memory
@@ -149,19 +149,19 @@ def main():
     if selection == "Dashboard":
         st.cache_resource.clear()
-        # free_memory()
         show_dashboard()
     elif selection == "Stage 1: Sentiment Polarity Analysis":
         st.cache_resource.clear()
-        # free_memory()
         show_sentiment_analysis()
     elif selection == "Stage 2: Emotion Mood-tag Analysis":
         st.cache_resource.clear()
-        # free_memory()
-        # show_emotion_analysis()
-        st.write("This section is under development.")
     elif selection == "Stage 3: Text Transformation & Normalization":
         st.cache_resource.clear()

 # sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), )))
+from emotionMoodtag_analysis.emotion_analysis_main import show_emotion_analysis
+from sentimentPolarity_analysis.sentiment_analysis_main import show_sentiment_analysis
 from dashboard import show_dashboard
 def free_memory():
     #  """Free up CPU & GPU memory before loading a new model."""
+    # global current_model, current_tokenizer
+    # if current_model is not None:
+    #     del current_model  # Delete the existing model
+    #     current_model = None  # Reset reference
+    # if current_tokenizer is not None:
+    #     del current_tokenizer  # Delete the tokenizer
+    #     current_tokenizer = None
     gc.collect()  # Force garbage collection for CPU memory
     if selection == "Dashboard":
         st.cache_resource.clear()
+        free_memory()
         show_dashboard()
     elif selection == "Stage 1: Sentiment Polarity Analysis":
         st.cache_resource.clear()
+        free_memory()
         show_sentiment_analysis()
     elif selection == "Stage 2: Emotion Mood-tag Analysis":
         st.cache_resource.clear()
+        free_memory()
+        show_emotion_analysis()
+        # st.write("This section is under development.")
     elif selection == "Stage 3: Text Transformation & Normalization":
         st.cache_resource.clear()

dashboard.py CHANGED Viewed

@@ -11,15 +11,15 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), )))
 def free_memory():
     #  """Free up CPU & GPU memory before loading a new model."""
-    global current_model, current_tokenizer
-    if current_model is not None:
-        del current_model  # Delete the existing model
-        current_model = None  # Reset reference
-    if current_tokenizer is not None:
-        del current_tokenizer  # Delete the tokenizer
-        current_tokenizer = None
     gc.collect()  # Force garbage collection for CPU memory
@@ -91,7 +91,9 @@ def show_dashboard():
              - Training Source: [GitHub @ Tachygraphy Micro-text Analysis & Normalization](https://github.com/ArchismanKarmakar/Tachygraphy-Microtext-Analysis-And-Normalization)
              - Kaggle Collections: [Kaggle @ Tachygraphy Micro-text Analysis & Normalization](https://www.kaggle.com/datasets/archismancoder/dataset-tachygraphy/data?select=Tachygraphy_MicroText-AIO-V3.xlsx)
              - Hugging Face Org: [Hugging Face @ Tachygraphy Micro-text Analysis & Normalization](https://huggingface.co/tachygraphy-microtrext-norm-org)
-             - Deployment: [Streamlit + Hugging Face @ GitHub](https://github.com/ArchismanKarmakar/Tachygraphy-Microtext-Analysis-And-Normalization-Deployment-Source-HuggingFace_Streamlit_JPX14032025)
              """)
     create_footer()

 def free_memory():
     #  """Free up CPU & GPU memory before loading a new model."""
+    # global current_model, current_tokenizer
+    # if current_model is not None:
+    #     del current_model  # Delete the existing model
+    #     current_model = None  # Reset reference
+    # if current_tokenizer is not None:
+    #     del current_tokenizer  # Delete the tokenizer
+    #     current_tokenizer = None
     gc.collect()  # Force garbage collection for CPU memory
              - Training Source: [GitHub @ Tachygraphy Micro-text Analysis & Normalization](https://github.com/ArchismanKarmakar/Tachygraphy-Microtext-Analysis-And-Normalization)
              - Kaggle Collections: [Kaggle @ Tachygraphy Micro-text Analysis & Normalization](https://www.kaggle.com/datasets/archismancoder/dataset-tachygraphy/data?select=Tachygraphy_MicroText-AIO-V3.xlsx)
              - Hugging Face Org: [Hugging Face @ Tachygraphy Micro-text Analysis & Normalization](https://huggingface.co/tachygraphy-microtrext-norm-org)
+             - Deployment Source: [GitHub](https://github.com/ArchismanKarmakar/Tachygraphy-Microtext-Analysis-And-Normalization-Deployment-Source-HuggingFace_Streamlit_JPX14032025)
+             - Streamlit Deployemnt: [Streamlit](https://tachygraphy-microtext.streamlit.app/)
+             - Hugging Face Space Deployment: [Hugging Face Space](https://huggingface.co/spaces/tachygraphy-microtrext-norm-org/Tachygraphy-Microtext-Analysis-and-Normalization-ArchismanCoder)
              """)
     create_footer()

emotionMoodtag_analysis/__init__.py ADDED Viewed

File without changes

emotionMoodtag_analysis/config/stage2_models.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+    "1": {
+        "name": "DeBERTa v3 Base for Sequence Classification",
+        "type": "hf_automodel_finetuned_dbt3",
+        "module_path": "hmv_cfg_base_stage2.model1",
+        "hf_location": "tachygraphy-microtrext-norm-org/DeBERTa-v3-seqClassfication-LV2-EmotionMoodtags-Batch8",
+        "tokenizer_class": "DebertaV2Tokenizer",
+        "model_class": "DebertaV2ForSequenceClassification",
+        "problem_type": "regression",
+        "base_model": "microsoft/deberta-v3-base",
+        "base_model_class": "DebertaV2ForSequenceClassification",
+        "num_labels": 7,
+        "device": "cpu",
+        "load_function": "load_model",
+        "predict_function": "predict"
+    },
+    "2": {
+        "name": "DeBERTa v3 Base Custom Model with minimal Regularized Loss",
+        "type": "db3_base_custom",
+        "module_path": "hmv_cfg_base_stage2.model2",
+        "hf_location": "tachygraphy-microtrext-norm-org/DeBERTa-v3-Base-Cust-LV2-EmotionMoodtags-minRegLoss",
+        "tokenizer_class": "DebertaV2Tokenizer",
+        "model_class": "EmotionModel",
+        "problem_type": "regression",
+        "base_model": "microsoft/deberta-v3-base",
+        "base_model_class": "DebertaV2Model",
+        "num_labels": 7,
+        "device": "cpu",
+        "load_function": "load_model",
+        "predict_function": "predict"
+    }
+}

emotionMoodtag_analysis/emotion_analysis_main.py ADDED Viewed

	@@ -0,0 +1,317 @@

+import os
+import sys
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), )))
+from imports import *
+import importlib.util
+import os
+import sys
+import joblib
+import time
+import torch
+# from transformers.utils import move_cache_to_trash
+# from huggingface_hub import delete_cache
+from transformers.utils.hub import TRANSFORMERS_CACHE
+import shutil
+# from hmv_cfg_base_stage1.model1 import load_model as load_model1
+# from hmv_cfg_base_stage1.model1 import predict as predict1
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+CONFIG_STAGE2 = os.path.join(BASE_DIR, "config", "stage2_models.json")
+LOADERS_STAGE2 = os.path.join(BASE_DIR, "hmv-cfg-base-stage2")
+EMOTION_MOODTAG_LABELS = [
+    "anger", "disgust", "fear", "joy", "neutral",
+    "sadness", "surprise"
+]
+current_model = None
+current_tokenizer = None
+# Enabling Resource caching
+# @st.cache_resource
+def load_model_config():
+    with open(CONFIG_STAGE2, "r") as f:
+        model_data = json.load(f)
+    # Extract names for dropdown
+    model_options = {v["name"]: v for v in model_data.values()}
+    return model_data, model_options
+MODEL_DATA, MODEL_OPTIONS = load_model_config()
+# ✅ Dynamically Import Model Functions
+def import_from_module(module_name, function_name):
+    try:
+        module = importlib.import_module(module_name)
+        return getattr(module, function_name)
+    except (ModuleNotFoundError, AttributeError) as e:
+        st.error(f"❌ Import Error: {e}")
+        return None
+def free_memory():
+    #  """Free up CPU & GPU memory before loading a new model."""
+    global current_model, current_tokenizer
+    if current_model is not None:
+        del current_model  # Delete the existing model
+        current_model = None  # Reset reference
+    if current_tokenizer is not None:
+        del current_tokenizer  # Delete the tokenizer
+        current_tokenizer = None
+    gc.collect()  # Force garbage collection for CPU memory
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()  # Free GPU memory
+        torch.cuda.ipc_collect()  # Clean up PyTorch GPU cache
+    # If running on CPU, reclaim memory using OS-level commands
+    try:
+        if torch.cuda.is_available() is False:
+            psutil.virtual_memory()  # Refresh memory stats
+    except Exception as e:
+        print(f"Memory cleanup error: {e}")
+    # Delete cached Hugging Face models
+    try:
+        cache_dir = TRANSFORMERS_CACHE
+        if os.path.exists(cache_dir):
+            shutil.rmtree(cache_dir)
+            print("Cache cleared!")
+    except Exception as e:
+        print(f"❌ Cache cleanup error: {e}")
+def load_selected_model(model_name):
+    global current_model, current_tokenizer
+    # st.cache_resource.clear()
+    # free_memory()
+    # st.write("DEBUG: Available Models:", MODEL_OPTIONS.keys())  # ✅ See available models
+    # st.write("DEBUG: Selected Model:", MODEL_OPTIONS[model_name])  # ✅ Check selected model
+    # st.write("DEBUG: Model Name:", model_name)  # ✅ Check selected model
+    if model_name not in MODEL_OPTIONS:
+        st.error(f"⚠️ Model '{model_name}' not found in config!")
+        return None, None, None
+    model_info = MODEL_OPTIONS[model_name]
+    hf_location = model_info["hf_location"]
+    model_module = model_info["module_path"]
+    load_function = model_info["load_function"]
+    predict_function = model_info["predict_function"]
+    load_model_func = import_from_module(model_module, load_function)
+    predict_func = import_from_module(model_module, predict_function)
+    if load_model_func is None or predict_func is None:
+        st.error("❌ Model functions could not be loaded!")
+        return None, None, None
+    model, tokenizer = load_model_func()
+    current_model, current_tokenizer = model, tokenizer
+    return model, tokenizer, predict_func
+def disable_ui():
+    st.components.v1.html(
+        """
+        <style>
+        #ui-disable-overlay {
+            position: fixed;
+            top: 0;
+            left: 0;
+            width: 100vw;
+            height: 100vh;
+            background-color: rgba(200, 200, 200, 0.5);
+            z-index: 9999;
+        }
+        </style>
+        <div id="ui-disable-overlay"></div>
+        """,
+        height=0,
+        scrolling=False
+    )
+def enable_ui():
+    st.components.v1.html(
+        """
+        <script>
+        var overlay = document.getElementById("ui-disable-overlay");
+        if (overlay) {
+            overlay.parentNode.removeChild(overlay);
+        }
+        </script>
+        """,
+        height=0,
+        scrolling=False
+    )
+# Function to increment progress dynamically
+def update_progress(progress_bar, start, end, delay=0.1):
+    for i in range(start, end + 1, 5):  # Increment in steps of 5%
+        progress_bar.progress(i)
+        time.sleep(delay)  # Simulate processing time
+        # st.experimental_rerun() # Refresh the page
+# Function to update session state when model changes
+def on_model_change():
+    st.session_state.model_changed = True  # Mark model as changed
+# Function to update session state when text changes
+def on_text_change():
+    st.session_state.text_changed = True  # Mark text as changed
+# Initialize session state variables
+if "selected_model" not in st.session_state:
+    st.session_state.selected_model = list(MODEL_OPTIONS.keys())[
+        0]  # Default model
+if "user_input" not in st.session_state:
+    st.session_state.user_input = ""
+if "last_processed_input" not in st.session_state:
+    st.session_state.last_processed_input = ""
+if "model_changed" not in st.session_state:
+    st.session_state.model_changed = False
+if "text_changed" not in st.session_state:
+    st.session_state.text_changed = False
+if "disabled" not in st.session_state:
+    st.session_state.disabled = False
+# Enabling Resource caching
+def show_emotion_analysis():
+    st.title("Stage 2: Emotion Mood-tag Analysis")
+    st.write("This section handles emotion mood-tag analysis.")
+    # Model selection with change detection
+    selected_model = st.selectbox(
+        "Choose a model:", list(MODEL_OPTIONS.keys()), key="selected_model", on_change=on_model_change
+    )
+    # Text input with change detection
+    user_input = st.text_input(
+        "Enter text for emotions mood-tag analysis:", key="user_input", on_change=on_text_change
+    )
+    user_input_copy = user_input
+    # Only run inference if:
+    # 1. The text is NOT empty
+    # 2. The text has changed OR the model has changed
+    if user_input.strip() and (st.session_state.text_changed or st.session_state.model_changed):
+        # disable_ui()
+        # Reset session state flags
+        st.session_state.last_processed_input = user_input
+        st.session_state.model_changed = False
+        st.session_state.text_changed = False   # Store selected model
+        # ADD A DYNAMIC PROGRESS BAR
+        progress_bar = st.progress(0)
+        update_progress(progress_bar, 0, 10)
+        # status_text = st.empty()
+        # update_progress(0, 10)
+        # status_text.text("Loading model...")
+        # Make prediction
+        # model, tokenizer = load_model()
+        # model, tokenizer = load_selected_model(selected_model)
+        with st.spinner("Please wait..."):
+            model, tokenizer, predict_func = load_selected_model(selected_model)
+            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+            if model is None:
+                st.error(
+                    "⚠️ Error: Model failed to load! Check model selection or configuration.")
+                st.stop()
+            # model.to(device)
+            if hasattr(model, "to"):
+                model.to(device)
+            # predictions = predict(user_input, model, tokenizer, device)
+            predictions = predict_func(user_input, model, tokenizer, device)
+            print(predictions)
+            # Squeeze predictions to remove extra dimensions
+            predictions_array = predictions.squeeze()
+            # Convert to binary predictions (argmax)
+            binary_predictions = np.zeros_like(predictions_array)
+            max_indices = np.argmax(predictions_array)
+            binary_predictions[max_indices] = 1
+            # Update progress bar for prediction and model loading
+            update_progress(progress_bar, 10, 100)
+        # Display raw predictions
+        st.write(f"**Predicted Emotion Scores:** {predictions_array}")
+        # enable_ui()
+##
+        # Display binary classification result
+        # st.write(f"**Predicted Sentiment:**")
+        # st.write(f"**NEGATIVE:** {binary_predictions[0]}, **NEUTRAL:** {binary_predictions[1]}, **POSITIVE:** {binary_predictions[2]}")
+        # st.write(f"**NEUTRAL:** {binary_predictions[1]}")
+        # st.write(f"**POSITIVE:** {binary_predictions[2]}")
+        # 1️⃣ **Polar Plot (Plotly)**
+        emotion_moodtags = predictions_array.tolist()
+        fig_polar = px.line_polar(
+            pd.DataFrame(dict(r=emotion_moodtags,
+                         theta=EMOTION_MOODTAG_LABELS)),
+            r='r', theta='theta', line_close=True
+        )
+        st.plotly_chart(fig_polar)
+        # 2️⃣ **Normalized Horizontal Bar Chart (Matplotlib)**
+        normalized_predictions = predictions_array / predictions_array.sum()
+        fig, ax = plt.subplots(figsize=(8, 2))
+        left = 0
+        for i in range(len(normalized_predictions)):
+            ax.barh(0, normalized_predictions[i], color=plt.cm.tab10(
+                i), left=left, label=EMOTION_MOODTAG_LABELS[i])
+            left += normalized_predictions[i]
+        # Configure the chart
+        ax.set_xlim(0, 1)
+        ax.set_yticks([])
+        ax.set_xticks(np.arange(0, 1.1, 0.1))
+        ax.legend(loc='upper center', bbox_to_anchor=(
+            0.5, -0.15), ncol=len(EMOTION_MOODTAG_LABELS))
+        plt.title("Emotion Mood-tags Prediction Distribution")
+        # Display in Streamlit
+        st.pyplot(fig)
+        progress_bar.empty()
+if __name__ == "__main__":
+    show_emotion_analysis()

emotionMoodtag_analysis/hmv_cfg_base_stage2/__init__.py ADDED Viewed

File without changes

{sentiment_analysis/hmv_cfg_base_stage1 → emotionMoodtag_analysis/hmv_cfg_base_stage2}/imports.py RENAMED Viewed

@@ -1,25 +1,25 @@
-import os
-import sys
-sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), )))
-import streamlit as st
-from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModel, DebertaV2Tokenizer, DebertaV2ForSequenceClassification, DebertaV2Model
-# import torch
-import numpy as np
-import matplotlib.pyplot as plt
-import plotly.express as px
-import pandas as pd
-import json
-import gc
-import psutil
-import importlib
-import importlib.util
-import asyncio
-# import pytorch_lightning as pl
-import safetensors
-from safetensors import load_file, save_file
-import json
-import huggingface_hub
 from huggingface_hub import hf_hub_download

+import os
+import sys
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), )))
+import streamlit as st
+from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModel, DebertaV2Tokenizer, DebertaV2ForSequenceClassification, DebertaV2Model
+# import torch
+import numpy as np
+import matplotlib.pyplot as plt
+import plotly.express as px
+import pandas as pd
+import json
+import gc
+import psutil
+import importlib
+import importlib.util
+import asyncio
+# import pytorch_lightning as pl
+import safetensors
+from safetensors import load_file, save_file
+import json
+import huggingface_hub
 from huggingface_hub import hf_hub_download

emotionMoodtag_analysis/hmv_cfg_base_stage2/model1.py ADDED Viewed

	@@ -0,0 +1,89 @@

+import os
+import sys
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), )))
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+CONFIG_STAGE2 = os.path.join(BASE_DIR, "..", "config", "stage2_models.json")
+import torch
+import torch.nn as nn
+from imports import *
+import torch.nn.functional as F
+MODEL_OPTIONS = {
+"1": {
+        "name": "DeBERTa v3 Base for Sequence Classification",
+        "type": "hf_automodel_finetuned_dbt3",
+        "module_path": "hmv_cfg_base_stage2.model1",
+        "hf_location": "tachygraphy-microtrext-norm-org/DeBERTa-v3-seqClassfication-LV2-EmotionMoodtags-Batch8",
+        "tokenizer_class": "DebertaV2Tokenizer",
+        "model_class": "DebertaV2ForSequenceClassification",
+        "problem_type": "regression",
+        "base_model": "microsoft/deberta-v3-base",
+        "base_model_class": "DebertaV2ForSequenceClassification",
+        "num_labels": 7,
+        "device": "cpu",
+        "load_function": "load_model",
+        "predict_function": "predict"
+    }
+}
+model_key = "1"
+model_info = MODEL_OPTIONS[model_key]
+hf_location = model_info["hf_location"]
+tokenizer_class = globals()[model_info["tokenizer_class"]]
+model_class = globals()[model_info["model_class"]]
+@st.cache_resource
+def load_model():
+    tokenizer = tokenizer_class.from_pretrained(hf_location)
+    print("Loading model 1")
+    model = model_class.from_pretrained(hf_location,
+                                        problem_type=model_info["problem_type"],
+                                        num_labels=model_info["num_labels"]
+                                        )
+    print("Model 1 loaded")
+    return model, tokenizer
+def predict(text, model, tokenizer, device, max_len=128):
+    # Tokenize and pad the input text
+    inputs = tokenizer(
+        text,
+        add_special_tokens=True,
+        padding=True,
+        truncation=False,
+        return_tensors="pt",
+        return_token_type_ids=False,
+    ).to(device)  # Move input tensors to the correct device
+    with torch.no_grad():
+        outputs = model(**inputs)
+    # probabilities = outputs.logits.cpu().numpy()
+    # probabilities = torch.relu(outputs.logits)
+    # probabilities = torch.clamp(torch.tensor(probabilities), min=0.00000, max=1.00000).cpu().numpy()
+    # probabilities /= probabilities.sum()
+    # probabilities = probabilities.cpu().numpy()
+    # predictions = outputs.logits.cpu().numpy()
+    relu_logits = F.relu(outputs.logits)
+    clipped_logits = torch.clamp(relu_logits, max=1.00000000, min=0.00000000)
+    predictions = clipped_logits.cpu().numpy()
+    return predictions
+if __name__ == "__main__":
+    model, tokenizer = load_model()
+    print("Model and tokenizer loaded successfully.")

emotionMoodtag_analysis/hmv_cfg_base_stage2/model2.py ADDED Viewed

	@@ -0,0 +1,163 @@

+from safetensors.torch import save_file, safe_open
+from huggingface_hub import hf_hub_download
+import json
+import safetensors
+from transformers import DebertaV2Model, DebertaV2Tokenizer
+import torch.nn as nn
+import torch.nn.functional as F
+import torch
+import joblib
+import importlib.util
+from imports import *
+import os
+import sys
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), )))
+# from safetensors import load_file, save_file
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+CONFIG_STAGE2 = os.path.join(BASE_DIR, "..", "config", "stage2_models.json")
+MODEL_OPTIONS = {
+    "2": {
+        "name": "DeBERTa v3 Base Custom Model with minimal Regularized Loss",
+        "type": "db3_base_custom",
+        "module_path": "hmv_cfg_base_stage2.model2",
+        "hf_location": "tachygraphy-microtrext-norm-org/DeBERTa-v3-Base-Cust-LV2-EmotionMoodtags-minRegLoss",
+        "tokenizer_class": "DebertaV2Tokenizer",
+        "model_class": "EmotionModel",
+        "problem_type": "regression",
+        "base_model": "microsoft/deberta-v3-base",
+        "base_model_class": "DebertaV2Model",
+        "num_labels": 7,
+        "device": "cpu",
+        "load_function": "load_model",
+        "predict_function": "predict"
+    }
+}
+class EmotionModel(nn.Module):
+    def __init__(self, roberta_model, n_classes = 7, dropout_rate = 0.2):
+        super(EmotionModel, self).__init__()
+        self.roberta = roberta_model
+        self.drop = nn.Dropout(p=dropout_rate)
+        self.fc1 = nn.Linear(self.roberta.config.hidden_size, 512)
+        self.relu = nn.ReLU()
+        self.fc2 = nn.Linear(512, 256)
+        self.out = nn.Linear(256, n_classes)
+    def forward(self, input_ids, attention_mask):
+        output = self.roberta(input_ids=input_ids, attention_mask=attention_mask)
+#         hidden_states = output.last_hidden_state
+        # Extract the [CLS] token representation (first token in the sequence)
+        cls_token_state = output.last_hidden_state[:, 0, :]
+        output = self.drop(cls_token_state)
+        output = self.relu(self.fc1(output))
+        output = self.drop(output)
+        output = self.relu(self.fc2(output))
+#         output = self.drop(output)
+        return self.out(output)
+    def save_pretrained(self, save_directory):
+        os.makedirs(save_directory, exist_ok=True)
+        model_weights = self.state_dict()
+        save_file(model_weights, os.path.join(save_directory, "model.safetensors"))
+        config = {
+            "hidden_size": self.roberta.config.hidden_size,
+            "num_labels": self.out.out_features,
+            "dropout_rate": self.drop.p,
+            "roberta_model": self.roberta.name_or_path,  # ✅ Save model name
+        }
+        with open(os.path.join(save_directory, "config.json"), "w") as f:
+            json.dump(config, f)
+        print(f"Model saved in {save_directory}")
+    @classmethod
+    @st.cache_resource
+    def load_pretrained(cls, model_path_or_repo):
+        # """Loads and caches the model (RoBERTa + EmotionModel) only when called."""
+        print(f"Loading model from {model_path_or_repo}...")
+        model_config_path = hf_hub_download(model_path_or_repo, "config.json")
+        model_weights_path = hf_hub_download(model_path_or_repo, "model.safetensors")
+        with open(model_config_path, "r") as f:
+            config = json.load(f)
+        print(f"Loading RoBERTa model: {config['roberta_model']}...")
+        roberta_model = DebertaV2Model.from_pretrained(
+            config["roberta_model"],
+        )
+        model = cls(
+            roberta_model, n_classes=config["num_labels"], dropout_rate=config["dropout_rate"]
+        )
+        with safe_open(model_weights_path, framework="pt", device="cpu") as f:
+            model_weights = {key: f.get_tensor(key) for key in f.keys()}
+        model.load_state_dict(model_weights)
+        print(f"Model loaded from {model_path_or_repo}")
+        return model
+model_key = "2"
+model_info = MODEL_OPTIONS[model_key]
+hf_location = model_info["hf_location"]
+base_model = model_info["base_model"]
+tokenizer_class = globals()[model_info["tokenizer_class"]]
+model_class = globals()[model_info["model_class"]]
+@st.cache_resource
+def load_model():
+    tokenizer = tokenizer_class.from_pretrained(hf_location)
+    print("Loading model 2")
+    model = EmotionModel.load_pretrained(hf_location)
+    print("Model 2 loaded")
+    # model.eval()
+    return model, tokenizer
+def predict(text, model, tokenizer, device, max_len=128):
+    # model.eval()  # Set model to evaluation mode
+    # Tokenize and pad the input text
+    inputs = tokenizer(
+        text,
+        add_special_tokens=True,
+        padding=True,
+        truncation=False,
+        return_tensors="pt",
+        return_token_type_ids=False,
+    ).to(device)  # Move input tensors to the correct device
+    with torch.no_grad():
+        outputs = model(**inputs)
+    # Apply sigmoid activation (for BCEWithLogitsLoss)
+    # probabilities = torch.sigmoid(outputs).cpu().numpy()
+    # probabilities = outputs.cpu().numpy()
+    relu_logits = F.relu(outputs)
+    clipped_logits = torch.clamp(relu_logits, max=1.00000000, min=0.00000000)
+    probabilities = clipped_logits.cpu().numpy()
+    return probabilities
+if __name__ == "__main__":
+    model, tokenizer = load_model()
+    print("Model and tokenizer loaded successfully.")

emotion_analysis.py DELETED Viewed

@@ -1,9 +0,0 @@
-import streamlit as st
-def show_emotion_analysis():
-    st.title("Stage 2: Emotion Mood-tag Analysis")
-    st.write("This section will handle emotion detection.")
-    # Add your emotion detection code here
-if __name__ == "__main__":
-    show_emotion_analysis()

poetry.lock CHANGED Viewed

@@ -2352,14 +2352,14 @@ files = [
 [[package]]
 name = "lightning-utilities"
-version = "0.14.1"
 description = "Lightning toolbox for across the our ecosystem."
 optional = false
 python-versions = ">=3.9"
 groups = ["main"]
 files = [
-    {file = "lightning_utilities-0.14.1-py3-none-any.whl", hash = "sha256:badc40a70d8e933706aa0b4f63b12392dbf8208728743a40b55edb5e81797311"},
-    {file = "lightning_utilities-0.14.1.tar.gz", hash = "sha256:9fb56c76dc07a46c075e1f78594fcb161091eae5944b34c2b43258d23c202791"},
 ]
 [package.dependencies]
@@ -4146,23 +4146,23 @@ files = [
 [[package]]
 name = "protobuf"
-version = "5.29.3"
 description = ""
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
 files = [
-    {file = "protobuf-5.29.3-cp310-abi3-win32.whl", hash = "sha256:3ea51771449e1035f26069c4c7fd51fba990d07bc55ba80701c78f886bf9c888"},
-    {file = "protobuf-5.29.3-cp310-abi3-win_amd64.whl", hash = "sha256:a4fa6f80816a9a0678429e84973f2f98cbc218cca434abe8db2ad0bffc98503a"},
-    {file = "protobuf-5.29.3-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:a8434404bbf139aa9e1300dbf989667a83d42ddda9153d8ab76e0d5dcaca484e"},
-    {file = "protobuf-5.29.3-cp38-abi3-manylinux2014_aarch64.whl", hash = "sha256:daaf63f70f25e8689c072cfad4334ca0ac1d1e05a92fc15c54eb9cf23c3efd84"},
-    {file = "protobuf-5.29.3-cp38-abi3-manylinux2014_x86_64.whl", hash = "sha256:c027e08a08be10b67c06bf2370b99c811c466398c357e615ca88c91c07f0910f"},
-    {file = "protobuf-5.29.3-cp38-cp38-win32.whl", hash = "sha256:84a57163a0ccef3f96e4b6a20516cedcf5bb3a95a657131c5c3ac62200d23252"},
-    {file = "protobuf-5.29.3-cp38-cp38-win_amd64.whl", hash = "sha256:b89c115d877892a512f79a8114564fb435943b59067615894c3b13cd3e1fa107"},
-    {file = "protobuf-5.29.3-cp39-cp39-win32.whl", hash = "sha256:0eb32bfa5219fc8d4111803e9a690658aa2e6366384fd0851064b963b6d1f2a7"},
-    {file = "protobuf-5.29.3-cp39-cp39-win_amd64.whl", hash = "sha256:6ce8cc3389a20693bfde6c6562e03474c40851b44975c9b2bf6df7d8c4f864da"},
-    {file = "protobuf-5.29.3-py3-none-any.whl", hash = "sha256:0a18ed4a24198528f2333802eb075e59dea9d679ab7a6c5efb017a59004d849f"},
-    {file = "protobuf-5.29.3.tar.gz", hash = "sha256:5da0f41edaf117bde316404bad1a486cb4ededf8e4a54891296f648e8e076620"},
 ]
 [[package]]

 [[package]]
 name = "lightning-utilities"
+version = "0.14.2"
 description = "Lightning toolbox for across the our ecosystem."
 optional = false
 python-versions = ">=3.9"
 groups = ["main"]
 files = [
+    {file = "lightning_utilities-0.14.2-py3-none-any.whl", hash = "sha256:da791fcaa731f651ec76a1a3b12994ed05af4d6841f2e78760233552709ef05d"},
+    {file = "lightning_utilities-0.14.2.tar.gz", hash = "sha256:0466a4f1bb9dff1c7190d4c7a32d1a8a1109f94fb816931efe8fb8b12bb0ab8d"},
 ]
 [package.dependencies]
 [[package]]
 name = "protobuf"
+version = "5.29.4"
 description = ""
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
 files = [
+    {file = "protobuf-5.29.4-cp310-abi3-win32.whl", hash = "sha256:13eb236f8eb9ec34e63fc8b1d6efd2777d062fa6aaa68268fb67cf77f6839ad7"},
+    {file = "protobuf-5.29.4-cp310-abi3-win_amd64.whl", hash = "sha256:bcefcdf3976233f8a502d265eb65ea740c989bacc6c30a58290ed0e519eb4b8d"},
+    {file = "protobuf-5.29.4-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:307ecba1d852ec237e9ba668e087326a67564ef83e45a0189a772ede9e854dd0"},
+    {file = "protobuf-5.29.4-cp38-abi3-manylinux2014_aarch64.whl", hash = "sha256:aec4962f9ea93c431d5714ed1be1c93f13e1a8618e70035ba2b0564d9e633f2e"},
+    {file = "protobuf-5.29.4-cp38-abi3-manylinux2014_x86_64.whl", hash = "sha256:d7d3f7d1d5a66ed4942d4fefb12ac4b14a29028b209d4bfb25c68ae172059922"},
+    {file = "protobuf-5.29.4-cp38-cp38-win32.whl", hash = "sha256:1832f0515b62d12d8e6ffc078d7e9eb06969aa6dc13c13e1036e39d73bebc2de"},
+    {file = "protobuf-5.29.4-cp38-cp38-win_amd64.whl", hash = "sha256:476cb7b14914c780605a8cf62e38c2a85f8caff2e28a6a0bad827ec7d6c85d68"},
+    {file = "protobuf-5.29.4-cp39-cp39-win32.whl", hash = "sha256:fd32223020cb25a2cc100366f1dedc904e2d71d9322403224cdde5fdced0dabe"},
+    {file = "protobuf-5.29.4-cp39-cp39-win_amd64.whl", hash = "sha256:678974e1e3a9b975b8bc2447fca458db5f93a2fb6b0c8db46b6675b5b5346812"},
+    {file = "protobuf-5.29.4-py3-none-any.whl", hash = "sha256:3fde11b505e1597f71b875ef2fc52062b6a9740e5f7c8997ce878b6009145862"},
+    {file = "protobuf-5.29.4.tar.gz", hash = "sha256:4f1dfcd7997b31ef8f53ec82781ff434a28bf71d9102ddde14d076adcfc78c99"},
 ]
 [[package]]

pyproject.toml CHANGED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "tachygraphy-microtext-analysis-and-normalization"
-version = "2025.03.18.post5"
 description = ""
 authors = [
     { name = "Archisman Karmakar", email = "[email protected]" },

 [project]
 name = "tachygraphy-microtext-analysis-and-normalization"
+version = "2025.03.20.post1"
 description = ""
 authors = [
     { name = "Archisman Karmakar", email = "[email protected]" },

pyprojectOLD.toml CHANGED Viewed

@@ -1,6 +1,7 @@
 [project]
 name = "tachygraphy-microtext-analysis-and-normalization"
-version = "2025.03.18.post4_3"
 # version = "2025.03.18.post3"
 # version = "2025.03.18.post2"
 # version = "2025.03.18.post1"

 [project]
 name = "tachygraphy-microtext-analysis-and-normalization"
+version = "2025.03.18.post5"
+# version = "2025.03.18.post4_3"
 # version = "2025.03.18.post3"
 # version = "2025.03.18.post2"
 # version = "2025.03.18.post1"

requirements.txt CHANGED Viewed

@@ -87,7 +87,7 @@ keras==3.9.0 ; python_version >= "3.12" and python_version < "4.0"
 keyring==25.6.0 ; python_version >= "3.12" and python_version < "4.0"
 kiwisolver==1.4.8 ; python_version >= "3.12" and python_version < "4.0"
 libclang==18.1.1 ; python_version >= "3.12" and python_version < "4.0"
-lightning-utilities==0.14.1 ; python_version >= "3.12" and python_version < "4.0"
 locket==1.0.0 ; python_version >= "3.12" and python_version < "4.0"
 lxml==5.3.1 ; python_version >= "3.12" and python_version < "4.0"
 markdown-it-py==3.0.0 ; python_version >= "3.12" and python_version < "4.0"
@@ -145,7 +145,7 @@ portalocker==3.1.1 ; python_version >= "3.12" and python_version < "4.0"
 prometheus-client==0.21.1 ; python_version >= "3.12" and python_version < "4.0"
 prompt-toolkit==3.0.50 ; python_version >= "3.12" and python_version < "4.0"
 propcache==0.3.0 ; python_version >= "3.12" and python_version < "4.0"
-protobuf==5.29.3 ; python_version >= "3.12" and python_version < "4.0"
 psutil==7.0.0 ; python_version >= "3.12" and python_version < "4.0"
 ptyprocess==0.7.0 ; python_version >= "3.12" and python_version < "4.0" and sys_platform != "win32" and sys_platform != "emscripten"
 pure-eval==0.2.3 ; python_version >= "3.12" and python_version < "4.0"

 keyring==25.6.0 ; python_version >= "3.12" and python_version < "4.0"
 kiwisolver==1.4.8 ; python_version >= "3.12" and python_version < "4.0"
 libclang==18.1.1 ; python_version >= "3.12" and python_version < "4.0"
+lightning-utilities==0.14.2 ; python_version >= "3.12" and python_version < "4.0"
 locket==1.0.0 ; python_version >= "3.12" and python_version < "4.0"
 lxml==5.3.1 ; python_version >= "3.12" and python_version < "4.0"
 markdown-it-py==3.0.0 ; python_version >= "3.12" and python_version < "4.0"
 prometheus-client==0.21.1 ; python_version >= "3.12" and python_version < "4.0"
 prompt-toolkit==3.0.50 ; python_version >= "3.12" and python_version < "4.0"
 propcache==0.3.0 ; python_version >= "3.12" and python_version < "4.0"
+protobuf==5.29.4 ; python_version >= "3.12" and python_version < "4.0"
 psutil==7.0.0 ; python_version >= "3.12" and python_version < "4.0"
 ptyprocess==0.7.0 ; python_version >= "3.12" and python_version < "4.0" and sys_platform != "win32" and sys_platform != "emscripten"
 pure-eval==0.2.3 ; python_version >= "3.12" and python_version < "4.0"

{sentiment_analysis → sentimentPolarity_analysis}/__init__.py RENAMED Viewed

File without changes

{sentiment_analysis → sentimentPolarity_analysis}/config/stage1_models.json RENAMED Viewed

@@ -1,62 +1,62 @@
-{
-    "1": {
-        "name": "DeBERTa v3 Base for Sequence Classification",
-        "type": "hf_automodel_finetuned_dbt3",
-        "module_path": "hmv_cfg_base_stage1.model1",
-        "hf_location": "tachygraphy-microtrext-norm-org/DeBERTa-v3-seqClassfication-LV1-SentimentPolarities-Batch8",
-        "tokenizer_class": "DebertaV2Tokenizer",
-        "model_class": "DebertaV2ForSequenceClassification",
-        "problem_type": "multi_label_classification",
-        "base_model": "microsoft/deberta-v3-base",
-        "base_model_class": "DebertaV2ForSequenceClassification",
-        "num_labels": 3,
-        "device": "cpu",
-        "load_function": "load_model",
-        "predict_function": "predict"
-    },
-    "2": {
-        "name": "DeBERTa v3 Base Custom Model with minimal Regularized Loss",
-        "type": "db3_base_custom",
-        "module_path": "hmv_cfg_base_stage1.model2",
-        "hf_location": "tachygraphy-microtrext-norm-org/DeBERTa-v3-Base-Cust-LV1-SentimentPolarities-minRegLoss",
-        "tokenizer_class": "DebertaV2Tokenizer",
-        "model_class": "SentimentModel",
-        "problem_type": "multi_label_classification",
-        "base_model": "microsoft/deberta-v3-base",
-        "base_model_class": "DebertaV2Model",
-        "num_labels": 3,
-        "device": "cpu",
-        "load_function": "load_model",
-        "predict_function": "predict"
-    },
-    "3": {
-        "name": "BERT Base Uncased Custom Model",
-        "type": "bert_base_uncased_custom",
-        "module_path": "hmv_cfg_base_stage1.model3",
-        "hf_location": "https://huggingface.co/tachygraphy-microtrext-norm-org/BERT-LV1-SentimentPolarities/resolve/main/saved_weights.pt",
-        "tokenizer_class": "AutoTokenizer",
-        "model_class": "BERT_architecture",
-        "problem_type": "multi_label_classification",
-        "base_model": "bert-base-uncased",
-        "base_model_class": "AutoModel",
-        "num_labels": 3,
-        "device": "cpu",
-        "load_function": "load_model",
-        "predict_function": "predict"
-    },
-    "4": {
-        "name": "LSTM Custom Model",
-        "type": "lstm_uncased_custom",
-        "module_path": "hmv_cfg_base_stage1.model4",
-        "hf_location": "tachygraphy-microtrext-norm-org/LSTM-LV1-SentimentPolarities",
-        "tokenizer_class": "",
-        "model_class": "",
-        "problem_type": "multi_label_classification",
-        "base_model": "",
-        "base_model_class": "",
-        "num_labels": 3,
-        "device": "cpu",
-        "load_function": "load_model",
-        "predict_function": "predict"
-    }
-}

+{
+    "1": {
+        "name": "DeBERTa v3 Base for Sequence Classification",
+        "type": "hf_automodel_finetuned_dbt3",
+        "module_path": "hmv_cfg_base_stage1.model1",
+        "hf_location": "tachygraphy-microtrext-norm-org/DeBERTa-v3-seqClassfication-LV1-SentimentPolarities-Batch8",
+        "tokenizer_class": "DebertaV2Tokenizer",
+        "model_class": "DebertaV2ForSequenceClassification",
+        "problem_type": "multi_label_classification",
+        "base_model": "microsoft/deberta-v3-base",
+        "base_model_class": "DebertaV2ForSequenceClassification",
+        "num_labels": 3,
+        "device": "cpu",
+        "load_function": "load_model",
+        "predict_function": "predict"
+    },
+    "2": {
+        "name": "DeBERTa v3 Base Custom Model with minimal Regularized Loss",
+        "type": "db3_base_custom",
+        "module_path": "hmv_cfg_base_stage1.model2",
+        "hf_location": "tachygraphy-microtrext-norm-org/DeBERTa-v3-Base-Cust-LV1-SentimentPolarities-minRegLoss",
+        "tokenizer_class": "DebertaV2Tokenizer",
+        "model_class": "SentimentModel",
+        "problem_type": "multi_label_classification",
+        "base_model": "microsoft/deberta-v3-base",
+        "base_model_class": "DebertaV2Model",
+        "num_labels": 3,
+        "device": "cpu",
+        "load_function": "load_model",
+        "predict_function": "predict"
+    },
+    "3": {
+        "name": "BERT Base Uncased Custom Model",
+        "type": "bert_base_uncased_custom",
+        "module_path": "hmv_cfg_base_stage1.model3",
+        "hf_location": "https://huggingface.co/tachygraphy-microtrext-norm-org/BERT-LV1-SentimentPolarities/resolve/main/saved_weights.pt",
+        "tokenizer_class": "AutoTokenizer",
+        "model_class": "BERT_architecture",
+        "problem_type": "multi_label_classification",
+        "base_model": "bert-base-uncased",
+        "base_model_class": "AutoModel",
+        "num_labels": 3,
+        "device": "cpu",
+        "load_function": "load_model",
+        "predict_function": "predict"
+    },
+    "4": {
+        "name": "LSTM Custom Model",
+        "type": "lstm_uncased_custom",
+        "module_path": "hmv_cfg_base_stage1.model4",
+        "hf_location": "tachygraphy-microtrext-norm-org/LSTM-LV1-SentimentPolarities",
+        "tokenizer_class": "",
+        "model_class": "",
+        "problem_type": "multi_label_classification",
+        "base_model": "",
+        "base_model_class": "",
+        "num_labels": 3,
+        "device": "cpu",
+        "load_function": "load_model",
+        "predict_function": "predict"
+    }
+}

{sentiment_analysis → sentimentPolarity_analysis}/hmv_cfg_base_stage1/__init__.py RENAMED Viewed

	@@ -1 +1 @@
1	- # from . import model1


1	+ # from . import model1

sentimentPolarity_analysis/hmv_cfg_base_stage1/imports.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import os
+import sys
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), )))
+import streamlit as st
+from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModel, DebertaV2Tokenizer, DebertaV2ForSequenceClassification, DebertaV2Model
+# import torch
+import numpy as np
+import matplotlib.pyplot as plt
+import plotly.express as px
+import pandas as pd
+import json
+import gc
+import psutil
+import importlib
+import importlib.util
+import asyncio
+# import pytorch_lightning as pl
+import safetensors
+from safetensors import load_file, save_file
+import json
+import huggingface_hub
+from huggingface_hub import hf_hub_download

{sentiment_analysis → sentimentPolarity_analysis}/hmv_cfg_base_stage1/model1.py RENAMED Viewed

@@ -1,85 +1,85 @@
-import os
-import sys
-sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), )))
-BASE_DIR = os.path.dirname(os.path.abspath(__file__))
-CONFIG_STAGE1 = os.path.join(BASE_DIR, "..", "config", "stage1_models.json")
-import torch
-import torch.nn as nn
-from imports import *
-import torch.nn.functional as F
-MODEL_OPTIONS = {
-"1": {
-        "name": "DeBERTa v3 Base for Sequence Classification",
-        "type": "hf_automodel_finetuned_dbt3",
-        "module_path": "hmv_cfg_base_stage1.model1",
-        "hf_location": "tachygraphy-microtrext-norm-org/DeBERTa-v3-seqClassfication-LV1-SentimentPolarities-Batch8",
-        "tokenizer_class": "DebertaV2Tokenizer",
-        "model_class": "DebertaV2ForSequenceClassification",
-        "problem_type": "multi_label_classification",
-        "base_model": "microsoft/deberta-v3-base",
-        "base_model_class": "DebertaV2ForSequenceClassification",
-        "num_labels": 3,
-        "device": "cpu",
-        "load_function": "load_model",
-        "predict_function": "predict"
-    }
-}
-model_key = "1"
-model_info = MODEL_OPTIONS[model_key]
-hf_location = model_info["hf_location"]
-tokenizer_class = globals()[model_info["tokenizer_class"]]
-model_class = globals()[model_info["model_class"]]
-@st.cache_resource
-def load_model():
-    tokenizer = tokenizer_class.from_pretrained(hf_location)
-    print("Loading model 1")
-    model = model_class.from_pretrained(hf_location,
-                                        problem_type=model_info["problem_type"],
-                                        num_labels=model_info["num_labels"]
-                                        )
-    print("Model 1 loaded")
-    return model, tokenizer
-def predict(text, model, tokenizer, device, max_len=128):
-    # Tokenize and pad the input text
-    inputs = tokenizer(
-        text,
-        add_special_tokens=True,
-        padding=True,
-        truncation=False,
-        return_tensors="pt",
-        return_token_type_ids=False,
-    ).to(device)  # Move input tensors to the correct device
-    with torch.no_grad():
-        outputs = model(**inputs)
-    # probabilities = outputs.logits.cpu().numpy()
-    # probabilities = torch.relu(outputs.logits)
-    # probabilities = torch.clamp(torch.tensor(probabilities), min=0.00000, max=1.00000).cpu().numpy()
-    # probabilities /= probabilities.sum()
-    # probabilities = probabilities.cpu().numpy()
-    predictions = torch.sigmoid(outputs.logits).cpu().numpy()
-    return predictions
-if __name__ == "__main__":
-    model, tokenizer = load_model()
-    print("Model and tokenizer loaded successfully.")

+import os
+import sys
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), )))
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+CONFIG_STAGE1 = os.path.join(BASE_DIR, "..", "config", "stage1_models.json")
+import torch
+import torch.nn as nn
+from imports import *
+import torch.nn.functional as F
+MODEL_OPTIONS = {
+"1": {
+        "name": "DeBERTa v3 Base for Sequence Classification",
+        "type": "hf_automodel_finetuned_dbt3",
+        "module_path": "hmv_cfg_base_stage1.model1",
+        "hf_location": "tachygraphy-microtrext-norm-org/DeBERTa-v3-seqClassfication-LV1-SentimentPolarities-Batch8",
+        "tokenizer_class": "DebertaV2Tokenizer",
+        "model_class": "DebertaV2ForSequenceClassification",
+        "problem_type": "multi_label_classification",
+        "base_model": "microsoft/deberta-v3-base",
+        "base_model_class": "DebertaV2ForSequenceClassification",
+        "num_labels": 3,
+        "device": "cpu",
+        "load_function": "load_model",
+        "predict_function": "predict"
+    }
+}
+model_key = "1"
+model_info = MODEL_OPTIONS[model_key]
+hf_location = model_info["hf_location"]
+tokenizer_class = globals()[model_info["tokenizer_class"]]
+model_class = globals()[model_info["model_class"]]
+@st.cache_resource
+def load_model():
+    tokenizer = tokenizer_class.from_pretrained(hf_location)
+    print("Loading model 1")
+    model = model_class.from_pretrained(hf_location,
+                                        problem_type=model_info["problem_type"],
+                                        num_labels=model_info["num_labels"]
+                                        )
+    print("Model 1 loaded")
+    return model, tokenizer
+def predict(text, model, tokenizer, device, max_len=128):
+    # Tokenize and pad the input text
+    inputs = tokenizer(
+        text,
+        add_special_tokens=True,
+        padding=True,
+        truncation=False,
+        return_tensors="pt",
+        return_token_type_ids=False,
+    ).to(device)  # Move input tensors to the correct device
+    with torch.no_grad():
+        outputs = model(**inputs)
+    # probabilities = outputs.logits.cpu().numpy()
+    # probabilities = torch.relu(outputs.logits)
+    # probabilities = torch.clamp(torch.tensor(probabilities), min=0.00000, max=1.00000).cpu().numpy()
+    # probabilities /= probabilities.sum()
+    # probabilities = probabilities.cpu().numpy()
+    predictions = torch.sigmoid(outputs.logits).cpu().numpy()
+    return predictions
+if __name__ == "__main__":
+    model, tokenizer = load_model()
+    print("Model and tokenizer loaded successfully.")

{sentiment_analysis → sentimentPolarity_analysis}/hmv_cfg_base_stage1/model2.py RENAMED Viewed

@@ -11,7 +11,7 @@ import joblib
 import torch
 import torch.nn as nn
-import torch.functional as F
 from transformers import DebertaV2Model, DebertaV2Tokenizer
 import safetensors
 # from safetensors import load_file, save_file
@@ -78,7 +78,7 @@ class SentimentModel(nn.Module):
     @classmethod
     @st.cache_resource
     def load_pretrained(cls, model_path_or_repo):
-        """Loads and caches the model (RoBERTa + SentimentModel) only when called."""
         print(f"Loading model from {model_path_or_repo}...")
         model_config_path = hf_hub_download(model_path_or_repo, "config.json")

 import torch
 import torch.nn as nn
+import torch.nn.functional as F
 from transformers import DebertaV2Model, DebertaV2Tokenizer
 import safetensors
 # from safetensors import load_file, save_file
     @classmethod
     @st.cache_resource
     def load_pretrained(cls, model_path_or_repo):
+        # """Loads and caches the model (RoBERTa + SentimentModel) only when called."""
         print(f"Loading model from {model_path_or_repo}...")
         model_config_path = hf_hub_download(model_path_or_repo, "config.json")

{sentiment_analysis → sentimentPolarity_analysis}/hmv_cfg_base_stage1/model3.py RENAMED Viewed

File without changes

{sentiment_analysis → sentimentPolarity_analysis}/hmv_cfg_base_stage1/model4.py RENAMED Viewed

File without changes

{sentiment_analysis → sentimentPolarity_analysis}/sentiment_analysis_main.py RENAMED Viewed

File without changes

sentiment_analysis/hmv_cfg_base_stage1/__pycache__/__init__.cpython-310.pyc DELETED Viewed

Binary file (186 Bytes)

sentiment_analysis/hmv_cfg_base_stage1/__pycache__/__init__.cpython-312.pyc DELETED Viewed

Binary file (284 Bytes)

sentiment_analysis/hmv_cfg_base_stage1/__pycache__/model1.cpython-310.pyc DELETED Viewed

Binary file (1.95 kB)

sentiment_analysis/hmv_cfg_base_stage1/__pycache__/model1.cpython-312.pyc DELETED Viewed

Binary file (2.96 kB)