Spaces:

Tachygraphy-Microtext-Normalization-IEMK25
/

Tachygraphy-Microtext-Analysis-and-Normalization-ArchismanCoder

Running

App Files Files Community

Archisman Karmakar commited on Mar 25

Commit

19dcfe5

1 Parent(s): e999632

2025.03.25.post1

Browse files

Files changed (20) hide show

.github/workflows/deploy_to_HF_space_DIRECT.yml +5 -5
.github/workflows/dfploy_to_HF_space_DOCKER +2 -2
app_main_hf.py +16 -2
dashboard.py +103 -3
data_collection_form/__init__.py +0 -0
data_collection_form/data_collector.py +387 -0
data_collection_form/hmv_cfg_base_dcl/__init__.py +0 -0
data_collection_form/hmv_cfg_base_dcl/imports.py +25 -0
emotionMoodtag_analysis/config/stage2_models.json +2 -2
poetry.lock +19 -25
pyproject.toml +1 -1
pyprojectOLD.toml +2 -1
requirements.txt +6 -6
sentimentPolarity_analysis/config/stage1_models.json +4 -4
stacked_stacking_stages/__init__.py +0 -0
stacked_stacking_stages/hmv_cfg_base_stk_stg/__init__.py +0 -0
stacked_stacking_stages/hmv_cfg_base_stk_stg/imports.py +25 -0
stacked_stacking_stages/stacking_stages.py +774 -0
transformation_and_Normalization/config/stage3_models.json +3 -3
transformation_and_Normalization/transformationNormalization_main.py +52 -47

.github/workflows/deploy_to_HF_space_DIRECT.yml CHANGED Viewed

@@ -76,8 +76,8 @@ jobs:
         env:
           HF_READ_WRITE_TOKEN: ${{ secrets.HF_READ_WRITE_TOKEN }}
         run: |
-          git remote add space https://huggingface.co/spaces/tachygraphy-microtext-normalization-iemk/Tachygraphy-Microtext-Analysis-and-Normalization-ArchismanCoder
-          git push --force https://${{ secrets.HF_USERNAME }}:${{ secrets.HF_READ_WRITE_TOKEN }}@huggingface.co/spaces/tachygraphy-microtext-normalization-iemk/Tachygraphy-Microtext-Analysis-and-Normalization-ArchismanCoder
@@ -214,7 +214,7 @@ jobs:
 #       - name: Clone Hugging Face Space repository
 #         run: |
-#           git clone https://HF_USERNAME:${{ secrets.HF_TOKEN }}@huggingface.co/spaces/tachygraphy-microtext-normalization-iemk/Tachygraphy-Microtext-Analysis-and-Normalization-ArchismanCoder hf-space
 #       - name: Copy repository files to HF Space
 #         run: |
@@ -227,7 +227,7 @@ jobs:
 #       #   run: |
 #       #     cd hf-space
 #       #     git init
-#       #     git remote add origin https://huggingface.co/spaces/tachygraphy-microtext-normalization-iemk/Tachygraphy-Microtext-Analysis-and-Normalization-ArchismanCoder
 #       #     git checkout -b main
 #       #     git add .
 #       #     git commit -m "Update deployment via GitHub Actions"
@@ -240,7 +240,7 @@ jobs:
 #           git init
 #           # Remove existing origin if it exists
 #           git remote remove origin || true
-#           git remote add origin https://huggingface.co/spaces/tachygraphy-microtext-normalization-iemk/Tachygraphy-Microtext-Analysis-and-Normalization-ArchismanCoder
 #           git checkout -b main
 #           git add .
 #           git commit -m "Update deployment via GitHub Actions"

         env:
           HF_READ_WRITE_TOKEN: ${{ secrets.HF_READ_WRITE_TOKEN }}
         run: |
+          git remote add space https://huggingface.co/spaces/Tachygraphy-Microtext-Normalization-IEMK25/Tachygraphy-Microtext-Analysis-and-Normalization-ArchismanCoder
+          git push --force https://${{ secrets.HF_USERNAME }}:${{ secrets.HF_READ_WRITE_TOKEN }}@huggingface.co/spaces/Tachygraphy-Microtext-Normalization-IEMK25/Tachygraphy-Microtext-Analysis-and-Normalization-ArchismanCoder
 #       - name: Clone Hugging Face Space repository
 #         run: |
+#           git clone https://HF_USERNAME:${{ secrets.HF_TOKEN }}@huggingface.co/spaces/Tachygraphy-Microtext-Normalization-IEMK25/Tachygraphy-Microtext-Analysis-and-Normalization-ArchismanCoder hf-space
 #       - name: Copy repository files to HF Space
 #         run: |
 #       #   run: |
 #       #     cd hf-space
 #       #     git init
+#       #     git remote add origin https://huggingface.co/spaces/Tachygraphy-Microtext-Normalization-IEMK25/Tachygraphy-Microtext-Analysis-and-Normalization-ArchismanCoder
 #       #     git checkout -b main
 #       #     git add .
 #       #     git commit -m "Update deployment via GitHub Actions"
 #           git init
 #           # Remove existing origin if it exists
 #           git remote remove origin || true
+#           git remote add origin https://huggingface.co/spaces/Tachygraphy-Microtext-Normalization-IEMK25/Tachygraphy-Microtext-Analysis-and-Normalization-ArchismanCoder
 #           git checkout -b main
 #           git add .
 #           git commit -m "Update deployment via GitHub Actions"

.github/workflows/dfploy_to_HF_space_DOCKER CHANGED Viewed

@@ -28,7 +28,7 @@ jobs:
       - name: Build the Docker image
-        run: docker build -t huggingface.co/spaces/tachygraphy-microtext-normalization-iemk/Tachygraphy-Microtext-Analysis-and-Normalization-ArchismanCoder .
       - name: Push the Docker image to Hugging Face
-        run: docker push huggingface.co/spaces/tachygraphy-microtext-normalization-iemk/Tachygraphy-Microtext-Analysis-and-Normalization-ArchismanCoder

       - name: Build the Docker image
+        run: docker build -t huggingface.co/spaces/Tachygraphy-Microtext-Normalization-IEMK25/Tachygraphy-Microtext-Analysis-and-Normalization-ArchismanCoder .
       - name: Push the Docker image to Hugging Face
+        run: docker push huggingface.co/spaces/Tachygraphy-Microtext-Normalization-IEMK25/Tachygraphy-Microtext-Analysis-and-Normalization-ArchismanCoder

app_main_hf.py CHANGED Viewed

@@ -50,6 +50,8 @@ from emotionMoodtag_analysis.emotion_analysis_main import show_emotion_analysis
 from sentimentPolarity_analysis.sentiment_analysis_main import show_sentiment_analysis
 from transformation_and_Normalization.transformationNormalization_main import transform_and_normalize
 from dashboard import show_dashboard
 # from text_transformation import show_text_transformation
@@ -138,8 +140,8 @@ def main():
         selection = option_menu(
             menu_title=None,          # No title for a sleek look
-            options=["Dashboard", "Stage 1: Sentiment Polarity Analysis", "Stage 2: Emotion Mood-tag Analysis", "Stage 3: Text Transformation & Normalization"],
-            icons=['house', 'diagram-3', "snow", 'activity'],
             menu_icon="cast",          # Main menu icon
             default_index=0,           # Highlight the first option
             orientation="vertical",
@@ -210,6 +212,18 @@ def main():
         transform_and_normalize()
         # st.write("This section is under development.")
     # st.sidebar.title("Navigation")

 from sentimentPolarity_analysis.sentiment_analysis_main import show_sentiment_analysis
 from transformation_and_Normalization.transformationNormalization_main import transform_and_normalize
 from dashboard import show_dashboard
+from stacked_stacking_stages.stacking_stages import show_stacking_stages
+from data_collection_form.data_collector import show_data_collector
 # from text_transformation import show_text_transformation
         selection = option_menu(
             menu_title=None,          # No title for a sleek look
+            options=["Dashboard", "Stage 1: Sentiment Polarity Analysis", "Stage 2: Emotion Mood-tag Analysis", "Stage 3: Text Transformation & Normalization", "Stacked Stages", "Data Correction & Collection"],
+            icons=['house', 'diagram-3', "snow", 'activity', 'collection', 'database-up'],
             menu_icon="cast",          # Main menu icon
             default_index=0,           # Highlight the first option
             orientation="vertical",
         transform_and_normalize()
         # st.write("This section is under development.")
+    elif selection == "Stacked Stages":
+        # st.title("Stacked Stages")
+        # st.cache_resource.clear()
+        # free_memory()
+        show_stacking_stages()
+    elif selection == "Data Correction & Collection":
+        # st.title("Data Correction & Collection")
+        # st.cache_resource.clear()
+        # free_memory()
+        show_data_collector()
     # st.sidebar.title("Navigation")

dashboard.py CHANGED Viewed

@@ -44,8 +44,102 @@ def free_memory():
         print(f"❌ Cache cleanup error: {e}")
 def create_footer():
-    st.divider()
     # 🛠️ Layout using Streamlit columns
     col1, col2, col3 = st.columns([1, 1, 1])
@@ -90,14 +184,20 @@ def show_dashboard():
     st.write("""
              - Training Source: [GitHub @ Tachygraphy Micro-text Analysis & Normalization](https://github.com/ArchismanKarmakar/Tachygraphy-Microtext-Analysis-And-Normalization)
              - Kaggle Collections: [Kaggle @ Tachygraphy Micro-text Analysis & Normalization](https://www.kaggle.com/datasets/archismancoder/dataset-tachygraphy/data?select=Tachygraphy_MicroText-AIO-V3.xlsx)
-             - Hugging Face Org: [Hugging Face @ Tachygraphy Micro-text Analysis & Normalization](https://huggingface.co/tachygraphy-microtext-normalization-iemk)
              - Deployment Source: [GitHub](https://github.com/ArchismanKarmakar/Tachygraphy-Microtext-Analysis-And-Normalization-Deployment-Source-HuggingFace_Streamlit_JPX14032025)
              - Streamlit Deployemnt: [Streamlit](https://tachygraphy-microtext.streamlit.app/)
-             - Hugging Face Space Deployment: [Hugging Face Space](https://huggingface.co/spaces/tachygraphy-microtext-normalization-iemk/Tachygraphy-Microtext-Analysis-and-Normalization-ArchismanCoder)
              """)
     create_footer()
 def __main__():
     show_dashboard()

         print(f"❌ Cache cleanup error: {e}")
+def create_sample_example1():
+    st.write("""
+        ## Sample Example 1
+        """)
+    graph = """
+    digraph {
+        // Global graph settings with explicit DPI
+        graph [bgcolor="white", rankdir=TB, splines=true, nodesep=0.8, ranksep=0.8];
+        node [shape=box, style="rounded,filled", fontname="Helvetica", fontsize=9, margin="0.15,0.1"];
+        // Define nodes with custom colors
+        Input [label="Input:\nbruh, floods in Kerala, rescue ops non-stop 🚁", fillcolor="#ffe6de", fontcolor="#000000"];
+        Output [label="Output:\nBrother, the floods in Kerala are severe,\nand rescue operations are ongoing continuously.", fillcolor="#ffe6de", fontcolor="#000000"];
+        Sentiment [label="Sentiment:\nNEUTRAL", fillcolor="#ecdeff", fontcolor="black"];
+        // Emotion nodes with a uniform style
+        Anger [label="Anger: 0.080178231", fillcolor="#deffe1", fontcolor="black"];
+        Disgust [label="Disgust: 0.015257259", fillcolor="#deffe1", fontcolor="black"];
+        Fear [label="Fear: 0.601871967", fillcolor="#deffe1", fontcolor="black"];
+        Joy [label="Joy: 0.00410547", fillcolor="#deffe1", fontcolor="black"];
+        NeutralE [label="Neutral: 0.0341026", fillcolor="#deffe1", fontcolor="black"];
+        Sadness [label="Sadness: 0.245294735", fillcolor="#deffe1", fontcolor="black"];
+        Surprise [label="Surprise: 0.019189769", fillcolor="#deffe1", fontcolor="black"];
+        // Define edges with a consistent style
+        edge [color="#7a7a7a", penwidth=3];
+        // Establish the tree structure
+        Input -> Output;
+        Input -> Sentiment;
+        Sentiment -> Anger;
+        Sentiment -> Disgust;
+        Sentiment -> Fear;
+        Sentiment -> Joy;
+        Sentiment -> NeutralE;
+        Sentiment -> Sadness;
+        Sentiment -> Surprise;
+    }
+    """
+    st.graphviz_chart(graph)
+def create_sample_example2():
+    st.write("""
+        ## Sample Example 2
+        """)
+    graph = """
+    digraph {
+        // Global graph settings
+        graph [bgcolor="white", rankdir=TB, splines=true, nodesep=0.8, ranksep=0.8];
+        node [shape=box, style="rounded,filled", fontname="Helvetica", fontsize=9, margin="0.15,0.1"];
+        // Define nodes with custom colors
+        Input [label="Input:\nu rlly think all that talk means u tough? lol, when I step up, u ain't gon say sh*t", fillcolor="#ffe6de", fontcolor="black"];
+        Output [label="Output:\nyou really think all that talk makes you tough lol when i step up you are not going to say anything", fillcolor="#ffe6de", fontcolor="black"];
+        Sentiment [label="Sentiment:\nNEGATIVE", fillcolor="#ecdeff", fontcolor="black"];
+        // Emotion nodes with a uniform style
+        Anger [label="Anger: 0.14403291", fillcolor="#deffe1", fontcolor="black"];
+        Disgust [label="Disgust: 0.039282672", fillcolor="#deffe1", fontcolor="black"];
+        Fear [label="Fear: 0.014349542", fillcolor="#deffe1", fontcolor="black"];
+        Joy [label="Joy: 0.048965044", fillcolor="#deffe1", fontcolor="black"];
+        NeutralE [label="Neutral: 0.494852662", fillcolor="#deffe1", fontcolor="black"];
+        Sadness [label="Sadness: 0.021111647", fillcolor="#deffe1", fontcolor="black"];
+        Surprise [label="Surprise: 0.237405464", fillcolor="#deffe1", fontcolor="black"];
+        // Define edges with a consistent style
+        edge [color="#7a7a7a", penwidth=3];
+        // Establish the tree structure
+        Input -> Output;
+        Input -> Sentiment;
+        Sentiment -> Anger;
+        Sentiment -> Disgust;
+        Sentiment -> Fear;
+        Sentiment -> Joy;
+        Sentiment -> NeutralE;
+        Sentiment -> Sadness;
+        Sentiment -> Surprise;
+    }
+    """
+    st.graphviz_chart(graph)
+def create_project_overview():
+    # st.divider()
+    st.markdown("## Project Overview")
+    st.write(f"""
+        Tachygraphy—originally developed to expedite writing—has evolved over centuries. In the 1990s, it reappeared as micro-text, driving faster communication on social media with characteristics like 'Anytime, Anyplace, Anybody, and Anything (4A)'. This project focuses on the analysis and normalization of micro-text, which is a prevalent form of informal communication today. It aims to enhance Natural Language Processing (NLP) tasks by standardizing micro-text for better sentiment analysis, emotion analysis, data extraction and normalization to understandable form aka. 4A message decoding as primary objective.
+        """
+             )
 def create_footer():
+    # st.divider()
+    st.markdown("## About Us")
     # 🛠️ Layout using Streamlit columns
     col1, col2, col3 = st.columns([1, 1, 1])
     st.write("""
              - Training Source: [GitHub @ Tachygraphy Micro-text Analysis & Normalization](https://github.com/ArchismanKarmakar/Tachygraphy-Microtext-Analysis-And-Normalization)
              - Kaggle Collections: [Kaggle @ Tachygraphy Micro-text Analysis & Normalization](https://www.kaggle.com/datasets/archismancoder/dataset-tachygraphy/data?select=Tachygraphy_MicroText-AIO-V3.xlsx)
+             - Hugging Face Org: [Hugging Face @ Tachygraphy Micro-text Analysis & Normalization](https://huggingface.co/Tachygraphy-Microtext-Normalization-IEMK25)
              - Deployment Source: [GitHub](https://github.com/ArchismanKarmakar/Tachygraphy-Microtext-Analysis-And-Normalization-Deployment-Source-HuggingFace_Streamlit_JPX14032025)
              - Streamlit Deployemnt: [Streamlit](https://tachygraphy-microtext.streamlit.app/)
+             - Hugging Face Space Deployment: [Hugging Face Space](https://huggingface.co/spaces/Tachygraphy-Microtext-Normalization-IEMK25/Tachygraphy-Microtext-Analysis-and-Normalization-ArchismanCoder)
              """)
     create_footer()
+    create_project_overview()
+    create_sample_example1()
+    # create_sample_example2()
 def __main__():
     show_dashboard()

data_collection_form/__init__.py ADDED Viewed

File without changes

data_collection_form/data_collector.py ADDED Viewed

	@@ -0,0 +1,387 @@

+import shutil
+from transformers.utils.hub import TRANSFORMERS_CACHE
+import torch
+import time
+import joblib
+import importlib.util
+from imports import *
+import os
+import sys
+import time
+import uuid
+import math
+from dotenv import load_dotenv
+# import psycopg2
+from supabase import create_client, Client
+from datetime import datetime, timezone
+from collections import OrderedDict
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), )))
+env_path = os.path.join(os.path.dirname(__file__),
+                        "..", ".devcontainer", ".env")
+# from transformers.utils import move_cache_to_trash
+# from huggingface_hub import delete_cache
+# from hmv_cfg_base_stage1.model1 import load_model as load_model1
+# from hmv_cfg_base_stage1.model1 import predict as predict1
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+CONFIG_STAGE1 = os.path.join(BASE_DIR, "..", "sentimentPolarity_analysis", "config", "stage1_models.json")
+CONFIG_STAGE2 = os.path.join(BASE_DIR, "..", "emotionMoodtag_analysis", "config", "stage2_models.json")
+CONFIG_STAGE3 = os.path.join(BASE_DIR, "..", "transformation_and_Normalization", "config", "stage3_models.json")
+LOADERS_STAGE_COLLECTOR = os.path.join(BASE_DIR, "hmv_cfg_base_dlc")
+EMOTION_MOODTAG_LABELS = [
+    "anger", "disgust", "fear", "joy", "neutral",
+    "sadness", "surprise"
+]
+SENTIMENT_POLARITY_LABELS = [
+    "negative", "neutral", "positive"
+]
+current_model = None
+current_tokenizer = None
+# Enabling Resource caching
+# Load environment variables from .env
+load_dotenv()
+# @st.cache_resource
+# DATABASE_URL = os.environ.get("DATABASE_URL")
+# def get_connection():
+#     #  """Establish a connection to the database."""
+#     # return psycopg2.connect(os.environ.get("DATABASE_URL"))
+#     supabase: Client = create_client(os.environ.get("SUPABASE_URL"), os.environ.get("anon_key"))
+#     return supabase
+# @st.cache_resource
+def load_model_config1():
+    with open(CONFIG_STAGE1, "r") as f:
+        model_data = json.load(f)
+    # Extract names for dropdown
+    # model_options is a dict mapping model name to its config
+    model_options = {v["name"]: v for v in model_data.values()}
+    # Create an OrderedDict and insert a default option at the beginning.
+    default_option = "--Select the model used for inference (if applicable)--"
+    model_options_with_default = OrderedDict()
+    model_options_with_default[default_option] = None  # or any placeholder value
+    # Add the rest of the options
+    for key, value in model_options.items():
+        model_options_with_default[key] = value
+    return model_data, model_options_with_default
+MODEL_DATA1, MODEL_OPTIONS1 = load_model_config1()
+def load_model_config2():
+    with open(CONFIG_STAGE2, "r") as f:
+        model_data = json.load(f)
+    # Extract names for dropdown
+    # model_options is a dict mapping model name to its config
+    model_options = {v["name"]: v for v in model_data.values()}
+    # Create an OrderedDict and insert a default option at the beginning.
+    default_option = "--Select the model used for inference (if applicable)--"
+    model_options_with_default = OrderedDict()
+    model_options_with_default[default_option] = None  # or any placeholder value
+    # Add the rest of the options
+    for key, value in model_options.items():
+        model_options_with_default[key] = value
+    return model_data, model_options_with_default
+MODEL_DATA2, MODEL_OPTIONS2 = load_model_config2()
+def load_model_config3():
+    with open(CONFIG_STAGE3, "r") as f:
+        model_data = json.load(f)
+    # Extract names for dropdown
+    # model_options is a dict mapping model name to its config
+    model_options = {v["name"]: v for v in model_data.values()}
+    # Create an OrderedDict and insert a default option at the beginning.
+    default_option = "--Select the model used for inference (if applicable)--"
+    model_options_with_default = OrderedDict()
+    model_options_with_default[default_option] = None  # or any placeholder value
+    # Add the rest of the options
+    for key, value in model_options.items():
+        model_options_with_default[key] = value
+    return model_data, model_options_with_default
+MODEL_DATA3, MODEL_OPTIONS3 = load_model_config3()
+# ✅ Dynamically Import Model Functions
+def import_from_module(module_name, function_name):
+    try:
+        module = importlib.import_module(module_name)
+        return getattr(module, function_name)
+    except (ModuleNotFoundError, AttributeError) as e:
+        st.error(f"❌ Import Error: {e}")
+        return None
+def free_memory():
+    #  """Free up CPU & GPU memory before loading a new model."""
+    global current_model, current_tokenizer
+    if current_model is not None:
+        del current_model  # Delete the existing model
+        current_model = None  # Reset reference
+    if current_tokenizer is not None:
+        del current_tokenizer  # Delete the tokenizer
+        current_tokenizer = None
+    gc.collect()  # Force garbage collection for CPU memory
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()  # Free GPU memory
+        torch.cuda.ipc_collect()  # Clean up PyTorch GPU cache
+    # If running on CPU, reclaim memory using OS-level commands
+    try:
+        if torch.cuda.is_available() is False:
+            psutil.virtual_memory()  # Refresh memory stats
+    except Exception as e:
+        print(f"Memory cleanup error: {e}")
+    # Delete cached Hugging Face models
+    try:
+        cache_dir = TRANSFORMERS_CACHE
+        if os.path.exists(cache_dir):
+            shutil.rmtree(cache_dir)
+            print("Cache cleared!")
+    except Exception as e:
+        print(f"❌ Cache cleanup error: {e}")
+def disable_ui():
+    st.components.v1.html(
+        """
+        <style>
+        #ui-disable-overlay {
+            position: fixed;
+            top: 0;
+            left: 0;
+            width: 100vw;
+            height: 100vh;
+            background-color: rgba(200, 200, 200, 0.5);
+            z-index: 9999;
+        }
+        </style>
+        <div id="ui-disable-overlay"></div>
+        """,
+        height=0,
+        scrolling=False
+    )
+def enable_ui():
+    st.components.v1.html(
+        """
+        <script>
+        var overlay = document.getElementById("ui-disable-overlay");
+        if (overlay) {
+            overlay.parentNode.removeChild(overlay);
+        }
+        </script>
+        """,
+        height=0,
+        scrolling=False
+    )
+# Function to increment progress dynamically
+def get_env_variable(var_name):
+    # Try os.environ first (this covers local development and HF Spaces)
+    value = os.environ.get(var_name)
+    if value is None:
+        # Fall back to st.secrets if available (e.g., on Streamlit Cloud)
+        try:
+            value = st.secrets[var_name]
+        except KeyError:
+            value = None
+    return value
+def show_data_collector():
+    st.title("Data Correction & Collection Page")
+    st.error("New API keys are coming in Q2 2025, May 1st, old API authentication will be deprecated and blocked by PostgREST.")
+    st.warning(
+        "This page is running in test mode, please be careful with your data.")
+    st.error("The database is running in debug log mode, please be careful with your data.")
+    with st.form("feedback_form", clear_on_submit=True, border=False):
+        st.write("### Data Collection Form")
+        st.write(
+            "#### If the predictions generated are wrong, please provide feedback to help improve the model.")
+        # Model selection dropdown for Stage 3
+        model_names3 = list(MODEL_OPTIONS3.keys())
+        selected_model3 = st.selectbox(
+            "Choose a model:", model_names3, key="selected_model_stage3"
+        )
+        # Text Feedback Inputs
+        col1, col2 = st.columns(2)
+        with col1:
+            feedback = st.text_input(
+                "Enter the correct expanded standard formal English text:",
+                key="feedback_input"
+            )
+        with col2:
+            feedback2 = st.text_input(
+                "Enter any one of the wrongly predicted text:",
+                key="feedback_input2"
+            )
+        st.warning(
+        "The correct slider is for the probability of actual label and wrong slider is the probability predicted by any model which is wrong for that label.")
+        st.write("#### Sentiment Polarity Feedback (Select values between 0 and 1)")
+        SENTIMENT_POLARITY_LABELS = ["negative", "neutral", "positive"]
+        model_names1 = list(MODEL_OPTIONS1.keys())
+        selected_model1 = st.selectbox(
+            "Choose a model:", model_names1, key="selected_model_stage1"
+        )
+        sentiment_feedback = {}
+        # For sentiment, we have 3 labels so we can place them in one row.
+        sentiment_cols = st.columns(len(SENTIMENT_POLARITY_LABELS))
+        for idx, label in enumerate(SENTIMENT_POLARITY_LABELS):
+            with sentiment_cols[idx]:
+                st.write(f"**{label.capitalize()}**")
+                # Create two subcolumns for "Correct" and "Wrong"
+                subcol_correct, subcol_wrong = st.columns(2)
+                with subcol_correct:
+                    correct_value = st.slider(
+                        "Correct",
+                        min_value=0.0,
+                        max_value=1.0,
+                        value=0.33,  # default value
+                        step=0.01,
+                        format="%.2f",
+                        key=f"sentiment_{label}_correct"
+                    )
+                with subcol_wrong:
+                    wrong_value = st.slider(
+                        "Wrong",
+                        min_value=0.0,
+                        max_value=1.0,
+                        value=0.0,   # default value
+                        step=0.01,
+                        format="%.2f",
+                        key=f"sentiment_{label}_wrong"
+                    )
+            sentiment_feedback[label] = {"correct": correct_value, "wrong": wrong_value}
+        # st.write("**Collected Sentiment Feedback:**")
+        # st.write(sentiment_feedback)
+        # ---------------------------
+        # Emotion Feedback
+        # ---------------------------
+        st.write("#### Emotion Feedback (Select values between 0 and 1)")
+        EMOTION_MOODTAG_LABELS = [
+            "anger", "disgust", "fear", "joy", "neutral",
+            "sadness", "surprise"
+        ]
+        model_names2 = list(MODEL_OPTIONS2.keys())
+        selected_model2 = st.selectbox(
+            "Choose a model:", model_names2, key="selected_model_stage2"
+        )
+        emotion_feedback = {}
+        max_cols = 3  # Maximum number of emotion labels in one row
+        num_labels = len(EMOTION_MOODTAG_LABELS)
+        num_rows = math.ceil(num_labels / max_cols)
+        for row in range(num_rows):
+            # Get labels for this row.
+            row_labels = EMOTION_MOODTAG_LABELS[row * max_cols:(row + 1) * max_cols]
+            # Create main columns for each label in this row.
+            main_cols = st.columns(len(row_labels))
+            for idx, label in enumerate(row_labels):
+                with main_cols[idx]:
+                    st.write(f"**{label.capitalize()}**")
+                    # Create two subcolumns for correct and wrong values.
+                    subcol_correct, subcol_wrong = st.columns(2)
+                    with subcol_correct:
+                        correct_value = st.slider(
+                            "Correct",
+                            min_value=0.0,
+                            max_value=1.0,
+                            value=0.0,
+                            step=0.01,
+                            format="%.2f",
+                            key=f"emotion_{label}_correct"
+                        )
+                    with subcol_wrong:
+                        wrong_value = st.slider(
+                            "Wrong",
+                            min_value=0.0,
+                            max_value=1.0,
+                            value=0.0,
+                            step=0.01,
+                            format="%.2f",
+                            key=f"emotion_{label}_wrong"
+                        )
+                emotion_feedback[label] = {"correct": correct_value, "wrong": wrong_value}
+        # Use form_submit_button instead of st.button inside a form
+        submit_feedback = st.form_submit_button("Submit Feedback")
+        if submit_feedback and feedback.strip() and feedback2.strip():
+            # Prepare data to insert
+            data_to_insert = {
+                "input_text": st.session_state.get("user_input_stage3", ""),
+                "correct_text_by_user": feedback,
+                "model_used": st.session_state.get("selected_model_stage3", "unknown"),
+                "wrong_pred_any": feedback2,
+                "sentiment_feedback": sentiment_feedback,
+                "emotion_feedback": emotion_feedback
+            }
+            st.error("Feedback submission is disabled in debug logging mode.")
+            # try:
+            #     from supabase import create_client, Client
+            #     from dotenv import load_dotenv
+            #     load_dotenv()  # or load_dotenv(dotenv_path=env_path) if you have a specific path
+            #     supabase: Client = create_client(
+            #         get_env_variable("SUPABASE_DB_TACHYGRAPHY_DB_URL"),
+            #         get_env_variable("SUPABASE_DB_TACHYGRAPHY_ANON_API_KEY")
+            #     )
+            #     response = supabase.table(
+            #        get_env_variable("SUPABASE_DB_TACHYGRAPHY_DB_STAGE3_TABLE")
+            #     ).insert(data_to_insert, returning="minimal").execute()
+            #     st.success("Feedback submitted successfully!")
+            # except Exception as e:
+            #     st.error(f"Feedback submission failed: {e}")

data_collection_form/hmv_cfg_base_dcl/__init__.py ADDED Viewed

File without changes

data_collection_form/hmv_cfg_base_dcl/imports.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import os
+import sys
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), )))
+import streamlit as st
+from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModel
+# import torch
+import numpy as np
+import matplotlib.pyplot as plt
+import plotly.express as px
+import pandas as pd
+import json
+import gc
+import psutil
+import importlib
+import importlib.util
+import asyncio
+# import pytorch_lightning as pl
+import safetensors
+from safetensors import load_file, save_file
+import json
+import huggingface_hub
+from huggingface_hub import hf_hub_download

emotionMoodtag_analysis/config/stage2_models.json CHANGED Viewed

@@ -3,7 +3,7 @@
         "name": "DeBERTa v3 Base for Sequence Classification",
         "type": "hf_automodel_finetuned_dbt3",
         "module_path": "hmv_cfg_base_stage2.model1",
-        "hf_location": "tachygraphy-microtext-normalization-iemk/DeBERTa-v3-seqClassfication-LV2-EmotionMoodtags-Batch8",
         "tokenizer_class": "DebertaV2Tokenizer",
         "model_class": "DebertaV2ForSequenceClassification",
         "problem_type": "regression",
@@ -18,7 +18,7 @@
         "name": "DeBERTa v3 Base Custom Model with minimal Regularized Loss",
         "type": "db3_base_custom",
         "module_path": "hmv_cfg_base_stage2.model2",
-        "hf_location": "tachygraphy-microtext-normalization-iemk/DeBERTa-v3-Base-Cust-LV2-EmotionMoodtags-minRegLoss",
         "tokenizer_class": "DebertaV2Tokenizer",
         "model_class": "EmotionModel",
         "problem_type": "regression",

         "name": "DeBERTa v3 Base for Sequence Classification",
         "type": "hf_automodel_finetuned_dbt3",
         "module_path": "hmv_cfg_base_stage2.model1",
+        "hf_location": "Tachygraphy-Microtext-Normalization-IEMK25/DeBERTa-v3-seqClassfication-LV2-EmotionMoodtags-Batch8",
         "tokenizer_class": "DebertaV2Tokenizer",
         "model_class": "DebertaV2ForSequenceClassification",
         "problem_type": "regression",
         "name": "DeBERTa v3 Base Custom Model with minimal Regularized Loss",
         "type": "db3_base_custom",
         "module_path": "hmv_cfg_base_stage2.model2",
+        "hf_location": "Tachygraphy-Microtext-Normalization-IEMK25/DeBERTa-v3-Base-Cust-LV2-EmotionMoodtags-minRegLoss",
         "tokenizer_class": "DebertaV2Tokenizer",
         "model_class": "EmotionModel",
         "problem_type": "regression",

poetry.lock CHANGED Viewed

@@ -1249,14 +1249,14 @@ tests = ["asttokens (>=2.1.0)", "coverage", "coverage-enable-subprocess", "ipyth
 [[package]]
 name = "faker"
-version = "37.0.2"
 description = "Faker is a Python package that generates fake data for you."
 optional = false
 python-versions = ">=3.9"
 groups = ["main"]
 files = [
-    {file = "faker-37.0.2-py3-none-any.whl", hash = "sha256:8955706c56c28099585e9e2b6f814eb0a3a227eb36a2ee3eb9ab577c4764eacc"},
-    {file = "faker-37.0.2.tar.gz", hash = "sha256:948bd27706478d3aa0b6f9f58b9f25207098f6ca79852c7b49c44a8ced2bc59b"},
 ]
 [package.dependencies]
@@ -3152,24 +3152,20 @@ files = [
 [[package]]
 name = "narwhals"
-version = "1.31.0"
 description = "Extremely lightweight compatibility layer between dataframe libraries"
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
 files = [
-    {file = "narwhals-1.31.0-py3-none-any.whl", hash = "sha256:2a7b79bb5f511055c4c0142121fc0d4171ea171458e12d44dbd9c8fc6488e997"},
-    {file = "narwhals-1.31.0.tar.gz", hash = "sha256:333472e2562343dfdd27407ec9b5114a07c81d0416794e4ac6b703dd925c6a1a"},
 ]
 [package.extras]
-core = ["duckdb", "pandas", "polars", "pyarrow", "sqlframe"]
 cudf = ["cudf (>=24.10.0)"]
 dask = ["dask[dataframe] (>=2024.8)"]
-dev = ["covdefaults", "hypothesis", "mypy (>=1.15.0,<1.16.0)", "pandas-stubs (==2.2.3.250308)", "polars (==1.25.2)", "pre-commit", "pyarrow-stubs (==17.18)", "pyright", "pytest", "pytest-cov", "pytest-env", "pytest-randomly", "sqlframe (==3.24.1)", "typing-extensions", "uv"]
-docs = ["black", "duckdb", "jinja2", "markdown-exec[ansi]", "mkdocs", "mkdocs-autorefs", "mkdocs-material", "mkdocstrings-python (>=1.16)", "mkdocstrings[python]", "pandas", "polars (>=1.0.0)", "pyarrow"]
 duckdb = ["duckdb (>=1.0)"]
-extra = ["scikit-learn"]
 ibis = ["ibis-framework (>=6.0.0)", "packaging", "pyarrow-hotfix", "rich"]
 modin = ["modin"]
 pandas = ["pandas (>=0.25.3)"]
@@ -3177,8 +3173,6 @@ polars = ["polars (>=0.20.3)"]
 pyarrow = ["pyarrow (>=11.0.0)"]
 pyspark = ["pyspark (>=3.5.0)"]
 sqlframe = ["sqlframe (>=3.22.0)"]
-tests = ["covdefaults", "hypothesis", "pytest", "pytest-cov", "pytest-env", "pytest-randomly"]
-typing = ["hypothesis", "mypy (>=1.15.0,<1.16.0)", "pandas-stubs (==2.2.3.250308)", "polars (==1.25.2)", "pyarrow-stubs (==17.18)", "pyright", "pytest", "sqlframe (==3.24.1)", "typing-extensions", "uv"]
 [[package]]
 name = "nest-asyncio"
@@ -4617,14 +4611,14 @@ extra = ["pygments (>=2.19.1)"]
 [[package]]
 name = "pyparsing"
-version = "3.2.2"
 description = "pyparsing module - Classes and methods to define and execute parsing grammars"
 optional = false
 python-versions = ">=3.9"
 groups = ["main"]
 files = [
-    {file = "pyparsing-3.2.2-py3-none-any.whl", hash = "sha256:6ab05e1cb111cc72acc8ed811a3ca4c2be2af8d7b6df324347f04fd057d8d793"},
-    {file = "pyparsing-3.2.2.tar.gz", hash = "sha256:2a857aee851f113c2de9d4bfd9061baea478cb0f1c7ca6cbf594942d6d111575"},
 ]
 [package.extras]
@@ -4659,14 +4653,14 @@ six = ">=1.5"
 [[package]]
 name = "python-dotenv"
-version = "1.0.1"
 description = "Read key-value pairs from a .env file and set them as environment variables"
 optional = false
-python-versions = ">=3.8"
 groups = ["main"]
 files = [
-    {file = "python-dotenv-1.0.1.tar.gz", hash = "sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca"},
-    {file = "python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a"},
 ]
 [package.extras]
@@ -4705,14 +4699,14 @@ test = ["cloudpickle (>=1.3)", "coverage (==7.3.1)", "fastapi", "numpy (>=1.17.2
 [[package]]
 name = "pytz"
-version = "2025.1"
 description = "World timezone definitions, modern and historical"
 optional = false
 python-versions = "*"
 groups = ["main"]
 files = [
-    {file = "pytz-2025.1-py2.py3-none-any.whl", hash = "sha256:89dd22dca55b46eac6eda23b2d72721bf1bdfef212645d81513ef5d03038de57"},
-    {file = "pytz-2025.1.tar.gz", hash = "sha256:c2db42be2a2518b28e65f9207c4d05e6ff547d1efa4086469ef855e4ab70178e"},
 ]
 [[package]]
@@ -6613,14 +6607,14 @@ test = ["argcomplete (>=3.0.3)", "mypy (>=1.7.0)", "pre-commit", "pytest (>=7.0,
 [[package]]
 name = "transformers"
-version = "4.50.0"
 description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow"
 optional = false
 python-versions = ">=3.9.0"
 groups = ["main"]
 files = [
-    {file = "transformers-4.50.0-py3-none-any.whl", hash = "sha256:d75465d523a28bcfef0028c671f682edee29418ab9a5a15cf8a05171e7c54cb7"},
-    {file = "transformers-4.50.0.tar.gz", hash = "sha256:d4b0f587ec88825981103fee0a1e80230d956ecc8a7f3feeaafbe49a233c88b8"},
 ]
 [package.dependencies]

 [[package]]
 name = "faker"
+version = "37.1.0"
 description = "Faker is a Python package that generates fake data for you."
 optional = false
 python-versions = ">=3.9"
 groups = ["main"]
 files = [
+    {file = "faker-37.1.0-py3-none-any.whl", hash = "sha256:dc2f730be71cb770e9c715b13374d80dbcee879675121ab51f9683d262ae9a1c"},
+    {file = "faker-37.1.0.tar.gz", hash = "sha256:ad9dc66a3b84888b837ca729e85299a96b58fdaef0323ed0baace93c9614af06"},
 ]
 [package.dependencies]
 [[package]]
 name = "narwhals"
+version = "1.32.0"
 description = "Extremely lightweight compatibility layer between dataframe libraries"
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
 files = [
+    {file = "narwhals-1.32.0-py3-none-any.whl", hash = "sha256:8bdbf3f76155887412eea04b0b06303856ac1aa3d9e8bda5b5e54612855fa560"},
+    {file = "narwhals-1.32.0.tar.gz", hash = "sha256:bd0aa41434737adb4b26f8593f3559abc7d938730ece010fe727b58bc363580d"},
 ]
 [package.extras]
 cudf = ["cudf (>=24.10.0)"]
 dask = ["dask[dataframe] (>=2024.8)"]
 duckdb = ["duckdb (>=1.0)"]
 ibis = ["ibis-framework (>=6.0.0)", "packaging", "pyarrow-hotfix", "rich"]
 modin = ["modin"]
 pandas = ["pandas (>=0.25.3)"]
 pyarrow = ["pyarrow (>=11.0.0)"]
 pyspark = ["pyspark (>=3.5.0)"]
 sqlframe = ["sqlframe (>=3.22.0)"]
 [[package]]
 name = "nest-asyncio"
 [[package]]
 name = "pyparsing"
+version = "3.2.3"
 description = "pyparsing module - Classes and methods to define and execute parsing grammars"
 optional = false
 python-versions = ">=3.9"
 groups = ["main"]
 files = [
+    {file = "pyparsing-3.2.3-py3-none-any.whl", hash = "sha256:a749938e02d6fd0b59b356ca504a24982314bb090c383e3cf201c95ef7e2bfcf"},
+    {file = "pyparsing-3.2.3.tar.gz", hash = "sha256:b9c13f1ab8b3b542f72e28f634bad4de758ab3ce4546e4301970ad6fa77c38be"},
 ]
 [package.extras]
 [[package]]
 name = "python-dotenv"
+version = "1.1.0"
 description = "Read key-value pairs from a .env file and set them as environment variables"
 optional = false
+python-versions = ">=3.9"
 groups = ["main"]
 files = [
+    {file = "python_dotenv-1.1.0-py3-none-any.whl", hash = "sha256:d7c01d9e2293916c18baf562d95698754b0dbbb5e74d457c45d4f6561fb9d55d"},
+    {file = "python_dotenv-1.1.0.tar.gz", hash = "sha256:41f90bc6f5f177fb41f53e87666db362025010eb28f60a01c9143bfa33a2b2d5"},
 ]
 [package.extras]
 [[package]]
 name = "pytz"
+version = "2025.2"
 description = "World timezone definitions, modern and historical"
 optional = false
 python-versions = "*"
 groups = ["main"]
 files = [
+    {file = "pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00"},
+    {file = "pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3"},
 ]
 [[package]]
 [[package]]
 name = "transformers"
+version = "4.50.1"
 description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow"
 optional = false
 python-versions = ">=3.9.0"
 groups = ["main"]
 files = [
+    {file = "transformers-4.50.1-py3-none-any.whl", hash = "sha256:e9b9bd274518150528c1d745c7ebba72d27e4e52f2deffaa1fddebad6912da5d"},
+    {file = "transformers-4.50.1.tar.gz", hash = "sha256:6ee542d2cce7e1b6a06ae350599c27ddf2e6e45ec9d0cb42915b37fca3d6399a"},
 ]
 [package.dependencies]

pyproject.toml CHANGED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "tachygraphy-microtext-analysis-and-normalization"
-version = "2025.03.24.post1"
 description = ""
 authors = [
     { name = "Archisman Karmakar", email = "[email protected]" },

 [project]
 name = "tachygraphy-microtext-analysis-and-normalization"
+version = "2025.03.25.post1"
 description = ""
 authors = [
     { name = "Archisman Karmakar", email = "[email protected]" },

pyprojectOLD.toml CHANGED Viewed

@@ -1,6 +1,7 @@
 [project]
 name = "tachygraphy-microtext-analysis-and-normalization"
-version = "2025.03.22.post1"
 # version = "2025.03.21.post1"
 # version = "2025.03.18.post5"
 # version = "2025.03.18.post4_3"

 [project]
 name = "tachygraphy-microtext-analysis-and-normalization"
+version = "2025.03.24.post1"
+# version = "2025.03.22.post1"
 # version = "2025.03.21.post1"
 # version = "2025.03.18.post5"
 # version = "2025.03.18.post4_3"

requirements.txt CHANGED Viewed

@@ -45,7 +45,7 @@ entrypoints==0.4 ; python_version >= "3.12" and python_version < "4.0"
 et-xmlfile==2.0.0 ; python_version >= "3.12" and python_version < "4.0"
 evaluate==0.4.3 ; python_version >= "3.12" and python_version < "4.0"
 executing==2.2.0 ; python_version >= "3.12" and python_version < "4.0"
-faker==37.0.2 ; python_version >= "3.12" and python_version < "4.0"
 fastjsonschema==2.21.1 ; python_version >= "3.12" and python_version < "4.0"
 favicon==0.7.0 ; python_version >= "3.12" and python_version < "4.0"
 filelock==3.18.0 ; python_version >= "3.12" and python_version < "4.0"
@@ -111,7 +111,7 @@ msgpack==1.1.0 ; python_version >= "3.12" and python_version < "4.0"
 multidict==6.2.0 ; python_version >= "3.12" and python_version < "4.0"
 multiprocess==0.70.16 ; python_version >= "3.12" and python_version < "4.0"
 namex==0.0.8 ; python_version >= "3.12" and python_version < "4.0"
-narwhals==1.31.0 ; python_version >= "3.12" and python_version < "4.0"
 nest-asyncio==1.6.0 ; python_version >= "3.12" and python_version < "4.0"
 networkx==3.4.2 ; python_version >= "3.12" and python_version < "4.0"
 nltk==3.9.1 ; python_version >= "3.12" and python_version < "4.0"
@@ -164,12 +164,12 @@ pydantic==2.10.6 ; python_version >= "3.12" and python_version < "4.0"
 pydeck==0.9.1 ; python_version >= "3.12" and python_version < "4.0"
 pygments==2.19.1 ; python_version >= "3.12" and python_version < "4.0"
 pymdown-extensions==10.14.3 ; python_version >= "3.12" and python_version < "4.0"
-pyparsing==3.2.2 ; python_version >= "3.12" and python_version < "4.0"
 pyproject-hooks==1.2.0 ; python_version >= "3.12" and python_version < "4.0"
 python-dateutil==2.9.0.post0 ; python_version >= "3.12" and python_version < "4.0"
-python-dotenv==1.0.1 ; python_version >= "3.12" and python_version < "4.0"
 pytorch-lightning==2.5.1 ; python_version >= "3.12" and python_version < "4.0"
-pytz==2025.1 ; python_version >= "3.12" and python_version < "4.0"
 pywin32-ctypes==0.2.3 ; python_version >= "3.12" and python_version < "4.0" and sys_platform == "win32"
 pywin32==309 ; python_version >= "3.12" and python_version < "4.0" and (sys_platform == "win32" or platform_system == "Windows")
 pyyaml==6.0.2 ; python_version >= "3.12" and python_version < "4.0"
@@ -238,7 +238,7 @@ torchvision==0.21.0 ; python_version >= "3.12" and python_version < "4.0"
 tornado==6.4.2 ; python_version >= "3.12" and python_version < "4.0"
 tqdm==4.67.1 ; python_version >= "3.12" and python_version < "4.0"
 traitlets==5.14.3 ; python_version >= "3.12" and python_version < "4.0"
-transformers==4.50.0 ; python_version >= "3.12" and python_version < "4.0"
 triton==3.2.0 ; python_version >= "3.12" and python_version < "4.0" and platform_system == "Linux" and platform_machine == "x86_64"
 trove-classifiers==2025.3.19.19 ; python_version >= "3.12" and python_version < "4.0"
 typing-extensions==4.12.2 ; python_version >= "3.12" and python_version < "4.0"

 et-xmlfile==2.0.0 ; python_version >= "3.12" and python_version < "4.0"
 evaluate==0.4.3 ; python_version >= "3.12" and python_version < "4.0"
 executing==2.2.0 ; python_version >= "3.12" and python_version < "4.0"
+faker==37.1.0 ; python_version >= "3.12" and python_version < "4.0"
 fastjsonschema==2.21.1 ; python_version >= "3.12" and python_version < "4.0"
 favicon==0.7.0 ; python_version >= "3.12" and python_version < "4.0"
 filelock==3.18.0 ; python_version >= "3.12" and python_version < "4.0"
 multidict==6.2.0 ; python_version >= "3.12" and python_version < "4.0"
 multiprocess==0.70.16 ; python_version >= "3.12" and python_version < "4.0"
 namex==0.0.8 ; python_version >= "3.12" and python_version < "4.0"
+narwhals==1.32.0 ; python_version >= "3.12" and python_version < "4.0"
 nest-asyncio==1.6.0 ; python_version >= "3.12" and python_version < "4.0"
 networkx==3.4.2 ; python_version >= "3.12" and python_version < "4.0"
 nltk==3.9.1 ; python_version >= "3.12" and python_version < "4.0"
 pydeck==0.9.1 ; python_version >= "3.12" and python_version < "4.0"
 pygments==2.19.1 ; python_version >= "3.12" and python_version < "4.0"
 pymdown-extensions==10.14.3 ; python_version >= "3.12" and python_version < "4.0"
+pyparsing==3.2.3 ; python_version >= "3.12" and python_version < "4.0"
 pyproject-hooks==1.2.0 ; python_version >= "3.12" and python_version < "4.0"
 python-dateutil==2.9.0.post0 ; python_version >= "3.12" and python_version < "4.0"
+python-dotenv==1.1.0 ; python_version >= "3.12" and python_version < "4.0"
 pytorch-lightning==2.5.1 ; python_version >= "3.12" and python_version < "4.0"
+pytz==2025.2 ; python_version >= "3.12" and python_version < "4.0"
 pywin32-ctypes==0.2.3 ; python_version >= "3.12" and python_version < "4.0" and sys_platform == "win32"
 pywin32==309 ; python_version >= "3.12" and python_version < "4.0" and (sys_platform == "win32" or platform_system == "Windows")
 pyyaml==6.0.2 ; python_version >= "3.12" and python_version < "4.0"
 tornado==6.4.2 ; python_version >= "3.12" and python_version < "4.0"
 tqdm==4.67.1 ; python_version >= "3.12" and python_version < "4.0"
 traitlets==5.14.3 ; python_version >= "3.12" and python_version < "4.0"
+transformers==4.50.1 ; python_version >= "3.12" and python_version < "4.0"
 triton==3.2.0 ; python_version >= "3.12" and python_version < "4.0" and platform_system == "Linux" and platform_machine == "x86_64"
 trove-classifiers==2025.3.19.19 ; python_version >= "3.12" and python_version < "4.0"
 typing-extensions==4.12.2 ; python_version >= "3.12" and python_version < "4.0"

sentimentPolarity_analysis/config/stage1_models.json CHANGED Viewed

@@ -3,7 +3,7 @@
         "name": "DeBERTa v3 Base for Sequence Classification",
         "type": "hf_automodel_finetuned_dbt3",
         "module_path": "hmv_cfg_base_stage1.model1",
-        "hf_location": "tachygraphy-microtext-normalization-iemk/DeBERTa-v3-seqClassfication-LV1-SentimentPolarities-Batch8",
         "tokenizer_class": "DebertaV2Tokenizer",
         "model_class": "DebertaV2ForSequenceClassification",
         "problem_type": "multi_label_classification",
@@ -18,7 +18,7 @@
         "name": "DeBERTa v3 Base Custom Model with minimal Regularized Loss",
         "type": "db3_base_custom",
         "module_path": "hmv_cfg_base_stage1.model2",
-        "hf_location": "tachygraphy-microtext-normalization-iemk/DeBERTa-v3-Base-Cust-LV1-SentimentPolarities-minRegLoss",
         "tokenizer_class": "DebertaV2Tokenizer",
         "model_class": "SentimentModel",
         "problem_type": "multi_label_classification",
@@ -33,7 +33,7 @@
         "name": "BERT Base Uncased Custom Model",
         "type": "bert_base_uncased_custom",
         "module_path": "hmv_cfg_base_stage1.model3",
-        "hf_location": "https://huggingface.co/tachygraphy-microtext-normalization-iemk/BERT-LV1-SentimentPolarities/resolve/main/saved_weights.pt",
         "tokenizer_class": "AutoTokenizer",
         "model_class": "BERT_architecture",
         "problem_type": "multi_label_classification",
@@ -48,7 +48,7 @@
         "name": "LSTM Custom Model",
         "type": "lstm_uncased_custom",
         "module_path": "hmv_cfg_base_stage1.model4",
-        "hf_location": "tachygraphy-microtext-normalization-iemk/LSTM-LV1-SentimentPolarities",
         "tokenizer_class": "",
         "model_class": "",
         "problem_type": "multi_label_classification",

         "name": "DeBERTa v3 Base for Sequence Classification",
         "type": "hf_automodel_finetuned_dbt3",
         "module_path": "hmv_cfg_base_stage1.model1",
+        "hf_location": "Tachygraphy-Microtext-Normalization-IEMK25/DeBERTa-v3-seqClassfication-LV1-SentimentPolarities-Batch8",
         "tokenizer_class": "DebertaV2Tokenizer",
         "model_class": "DebertaV2ForSequenceClassification",
         "problem_type": "multi_label_classification",
         "name": "DeBERTa v3 Base Custom Model with minimal Regularized Loss",
         "type": "db3_base_custom",
         "module_path": "hmv_cfg_base_stage1.model2",
+        "hf_location": "Tachygraphy-Microtext-Normalization-IEMK25/DeBERTa-v3-Base-Cust-LV1-SentimentPolarities-minRegLoss",
         "tokenizer_class": "DebertaV2Tokenizer",
         "model_class": "SentimentModel",
         "problem_type": "multi_label_classification",
         "name": "BERT Base Uncased Custom Model",
         "type": "bert_base_uncased_custom",
         "module_path": "hmv_cfg_base_stage1.model3",
+        "hf_location": "https://huggingface.co/Tachygraphy-Microtext-Normalization-IEMK25/BERT-LV1-SentimentPolarities/resolve/main/saved_weights.pt",
         "tokenizer_class": "AutoTokenizer",
         "model_class": "BERT_architecture",
         "problem_type": "multi_label_classification",
         "name": "LSTM Custom Model",
         "type": "lstm_uncased_custom",
         "module_path": "hmv_cfg_base_stage1.model4",
+        "hf_location": "Tachygraphy-Microtext-Normalization-IEMK25/LSTM-LV1-SentimentPolarities",
         "tokenizer_class": "",
         "model_class": "",
         "problem_type": "multi_label_classification",

stacked_stacking_stages/__init__.py ADDED Viewed

File without changes

stacked_stacking_stages/hmv_cfg_base_stk_stg/__init__.py ADDED Viewed

File without changes

stacked_stacking_stages/hmv_cfg_base_stk_stg/imports.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import os
+import sys
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), )))
+import streamlit as st
+from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModel, AutoModelForSeq2SeqLM
+# import torch
+import numpy as np
+import matplotlib.pyplot as plt
+import plotly.express as px
+import pandas as pd
+import json
+import gc
+import psutil
+import importlib
+import importlib.util
+import asyncio
+# import pytorch_lightning as pl
+import safetensors
+from safetensors import load_file, save_file
+import json
+import huggingface_hub
+from huggingface_hub import hf_hub_download

stacked_stacking_stages/stacking_stages.py ADDED Viewed

	@@ -0,0 +1,774 @@

+import shutil
+from transformers.utils.hub import TRANSFORMERS_CACHE
+import torch
+import time
+import joblib
+import importlib.util
+from imports import *
+import os
+import sys
+import time
+import uuid
+import math
+from dotenv import load_dotenv
+# import psycopg2
+from supabase import create_client, Client
+from datetime import datetime, timezone
+from collections import OrderedDict
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), )))
+env_path = os.path.join(os.path.dirname(__file__),
+                        "..", ".devcontainer", ".env")
+# from transformers.utils import move_cache_to_trash
+# from huggingface_hub import delete_cache
+# from hmv_cfg_base_stage1.model1 import load_model as load_model1
+# from hmv_cfg_base_stage1.model1 import predict as predict1
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+CONFIG_STAGE1 = os.path.join(BASE_DIR, "..", "sentimentPolarity_analysis", "config", "stage1_models.json")
+CONFIG_STAGE2 = os.path.join(BASE_DIR, "..", "emotionMoodtag_analysis", "config", "stage2_models.json")
+CONFIG_STAGE3 = os.path.join(BASE_DIR, "..", "transformation_and_Normalization", "config", "stage3_models.json")
+LOADERS_STAGE_COLLECTOR = os.path.join(BASE_DIR, "hmv_cfg_base_dlc")
+EMOTION_MOODTAG_LABELS = [
+    "anger", "disgust", "fear", "joy", "neutral",
+    "sadness", "surprise"
+]
+SENTIMENT_POLARITY_LABELS = [
+    "negative", "neutral", "positive"
+]
+current_model = None
+current_tokenizer = None
+# Enabling Resource caching
+# Load environment variables from .env
+load_dotenv()
+# @st.cache_resource
+# DATABASE_URL = os.environ.get("DATABASE_URL")
+# def get_connection():
+#     #  """Establish a connection to the database."""
+#     # return psycopg2.connect(os.environ.get("DATABASE_URL"))
+#     supabase: Client = create_client(os.environ.get("SUPABASE_URL"), os.environ.get("anon_key"))
+#     return supabase
+# @st.cache_resource
+def load_model_config1():
+    with open(CONFIG_STAGE1, "r") as f:
+        model_data = json.load(f)
+    # Convert model_data values to a list and take only the first two entries
+    top2_data = list(model_data.values())[:2]
+    # Create a dictionary mapping from model name to its configuration for the top two models
+    model_options = {v["name"]: v for v in top2_data}
+    return top2_data, model_options
+MODEL_DATA1, MODEL_OPTIONS1 = load_model_config1()
+# MODEL_DATA1_1=MODEL_DATA1[0]
+# MODEL_OPTIONS1_1=MODEL_OPTIONS1[0]
+def load_model_config2():
+    with open(CONFIG_STAGE2, "r") as f:
+        model_data = json.load(f)
+    # Convert model_data values to a list and take only the first two entries
+    top2_data = list(model_data.values())[:2]
+    # Create a dictionary mapping from model name to its configuration for the top two models
+    model_options = {v["name"]: v for v in top2_data}
+    return top2_data, model_options
+MODEL_DATA2, MODEL_OPTIONS2 = load_model_config2()
+# MODEL_DATA2_1=MODEL_DATA2[0]
+# MODEL_OPTIONS2_1=MODEL_OPTIONS2[0]
+def load_model_config3():
+    with open(CONFIG_STAGE3, "r") as f:
+        model_data = json.load(f)
+    # Convert model_data values to a list and take only the first two entries
+    top2_data = list(model_data.values())[:2]
+    # Create a dictionary mapping from model name to its configuration for the top two models
+    model_options = {v["name"]: v for v in top2_data}
+    return top2_data, model_options
+MODEL_DATA3, MODEL_OPTIONS3 = load_model_config3()
+# MODEL_DATA3_1=MODEL_DATA3[0]
+# MODEL_OPTIONS3_1=MODEL_OPTIONS3[0]
+# ✅ Dynamically Import Model Functions
+def import_from_module(module_name, function_name):
+    try:
+        module = importlib.import_module(module_name)
+        return getattr(module, function_name)
+    except (ModuleNotFoundError, AttributeError) as e:
+        st.error(f"❌ Import Error: {e}")
+        return None
+def free_memory():
+    #  """Free up CPU & GPU memory before loading a new model."""
+    global current_model, current_tokenizer
+    if current_model is not None:
+        del current_model  # Delete the existing model
+        current_model = None  # Reset reference
+    if current_tokenizer is not None:
+        del current_tokenizer  # Delete the tokenizer
+        current_tokenizer = None
+    gc.collect()  # Force garbage collection for CPU memory
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()  # Free GPU memory
+        torch.cuda.ipc_collect()  # Clean up PyTorch GPU cache
+    # If running on CPU, reclaim memory using OS-level commands
+    try:
+        if torch.cuda.is_available() is False:
+            psutil.virtual_memory()  # Refresh memory stats
+    except Exception as e:
+        print(f"Memory cleanup error: {e}")
+    # Delete cached Hugging Face models
+    try:
+        cache_dir = TRANSFORMERS_CACHE
+        if os.path.exists(cache_dir):
+            shutil.rmtree(cache_dir)
+            print("Cache cleared!")
+    except Exception as e:
+        print(f"❌ Cache cleanup error: {e}")
+def load_selected_model1(model_name):
+    global current_model, current_tokenizer
+    # st.cache_resource.clear()
+    # free_memory()
+    # st.write("DEBUG: Available Models:", MODEL_OPTIONS.keys())  # ✅ See available models
+    # st.write("DEBUG: Selected Model:", MODEL_OPTIONS[model_name])  # ✅ Check selected model
+    # st.write("DEBUG: Model Name:", model_name)  # ✅ Check selected model
+    if model_name not in MODEL_OPTIONS1:
+        st.error(f"⚠️ Model '{model_name}' not found in config!")
+        return None, None, None
+    model_info = MODEL_OPTIONS1[model_name]
+    hf_location = model_info["hf_location"]
+    model_module = model_info["module_path"]
+    load_function = model_info["load_function"]
+    predict_function = model_info["predict_function"]
+    load_model_func = import_from_module(model_module, load_function)
+    predict_func = import_from_module(model_module, predict_function)
+    if load_model_func is None or predict_func is None:
+        st.error("❌ Model functions could not be loaded!")
+        return None, None, None
+    model, tokenizer = load_model_func()
+    current_model, current_tokenizer = model, tokenizer
+    return model, tokenizer, predict_func
+def load_selected_model2(model_name):
+    global current_model, current_tokenizer
+    # st.cache_resource.clear()
+    # free_memory()
+    # st.write("DEBUG: Available Models:", MODEL_OPTIONS.keys())  # ✅ See available models
+    # st.write("DEBUG: Selected Model:", MODEL_OPTIONS[model_name])  # ✅ Check selected model
+    # st.write("DEBUG: Model Name:", model_name)  # ✅ Check selected model
+    if model_name not in MODEL_OPTIONS2:
+        st.error(f"⚠️ Model '{model_name}' not found in config!")
+        return None, None, None
+    model_info = MODEL_OPTIONS2[model_name]
+    hf_location = model_info["hf_location"]
+    model_module = model_info["module_path"]
+    load_function = model_info["load_function"]
+    predict_function = model_info["predict_function"]
+    load_model_func = import_from_module(model_module, load_function)
+    predict_func = import_from_module(model_module, predict_function)
+    if load_model_func is None or predict_func is None:
+        st.error("❌ Model functions could not be loaded!")
+        return None, None, None
+    model, tokenizer = load_model_func()
+    current_model, current_tokenizer = model, tokenizer
+    return model, tokenizer, predict_func
+def load_selected_model3(model_name):
+    global current_model, current_tokenizer
+    # st.cache_resource.clear()
+    # free_memory()
+    # st.write("DEBUG: Available Models:", MODEL_OPTIONS.keys())  # ✅ See available models
+    # st.write("DEBUG: Selected Model:", MODEL_OPTIONS[model_name])  # ✅ Check selected model
+    # st.write("DEBUG: Model Name:", model_name)  # ✅ Check selected model
+    if model_name not in MODEL_OPTIONS3:
+        st.error(f"⚠️ Model '{model_name}' not found in config!")
+        return None, None, None
+    model_info = MODEL_OPTIONS3[model_name]
+    hf_location = model_info["hf_location"]
+    model_module = model_info["module_path"]
+    load_function = model_info["load_function"]
+    predict_function = model_info["predict_function"]
+    load_model_func = import_from_module(model_module, load_function)
+    predict_func = import_from_module(model_module, predict_function)
+    if load_model_func is None or predict_func is None:
+        st.error("❌ Model functions could not be loaded!")
+        return None, None, None
+    model, tokenizer = load_model_func()
+    current_model, current_tokenizer = model, tokenizer
+    return model, tokenizer, predict_func
+def disable_ui():
+    st.components.v1.html(
+        """
+        <style>
+        #ui-disable-overlay {
+            position: fixed;
+            top: 0;
+            left: 0;
+            width: 100vw;
+            height: 100vh;
+            background-color: rgba(200, 200, 200, 0.5);
+            z-index: 9999;
+        }
+        </style>
+        <div id="ui-disable-overlay"></div>
+        """,
+        height=0,
+        scrolling=False
+    )
+def enable_ui():
+    st.components.v1.html(
+        """
+        <script>
+        var overlay = document.getElementById("ui-disable-overlay");
+        if (overlay) {
+            overlay.parentNode.removeChild(overlay);
+        }
+        </script>
+        """,
+        height=0,
+        scrolling=False
+    )
+# Function to increment progress dynamically
+def get_sentiment_emotion_graph_code(input_text, normalized_text, sentiment_array, emotion_array):
+    """
+    Returns a Graphviz code string representing:
+      - Input Text as the root
+      - Normalized Text as a child
+      - A Sentiment node with its probabilities as children (using SENTIMENT_POLARITY_LABELS)
+      - An Emotion node with its probabilities as children (using EMOTION_MOODTAG_LABELS)
+      - Arrows from each sentiment node to the Emotion node with fixed penwidths (5 for highest, 3 for middle, 1 for lowest)
+    Both sentiment_array and emotion_array are NumPy arrays (possibly nested, e.g. [[values]]),
+    so they are squeezed before use.
+    """
+    import numpy as np
+    # Flatten arrays in case they are nested
+    sentiment_flat = np.array(sentiment_array).squeeze()
+    emotion_flat = np.array(emotion_array).squeeze()
+    # Create pairs for each sentiment label with its probability
+    sentiment_pairs = list(zip(SENTIMENT_POLARITY_LABELS, sentiment_flat))
+    # Sort by probability (ascending)
+    sentiment_sorted = sorted(sentiment_pairs, key=lambda x: x[1])
+    # Create a penwidth map: label -> penwidth
+    penwidth_map = {}
+    # Collect all unique probabilities to handle ties
+    unique_probs = set(prob for _, prob in sentiment_sorted)
+    if len(unique_probs) == 1:
+        # All sentiments have the same probability; use mid-range width (e.g., 3) for all
+        for label, _ in sentiment_sorted:
+            penwidth_map[label] = 3
+    elif len(unique_probs) == 2:
+        # Two unique probabilities: assign min width 1 and max width 5 accordingly
+        min_prob = sentiment_sorted[0][1]
+        max_prob = sentiment_sorted[-1][1]
+        for label, prob in sentiment_sorted:
+            if prob == min_prob:
+                penwidth_map[label] = 1
+            else:
+                penwidth_map[label] = 5
+    else:
+        # For three distinct probabilities, assign 1 to the smallest, 3 to the middle, 5 to the largest.
+        penwidth_map[sentiment_sorted[0][0]] = 1
+        penwidth_map[sentiment_sorted[1][0]] = 3
+        penwidth_map[sentiment_sorted[2][0]] = 5
+    # Build the basic Graphviz structure
+    graph_code = f'''
+    digraph G {{
+        rankdir=TB;
+        node [shape=box, style="rounded,filled", fontname="Helvetica", fontsize=12];
+        Input [label="Input Text:\\n{input_text.replace('"', '\\"')}", fillcolor="#ffe6de", fontcolor="#000000"];
+        Normalized [label="Normalized Text:\\n{normalized_text.replace('"', '\\"')}", fillcolor="#ffe6de", fontcolor="#000000"];
+        Sentiment [label="Sentiment"];
+        Emotion [label="Emotion"];
+        Input -> Normalized;
+        Input -> Sentiment;
+        Sentiment -> Emotion;
+    '''
+    # Add sentiment nodes (displaying full values without truncation)
+    for label, prob in sentiment_pairs:
+        node_id = f"S_{label}"
+        graph_code += f'\n    {node_id} [label="{label}: {prob}", fillcolor="#ecdeff", fontcolor="black"];'
+        graph_code += f'\n    Sentiment -> {node_id};'
+    # Add emotion nodes (displaying full values)
+    for i, label in enumerate(EMOTION_MOODTAG_LABELS):
+        if i < len(emotion_flat):
+            prob = emotion_flat[i]
+            node_id = f"E_{label}"
+            graph_code += f'\n    {node_id} [label="{label}: {prob}", fillcolor="#deffe1", fontcolor="black"];'
+            graph_code += f'\n    Emotion -> {node_id};'
+    # Add arrows from each sentiment node to the Emotion node with fixed penwidth based on ranking
+    for label, prob in sentiment_pairs:
+        node_id = f"S_{label}"
+        pw = penwidth_map[label]
+        graph_code += f'\n    {node_id} -> Emotion [penwidth={pw}];'
+    graph_code += "\n}"
+    return graph_code
+def get_env_variable(var_name):
+    # Try os.environ first (this covers local development and HF Spaces)
+    value = os.environ.get(var_name)
+    if value is None:
+        # Fall back to st.secrets if available (e.g., on Streamlit Cloud)
+        try:
+            value = st.secrets[var_name]
+        except KeyError:
+            value = None
+    return value
+def update_progress(progress_bar, start, end, delay=0.1):
+    for i in range(start, end + 1, 5):  # Increment in steps of 5%
+        progress_bar.progress(i)
+        time.sleep(delay)  # Simulate processing time
+        # st.experimental_rerun() # Refresh the page
+# Function to update session state when model changes
+def on_model_change():
+    st.cache_data.clear()
+    st.cache_resource.clear()
+    free_memory()
+    st.session_state.model_changed = True  # Mark model as changed
+    # Reset flags to trigger new prediction and show feedback form
+    st.session_state.prediction_generated = False
+    st.session_state.feedback_submitted = False
+    st.session_state.predictions = None
+    st.session_state.graphviz_code = None
+    st.session_state.last_processed_input = ""
+# Function to update session state when text changes
+def on_text_change():
+    st.session_state.text_changed = True  # Mark text as changed
+    st.session_state.prediction_generated = False
+    st.session_state.feedback_submitted = False
+    st.session_state.predictions = None
+    st.session_state.graphviz_code = None
+    # st.session_state.last_processed_input = ""
+def update_top_k_from_slider():
+    st.session_state.top_k = st.session_state.top_k_slider
+    st.session_state.prediction_generated = False
+    st.session_state.feedback_submitted = False
+    st.session_state.predictions = None
+    st.session_state.graphviz_code = None
+    # st.session_state.last_processed_input = ""
+def update_top_k_from_input():
+    st.session_state.top_k = st.session_state.top_k_input
+    st.session_state.prediction_generated = False
+    st.session_state.feedback_submitted = False
+    st.session_state.predictions = None
+    st.session_state.graphviz_code = None
+    # st.session_state.last_processed_input = ""
+def on_temperature_change():
+    st.session_state.prediction_generated = False
+    st.session_state.feedback_submitted = False
+    st.session_state.predictions = None
+    st.session_state.graphviz_code = None
+    # st.session_state.last_processed_input = ""
+def on_top_p_change():
+    st.session_state.prediction_generated = False
+    st.session_state.feedback_submitted = False
+    st.session_state.predictions = None
+    st.session_state.graphviz_code = None
+    # st.session_state.last_processed_input = ""
+def on_beam_checkbox_change():
+    st.session_state.prediction_generated = False
+    st.session_state.feedback_submitted = False
+    st.session_state.predictions = None
+    st.session_state.graphviz_code = None
+    # st.session_state.last_processed_input = ""
+def on_enable_sampling_checkbox_change():
+    st.session_state.prediction_generated = False
+    st.session_state.feedback_submitted = False
+    st.session_state.predictions = None
+    st.session_state.graphviz_code = None
+    # st.session_state.last_processed_input = ""
+def on_enable_earlyStopping_checkbox_change():
+    st.session_state.prediction_generated = False
+    st.session_state.feedback_submitted = False
+    st.session_state.predictions = None
+    st.session_state.graphviz_code = None
+    # st.session_state.last_processed_input = ""
+def on_max_new_tokens_change():
+    st.session_state.prediction_generated = False
+    st.session_state.feedback_submitted = False
+    st.session_state.predictions = None
+    st.session_state.graphviz_code = None
+    # st.session_state.last_processed_input = ""
+def on_num_return_sequences_change():
+    st.session_state.prediction_generated = False
+    st.session_state.feedback_submitted = False
+    st.session_state.predictions = None
+    st.session_state.graphviz_code = None
+    # st.session_state.last_processed_input = ""
+# Initialize session state variables
+if "selected_model1" not in st.session_state:
+    st.session_state.selected_model1 = list(MODEL_OPTIONS1.keys())[
+        0]  # Default model
+if "selected_model2" not in st.session_state:
+    st.session_state.selected_model2 = list(MODEL_OPTIONS2.keys())[
+        0]
+if "selected_model3" not in st.session_state:
+    st.session_state.selected_model3 = list(MODEL_OPTIONS3.keys())[
+        0]
+if "user_input" not in st.session_state:
+    st.session_state.user_input = ""
+if "last_processed_input" not in st.session_state:
+    st.session_state.last_processed_input = ""
+if "model_changed" not in st.session_state:
+    st.session_state.model_changed = False
+if "text_changed" not in st.session_state:
+    st.session_state.text_changed = False
+if "disabled" not in st.session_state:
+    st.session_state.disabled = False
+if "top_k" not in st.session_state:
+    st.session_state.top_k = 50
+if "last_change" not in st.session_state:
+    st.session_state.last_change = time.time()
+if "auto_predict_triggered" not in st.session_state:
+    st.session_state.auto_predict_triggered = False
+def show_stacking_stages():
+    # No cache clearing here—only in the model change callback!
+    # st.write(st.session_state)
+    if "last_change" not in st.session_state:
+        st.session_state.last_change = time.time()
+    if "auto_predict_triggered" not in st.session_state:
+        st.session_state.auto_predict_triggered = False
+    if "top_k" not in st.session_state:
+        st.session_state.top_k = 50
+    model_names1 = list(MODEL_OPTIONS1.keys())
+    model_names2 = list(MODEL_OPTIONS2.keys())
+    model_names3 = list(MODEL_OPTIONS3.keys())
+    st.title("Stacking all the best models together")
+    st.warning("If memory is low, this page may take a while to load or might fail too if memory overshoots or due to CUDA_Side_Device_Assertions.")
+    # Check if the stored selected model is valid; if not, reset it
+    if "selected_model1" in st.session_state:
+        if st.session_state.selected_model1 not in model_names1:
+            st.session_state.selected_model1 = model_names1[0]
+    else:
+        st.session_state.selected_model1 = model_names1[0]
+    if "selected_model2" in st.session_state:
+        if st.session_state.selected_model2 not in model_names2:
+            st.session_state.selected_model2 = model_names2[0]
+    else:
+        st.session_state.selected_model2 = model_names2[0]
+    if "selected_model3" in st.session_state:
+        if st.session_state.selected_model3 not in model_names3:
+            st.session_state.selected_model3 = model_names3[0]
+    else:
+        st.session_state.selected_model3 = model_names3[0]
+    # st.title("Stacking all the best models together")
+    st.write("This section handles the sentiment analysis and emotion analysis of informal text and then transformation and normalization of it into standard formal English.")
+    # Model selection with change detection; clearing cache happens in on_model_change()
+    col1, col2, col3 = st.columns(3)
+    with col1:
+        selected_model1 = st.selectbox(
+            "Choose a model:", model_names1, key="selected_model_stage1", on_change=on_model_change
+        )
+    with col2:
+        selected_model2 = st.selectbox(
+            "Choose a model:", model_names2, key="selected_model_stage2", on_change=on_model_change
+        )
+    with col3:
+        selected_model3 = st.selectbox(
+            "Choose a model:", model_names3, key="selected_model_stage3", on_change=on_model_change
+        )
+    # Text input with change detection
+    user_input = st.text_input(
+        "Enter text for emotions mood-tag analysis:", key="user_input_stage3", on_change=on_text_change
+    )
+    if st.session_state.get("last_processed_input", "") != user_input:
+        st.session_state.prediction_generated = False
+        st.session_state.feedback_submitted = False
+    st.markdown("#### Generation Parameters")
+    col1, col2 = st.columns(2)
+    with col1:
+        use_beam = st.checkbox("Use Beam Search", value=False, on_change=on_beam_checkbox_change)
+        if use_beam:
+            beams = st.number_input("Number of beams:", min_value=1, max_value=10, value=3, step=1, on_change=on_beam_checkbox_change)
+            do_sample = False
+            temp = None
+            top_p = None
+            top_k = None
+        else:
+            beams = None
+            do_sample = st.checkbox("Enable Sampling", value=True, on_change=on_enable_sampling_checkbox_change)
+            temp = st.slider("Temperature:", min_value=0.1, max_value=2.0, value=0.4, step=0.1, on_change=on_temperature_change) if do_sample else None
+    with col2:
+        top_p = st.slider("Top-p (nucleus sampling):", min_value=0.0, max_value=1.0, value=0.9, step=0.05, on_change=on_top_p_change) if (not use_beam and do_sample) else None
+        model_config = MODEL_OPTIONS3[selected_model3]
+        max_top_k = model_config.get("max_top_k", 50)
+        if not use_beam and do_sample:
+            col_slider, col_input = st.columns(2)
+            st.write("Top-K: Top K most probable tokens, recommended range: 10-60")
+            with col_slider:
+                top_k_slider = st.slider(
+                    "Top-k (slider):",
+                    min_value=0,
+                    max_value=max_top_k,
+                    value=st.session_state.top_k,
+                    step=1,
+                    key="top_k_slider",
+                    on_change=update_top_k_from_slider
+                )
+            with col_input:
+                top_k_input = st.number_input(
+                    "Top-k (number input):",
+                    min_value=0,
+                    max_value=max_top_k,
+                    value=st.session_state.top_k,
+                    step=1,
+                    key="top_k_input",
+                    on_change=update_top_k_from_input
+                )
+            final_top_k = st.session_state.top_k
+        else:
+            final_top_k = None
+    col_tokens, col_return = st.columns(2)
+    with col_tokens:
+        max_new_tokens = st.number_input("Max New Tokens:", min_value=1, value=1024, step=1, on_change=on_max_new_tokens_change)
+        early_stopping = st.checkbox("Early Stopping", value=True, on_change=on_enable_earlyStopping_checkbox_change)
+    with col_return:
+        if beams is not None:
+            num_return_sequences = st.number_input(
+                "Num Return Sequences:",
+                min_value=1,
+                max_value=beams,
+                value=1,
+                step=1,
+                on_change=on_num_return_sequences_change
+            )
+        else:
+            num_return_sequences = st.number_input(
+                "Num Return Sequences:",
+                min_value=1,
+                max_value=3,
+                value=1,
+                step=1,
+                on_change=on_num_return_sequences_change
+            )
+        user_input_copy = user_input
+    current_time = time.time()
+    if user_input.strip() and (current_time - st.session_state.last_change >= 1.25) and st.session_state.get("prediction_generated", False) is False:
+        st.session_state.last_processed_input = user_input
+        progress_bar = st.progress(0)
+        update_progress(progress_bar, 0, 10)
+        col_spinner, col_warning = st.columns(2)
+        with col_warning:
+            warning_placeholder = st.empty()
+            warning_placeholder.warning("Don't change the text data or any input parameters or switch models or pages while inference is loading...")
+        with col_spinner:
+            with st.spinner("Please wait, inference is loading..."):
+                model1, tokenizer1, predict_func1 = load_selected_model1(selected_model1)
+                model2, tokenizer2, predict_func2 = load_selected_model2(selected_model2)
+                model3, tokenizer3, predict_func3 = load_selected_model3(selected_model3)
+                device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+                if model1 is None:
+                    st.error("⚠️ Error: Model 1 failed to load!")
+                    st.stop()
+                if hasattr(model1, "to"):
+                    model1.to(device)
+                if model2 is None:
+                    st.error("⚠️ Error: Model 2 failed to load!")
+                    st.stop()
+                if hasattr(model2, "to"):
+                    model2.to(device)
+                if model3 is None:
+                    st.error("⚠️ Error: Model 3 failed to load!")
+                    st.stop()
+                if hasattr(model3, "to"):
+                    model3.to(device)
+                predictions1 = predict_func1(user_input, model1, tokenizer1, device)
+                predictions2 = predict_func2(user_input, model2, tokenizer2, device)
+                predictions = predict_func3(
+                    model3, tokenizer3, user_input, device,
+                    num_return_sequences,
+                    beams,
+                    do_sample,
+                    temp,
+                    top_p,
+                    final_top_k,
+                    max_new_tokens,
+                    early_stopping
+                )
+        update_progress(progress_bar, 10, 100)
+        warning_placeholder.empty()
+        st.session_state.predictions = predictions
+        st.session_state.predictions1 = predictions1
+        st.session_state.predictions2 = predictions2
+        print(predictions1)
+        print(predictions2)
+        if len(predictions) > 1:
+            st.write("### Most Probable Predictions:")
+            for i, pred in enumerate(predictions, start=1):
+                st.markdown(f"**Prediction Sequence {i}:** {pred}")
+        else:
+            st.write("### Predicted Sequence:")
+            st.write(predictions[0])
+        graph_code = get_sentiment_emotion_graph_code(user_input, predictions[0], predictions1, predictions2)
+        st.session_state.graphviz_code = graph_code
+    # Now display the graph from session state:
+        st.graphviz_chart(st.session_state.graphviz_code)
+        progress_bar.empty()
+    # else:
+    #     st.info("Waiting for input to settle...")
+        # Mark that a prediction has been generated
+        st.session_state.prediction_generated = True
+    else:
+        # If predictions are already generated, display the stored ones
+        if st.session_state.get("predictions") and st.session_state.get("graphviz_code") and st.session_state.get("predictions2") and st.session_state.get("predictions1"):
+            predictions = st.session_state.predictions
+            if len(predictions) > 1:
+                st.write("### Most Probable Predictions:")
+                for i, pred in enumerate(predictions, start=1):
+                    st.markdown(f"**Prediction Sequence {i}:** {pred}")
+            else:
+                st.write("### Predicted Sequence:")
+                st.write(predictions[0])
+            st.graphviz_chart(st.session_state.graphviz_code)

transformation_and_Normalization/config/stage3_models.json CHANGED Viewed

@@ -3,7 +3,7 @@
         "name": "Facebook BART Base for Conditional Text Generation",
         "type": "hf_automodel_finetuned_fbtctg",
         "module_path": "hmv_cfg_base_stage3.model1",
-        "hf_location": "tachygraphy-microtext-normalization-iemk/BART-base-HF-Seq2Seq-Trainer-Batch4",
         "tokenizer_class": "BartTokenizer",
         "model_class": "BartForConditionalGeneration",
         "problem_type": "text_transformamtion_and_normalization",
@@ -18,7 +18,7 @@
         "name": "Microsoft Prophet Net Uncased Large for Conditional Text Generation",
         "type": "hf_automodel_finetuned_mstctg",
         "module_path": "hmv_cfg_base_stage3.model2",
-        "hf_location": "tachygraphy-microtext-normalization-iemk/ProphetNet_ForCondGen_Uncased_Large_HFTSeq2Seq_Batch4_ngram3",
         "tokenizer_class": "ProphetNetTokenizer",
         "model_class": "ProphetNetForConditionalGeneration",
         "problem_type": "text_transformamtion_and_normalization",
@@ -33,7 +33,7 @@
         "name": "Google T5 v1.1 Base for Conditional Text Generation",
         "type": "hf_automodel_finetuned_gt5tctg",
         "module_path": "hmv_cfg_base_stage3.model3",
-        "hf_location": "tachygraphy-microtext-normalization-iemk/T5-1.1-HF-seq2seq-Trainer-Batch4",
         "tokenizer_class": "T5Tokenizer",
         "model_class": "T5ForConditionalGeneration",
         "problem_type": "text_transformamtion_and_normalization",

         "name": "Facebook BART Base for Conditional Text Generation",
         "type": "hf_automodel_finetuned_fbtctg",
         "module_path": "hmv_cfg_base_stage3.model1",
+        "hf_location": "Tachygraphy-Microtext-Normalization-IEMK25/BART-base-HF-Seq2Seq-Trainer-Batch4",
         "tokenizer_class": "BartTokenizer",
         "model_class": "BartForConditionalGeneration",
         "problem_type": "text_transformamtion_and_normalization",
         "name": "Microsoft Prophet Net Uncased Large for Conditional Text Generation",
         "type": "hf_automodel_finetuned_mstctg",
         "module_path": "hmv_cfg_base_stage3.model2",
+        "hf_location": "Tachygraphy-Microtext-Normalization-IEMK25/ProphetNet_ForCondGen_Uncased_Large_HFTSeq2Seq_Batch4_ngram3",
         "tokenizer_class": "ProphetNetTokenizer",
         "model_class": "ProphetNetForConditionalGeneration",
         "problem_type": "text_transformamtion_and_normalization",
         "name": "Google T5 v1.1 Base for Conditional Text Generation",
         "type": "hf_automodel_finetuned_gt5tctg",
         "module_path": "hmv_cfg_base_stage3.model3",
+        "hf_location": "Tachygraphy-Microtext-Normalization-IEMK25/T5-1.1-HF-seq2seq-Trainer-Batch4",
         "tokenizer_class": "T5Tokenizer",
         "model_class": "T5ForConditionalGeneration",
         "problem_type": "text_transformamtion_and_normalization",

transformation_and_Normalization/transformationNormalization_main.py CHANGED Viewed

@@ -36,6 +36,11 @@ EMOTION_MOODTAG_LABELS = [
     "sadness", "surprise"
 ]
 current_model = None
 current_tokenizer = None
@@ -490,54 +495,54 @@ def transform_and_normalize():
                 st.write(predictions[0])
     # Only show the feedback form if a prediction has been generated
-    if st.session_state.get("prediction_generated", False):
-        if not st.session_state.get("feedback_submitted", False):
-            with st.form("feedback_form", clear_on_submit=True, border=False):
-                st.error("New API keys are coming in Q2 2025, May 1st, old API authentication will be deprecated and blocked by Postgrest.")
-                st.warning("This form and database are running in test mode, please be careful with your data.")
-                st.write("### Data Collection Form")
-                st.write("#### If the predictions generated are wrong, please provide feedback to help improve the model.")
-                col1, col2 = st.columns(2)
-                with col1:
-                    feedback = st.text_input(
-                        "Enter the correct expanded standard formal English text:",
-                        key="feedback_input"
-                    )
-                with col2:
-                    feedback2 = st.text_input(
-                        "Enter any one of the wrongly predicted text:",
-                        key="feedback_input2"
-                    )
-                submit_feedback = st.form_submit_button("Submit Feedback")
-                if submit_feedback and feedback.strip() and feedback2.strip():
-                    data_to_insert = {
-                        # "id" : str(uuid.uuid4()),            # text
-                        # "created_at": datetime.now(timezone.utc).isoformat(),       # timestamp
-                        "input_text": user_input,            # text
-                        "correct_text_by_user": feedback,    # text
-                        "model_used": selected_model,        # text
-                        "wrong_pred_any": feedback2 if feedback2.strip() else ""
-                    }
-                    # Here we use the supabase client already created above
-                    # supabase = get_connection()
-                    # load_dotenv()
-                    # print("SUPABASE_URL:", os.environ.get("SUPABASE_URL"))
-                    # print("anon_key:", os.environ.get("anon_key"))
-                    # print("table3_name:", os.environ.get("table3_name"))
-                    # load_dotenv(dotenv_path=env_path)
-                    # load_dotenv()
-                    # supabase: Client = create_client(os.environ.get("SUPABASE_URL"), os.environ.get("anon_key"))
-                    # response = supabase.table(os.environ.get("table3_name")).insert(data_to_insert, returning="minimal").execute()
-                    try:
-                        supabase: Client = create_client(get_env_variable("SUPABASE_DB_TACHYGRAPHY_DB_URL"), get_env_variable("SUPABASE_DB_TACHYGRAPHY_ANON_API_KEY"))
-                        response = supabase.table(get_env_variable("SUPABASE_DB_TACHYGRAPHY_DB_STAGE3_TABLE")).insert(data_to_insert, returning="minimal").execute()
-                        st.success("Feedback submitted successfully!")
-                        st.session_state.feedback_submitted = True
-                    except Exception as e:
-                        st.error(f"Feedback submission failed: {e}")
-        else:
-            st.info("Feedback already submitted for this prediction.")
 if __name__ == "__main__":
     transform_and_normalize()

     "sadness", "surprise"
 ]
+SENTIMENT_POLARITY_LABELS = [
+    "negative", "neutral", "positive"
+]
 current_model = None
 current_tokenizer = None
                 st.write(predictions[0])
     # Only show the feedback form if a prediction has been generated
+    # if st.session_state.get("prediction_generated", False):
+    #     if not st.session_state.get("feedback_submitted", False):
+    #         with st.form("feedback_form", clear_on_submit=True, border=False):
+    #             st.error("New API keys are coming in Q2 2025, May 1st, old API authentication will be deprecated and blocked by Postgrest.")
+    #             st.warning("This form and database are running in test mode, please be careful with your data.")
+    #             st.write("### Data Collection Form")
+    #             st.write("#### If the predictions generated are wrong, please provide feedback to help improve the model.")
+    #             col1, col2 = st.columns(2)
+    #             with col1:
+    #                 feedback = st.text_input(
+    #                     "Enter the correct expanded standard formal English text:",
+    #                     key="feedback_input"
+    #                 )
+    #             with col2:
+    #                 feedback2 = st.text_input(
+    #                     "Enter any one of the wrongly predicted text:",
+    #                     key="feedback_input2"
+    #                 )
+    #             submit_feedback = st.form_submit_button("Submit Feedback")
+    #             if submit_feedback and feedback.strip() and feedback2.strip():
+    #                 data_to_insert = {
+    #                     # "id" : str(uuid.uuid4()),            # text
+    #                     # "created_at": datetime.now(timezone.utc).isoformat(),       # timestamp
+    #                     "input_text": user_input,            # text
+    #                     "correct_text_by_user": feedback,    # text
+    #                     "model_used": selected_model,        # text
+    #                     "wrong_pred_any": feedback2 if feedback2.strip() else ""
+    #                 }
+    #                 # Here we use the supabase client already created above
+    #                 # supabase = get_connection()
+    #                 # load_dotenv()
+    #                 # print("SUPABASE_URL:", os.environ.get("SUPABASE_URL"))
+    #                 # print("anon_key:", os.environ.get("anon_key"))
+    #                 # print("table3_name:", os.environ.get("table3_name"))
+    #                 # load_dotenv(dotenv_path=env_path)
+    #                 # load_dotenv()
+    #                 # supabase: Client = create_client(os.environ.get("SUPABASE_URL"), os.environ.get("anon_key"))
+    #                 # response = supabase.table(os.environ.get("table3_name")).insert(data_to_insert, returning="minimal").execute()
+    #                 try:
+    #                     supabase: Client = create_client(get_env_variable("SUPABASE_DB_TACHYGRAPHY_DB_URL"), get_env_variable("SUPABASE_DB_TACHYGRAPHY_ANON_API_KEY"))
+    #                     response = supabase.table(get_env_variable("SUPABASE_DB_TACHYGRAPHY_DB_STAGE3_TABLE")).insert(data_to_insert, returning="minimal").execute()
+    #                     st.success("Feedback submitted successfully!")
+    #                     st.session_state.feedback_submitted = True
+    #                 except Exception as e:
+    #                     st.error(f"Feedback submission failed: {e}")
+    #     else:
+    #         st.info("Feedback already submitted for this prediction.")
 if __name__ == "__main__":
     transform_and_normalize()