CISProject
/

News-Headline-Classifier-Notebook

Safetensors

headlineclassifier

Model card Files Files and versions

xet

Community

TUEN-YUE commited on Dec 24, 2024

Commit

ef1c2c2

verified ·

1 Parent(s): ec11c59

Upload eval_py.ipynb

Browse files

Files changed (1) hide show

eval_py.ipynb +424 -46

eval_py.ipynb CHANGED Viewed

@@ -8,9 +8,14 @@
      "base_uri": "https://localhost:8080/"
     },
     "id": "initial_id",
-    "outputId": "85dee483-9370-4e16-ecb1-1c2d545ee1fb"
    },
    "source": [
     "!pip install geopy > delete.txt\n",
     "!pip install datasets > delete.txt\n",
     "!pip install torch torchvision datasets > delete.txt\n",
@@ -18,10 +23,21 @@
     "!pip install pyhocon > delete.txt\n",
     "!pip install transformers > delete.txt\n",
     "!pip install gensim > delete.txt\n",
-    "!rm delete.txt"
    ],
-   "outputs": [],
-   "execution_count": null
   },
   {
    "metadata": {
@@ -29,15 +45,17 @@
      "base_uri": "https://localhost:8080/"
     },
     "id": "b0a77c981c32a0c8",
-    "outputId": "fe03df52-1418-4034-8124-3bf9030ed5d7"
    },
    "cell_type": "code",
-   "source": [
-    "!huggingface-cli login"
-   ],
    "id": "b0a77c981c32a0c8",
    "outputs": [],
-   "execution_count": null
   },
   {
    "metadata": {
@@ -105,8 +123,8 @@
     "id": "a4aa3b759defc904",
     "outputId": "b1868c23-e675-41db-aa26-5eed9de60d9f",
     "ExecuteTime": {
-     "end_time": "2024-12-16T08:26:09.513376Z",
-     "start_time": "2024-12-16T08:26:05.978557Z"
     }
    },
    "cell_type": "code",
@@ -120,7 +138,7 @@
    ],
    "id": "a4aa3b759defc904",
    "outputs": [],
-   "execution_count": 1
   },
   {
    "metadata": {
@@ -144,8 +162,8 @@
     "id": "ce6e6b982e22e9fe",
     "outputId": "f38ef6b3-35ac-41dc-a8ae-f0dd28b1f84d",
     "ExecuteTime": {
-     "end_time": "2024-12-16T08:26:54.306779Z",
-     "start_time": "2024-12-16T08:26:54.298397Z"
     }
    },
    "cell_type": "code",
@@ -362,8 +380,8 @@
     "id": "b605d3b4f5ff547a",
     "outputId": "f365a98e-c181-4754-9fac-77aa1e8639db",
     "ExecuteTime": {
-     "end_time": "2024-12-16T08:27:16.788714Z",
-     "start_time": "2024-12-16T08:27:01.757035Z"
     }
    },
    "cell_type": "code",
@@ -396,10 +414,331 @@
      "text": [
       "vectorizer fitted on training data.\n"
      ]
     }
    ],
    "execution_count": 5
   },
   {
    "metadata": {
     "colab": {
@@ -422,8 +761,8 @@
     "id": "b20d11caa1d25445",
     "outputId": "986c82fd-014b-432a-8174-857b2b866cb8",
     "ExecuteTime": {
-     "end_time": "2024-12-16T08:27:32.874705Z",
-     "start_time": "2024-12-16T08:27:32.787248Z"
     }
    },
    "cell_type": "code",
@@ -435,33 +774,51 @@
    "id": "b20d11caa1d25445",
    "outputs": [
     {
-     "ename": "ValueError",
-     "evalue": "The checkpoint you are trying to load has model type `headlineclassifier` but Transformers does not recognize this architecture. This could be because of an issue with the checkpoint, or because your version of Transformers is out of date.",
-     "output_type": "error",
-     "traceback": [
-      "\u001B[1;31m---------------------------------------------------------------------------\u001B[0m",
-      "\u001B[1;31mKeyError\u001B[0m                                  Traceback (most recent call last)",
-      "File \u001B[1;32m~\\anaconda3\\envs\\newsCLS\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:1038\u001B[0m, in \u001B[0;36mAutoConfig.from_pretrained\u001B[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001B[0m\n\u001B[0;32m   1037\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[1;32m-> 1038\u001B[0m     config_class \u001B[38;5;241m=\u001B[39m CONFIG_MAPPING[config_dict[\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mmodel_type\u001B[39m\u001B[38;5;124m\"\u001B[39m]]\n\u001B[0;32m   1039\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mKeyError\u001B[39;00m:\n",
-      "File \u001B[1;32m~\\anaconda3\\envs\\newsCLS\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:740\u001B[0m, in \u001B[0;36m_LazyConfigMapping.__getitem__\u001B[1;34m(self, key)\u001B[0m\n\u001B[0;32m    739\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m key \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;129;01min\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_mapping:\n\u001B[1;32m--> 740\u001B[0m     \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mKeyError\u001B[39;00m(key)\n\u001B[0;32m    741\u001B[0m value \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_mapping[key]\n",
-      "\u001B[1;31mKeyError\u001B[0m: 'headlineclassifier'",
-      "\nDuring handling of the above exception, another exception occurred:\n",
-      "\u001B[1;31mValueError\u001B[0m                                Traceback (most recent call last)",
-      "Cell \u001B[1;32mIn[15], line 2\u001B[0m\n\u001B[0;32m      1\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01mtransformers\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m AutoModel, AutoConfig\n\u001B[1;32m----> 2\u001B[0m config \u001B[38;5;241m=\u001B[39m AutoConfig\u001B[38;5;241m.\u001B[39mfrom_pretrained(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mCISProject/News-Headline-Classifier-Notebook\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n\u001B[0;32m      3\u001B[0m model \u001B[38;5;241m=\u001B[39m AutoModel\u001B[38;5;241m.\u001B[39mfrom_pretrained(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mCISProject/News-Headline-Classifier-Notebook\u001B[39m\u001B[38;5;124m\"\u001B[39m,config \u001B[38;5;241m=\u001B[39m config)\n",
-      "File \u001B[1;32m~\\anaconda3\\envs\\newsCLS\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:1040\u001B[0m, in \u001B[0;36mAutoConfig.from_pretrained\u001B[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001B[0m\n\u001B[0;32m   1038\u001B[0m         config_class \u001B[38;5;241m=\u001B[39m CONFIG_MAPPING[config_dict[\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mmodel_type\u001B[39m\u001B[38;5;124m\"\u001B[39m]]\n\u001B[0;32m   1039\u001B[0m     \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mKeyError\u001B[39;00m:\n\u001B[1;32m-> 1040\u001B[0m         \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mValueError\u001B[39;00m(\n\u001B[0;32m   1041\u001B[0m             \u001B[38;5;124mf\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mThe checkpoint you are trying to load has model type `\u001B[39m\u001B[38;5;132;01m{\u001B[39;00mconfig_dict[\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mmodel_type\u001B[39m\u001B[38;5;124m'\u001B[39m]\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m` \u001B[39m\u001B[38;5;124m\"\u001B[39m\n\u001B[0;32m   1042\u001B[0m             \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mbut Transformers does not recognize this architecture. This could be because of an \u001B[39m\u001B[38;5;124m\"\u001B[39m\n\u001B[0;32m   1043\u001B[0m             \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124missue with the checkpoint, or because your version of Transformers is out of date.\u001B[39m\u001B[38;5;124m\"\u001B[39m\n\u001B[0;32m   1044\u001B[0m         )\n\u001B[0;32m   1045\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m config_class\u001B[38;5;241m.\u001B[39mfrom_dict(config_dict, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39munused_kwargs)\n\u001B[0;32m   1046\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[0;32m   1047\u001B[0m     \u001B[38;5;66;03m# Fallback: use pattern matching on the string.\u001B[39;00m\n\u001B[0;32m   1048\u001B[0m     \u001B[38;5;66;03m# We go from longer names to shorter names to catch roberta before bert (for instance)\u001B[39;00m\n",
-      "\u001B[1;31mValueError\u001B[0m: The checkpoint you are trying to load has model type `headlineclassifier` but Transformers does not recognize this architecture. This could be because of an issue with the checkpoint, or because your version of Transformers is out of date."
      ]
     }
    ],
-   "execution_count": 15
   },
   {
    "metadata": {
-    "id": "1d23cedfe1d79660"
    },
    "cell_type": "code",
    "source": [
     "from torch.utils.data import DataLoader\n",
     "from sklearn.metrics import accuracy_score, classification_report\n",
     "# Define a collate function to handle the batched data\n",
     "def collate_fn(batch):\n",
     "    freq_inputs = torch.stack([torch.tensor(item[\"freq_inputs\"]) for item in batch])\n",
@@ -510,18 +867,39 @@
     "print(report)"
    ],
    "id": "1d23cedfe1d79660",
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "metadata": {
-    "id": "549f3e0a004e80ab"
-   },
-   "cell_type": "code",
-   "source": [],
-   "id": "549f3e0a004e80ab",
-   "outputs": [],
-   "execution_count": null
   }
  ],
  "metadata": {

      "base_uri": "https://localhost:8080/"
     },
     "id": "initial_id",
+    "outputId": "85dee483-9370-4e16-ecb1-1c2d545ee1fb",
+    "ExecuteTime": {
+     "end_time": "2024-12-24T16:26:10.771980Z",
+     "start_time": "2024-12-24T16:26:10.767703Z"
+    }
    },
    "source": [
+    "\n",
     "!pip install geopy > delete.txt\n",
     "!pip install datasets > delete.txt\n",
     "!pip install torch torchvision datasets > delete.txt\n",
     "!pip install pyhocon > delete.txt\n",
     "!pip install transformers > delete.txt\n",
     "!pip install gensim > delete.txt\n",
+    "!rm delete.txt\n"
    ],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'\\n!pip install geopy > delete.txt\\n!pip install datasets > delete.txt\\n!pip install torch torchvision datasets > delete.txt\\n!pip install huggingface_hub > delete.txt\\n!pip install pyhocon > delete.txt\\n!pip install transformers > delete.txt\\n!pip install gensim > delete.txt\\n!rm delete.txt\\n'"
+      ]
+     },
+     "execution_count": 1,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "execution_count": 1
   },
   {
    "metadata": {
      "base_uri": "https://localhost:8080/"
     },
     "id": "b0a77c981c32a0c8",
+    "outputId": "fe03df52-1418-4034-8124-3bf9030ed5d7",
+    "ExecuteTime": {
+     "end_time": "2024-12-24T16:26:10.813215Z",
+     "start_time": "2024-12-24T16:26:10.810118Z"
+    }
    },
    "cell_type": "code",
+   "source": "!huggingface-cli login",
    "id": "b0a77c981c32a0c8",
    "outputs": [],
+   "execution_count": 2
   },
   {
    "metadata": {
     "id": "a4aa3b759defc904",
     "outputId": "b1868c23-e675-41db-aa26-5eed9de60d9f",
     "ExecuteTime": {
+     "end_time": "2024-12-24T16:26:14.452521Z",
+     "start_time": "2024-12-24T16:26:10.866207Z"
     }
    },
    "cell_type": "code",
    ],
    "id": "a4aa3b759defc904",
    "outputs": [],
+   "execution_count": 3
   },
   {
    "metadata": {
     "id": "ce6e6b982e22e9fe",
     "outputId": "f38ef6b3-35ac-41dc-a8ae-f0dd28b1f84d",
     "ExecuteTime": {
+     "end_time": "2024-12-24T16:26:16.191425Z",
+     "start_time": "2024-12-24T16:26:14.463529Z"
     }
    },
    "cell_type": "code",
     "id": "b605d3b4f5ff547a",
     "outputId": "f365a98e-c181-4754-9fac-77aa1e8639db",
     "ExecuteTime": {
+     "end_time": "2024-12-24T16:27:18.269951Z",
+     "start_time": "2024-12-24T16:26:16.194928Z"
     }
    },
    "cell_type": "code",
      "text": [
       "vectorizer fitted on training data.\n"
      ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "Map (num_proc=4):   0%|          | 0/3044 [00:00<?, ? examples/s]"
+      ],
+      "application/vnd.jupyter.widget-view+json": {
+       "version_major": 2,
+       "version_minor": 0,
+       "model_id": "0aec436603c54b82b962cb31750a5921"
+      }
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": [
+       "Map (num_proc=4):   0%|          | 0/3044 [00:00<?, ? examples/s]"
+      ],
+      "application/vnd.jupyter.widget-view+json": {
+       "version_major": 2,
+       "version_minor": 0,
+       "model_id": "2061954a6f7e47e08803c4c54e852571"
+      }
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": [
+       "Map (num_proc=4):   0%|          | 0/3044 [00:00<?, ? examples/s]"
+      ],
+      "application/vnd.jupyter.widget-view+json": {
+       "version_major": 2,
+       "version_minor": 0,
+       "model_id": "a3e3797234bb478c88fcf944fafc8570"
+      }
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": [
+       "Map (num_proc=4):   0%|          | 0/3044 [00:00<?, ? examples/s]"
+      ],
+      "application/vnd.jupyter.widget-view+json": {
+       "version_major": 2,
+       "version_minor": 0,
+       "model_id": "2c8bc25d53984eee839a20da98f749d1"
+      }
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": [
+       "Map (num_proc=4):   0%|          | 0/3044 [00:00<?, ? examples/s]"
+      ],
+      "application/vnd.jupyter.widget-view+json": {
+       "version_major": 2,
+       "version_minor": 0,
+       "model_id": "ab84a6ca73a945d58cc23643538c8a6c"
+      }
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": [
+       "Map (num_proc=4):   0%|          | 0/3044 [00:00<?, ? examples/s]"
+      ],
+      "application/vnd.jupyter.widget-view+json": {
+       "version_major": 2,
+       "version_minor": 0,
+       "model_id": "4ebc99d9190c49bfae44fb7f9db88007"
+      }
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": [
+       "Map (num_proc=4):   0%|          | 0/761 [00:00<?, ? examples/s]"
+      ],
+      "application/vnd.jupyter.widget-view+json": {
+       "version_major": 2,
+       "version_minor": 0,
+       "model_id": "b1a4da224f31484fa8946982954e74ff"
+      }
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": [
+       "Map (num_proc=4):   0%|          | 0/761 [00:00<?, ? examples/s]"
+      ],
+      "application/vnd.jupyter.widget-view+json": {
+       "version_major": 2,
+       "version_minor": 0,
+       "model_id": "ab6cbe1f1b714017809ef32d010e452f"
+      }
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": [
+       "Map (num_proc=4):   0%|          | 0/761 [00:00<?, ? examples/s]"
+      ],
+      "application/vnd.jupyter.widget-view+json": {
+       "version_major": 2,
+       "version_minor": 0,
+       "model_id": "fcd18d89aa5043b39fe0143d4a5ac681"
+      }
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": [
+       "Map (num_proc=4):   0%|          | 0/761 [00:00<?, ? examples/s]"
+      ],
+      "application/vnd.jupyter.widget-view+json": {
+       "version_major": 2,
+       "version_minor": 0,
+       "model_id": "d1b2b18cedc94f338d4f5bf5dc4a5dec"
+      }
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": [
+       "Map (num_proc=4):   0%|          | 0/761 [00:00<?, ? examples/s]"
+      ],
+      "application/vnd.jupyter.widget-view+json": {
+       "version_major": 2,
+       "version_minor": 0,
+       "model_id": "03f17c1c68e248a6bf71a6bd7d7bbae3"
+      }
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": [
+       "Map (num_proc=4):   0%|          | 0/761 [00:00<?, ? examples/s]"
+      ],
+      "application/vnd.jupyter.widget-view+json": {
+       "version_major": 2,
+       "version_minor": 0,
+       "model_id": "1e4135f4a3974b149f50dd3dba35c02e"
+      }
+     },
+     "metadata": {},
+     "output_type": "display_data"
     }
    ],
    "execution_count": 5
   },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-12-24T16:28:32.064840Z",
+     "start_time": "2024-12-24T16:28:31.661013Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "# TODO: import all packages necessary for your custom model\n",
+    "import pandas as pd\n",
+    "import os\n",
+    "from torch.utils.data import DataLoader\n",
+    "from transformers import PreTrainedModel, PretrainedConfig, AutoConfig, AutoModel\n",
+    "import torch\n",
+    "import torch.nn as nn\n",
+    "from transformers import RobertaModel, RobertaConfig,RobertaForSequenceClassification, BertModel\n",
+    "from model.network import Classifier\n",
+    "from model.frequential import FreqNetwork\n",
+    "from model.sequential import SeqNetwork\n",
+    "from model.positional import PosNetwork\n",
+    "\n",
+    "class CustomConfig(PretrainedConfig):\n",
+    "    model_type = \"headlineclassifier\"\n",
+    "\n",
+    "    def __init__(\n",
+    "        self,\n",
+    "        base_exp_dir=\"./exp/fox_nbc/\",\n",
+    "        # dataset={\"data_dir\": \"./data/CASE_NAME/data.csv\", \"transform\": True},\n",
+    "        train={\n",
+    "            \"learning_rate\": 2e-5,\n",
+    "            \"learning_rate_alpha\": 0.05,\n",
+    "            \"end_iter\": 10,\n",
+    "            \"batch_size\": 32,\n",
+    "            \"warm_up_end\": 2,\n",
+    "            \"anneal_end\": 5,\n",
+    "            \"save_freq\": 1,\n",
+    "            \"val_freq\": 1,\n",
+    "        },\n",
+    "        model={\n",
+    "            \"freq\": {\n",
+    "                \"tfidf_input_dim\": 8145,\n",
+    "                \"tfidf_output_dim\": 128,\n",
+    "                \"tfidf_hidden_dim\": 512,\n",
+    "                \"n_layers\": 2,\n",
+    "                \"skip_in\": [80],\n",
+    "                \"weight_norm\": True,\n",
+    "            },\n",
+    "            \"pos\": {\n",
+    "                \"input_dim\": 300,\n",
+    "                \"output_dim\": 128,\n",
+    "                \"hidden_dim\": 256,\n",
+    "                \"n_layers\": 2,\n",
+    "                \"skip_in\": [80],\n",
+    "                \"weight_norm\": True,\n",
+    "            },\n",
+    "            \"cls\": {\n",
+    "                \"combined_input\": 1024, #1024\n",
+    "                \"combined_dim\": 128,\n",
+    "                \"num_classes\": 2,\n",
+    "                \"n_layers\": 2,\n",
+    "                \"skip_in\": [80],\n",
+    "                \"weight_norm\": True,\n",
+    "            },\n",
+    "        },\n",
+    "        **kwargs,\n",
+    "    ):\n",
+    "        super().__init__(**kwargs)\n",
+    "\n",
+    "        self.base_exp_dir = base_exp_dir\n",
+    "        # self.dataset = dataset\n",
+    "        self.train = train\n",
+    "        self.model = model\n",
+    "\n",
+    "# TODO: define all parameters needed for your model, as well as calling the model itself\n",
+    "class CustomModel(PreTrainedModel):\n",
+    "    config_class = CustomConfig\n",
+    "\n",
+    "    def __init__(self, config):\n",
+    "        super().__init__(config)\n",
+    "        self.conf = config\n",
+    "        self.freq = FreqNetwork(**self.conf.model[\"freq\"])\n",
+    "        self.pos = PosNetwork(**self.conf.model[\"pos\"])\n",
+    "        self.cls = Classifier(**self.conf.model[\"cls\"])\n",
+    "        self.fc = nn.Linear(self.conf.model[\"cls\"][\"combined_input\"],2)\n",
+    "        self.seq = RobertaModel.from_pretrained(\"roberta-base\")\n",
+    "        # self.seq = BertModel.from_pretrained(\"bert-base-uncased\")\n",
+    "        #for param in self.roberta.parameters():\n",
+    "        #    param.requires_grad = False\n",
+    "        self.dropout = nn.Dropout(0.2)\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        freq_inputs = x[\"freq_inputs\"]\n",
+    "        seq_inputs = x[\"seq_inputs\"]\n",
+    "        pos_inputs = x[\"pos_inputs\"]\n",
+    "        seq_feature = self.seq(\n",
+    "            input_ids=seq_inputs[:,0,:],\n",
+    "            attention_mask=seq_inputs[:,1,:]\n",
+    "        ).pooler_output # last_hidden_state[:, 0, :]\n",
+    "        freq_feature = self.freq(freq_inputs) # Shape: (batch_size, 128)\n",
+    "\n",
+    "        pos_feature = self.pos(pos_inputs) #Shape: (batch_size, 128)\n",
+    "        inputs = torch.cat((seq_feature, freq_feature, pos_feature), dim=1)  # Shape: (batch_size, 384)\n",
+    "        # inputs = torch.cat((seq_feature, freq_feature), dim=1)  # Shape: (batch_size,256)\n",
+    "        # inputs = seq_feature\n",
+    "\n",
+    "        x = inputs\n",
+    "        x = self.dropout(x)\n",
+    "        outputs = self.fc(x)\n",
+    "\n",
+    "        return outputs\n",
+    "\n",
+    "    def save_model(self, save_path):\n",
+    "        \"\"\"Save the model locally using the Hugging Face format.\"\"\"\n",
+    "        self.save_pretrained(save_path)\n",
+    "\n",
+    "    def push_model(self, repo_name):\n",
+    "        \"\"\"Push the model to the Hugging Face Hub.\"\"\"\n",
+    "        self.push_to_hub(repo_name)"
+   ],
+   "id": "9266d67887120863",
+   "outputs": [],
+   "execution_count": 7
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-12-24T16:28:35.657792Z",
+     "start_time": "2024-12-24T16:28:35.392033Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "AutoConfig.register(\"headlineclassifier\", CustomConfig)\n",
+    "AutoModel.register(CustomConfig, CustomModel)\n",
+    "config = CustomConfig()\n",
+    "model = CustomModel(config)"
+   ],
+   "id": "77b94c012f4fae3a",
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\Users\\swall\\anaconda3\\envs\\newsCLS\\Lib\\site-packages\\torch\\nn\\utils\\weight_norm.py:143: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`.\n",
+      "  WeightNorm.apply(module, name, dim)\n",
+      "Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']\n",
+      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
+     ]
+    }
+   ],
+   "execution_count": 8
+  },
   {
    "metadata": {
     "colab": {
     "id": "b20d11caa1d25445",
     "outputId": "986c82fd-014b-432a-8174-857b2b866cb8",
     "ExecuteTime": {
+     "end_time": "2024-12-24T16:28:50.195358Z",
+     "start_time": "2024-12-24T16:28:37.051697Z"
     }
    },
    "cell_type": "code",
    "id": "b20d11caa1d25445",
    "outputs": [
     {
+     "data": {
+      "text/plain": [
+       "model.safetensors:   0%|          | 0.00/518M [00:00<?, ?B/s]"
+      ],
+      "application/vnd.jupyter.widget-view+json": {
+       "version_major": 2,
+       "version_minor": 0,
+       "model_id": "882ba9da828e4438bdcbc3cd60ce32a4"
+      }
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\Users\\swall\\anaconda3\\envs\\newsCLS\\Lib\\site-packages\\huggingface_hub\\file_download.py:139: UserWarning: `huggingface_hub` cache-system uses symlinks by default to efficiently store duplicated files but your machine does not support them in C:\\Users\\swall\\.cache\\huggingface\\hub\\models--CISProject--News-Headline-Classifier-Notebook. Caching files will still work but in a degraded version that might require more space on your disk. This warning can be disabled by setting the `HF_HUB_DISABLE_SYMLINKS_WARNING` environment variable. For more details, see https://huggingface.co/docs/huggingface_hub/how-to-cache#limitations.\n",
+      "To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development\n",
+      "  warnings.warn(message)\n",
+      "Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']\n",
+      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
+      "Some weights of the model checkpoint at CISProject/News-Headline-Classifier-Notebook were not used when initializing CustomModel: ['cls.lin0.parametrizations.weight.original0', 'cls.lin0.parametrizations.weight.original1', 'cls.lin1.parametrizations.weight.original0', 'cls.lin1.parametrizations.weight.original1', 'cls.lin2.parametrizations.weight.original0', 'cls.lin2.parametrizations.weight.original1', 'freq.lin0.parametrizations.weight.original0', 'freq.lin0.parametrizations.weight.original1', 'freq.lin1.parametrizations.weight.original0', 'freq.lin1.parametrizations.weight.original1', 'freq.lin2.parametrizations.weight.original0', 'freq.lin2.parametrizations.weight.original1', 'pos.lin0.parametrizations.weight.original0', 'pos.lin0.parametrizations.weight.original1', 'pos.lin1.parametrizations.weight.original0', 'pos.lin1.parametrizations.weight.original1', 'pos.lin2.parametrizations.weight.original0', 'pos.lin2.parametrizations.weight.original1']\n",
+      "- This IS expected if you are initializing CustomModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+      "- This IS NOT expected if you are initializing CustomModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
+      "Some weights of CustomModel were not initialized from the model checkpoint at CISProject/News-Headline-Classifier-Notebook and are newly initialized: ['cls.lin0.weight_g', 'cls.lin0.weight_v', 'cls.lin1.weight_g', 'cls.lin1.weight_v', 'cls.lin2.weight_g', 'cls.lin2.weight_v', 'freq.lin0.weight_g', 'freq.lin0.weight_v', 'freq.lin1.weight_g', 'freq.lin1.weight_v', 'freq.lin2.weight_g', 'freq.lin2.weight_v', 'pos.lin0.weight_g', 'pos.lin0.weight_v', 'pos.lin1.weight_g', 'pos.lin1.weight_v', 'pos.lin2.weight_g', 'pos.lin2.weight_v']\n",
+      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
      ]
     }
    ],
+   "execution_count": 9
   },
   {
    "metadata": {
+    "id": "1d23cedfe1d79660",
+    "ExecuteTime": {
+     "end_time": "2024-12-24T16:29:36.873566Z",
+     "start_time": "2024-12-24T16:29:23.549424Z"
+    }
    },
    "cell_type": "code",
    "source": [
     "from torch.utils.data import DataLoader\n",
     "from sklearn.metrics import accuracy_score, classification_report\n",
+    "from tqdm import tqdm\n",
     "# Define a collate function to handle the batched data\n",
     "def collate_fn(batch):\n",
     "    freq_inputs = torch.stack([torch.tensor(item[\"freq_inputs\"]) for item in batch])\n",
     "print(report)"
    ],
    "id": "1d23cedfe1d79660",
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "                                                        "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Accuracy: 0.8988\n",
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "           0       0.90      0.88      0.89       356\n",
+      "           1       0.90      0.91      0.91       405\n",
+      "\n",
+      "    accuracy                           0.90       761\n",
+      "   macro avg       0.90      0.90      0.90       761\n",
+      "weighted avg       0.90      0.90      0.90       761\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    }
+   ],
+   "execution_count": 12
   }
  ],
  "metadata": {