CISProject
/

News-Headline-Classifier-Notebook

Safetensors

headlineclassifier

Model card Files Files and versions

xet

Community

TUEN-YUE commited on Dec 16, 2024

Commit

e79e9a8

verified ·

1 Parent(s): f3e9a7f

Upload train&test.ipynb

Browse files

Files changed (1) hide show

train&test.ipynb +1408 -0

train&test.ipynb ADDED Viewed

	@@ -0,0 +1,1408 @@

+{
+ "cells": [
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": [
+    "# Installing dependencies\n",
+    "## Please make a copy of this notebook."
+   ],
+   "id": "13156d7ed48b282"
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": [
+    "# Huggingface login\n",
+    "You will require your personal token."
+   ],
+   "id": "432a756039e6399"
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": "# !huggingface-cli login",
+   "id": "2e73da09a7c6171e",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": "# Part 1: Load Data",
+   "id": "c731d9c1ebb477dc"
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": "## Downloading the train and test dataset",
+   "id": "14070f20b547688f"
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": "",
+   "id": "b8920847b7cc378d"
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": [
+    "from datasets import load_dataset\n",
+    "\n",
+    "dataset_train = load_dataset(\"CISProject/FOX_NBC\", split=\"train\")\n",
+    "dataset_test = load_dataset(\"CISProject/FOX_NBC\", split=\"test\")\n",
+    "# dataset_test = load_dataset(\"CISProject/FOX_NBC\", split=\"test_data_random_subset\")\n"
+   ],
+   "id": "877c90c978d62b7d",
+   "outputs": [],
+   "execution_count": 32
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-12-16T08:10:32.645990Z",
+     "start_time": "2024-12-16T08:10:32.636717Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "import numpy as np\n",
+    "import torch\n",
+    "import re\n",
+    "from transformers import BertTokenizer\n",
+    "from transformers import RobertaTokenizer\n",
+    "from sklearn.feature_extraction.text import CountVectorizer\n",
+    "from gensim.models import KeyedVectors\n",
+    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
+    "\n",
+    "def preprocess_data(data,\n",
+    "                    mode=\"train\",\n",
+    "                    vectorizer=None,\n",
+    "                    w2v_model=None,\n",
+    "                    max_features=4096,\n",
+    "                    max_seq_length=128,\n",
+    "                    num_proc=4):\n",
+    "    if w2v_model is None:\n",
+    "        raise ValueError(\"w2v_model must be provided for Word2Vec embeddings.\")\n",
+    "\n",
+    "    # tokenizer = BertTokenizer.from_pretrained(\"bert-base-uncased\")\n",
+    "    tokenizer = RobertaTokenizer.from_pretrained(\"roberta-base\")\n",
+    "    # 1. Clean text once\n",
+    "    def clean_text(examples):\n",
+    "        import re\n",
+    "        cleaned = []\n",
+    "        for text in examples[\"title\"]:\n",
+    "            text = text.lower()\n",
+    "            text = re.sub(r'[^\\w\\s]', '', text)\n",
+    "            text = text.strip()\n",
+    "            cleaned.append(text)\n",
+    "        return {\"clean_title\": cleaned}\n",
+    "\n",
+    "    data = data.map(clean_text, batched=True, num_proc=num_proc)\n",
+    "\n",
+    "    # 2. Fit CountVectorizer on training data if needed\n",
+    "    if mode == \"train\" and vectorizer is None:\n",
+    "        # Collect all cleaned titles to fit\n",
+    "        all_titles = data[\"clean_title\"]\n",
+    "        #vectorizer = CountVectorizer(max_features=max_features, ngram_range=(1,2))\n",
+    "        vectorizer = TfidfVectorizer(max_features=max_features)\n",
+    "        vectorizer.fit(all_titles)\n",
+    "        print(\"vectorizer fitted on training data.\")\n",
+    "\n",
+    "    # 3. Transform titles with vectorizer once\n",
+    "    def vectorize_batch(examples):\n",
+    "        import numpy as np\n",
+    "        freq = vectorizer.transform(examples[\"clean_title\"]).toarray().astype(np.float32)\n",
+    "        return {\"freq_inputs\": freq}\n",
+    "\n",
+    "    data = data.map(vectorize_batch, batched=True, num_proc=num_proc)\n",
+    "\n",
+    "    # 4. Tokenize with BERT once\n",
+    "    def tokenize_batch(examples):\n",
+    "        tokenized = tokenizer(\n",
+    "            examples[\"title\"],\n",
+    "            padding=\"max_length\",\n",
+    "            truncation=True,\n",
+    "            max_length=max_seq_length\n",
+    "        )\n",
+    "        return {\n",
+    "            \"input_ids\": tokenized[\"input_ids\"],\n",
+    "            \"attention_mask\": tokenized[\"attention_mask\"]\n",
+    "        }\n",
+    "\n",
+    "    data = data.map(tokenize_batch, batched=True, num_proc=num_proc)\n",
+    "\n",
+    "    # 5. Convert titles into tokens for W2V\n",
+    "    def split_tokens(examples):\n",
+    "        tokens_list = [t.split() for t in examples[\"clean_title\"]]\n",
+    "        return {\"tokens\": tokens_list}\n",
+    "\n",
+    "    data = data.map(split_tokens, batched=True, num_proc=num_proc)\n",
+    "\n",
+    "    # Build an embedding dictionary for all unique tokens (do this once before embedding map)\n",
+    "    unique_tokens = set()\n",
+    "    for tokens in data[\"tokens\"]:\n",
+    "        unique_tokens.update(tokens)\n",
+    "\n",
+    "    embedding_dim = w2v_model.vector_size\n",
+    "    embedding_dict = {}\n",
+    "    for tk in unique_tokens:\n",
+    "        if tk in w2v_model:\n",
+    "            embedding_dict[tk] = w2v_model[tk].astype(np.float32)\n",
+    "        else:\n",
+    "            embedding_dict[tk] = np.zeros((embedding_dim,), dtype=np.float32)\n",
+    "\n",
+    "    def w2v_embedding_batch(examples):\n",
+    "        import numpy as np\n",
+    "        batch_w2v = []\n",
+    "        for tokens in examples[\"tokens\"]:\n",
+    "            vectors = [embedding_dict[tk] for tk in tokens[:max_seq_length]]\n",
+    "            if len(vectors) < max_seq_length:\n",
+    "                vectors += [np.zeros((embedding_dim,), dtype=np.float32)] * (max_seq_length - len(vectors))\n",
+    "            batch_w2v.append(vectors)\n",
+    "        return {\"pos_inputs\": batch_w2v}\n",
+    "\n",
+    "\n",
+    "    data = data.map(w2v_embedding_batch, batched=True, batch_size=32, num_proc=num_proc)\n",
+    "\n",
+    "    # 7. Create labels\n",
+    "    def make_labels(examples):\n",
+    "        labels = [1.0 if agency == \"fox\" else 0.0 for agency in examples[\"news\"]]\n",
+    "        return {\"labels\": labels}\n",
+    "\n",
+    "    data = data.map(make_labels, batched=True, num_proc=num_proc)\n",
+    "\n",
+    "    # Convert freq_inputs and pos_inputs to torch tensors in a final map step\n",
+    "    def to_tensors(examples):\n",
+    "        import torch\n",
+    "\n",
+    "        freq_inputs = torch.tensor(examples[\"freq_inputs\"], dtype=torch.float32)\n",
+    "        input_ids = torch.tensor(examples[\"input_ids\"])\n",
+    "        attention_mask = torch.tensor(examples[\"attention_mask\"])\n",
+    "        pos_inputs = torch.tensor(examples[\"pos_inputs\"], dtype=torch.float32)\n",
+    "        labels = torch.tensor(examples[\"labels\"],dtype=torch.long)\n",
+    "\n",
+    "        # seq_inputs shape: (batch_size, 2, seq_len)\n",
+    "        seq_inputs = torch.stack([input_ids, attention_mask], dim=1)\n",
+    "\n",
+    "        return {\n",
+    "            \"freq_inputs\": freq_inputs,\n",
+    "            \"seq_inputs\": seq_inputs,\n",
+    "            \"pos_inputs\": pos_inputs,\n",
+    "            \"labels\": labels\n",
+    "        }\n",
+    "\n",
+    "    # Apply final conversion to tensor\n",
+    "    processed_data = data.map(to_tensors, batched=True, num_proc=num_proc)\n",
+    "\n",
+    "    return processed_data, vectorizer\n"
+   ],
+   "id": "dc2ba675ce880d6d",
+   "outputs": [],
+   "execution_count": 33
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-12-16T08:11:25.586667Z",
+     "start_time": "2024-12-16T08:10:32.651505Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "from gensim.models import KeyedVectors\n",
+    "w2v_model = KeyedVectors.load_word2vec_format(\"./GoogleNews-vectors-negative300.bin\", binary=True)\n",
+    "\n",
+    "dataset_train,vectorizer = preprocess_data(\n",
+    "    data=dataset_train,\n",
+    "    mode=\"train\",\n",
+    "    w2v_model=w2v_model,\n",
+    "    max_features=8192,\n",
+    "    max_seq_length=128\n",
+    ")\n",
+    "\n",
+    "dataset_test, _ = preprocess_data(\n",
+    "    data=dataset_test,\n",
+    "    mode=\"test\",\n",
+    "    vectorizer=vectorizer,\n",
+    "    w2v_model=w2v_model,\n",
+    "    max_features=8192,\n",
+    "    max_seq_length=128\n",
+    ")"
+   ],
+   "id": "158b99950fb22d1",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "vectorizer fitted on training data.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "Map (num_proc=4):   0%|          | 0/3044 [00:00<?, ? examples/s]"
+      ],
+      "application/vnd.jupyter.widget-view+json": {
+       "version_major": 2,
+       "version_minor": 0,
+       "model_id": "bfd4d90535a94d6089d9713c39003468"
+      }
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": [
+       "Map (num_proc=4):   0%|          | 0/3044 [00:00<?, ? examples/s]"
+      ],
+      "application/vnd.jupyter.widget-view+json": {
+       "version_major": 2,
+       "version_minor": 0,
+       "model_id": "88c8f4ba83ba48c1b047549b7086be66"
+      }
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": [
+       "Map (num_proc=4):   0%|          | 0/761 [00:00<?, ? examples/s]"
+      ],
+      "application/vnd.jupyter.widget-view+json": {
+       "version_major": 2,
+       "version_minor": 0,
+       "model_id": "7b73903c642241bca21fabc4c37b9919"
+      }
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": [
+       "Map (num_proc=4):   0%|          | 0/761 [00:00<?, ? examples/s]"
+      ],
+      "application/vnd.jupyter.widget-view+json": {
+       "version_major": 2,
+       "version_minor": 0,
+       "model_id": "fa333143b6344de8bbab7ddb27f618cf"
+      }
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": [
+       "Map (num_proc=4):   0%|          | 0/761 [00:00<?, ? examples/s]"
+      ],
+      "application/vnd.jupyter.widget-view+json": {
+       "version_major": 2,
+       "version_minor": 0,
+       "model_id": "43ea10bbe58b43e4a630c05296f4a92d"
+      }
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": [
+       "Map (num_proc=4):   0%|          | 0/761 [00:00<?, ? examples/s]"
+      ],
+      "application/vnd.jupyter.widget-view+json": {
+       "version_major": 2,
+       "version_minor": 0,
+       "model_id": "8917e01c927e43c2adcf3fd9b9d8a602"
+      }
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": [
+       "Map (num_proc=4):   0%|          | 0/761 [00:00<?, ? examples/s]"
+      ],
+      "application/vnd.jupyter.widget-view+json": {
+       "version_major": 2,
+       "version_minor": 0,
+       "model_id": "b7ebd99939cf4bc48ba23029bdd1d651"
+      }
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": [
+       "Map (num_proc=4):   0%|          | 0/761 [00:00<?, ? examples/s]"
+      ],
+      "application/vnd.jupyter.widget-view+json": {
+       "version_major": 2,
+       "version_minor": 0,
+       "model_id": "99f71c080bbe41018345656f102529f7"
+      }
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": [
+       "Map (num_proc=4):   0%|          | 0/761 [00:00<?, ? examples/s]"
+      ],
+      "application/vnd.jupyter.widget-view+json": {
+       "version_major": 2,
+       "version_minor": 0,
+       "model_id": "27db7129c3b848ada119588691e6a602"
+      }
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "execution_count": 34
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-12-16T08:11:25.595796Z",
+     "start_time": "2024-12-16T08:11:25.593319Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "print(dataset_train)\n",
+    "print(dataset_test)"
+   ],
+   "id": "edd80d33175c96a0",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Dataset({\n",
+      "    features: ['title', 'news', 'index', 'url', 'clean_title', 'freq_inputs', 'input_ids', 'attention_mask', 'tokens', 'pos_inputs', 'labels', 'seq_inputs'],\n",
+      "    num_rows: 3044\n",
+      "})\n",
+      "Dataset({\n",
+      "    features: ['title', 'news', 'index', 'url', 'clean_title', 'freq_inputs', 'input_ids', 'attention_mask', 'tokens', 'pos_inputs', 'labels', 'seq_inputs'],\n",
+      "    num_rows: 761\n",
+      "})\n"
+     ]
+    }
+   ],
+   "execution_count": 35
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": "# Part 2: Model",
+   "id": "c9a49fc1fbca29d7"
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": "## Defining the Custom Model",
+   "id": "aebe5e51f0e611cc"
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": "",
+   "id": "f0eae08a025b6ed9"
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-12-16T08:11:25.680033Z",
+     "start_time": "2024-12-16T08:11:25.672667Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "# TODO: import all packages necessary for your custom model\n",
+    "import pandas as pd\n",
+    "import os\n",
+    "from torch.utils.data import DataLoader\n",
+    "from transformers import PreTrainedModel, PretrainedConfig, AutoConfig, AutoModel\n",
+    "import torch\n",
+    "import torch.nn as nn\n",
+    "from transformers import RobertaModel, RobertaConfig,RobertaForSequenceClassification, BertModel\n",
+    "from model.network import Classifier\n",
+    "from model.frequential import FreqNetwork\n",
+    "from model.sequential import SeqNetwork\n",
+    "from model.positional import PosNetwork\n",
+    "\n",
+    "class CustomConfig(PretrainedConfig):\n",
+    "    model_type = \"headlineclassifier\"\n",
+    "\n",
+    "    def __init__(\n",
+    "        self,\n",
+    "        base_exp_dir=\"./exp/fox_nbc/\",\n",
+    "        # dataset={\"data_dir\": \"./data/CASE_NAME/data.csv\", \"transform\": True},\n",
+    "        train={\n",
+    "            \"learning_rate\": 2e-5,\n",
+    "            \"learning_rate_alpha\": 0.05,\n",
+    "            \"end_iter\": 10,\n",
+    "            \"batch_size\": 32,\n",
+    "            \"warm_up_end\": 2,\n",
+    "            \"anneal_end\": 5,\n",
+    "            \"save_freq\": 1,\n",
+    "            \"val_freq\": 1,\n",
+    "        },\n",
+    "        model={\n",
+    "            \"freq\": {\n",
+    "                \"tfidf_input_dim\": 8145,\n",
+    "                \"tfidf_output_dim\": 128,\n",
+    "                \"tfidf_hidden_dim\": 512,\n",
+    "                \"n_layers\": 2,\n",
+    "                \"skip_in\": [80],\n",
+    "                \"weight_norm\": True,\n",
+    "            },\n",
+    "            \"pos\": {\n",
+    "                \"input_dim\": 300,\n",
+    "                \"output_dim\": 128,\n",
+    "                \"hidden_dim\": 256,\n",
+    "                \"n_layers\": 2,\n",
+    "                \"skip_in\": [80],\n",
+    "                \"weight_norm\": True,\n",
+    "            },\n",
+    "            \"cls\": {\n",
+    "                \"combined_input\": 1024, #1024\n",
+    "                \"combined_dim\": 128,\n",
+    "                \"num_classes\": 1,\n",
+    "                \"n_layers\": 2,\n",
+    "                \"skip_in\": [80],\n",
+    "                \"weight_norm\": True,\n",
+    "            },\n",
+    "        },\n",
+    "        **kwargs,\n",
+    "    ):\n",
+    "        super().__init__(**kwargs)\n",
+    "\n",
+    "        self.base_exp_dir = base_exp_dir\n",
+    "        # self.dataset = dataset\n",
+    "        self.train = train\n",
+    "        self.model = model\n",
+    "\n",
+    "# TODO: define all parameters needed for your model, as well as calling the model itself\n",
+    "class CustomModel(PreTrainedModel):\n",
+    "    config_class = CustomConfig\n",
+    "\n",
+    "    def __init__(self, config):\n",
+    "        super().__init__(config)\n",
+    "        self.conf = config\n",
+    "        self.freq = FreqNetwork(**self.conf.model[\"freq\"])\n",
+    "        self.pos = PosNetwork(**self.conf.model[\"pos\"])\n",
+    "        self.fc = nn.Linear(self.conf.model[\"cls\"][\"combined_input\"],2)\n",
+    "        self.seq = RobertaModel.from_pretrained(\"roberta-base\")\n",
+    "        # self.seq = BertModel.from_pretrained(\"bert-base-uncased\")\n",
+    "        #for param in self.roberta.parameters():\n",
+    "        #    param.requires_grad = False\n",
+    "        self.dropout = nn.Dropout(0.1)\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        freq_inputs = x[\"freq_inputs\"]\n",
+    "        seq_inputs = x[\"seq_inputs\"]\n",
+    "        pos_inputs = x[\"pos_inputs\"]\n",
+    "        seq_feature = self.seq(\n",
+    "            input_ids=seq_inputs[:,0,:],\n",
+    "            attention_mask=seq_inputs[:,1,:]\n",
+    "        ).pooler_output # last_hidden_state[:, 0, :]\n",
+    "        freq_feature = self.freq(freq_inputs) # Shape: (batch_size, 128)\n",
+    "\n",
+    "        pos_feature = self.pos(pos_inputs) #Shape: (batch_size, 128)\n",
+    "        inputs = torch.cat((seq_feature, freq_feature, pos_feature), dim=1)  # Shape: (batch_size, 384)\n",
+    "        # inputs = torch.cat((seq_feature, freq_feature), dim=1)  # Shape: (batch_size,256)\n",
+    "        # inputs = seq_feature\n",
+    "\n",
+    "        x = inputs\n",
+    "        x = self.dropout(x)\n",
+    "        outputs = self.fc(x)\n",
+    "\n",
+    "        return outputs\n",
+    "\n",
+    "    def save_model(self, save_path):\n",
+    "        \"\"\"Save the model locally using the Hugging Face format.\"\"\"\n",
+    "        self.save_pretrained(save_path)\n",
+    "\n",
+    "    def push_model(self, repo_name):\n",
+    "        \"\"\"Push the model to the Hugging Face Hub.\"\"\"\n",
+    "        self.push_to_hub(repo_name)"
+   ],
+   "id": "21f079d0c52d7d",
+   "outputs": [],
+   "execution_count": 36
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-12-16T08:11:25.888720Z",
+     "start_time": "2024-12-16T08:11:25.683038Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "from huggingface_hub import hf_hub_download\n",
+    "\n",
+    "AutoConfig.register(\"headlineclassifier\", CustomConfig)\n",
+    "AutoModel.register(CustomConfig, CustomModel)\n",
+    "config = CustomConfig()\n",
+    "model = CustomModel(config)\n",
+    "\n",
+    "REPO_NAME = \"CISProject/News-Headline-Classifier-Notebook\" # TODO: PROVIDE A STRING TO YOUR REPO ON HUGGINGFACE"
+   ],
+   "id": "b6ba3f96d3ce21",
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\Users\\swall\\anaconda3\\envs\\newsCLS\\Lib\\site-packages\\torch\\nn\\utils\\weight_norm.py:143: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`.\n",
+      "  WeightNorm.apply(module, name, dim)\n",
+      "Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']\n",
+      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
+     ]
+    }
+   ],
+   "execution_count": 37
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-12-16T08:11:25.904773Z",
+     "start_time": "2024-12-16T08:11:25.895279Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "import torch\n",
+    "from tqdm import tqdm\n",
+    "import os\n",
+    "\n",
+    "\n",
+    "class Trainer:\n",
+    "    def __init__(self, model, train_loader, val_loader, config, device=\"cuda\"):\n",
+    "        self.model = model.to(device)\n",
+    "        self.train_loader = train_loader\n",
+    "        self.val_loader = val_loader\n",
+    "        self.device = device\n",
+    "        self.conf = config\n",
+    "\n",
+    "        self.end_iter = self.conf.train[\"end_iter\"]\n",
+    "        self.save_freq = self.conf.train[\"save_freq\"]\n",
+    "        self.val_freq = self.conf.train[\"val_freq\"]\n",
+    "\n",
+    "        self.batch_size = self.conf.train['batch_size']\n",
+    "        self.learning_rate = self.conf.train['learning_rate']\n",
+    "        self.learning_rate_alpha = self.conf.train['learning_rate_alpha']\n",
+    "        self.warm_up_end = self.conf.train['warm_up_end']\n",
+    "        self.anneal_end = self.conf.train['anneal_end']\n",
+    "\n",
+    "        self.optimizer = torch.optim.Adam(model.parameters(), lr=self.learning_rate)\n",
+    "        #self.criterion = torch.nn.BCEWithLogitsLoss()\n",
+    "        self.criterion = torch.nn.CrossEntropyLoss()\n",
+    "        self.save_path = os.path.join(self.conf.base_exp_dir, \"checkpoints\")\n",
+    "        os.makedirs(self.save_path, exist_ok=True)\n",
+    "\n",
+    "        self.iter_step = 0\n",
+    "\n",
+    "        self.val_loss = None\n",
+    "\n",
+    "    def get_cos_anneal_ratio(self):\n",
+    "        if self.anneal_end == 0.0:\n",
+    "            return 1.0\n",
+    "        else:\n",
+    "            return np.min([1.0, self.iter_step / self.anneal_end])\n",
+    "\n",
+    "    def update_learning_rate(self):\n",
+    "        if self.iter_step < self.warm_up_end:\n",
+    "            learning_factor = self.iter_step / self.warm_up_end\n",
+    "        else:\n",
+    "            alpha = self.learning_rate_alpha\n",
+    "            progress = (self.iter_step - self.warm_up_end) / (self.end_iter - self.warm_up_end)\n",
+    "            learning_factor = (np.cos(np.pi * progress) + 1.0) * 0.5 * (1 - alpha) + alpha\n",
+    "\n",
+    "        for g in self.optimizer.param_groups:\n",
+    "            g['lr'] = self.learning_rate * learning_factor\n",
+    "\n",
+    "    def train(self):\n",
+    "        for epoch in range(self.end_iter):\n",
+    "            self.update_learning_rate()\n",
+    "            self.model.train()\n",
+    "            epoch_loss = 0.0\n",
+    "            correct = 0\n",
+    "            total = 0\n",
+    "\n",
+    "            for batch_inputs, labels in tqdm(self.train_loader, desc=f\"Epoch {epoch + 1}/{self.end_iter}\"):\n",
+    "                # Extract features\n",
+    "\n",
+    "                freq_inputs = batch_inputs[\"freq_inputs\"].to(self.device)\n",
+    "                seq_inputs = batch_inputs[\"seq_inputs\"].to(self.device)\n",
+    "                pos_inputs = batch_inputs[\"pos_inputs\"].to(self.device)\n",
+    "                # y_train = labels.to(self.device)[:,None]\n",
+    "                y_train = labels.to(self.device)\n",
+    "\n",
+    "                # Forward pass\n",
+    "                preds = self.model({\"freq_inputs\": freq_inputs, \"seq_inputs\": seq_inputs, \"pos_inputs\": pos_inputs})\n",
+    "                loss = self.criterion(preds, y_train)\n",
+    "\n",
+    "                # preds = (torch.sigmoid(preds) > 0.5).int()\n",
+    "                # Backward pass\n",
+    "                self.optimizer.zero_grad()\n",
+    "                loss.backward()\n",
+    "                self.optimizer.step()\n",
+    "                _, preds = torch.max(preds, dim=1)\n",
+    "                # Metrics\n",
+    "                epoch_loss += loss.item()\n",
+    "                total += y_train.size(0)\n",
+    "                # print(preds.shape)\n",
+    "                correct += (preds == y_train).sum().item()\n",
+    "\n",
+    "            # Log epoch metrics\n",
+    "            print(f\"Train Loss: {epoch_loss / len(self.train_loader):.4f}\")\n",
+    "            print(f\"Train Accuracy: {correct / total:.4f}\")\n",
+    "\n",
+    "            # Validation and Save Checkpoints\n",
+    "            if (epoch + 1) % self.val_freq == 0:\n",
+    "                self.val()\n",
+    "            if (epoch + 1) % self.save_freq == 0:\n",
+    "                self.save_checkpoint(epoch + 1)\n",
+    "\n",
+    "            # Update learning rate\n",
+    "            self.iter_step += 1\n",
+    "            self.update_learning_rate()\n",
+    "\n",
+    "\n",
+    "    def val(self):\n",
+    "        self.model.eval()\n",
+    "        val_loss = 0.0\n",
+    "        correct = 0\n",
+    "        total = 0\n",
+    "\n",
+    "        with torch.no_grad():\n",
+    "            for batch_inputs, labels in tqdm(self.val_loader, desc=\"Validation\", leave=False):\n",
+    "                freq_inputs = batch_inputs[\"freq_inputs\"].to(self.device)\n",
+    "                seq_inputs = batch_inputs[\"seq_inputs\"].to(self.device)\n",
+    "                pos_inputs = batch_inputs[\"pos_inputs\"].to(self.device)\n",
+    "                y_val = labels.to(self.device)\n",
+    "\n",
+    "                preds = self.model({\"freq_inputs\": freq_inputs, \"seq_inputs\": seq_inputs, \"pos_inputs\": pos_inputs})\n",
+    "                loss = self.criterion(preds, y_val)\n",
+    "                # preds = (torch.sigmoid(preds)>0.5).float()\n",
+    "                _, preds = torch.max(preds, dim=1)\n",
+    "                val_loss += loss.item()\n",
+    "                total += y_val.size(0)\n",
+    "                correct += (preds == y_val).sum().item()\n",
+    "        if self.val_loss is None or val_loss < self.val_loss:\n",
+    "            self.val_loss = val_loss\n",
+    "            self.save_checkpoint(\"best\")\n",
+    "        # Log validation metrics\n",
+    "        print(f\"Validation Loss: {val_loss / len(self.val_loader):.4f}\")\n",
+    "        print(f\"Validation Accuracy: {correct / total:.4f}\")\n",
+    "\n",
+    "    def save_checkpoint(self, epoch):\n",
+    "        \"\"\"Save model in Hugging Face format.\"\"\"\n",
+    "        checkpoint_dir = os.path.join(self.save_path, f\"checkpoint_epoch_{epoch}\")\n",
+    "        if epoch ==\"best\":\n",
+    "            checkpoint_dir = os.path.join(self.save_path, \"best\")\n",
+    "        self.model.save_pretrained(checkpoint_dir)\n",
+    "        print(f\"Checkpoint saved at {checkpoint_dir}\")"
+   ],
+   "id": "7be377251b81a25d",
+   "outputs": [],
+   "execution_count": 38
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-12-16T08:23:59.280460Z",
+     "start_time": "2024-12-16T08:11:25.911156Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "from torch.utils.data import DataLoader\n",
+    "\n",
+    "# Define a collate function to handle the batched data\n",
+    "def collate_fn(batch):\n",
+    "    freq_inputs = torch.stack([torch.tensor(item[\"freq_inputs\"]) for item in batch])\n",
+    "    seq_inputs = torch.stack([torch.tensor(item[\"seq_inputs\"]) for item in batch])\n",
+    "    pos_inputs = torch.stack([torch.tensor(item[\"pos_inputs\"]) for item in batch])\n",
+    "    labels = torch.tensor([torch.tensor(item[\"labels\"],dtype=torch.long) for item in batch])\n",
+    "    return {\"freq_inputs\": freq_inputs, \"seq_inputs\": seq_inputs, \"pos_inputs\": pos_inputs}, labels\n",
+    "\n",
+    "train_loader = DataLoader(dataset_train, batch_size=config.train[\"batch_size\"], shuffle=True,collate_fn=collate_fn)\n",
+    "test_loader = DataLoader(dataset_test, batch_size=config.train[\"batch_size\"], shuffle=False,collate_fn=collate_fn)\n",
+    "trainer = Trainer(model, train_loader, test_loader, config)\n",
+    "\n",
+    "# Train the model\n",
+    "trainer.train()\n",
+    "# Save the final model in Hugging Face format\n",
+    "final_save_path = os.path.join(config.base_exp_dir, \"checkpoints\")\n",
+    "model.save_pretrained(final_save_path)\n",
+    "print(f\"Final model saved at {final_save_path}\")\n"
+   ],
+   "id": "dd1749c306f148eb",
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/10: 100%|██████████| 96/96 [01:00<00:00,  1.60it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Train Loss: 0.6920\n",
+      "Train Accuracy: 0.5217\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "                                                           \r"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Checkpoint saved at ./exp/fox_nbc/checkpoints\\best\n",
+      "Validation Loss: 0.6914\n",
+      "Validation Accuracy: 0.5322\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 2/10: 100%|██████████| 96/96 [01:00<00:00,  1.59it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Train Loss: 0.6137\n",
+      "Train Accuracy: 0.6390\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "                                                           \r"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Checkpoint saved at ./exp/fox_nbc/checkpoints\\best\n",
+      "Validation Loss: 0.4313\n",
+      "Validation Accuracy: 0.7937\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 3/10: 100%|██████████| 96/96 [01:00<00:00,  1.59it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Train Loss: 0.3599\n",
+      "Train Accuracy: 0.8466\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "                                                           \r"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Validation Loss: 0.4837\n",
+      "Validation Accuracy: 0.7911\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 4/10: 100%|██████████| 96/96 [01:00<00:00,  1.59it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Train Loss: 0.2002\n",
+      "Train Accuracy: 0.9208\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "                                                           \r"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Checkpoint saved at ./exp/fox_nbc/checkpoints\\best\n",
+      "Validation Loss: 0.3466\n",
+      "Validation Accuracy: 0.8647\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 5/10: 100%|██████████| 96/96 [01:00<00:00,  1.57it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Train Loss: 0.0939\n",
+      "Train Accuracy: 0.9655\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "                                                           \r"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Validation Loss: 0.5384\n",
+      "Validation Accuracy: 0.8371\n",
+      "Checkpoint saved at ./exp/fox_nbc/checkpoints\\checkpoint_epoch_5\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 6/10: 100%|██████████| 96/96 [01:00<00:00,  1.58it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Train Loss: 0.0555\n",
+      "Train Accuracy: 0.9800\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "                                                           \r"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Validation Loss: 0.4440\n",
+      "Validation Accuracy: 0.8857\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 7/10: 100%|██████████| 96/96 [01:00<00:00,  1.58it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Train Loss: 0.0422\n",
+      "Train Accuracy: 0.9855\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "                                                           \r"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Validation Loss: 0.5519\n",
+      "Validation Accuracy: 0.8739\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 8/10: 100%|██████████| 96/96 [01:01<00:00,  1.56it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Train Loss: 0.0163\n",
+      "Train Accuracy: 0.9944\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "                                                           \r"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Validation Loss: 0.6396\n",
+      "Validation Accuracy: 0.8581\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 9/10: 100%|██████████| 96/96 [01:01<00:00,  1.57it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Train Loss: 0.0125\n",
+      "Train Accuracy: 0.9974\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "                                                           \r"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Validation Loss: 0.4890\n",
+      "Validation Accuracy: 0.8857\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 10/10: 100%|██████████| 96/96 [01:17<00:00,  1.24it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Train Loss: 0.0039\n",
+      "Train Accuracy: 0.9997\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "                                                           \r"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Validation Loss: 0.5024\n",
+      "Validation Accuracy: 0.8883\n",
+      "Checkpoint saved at ./exp/fox_nbc/checkpoints\\checkpoint_epoch_10\n",
+      "Final model saved at ./exp/fox_nbc/checkpoints\n"
+     ]
+    }
+   ],
+   "execution_count": 39
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": "## Evaluate Model",
+   "id": "4af000263dd99bca"
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-12-16T08:24:40.129577Z",
+     "start_time": "2024-12-16T08:24:26.080416Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "from transformers import AutoConfig, AutoModel\n",
+    "from sklearn.metrics import accuracy_score, classification_report\n",
+    "def load_last_checkpoint(checkpoint_dir):\n",
+    "    # Find all checkpoints in the directory\n",
+    "    checkpoints = [f for f in os.listdir(checkpoint_dir) if f.startswith(\"checkpoint_epoch_\")]\n",
+    "    if not checkpoints:\n",
+    "        raise FileNotFoundError(f\"No checkpoints found in {checkpoint_dir}!\")\n",
+    "    # Sort checkpoints by epoch number\n",
+    "    checkpoints.sort(key=lambda x: int(x.split(\"_\")[-1]))\n",
+    "\n",
+    "    # Load the last checkpoint\n",
+    "    last_checkpoint = os.path.join(checkpoint_dir, checkpoints[-1])\n",
+    "    # print(f\"Loading checkpoint from {last_checkpoint}\")\n",
+    "    # Load the best checkpoint\n",
+    "    #if os.path.join(checkpoint_dir, \"best\") is not None:\n",
+    "   #     last_checkpoint = os.path.join(checkpoint_dir, \"best\")\n",
+    "    print(f\"Loading checkpoint from {last_checkpoint}\")\n",
+    "    # Load model and config\n",
+    "    config = AutoConfig.from_pretrained(last_checkpoint)\n",
+    "    model = AutoModel.from_pretrained(last_checkpoint, config=config)\n",
+    "    return model\n",
+    "\n",
+    "# Step 1: Define paths and setup\n",
+    "checkpoint_dir = os.path.join(config.base_exp_dir, \"checkpoints\")  # Directory where checkpoints are stored\n",
+    "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
+    "model = load_last_checkpoint(checkpoint_dir)\n",
+    "model.to(device)\n",
+    "\n",
+    "# criterion = torch.nn.BCEWithLogitsLoss()\n",
+    "\n",
+    "criterion = torch.nn.CrossEntropyLoss()\n",
+    "\n",
+    "def evaluate_model(model, val_loader, criterion, device=\"cuda\"):\n",
+    "    model.eval()\n",
+    "    val_loss = 0.0\n",
+    "    correct = 0\n",
+    "    total = 0\n",
+    "    all_preds = []\n",
+    "    all_labels = []\n",
+    "    with torch.no_grad():\n",
+    "        for batch_inputs, labels in tqdm(val_loader, desc=\"Testing\", leave=False):\n",
+    "            freq_inputs = batch_inputs[\"freq_inputs\"].to(device)\n",
+    "            seq_inputs = batch_inputs[\"seq_inputs\"].to(device)\n",
+    "            pos_inputs = batch_inputs[\"pos_inputs\"].to(device)\n",
+    "            labels = labels.to(device)\n",
+    "\n",
+    "            preds= model({\"freq_inputs\": freq_inputs, \"seq_inputs\": seq_inputs, \"pos_inputs\": pos_inputs})\n",
+    "            loss = criterion(preds, labels)\n",
+    "            _, preds = torch.max(preds, dim=1)\n",
+    "            # preds = (torch.sigmoid(preds) > 0.5).float()\n",
+    "            val_loss += loss.item()\n",
+    "            total += labels.size(0)\n",
+    "            # preds = (torch.sigmoid(preds) > 0.5).int()\n",
+    "            correct += (preds == labels).sum().item()\n",
+    "            all_preds.extend(preds.cpu().numpy())\n",
+    "            all_labels.extend(labels.cpu().numpy())\n",
+    "\n",
+    "    return accuracy_score(all_labels, all_preds), classification_report(all_labels, all_preds)\n",
+    "\n",
+    "\n",
+    "accuracy, report = evaluate_model(model, test_loader, criterion)\n",
+    "print(f\"Accuracy: {accuracy:.4f}\")\n",
+    "print(report)\n"
+   ],
+   "id": "b75d2dc8a300cdf6",
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']\n",
+      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
+      "Some weights of the model checkpoint at ./exp/fox_nbc/checkpoints\\checkpoint_epoch_10 were not used when initializing CustomModel: ['freq.lin0.parametrizations.weight.original0', 'freq.lin0.parametrizations.weight.original1', 'freq.lin1.parametrizations.weight.original0', 'freq.lin1.parametrizations.weight.original1', 'freq.lin2.parametrizations.weight.original0', 'freq.lin2.parametrizations.weight.original1', 'pos.lin0.parametrizations.weight.original0', 'pos.lin0.parametrizations.weight.original1', 'pos.lin1.parametrizations.weight.original0', 'pos.lin1.parametrizations.weight.original1', 'pos.lin2.parametrizations.weight.original0', 'pos.lin2.parametrizations.weight.original1']\n",
+      "- This IS expected if you are initializing CustomModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+      "- This IS NOT expected if you are initializing CustomModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loading checkpoint from ./exp/fox_nbc/checkpoints\\checkpoint_epoch_10\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Some weights of CustomModel were not initialized from the model checkpoint at ./exp/fox_nbc/checkpoints\\checkpoint_epoch_10 and are newly initialized: ['freq.lin0.weight_g', 'freq.lin0.weight_v', 'freq.lin1.weight_g', 'freq.lin1.weight_v', 'freq.lin2.weight_g', 'freq.lin2.weight_v', 'pos.lin0.weight_g', 'pos.lin0.weight_v', 'pos.lin1.weight_g', 'pos.lin1.weight_v', 'pos.lin2.weight_g', 'pos.lin2.weight_v']\n",
+      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
+      "                                                        "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Accuracy: 0.8883\n",
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "           0       0.86      0.91      0.88       356\n",
+      "           1       0.92      0.87      0.89       405\n",
+      "\n",
+      "    accuracy                           0.89       761\n",
+      "   macro avg       0.89      0.89      0.89       761\n",
+      "weighted avg       0.89      0.89      0.89       761\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    }
+   ],
+   "execution_count": 41
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": "# Part 3. Pushing the Model to the Hugging Face",
+   "id": "d2ffeb383ea00beb"
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-12-16T08:25:19.716888Z",
+     "start_time": "2024-12-16T08:24:56.833580Z"
+    }
+   },
+   "cell_type": "code",
+   "source": "model.push_model(REPO_NAME)",
+   "id": "f55c22b0a1b2a66b",
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "README.md:   0%|          | 0.00/326 [00:00<?, ?B/s]"
+      ],
+      "application/vnd.jupyter.widget-view+json": {
+       "version_major": 2,
+       "version_minor": 0,
+       "model_id": "7595a65d4b2f4332a7655b4270bb6f5b"
+      }
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": [
+       "model.safetensors:   0%|          | 0.00/517M [00:00<?, ?B/s]"
+      ],
+      "application/vnd.jupyter.widget-view+json": {
+       "version_major": 2,
+       "version_minor": 0,
+       "model_id": "6ad5e72579ed463b9d6bbb1c3ef4d2a9"
+      }
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "execution_count": 42
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": "### NOTE: You need to ensure that your Hugging Face token has both read and write access to your repository and Hugging Face organization.",
+   "id": "3826c0b6195a8fd5"
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-12-16T08:25:45.224733Z",
+     "start_time": "2024-12-16T08:25:21.297050Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "# Load model directly\n",
+    "from transformers import AutoModel, AutoConfig\n",
+    "config = AutoConfig.from_pretrained(\"CISProject/News-Headline-Classifier-Notebook\")\n",
+    "model = AutoModel.from_pretrained(\"CISProject/News-Headline-Classifier-Notebook\",config = config)"
+   ],
+   "id": "33a0ca269c24d700",
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "config.json:   0%|          | 0.00/1.09k [00:00<?, ?B/s]"
+      ],
+      "application/vnd.jupyter.widget-view+json": {
+       "version_major": 2,
+       "version_minor": 0,
+       "model_id": "11974267c2544c6885478d2f63a8c41c"
+      }
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": [
+       "model.safetensors:   0%|          | 0.00/517M [00:00<?, ?B/s]"
+      ],
+      "application/vnd.jupyter.widget-view+json": {
+       "version_major": 2,
+       "version_minor": 0,
+       "model_id": "140a2fb1413a409782bb4322ec70e4d4"
+      }
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\Users\\swall\\anaconda3\\envs\\newsCLS\\Lib\\site-packages\\torch\\nn\\utils\\weight_norm.py:143: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`.\n",
+      "  WeightNorm.apply(module, name, dim)\n",
+      "Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']\n",
+      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
+      "Some weights of the model checkpoint at CISProject/News-Headline-Classifier-Notebook were not used when initializing CustomModel: ['freq.lin0.parametrizations.weight.original0', 'freq.lin0.parametrizations.weight.original1', 'freq.lin1.parametrizations.weight.original0', 'freq.lin1.parametrizations.weight.original1', 'freq.lin2.parametrizations.weight.original0', 'freq.lin2.parametrizations.weight.original1', 'pos.lin0.parametrizations.weight.original0', 'pos.lin0.parametrizations.weight.original1', 'pos.lin1.parametrizations.weight.original0', 'pos.lin1.parametrizations.weight.original1', 'pos.lin2.parametrizations.weight.original0', 'pos.lin2.parametrizations.weight.original1']\n",
+      "- This IS expected if you are initializing CustomModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+      "- This IS NOT expected if you are initializing CustomModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
+      "Some weights of CustomModel were not initialized from the model checkpoint at CISProject/News-Headline-Classifier-Notebook and are newly initialized: ['freq.lin0.weight_g', 'freq.lin0.weight_v', 'freq.lin1.weight_g', 'freq.lin1.weight_v', 'freq.lin2.weight_g', 'freq.lin2.weight_v', 'pos.lin0.weight_g', 'pos.lin0.weight_v', 'pos.lin1.weight_g', 'pos.lin1.weight_v', 'pos.lin2.weight_g', 'pos.lin2.weight_v']\n",
+      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
+     ]
+    }
+   ],
+   "execution_count": 43
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-12-16T08:26:00.658712Z",
+     "start_time": "2024-12-16T08:25:47.304227Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "from transformers import AutoConfig, AutoModel\n",
+    "from sklearn.metrics import accuracy_score, classification_report\n",
+    "\n",
+    "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
+    "model.to(device)\n",
+    "\n",
+    "#criterion = torch.nn.BCEWithLogitsLoss()\n",
+    "\n",
+    "criterion = torch.nn.CrossEntropyLoss()\n",
+    "def evaluate_model(model, val_loader, criterion, device=\"cuda\"):\n",
+    "    model.eval()\n",
+    "    val_loss = 0.0\n",
+    "    correct = 0\n",
+    "    total = 0\n",
+    "    all_preds = []\n",
+    "    all_labels = []\n",
+    "    with torch.no_grad():\n",
+    "        for batch_inputs, labels in tqdm(val_loader, desc=\"Testing\", leave=False):\n",
+    "            freq_inputs = batch_inputs[\"freq_inputs\"].to(device)\n",
+    "            seq_inputs = batch_inputs[\"seq_inputs\"].to(device)\n",
+    "            pos_inputs = batch_inputs[\"pos_inputs\"].to(device)\n",
+    "            labels = labels.to(device)\n",
+    "\n",
+    "            preds = model({\"freq_inputs\": freq_inputs, \"seq_inputs\": seq_inputs, \"pos_inputs\": pos_inputs})\n",
+    "            loss = criterion(preds, labels)\n",
+    "            _, preds = torch.max(preds, dim=1)\n",
+    "            # preds = (torch.sigmoid(preds) > 0.5).float()\n",
+    "            val_loss += loss.item()\n",
+    "            total += labels.size(0)\n",
+    "            correct += (preds == labels).sum().item()\n",
+    "            all_preds.extend(preds.cpu().numpy())\n",
+    "            all_labels.extend(labels.cpu().numpy())\n",
+    "\n",
+    "    return accuracy_score(all_labels, all_preds), classification_report(all_labels, all_preds)\n",
+    "\n",
+    "\n",
+    "accuracy, report = evaluate_model(model, test_loader, criterion)\n",
+    "print(f\"Accuracy: {accuracy:.4f}\")\n",
+    "print(report)\n"
+   ],
+   "id": "cc313b4396f87690",
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "                                                        "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Accuracy: 0.8883\n",
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "           0       0.86      0.91      0.88       356\n",
+      "           1       0.92      0.87      0.89       405\n",
+      "\n",
+      "    accuracy                           0.89       761\n",
+      "   macro avg       0.89      0.89      0.89       761\n",
+      "weighted avg       0.89      0.89      0.89       761\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    }
+   ],
+   "execution_count": 44
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "name": "python3",
+   "language": "python",
+   "display_name": "Python 3 (ipykernel)"
+  }
+ },
+ "nbformat": 5,
+ "nbformat_minor": 9
+}