CISProject
/

News-Headline-Classifier-Notebook

Safetensors

headlineclassifier

Model card Files Files and versions Community

TUEN-YUE commited on Dec 16, 2024

Commit

8e86e93

verified ·

1 Parent(s): d6cf153

Delete train&test.ipynb

Browse files

Files changed (1) hide show

train&test.ipynb +0 -1309

train&test.ipynb DELETED Viewed

@@ -1,1309 +0,0 @@
-{
- "cells": [
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": [
-    "# Installing dependencies\n",
-    "## Please make a copy of this notebook."
-   ],
-   "id": "13156d7ed48b282"
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": [
-    "# Huggingface login\n",
-    "You will require your personal token."
-   ],
-   "id": "432a756039e6399"
-  },
-  {
-   "metadata": {},
-   "cell_type": "code",
-   "source": "source": [
-    "!pip install geopy > delete.txt\n",
-    "!pip install datasets > delete.txt\n",
-    "!pip install torch torchvision datasets > delete.txt\n",
-    "!pip install huggingface_hub > delete.txt\n",
-    "!pip install pyhocon > delete.txt\n",
-    "!pip install transformers > delete.txt\n",
-    "!pip install gensim > delete.txt\n",
-    "!rm delete.txt"
-   ],
-   "id": "2e73da09a7c6171e",
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": "# Part 1: Load Data",
-   "id": "c731d9c1ebb477dc"
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": "## Downloading the train and test dataset",
-   "id": "14070f20b547688f"
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": "",
-   "id": "b8920847b7cc378d"
-  },
-  {
-   "metadata": {},
-   "cell_type": "code",
-   "source": [
-    "from datasets import load_dataset\n",
-    "\n",
-    "dataset_train = load_dataset(\"CISProject/FOX_NBC\", split=\"train\")\n",
-    "dataset_test = load_dataset(\"CISProject/FOX_NBC\", split=\"test\")\n",
-    "# dataset_test = load_dataset(\"CISProject/FOX_NBC\", split=\"test_data_random_subset\")\n"
-   ],
-   "id": "877c90c978d62b7d",
-   "outputs": [],
-   "execution_count": 12
-  },
-  {
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2024-12-16T18:33:00.318956Z",
-     "start_time": "2024-12-16T18:33:00.310428Z"
-    }
-   },
-   "cell_type": "code",
-   "source": [
-    "import numpy as np\n",
-    "import torch\n",
-    "import re\n",
-    "from transformers import BertTokenizer\n",
-    "from transformers import RobertaTokenizer\n",
-    "from sklearn.feature_extraction.text import CountVectorizer\n",
-    "from gensim.models import KeyedVectors\n",
-    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
-    "\n",
-    "def preprocess_data(data,\n",
-    "                    mode=\"train\",\n",
-    "                    vectorizer=None,\n",
-    "                    w2v_model=None,\n",
-    "                    max_features=4096,\n",
-    "                    max_seq_length=128,\n",
-    "                    num_proc=4):\n",
-    "    if w2v_model is None:\n",
-    "        raise ValueError(\"w2v_model must be provided for Word2Vec embeddings.\")\n",
-    "\n",
-    "    # tokenizer = BertTokenizer.from_pretrained(\"bert-base-uncased\")\n",
-    "    tokenizer = RobertaTokenizer.from_pretrained(\"roberta-base\")\n",
-    "    # 1. Clean text once\n",
-    "    def clean_text(examples):\n",
-    "        import re\n",
-    "        cleaned = []\n",
-    "        for text in examples[\"title\"]:\n",
-    "            text = text.lower()\n",
-    "            text = re.sub(r'[^\\w\\s]', '', text)\n",
-    "            text = text.strip()\n",
-    "            cleaned.append(text)\n",
-    "        return {\"clean_title\": cleaned}\n",
-    "\n",
-    "    data = data.map(clean_text, batched=True, num_proc=num_proc)\n",
-    "\n",
-    "    # 2. Fit CountVectorizer on training data if needed\n",
-    "    if mode == \"train\" and vectorizer is None:\n",
-    "        # Collect all cleaned titles to fit\n",
-    "        all_titles = data[\"clean_title\"]\n",
-    "        #vectorizer = CountVectorizer(max_features=max_features, ngram_range=(1,2))\n",
-    "        vectorizer = TfidfVectorizer(max_features=max_features)\n",
-    "        vectorizer.fit(all_titles)\n",
-    "        print(\"vectorizer fitted on training data.\")\n",
-    "\n",
-    "    # 3. Transform titles with vectorizer once\n",
-    "    def vectorize_batch(examples):\n",
-    "        import numpy as np\n",
-    "        freq = vectorizer.transform(examples[\"clean_title\"]).toarray().astype(np.float32)\n",
-    "        return {\"freq_inputs\": freq}\n",
-    "\n",
-    "    data = data.map(vectorize_batch, batched=True, num_proc=num_proc)\n",
-    "\n",
-    "    # 4. Tokenize with BERT once\n",
-    "    def tokenize_batch(examples):\n",
-    "        tokenized = tokenizer(\n",
-    "            examples[\"title\"],\n",
-    "            padding=\"max_length\",\n",
-    "            truncation=True,\n",
-    "            max_length=max_seq_length\n",
-    "        )\n",
-    "        return {\n",
-    "            \"input_ids\": tokenized[\"input_ids\"],\n",
-    "            \"attention_mask\": tokenized[\"attention_mask\"]\n",
-    "        }\n",
-    "\n",
-    "    data = data.map(tokenize_batch, batched=True, num_proc=num_proc)\n",
-    "\n",
-    "    # 5. Convert titles into tokens for W2V\n",
-    "    def split_tokens(examples):\n",
-    "        tokens_list = [t.split() for t in examples[\"clean_title\"]]\n",
-    "        return {\"tokens\": tokens_list}\n",
-    "\n",
-    "    data = data.map(split_tokens, batched=True, num_proc=num_proc)\n",
-    "\n",
-    "    # Build an embedding dictionary for all unique tokens (do this once before embedding map)\n",
-    "    unique_tokens = set()\n",
-    "    for tokens in data[\"tokens\"]:\n",
-    "        unique_tokens.update(tokens)\n",
-    "\n",
-    "    embedding_dim = w2v_model.vector_size\n",
-    "    embedding_dict = {}\n",
-    "    for tk in unique_tokens:\n",
-    "        if tk in w2v_model:\n",
-    "            embedding_dict[tk] = w2v_model[tk].astype(np.float32)\n",
-    "        else:\n",
-    "            embedding_dict[tk] = np.zeros((embedding_dim,), dtype=np.float32)\n",
-    "\n",
-    "    def w2v_embedding_batch(examples):\n",
-    "        import numpy as np\n",
-    "        batch_w2v = []\n",
-    "        for tokens in examples[\"tokens\"]:\n",
-    "            vectors = [embedding_dict[tk] for tk in tokens[:max_seq_length]]\n",
-    "            if len(vectors) < max_seq_length:\n",
-    "                vectors += [np.zeros((embedding_dim,), dtype=np.float32)] * (max_seq_length - len(vectors))\n",
-    "            batch_w2v.append(vectors)\n",
-    "        return {\"pos_inputs\": batch_w2v}\n",
-    "\n",
-    "\n",
-    "    data = data.map(w2v_embedding_batch, batched=True, batch_size=32, num_proc=num_proc)\n",
-    "\n",
-    "    # 7. Create labels\n",
-    "    def make_labels(examples):\n",
-    "        labels = examples[\"labels\"]\n",
-    "        return {\"labels\": labels}\n",
-    "\n",
-    "    data = data.map(make_labels, batched=True, num_proc=num_proc)\n",
-    "\n",
-    "    # Convert freq_inputs and pos_inputs to torch tensors in a final map step\n",
-    "    def to_tensors(examples):\n",
-    "        import torch\n",
-    "\n",
-    "        freq_inputs = torch.tensor(examples[\"freq_inputs\"], dtype=torch.float32)\n",
-    "        input_ids = torch.tensor(examples[\"input_ids\"])\n",
-    "        attention_mask = torch.tensor(examples[\"attention_mask\"])\n",
-    "        pos_inputs = torch.tensor(examples[\"pos_inputs\"], dtype=torch.float32)\n",
-    "        labels = torch.tensor(examples[\"labels\"],dtype=torch.long)\n",
-    "\n",
-    "        # seq_inputs shape: (batch_size, 2, seq_len)\n",
-    "        seq_inputs = torch.stack([input_ids, attention_mask], dim=1)\n",
-    "\n",
-    "        return {\n",
-    "            \"freq_inputs\": freq_inputs,\n",
-    "            \"seq_inputs\": seq_inputs,\n",
-    "            \"pos_inputs\": pos_inputs,\n",
-    "            \"labels\": labels\n",
-    "        }\n",
-    "\n",
-    "    # Apply final conversion to tensor\n",
-    "    processed_data = data.map(to_tensors, batched=True, num_proc=num_proc)\n",
-    "\n",
-    "    return processed_data, vectorizer\n"
-   ],
-   "id": "dc2ba675ce880d6d",
-   "outputs": [],
-   "execution_count": 13
-  },
-  {
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2024-12-16T18:33:26.890102Z",
-     "start_time": "2024-12-16T18:33:00.323837Z"
-    }
-   },
-   "cell_type": "code",
-   "source": [
-    "from gensim.models import KeyedVectors\n",
-    "w2v_model = KeyedVectors.load_word2vec_format(\"./GoogleNews-vectors-negative300.bin\", binary=True)\n",
-    "\n",
-    "dataset_train,vectorizer = preprocess_data(\n",
-    "    data=dataset_train,\n",
-    "    mode=\"train\",\n",
-    "    w2v_model=w2v_model,\n",
-    "    max_features=8192,\n",
-    "    max_seq_length=128\n",
-    ")\n",
-    "\n",
-    "dataset_test, _ = preprocess_data(\n",
-    "    data=dataset_test,\n",
-    "    mode=\"test\",\n",
-    "    vectorizer=vectorizer,\n",
-    "    w2v_model=w2v_model,\n",
-    "    max_features=8192,\n",
-    "    max_seq_length=128\n",
-    ")"
-   ],
-   "id": "158b99950fb22d1",
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "vectorizer fitted on training data.\n"
-     ]
-    }
-   ],
-   "execution_count": 14
-  },
-  {
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2024-12-16T18:33:26.904401Z",
-     "start_time": "2024-12-16T18:33:26.899278Z"
-    }
-   },
-   "cell_type": "code",
-   "source": [
-    "print(dataset_train)\n",
-    "print(dataset_test)"
-   ],
-   "id": "edd80d33175c96a0",
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Dataset({\n",
-      "    features: ['title', 'outlet', 'index', 'url', 'labels', 'clean_title', 'freq_inputs', 'input_ids', 'attention_mask', 'tokens', 'pos_inputs', 'seq_inputs'],\n",
-      "    num_rows: 3044\n",
-      "})\n",
-      "Dataset({\n",
-      "    features: ['title', 'outlet', 'index', 'url', 'labels', 'clean_title', 'freq_inputs', 'input_ids', 'attention_mask', 'tokens', 'pos_inputs', 'seq_inputs'],\n",
-      "    num_rows: 761\n",
-      "})\n"
-     ]
-    }
-   ],
-   "execution_count": 15
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": "# Part 2: Model",
-   "id": "c9a49fc1fbca29d7"
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": "## Defining the Custom Model",
-   "id": "aebe5e51f0e611cc"
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": "",
-   "id": "f0eae08a025b6ed9"
-  },
-  {
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2024-12-16T18:33:26.937874Z",
-     "start_time": "2024-12-16T18:33:26.926248Z"
-    }
-   },
-   "cell_type": "code",
-   "source": [
-    "# TODO: import all packages necessary for your custom model\n",
-    "import pandas as pd\n",
-    "import os\n",
-    "from torch.utils.data import DataLoader\n",
-    "from transformers import PreTrainedModel, PretrainedConfig, AutoConfig, AutoModel\n",
-    "import torch\n",
-    "import torch.nn as nn\n",
-    "from transformers import RobertaModel, RobertaConfig,RobertaForSequenceClassification, BertModel\n",
-    "from model.network import Classifier\n",
-    "from model.frequential import FreqNetwork\n",
-    "from model.sequential import SeqNetwork\n",
-    "from model.positional import PosNetwork\n",
-    "\n",
-    "class CustomConfig(PretrainedConfig):\n",
-    "    model_type = \"headlineclassifier\"\n",
-    "\n",
-    "    def __init__(\n",
-    "        self,\n",
-    "        base_exp_dir=\"./exp/fox_nbc/\",\n",
-    "        # dataset={\"data_dir\": \"./data/CASE_NAME/data.csv\", \"transform\": True},\n",
-    "        train={\n",
-    "            \"learning_rate\": 2e-5,\n",
-    "            \"learning_rate_alpha\": 0.05,\n",
-    "            \"end_iter\": 10,\n",
-    "            \"batch_size\": 32,\n",
-    "            \"warm_up_end\": 2,\n",
-    "            \"anneal_end\": 5,\n",
-    "            \"save_freq\": 1,\n",
-    "            \"val_freq\": 1,\n",
-    "        },\n",
-    "        model={\n",
-    "            \"freq\": {\n",
-    "                \"tfidf_input_dim\": 8145,\n",
-    "                \"tfidf_output_dim\": 128,\n",
-    "                \"tfidf_hidden_dim\": 512,\n",
-    "                \"n_layers\": 2,\n",
-    "                \"skip_in\": [80],\n",
-    "                \"weight_norm\": True,\n",
-    "            },\n",
-    "            \"pos\": {\n",
-    "                \"input_dim\": 300,\n",
-    "                \"output_dim\": 128,\n",
-    "                \"hidden_dim\": 256,\n",
-    "                \"n_layers\": 2,\n",
-    "                \"skip_in\": [80],\n",
-    "                \"weight_norm\": True,\n",
-    "            },\n",
-    "            \"cls\": {\n",
-    "                \"combined_input\": 1024, #1024\n",
-    "                \"combined_dim\": 128,\n",
-    "                \"num_classes\": 2,\n",
-    "                \"n_layers\": 2,\n",
-    "                \"skip_in\": [80],\n",
-    "                \"weight_norm\": True,\n",
-    "            },\n",
-    "        },\n",
-    "        **kwargs,\n",
-    "    ):\n",
-    "        super().__init__(**kwargs)\n",
-    "\n",
-    "        self.base_exp_dir = base_exp_dir\n",
-    "        # self.dataset = dataset\n",
-    "        self.train = train\n",
-    "        self.model = model\n",
-    "\n",
-    "# TODO: define all parameters needed for your model, as well as calling the model itself\n",
-    "class CustomModel(PreTrainedModel):\n",
-    "    config_class = CustomConfig\n",
-    "\n",
-    "    def __init__(self, config):\n",
-    "        super().__init__(config)\n",
-    "        self.conf = config\n",
-    "        self.freq = FreqNetwork(**self.conf.model[\"freq\"])\n",
-    "        self.pos = PosNetwork(**self.conf.model[\"pos\"])\n",
-    "        self.cls = Classifier(**self.conf.model[\"cls\"])\n",
-    "        self.fc = nn.Linear(self.conf.model[\"cls\"][\"combined_input\"],2)\n",
-    "        self.seq = RobertaModel.from_pretrained(\"roberta-base\")\n",
-    "        # self.seq = BertModel.from_pretrained(\"bert-base-uncased\")\n",
-    "        #for param in self.roberta.parameters():\n",
-    "        #    param.requires_grad = False\n",
-    "        self.dropout = nn.Dropout(0.2)\n",
-    "\n",
-    "    def forward(self, x):\n",
-    "        freq_inputs = x[\"freq_inputs\"]\n",
-    "        seq_inputs = x[\"seq_inputs\"]\n",
-    "        pos_inputs = x[\"pos_inputs\"]\n",
-    "        seq_feature = self.seq(\n",
-    "            input_ids=seq_inputs[:,0,:],\n",
-    "            attention_mask=seq_inputs[:,1,:]\n",
-    "        ).pooler_output # last_hidden_state[:, 0, :]\n",
-    "        freq_feature = self.freq(freq_inputs) # Shape: (batch_size, 128)\n",
-    "\n",
-    "        pos_feature = self.pos(pos_inputs) #Shape: (batch_size, 128)\n",
-    "        inputs = torch.cat((seq_feature, freq_feature, pos_feature), dim=1)  # Shape: (batch_size, 384)\n",
-    "        # inputs = torch.cat((seq_feature, freq_feature), dim=1)  # Shape: (batch_size,256)\n",
-    "        # inputs = seq_feature\n",
-    "\n",
-    "        x = inputs\n",
-    "        x = self.dropout(x)\n",
-    "        outputs = self.fc(x)\n",
-    "\n",
-    "        return outputs\n",
-    "\n",
-    "    def save_model(self, save_path):\n",
-    "        \"\"\"Save the model locally using the Hugging Face format.\"\"\"\n",
-    "        self.save_pretrained(save_path)\n",
-    "\n",
-    "    def push_model(self, repo_name):\n",
-    "        \"\"\"Push the model to the Hugging Face Hub.\"\"\"\n",
-    "        self.push_to_hub(repo_name)"
-   ],
-   "id": "21f079d0c52d7d",
-   "outputs": [],
-   "execution_count": 16
-  },
-  {
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2024-12-16T18:33:27.235482Z",
-     "start_time": "2024-12-16T18:33:26.951564Z"
-    }
-   },
-   "cell_type": "code",
-   "source": [
-    "from huggingface_hub import hf_hub_download\n",
-    "\n",
-    "AutoConfig.register(\"headlineclassifier\", CustomConfig)\n",
-    "AutoModel.register(CustomConfig, CustomModel)\n",
-    "config = CustomConfig()\n",
-    "model = CustomModel(config)\n",
-    "\n",
-    "REPO_NAME = \"CISProject/News-Headline-Classifier-Notebook\" # TODO: PROVIDE A STRING TO YOUR REPO ON HUGGINGFACE"
-   ],
-   "id": "b6ba3f96d3ce21",
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']\n",
-      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
-     ]
-    }
-   ],
-   "execution_count": 17
-  },
-  {
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2024-12-16T18:33:27.279248Z",
-     "start_time": "2024-12-16T18:33:27.261675Z"
-    }
-   },
-   "cell_type": "code",
-   "source": [
-    "import torch\n",
-    "from tqdm import tqdm\n",
-    "import os\n",
-    "\n",
-    "\n",
-    "class Trainer:\n",
-    "    def __init__(self, model, train_loader, val_loader, config, device=\"cuda\"):\n",
-    "        self.model = model.to(device)\n",
-    "        self.train_loader = train_loader\n",
-    "        self.val_loader = val_loader\n",
-    "        self.device = device\n",
-    "        self.conf = config\n",
-    "\n",
-    "        self.end_iter = self.conf.train[\"end_iter\"]\n",
-    "        self.save_freq = self.conf.train[\"save_freq\"]\n",
-    "        self.val_freq = self.conf.train[\"val_freq\"]\n",
-    "\n",
-    "        self.batch_size = self.conf.train['batch_size']\n",
-    "        self.learning_rate = self.conf.train['learning_rate']\n",
-    "        self.learning_rate_alpha = self.conf.train['learning_rate_alpha']\n",
-    "        self.warm_up_end = self.conf.train['warm_up_end']\n",
-    "        self.anneal_end = self.conf.train['anneal_end']\n",
-    "\n",
-    "        self.optimizer = torch.optim.Adam(model.parameters(), lr=self.learning_rate)\n",
-    "        #self.criterion = torch.nn.BCEWithLogitsLoss()\n",
-    "        self.criterion = torch.nn.CrossEntropyLoss()\n",
-    "        self.save_path = os.path.join(self.conf.base_exp_dir, \"checkpoints\")\n",
-    "        os.makedirs(self.save_path, exist_ok=True)\n",
-    "\n",
-    "        self.iter_step = 0\n",
-    "\n",
-    "        self.val_loss = None\n",
-    "\n",
-    "    def get_cos_anneal_ratio(self):\n",
-    "        if self.anneal_end == 0.0:\n",
-    "            return 1.0\n",
-    "        else:\n",
-    "            return np.min([1.0, self.iter_step / self.anneal_end])\n",
-    "\n",
-    "    def update_learning_rate(self):\n",
-    "        if self.iter_step < self.warm_up_end:\n",
-    "            learning_factor = self.iter_step / self.warm_up_end\n",
-    "        else:\n",
-    "            alpha = self.learning_rate_alpha\n",
-    "            progress = (self.iter_step - self.warm_up_end) / (self.end_iter - self.warm_up_end)\n",
-    "            learning_factor = (np.cos(np.pi * progress) + 1.0) * 0.5 * (1 - alpha) + alpha\n",
-    "\n",
-    "        for g in self.optimizer.param_groups:\n",
-    "            g['lr'] = self.learning_rate * learning_factor\n",
-    "\n",
-    "    def train(self):\n",
-    "        for epoch in range(self.end_iter):\n",
-    "            self.update_learning_rate()\n",
-    "            self.model.train()\n",
-    "            epoch_loss = 0.0\n",
-    "            correct = 0\n",
-    "            total = 0\n",
-    "\n",
-    "            for batch_inputs, labels in tqdm(self.train_loader, desc=f\"Epoch {epoch + 1}/{self.end_iter}\"):\n",
-    "                # Extract features\n",
-    "\n",
-    "                freq_inputs = batch_inputs[\"freq_inputs\"].to(self.device)\n",
-    "                seq_inputs = batch_inputs[\"seq_inputs\"].to(self.device)\n",
-    "                pos_inputs = batch_inputs[\"pos_inputs\"].to(self.device)\n",
-    "                # y_train = labels.to(self.device)[:,None]\n",
-    "                y_train = labels.to(self.device)\n",
-    "\n",
-    "                # Forward pass\n",
-    "                preds = self.model({\"freq_inputs\": freq_inputs, \"seq_inputs\": seq_inputs, \"pos_inputs\": pos_inputs})\n",
-    "                loss = self.criterion(preds, y_train)\n",
-    "\n",
-    "                # preds = (torch.sigmoid(preds) > 0.5).int()\n",
-    "                # Backward pass\n",
-    "                self.optimizer.zero_grad()\n",
-    "                loss.backward()\n",
-    "                self.optimizer.step()\n",
-    "                _, preds = torch.max(preds, dim=1)\n",
-    "                # Metrics\n",
-    "                epoch_loss += loss.item()\n",
-    "                total += y_train.size(0)\n",
-    "                # print(preds.shape)\n",
-    "                correct += (preds == y_train).sum().item()\n",
-    "\n",
-    "            # Log epoch metrics\n",
-    "            print(f\"Train Loss: {epoch_loss / len(self.train_loader):.4f}\")\n",
-    "            print(f\"Train Accuracy: {correct / total:.4f}\")\n",
-    "\n",
-    "            # Validation and Save Checkpoints\n",
-    "            if (epoch + 1) % self.val_freq == 0:\n",
-    "                self.val()\n",
-    "            if (epoch + 1) % self.save_freq == 0:\n",
-    "                self.save_checkpoint(epoch + 1)\n",
-    "\n",
-    "            # Update learning rate\n",
-    "            self.iter_step += 1\n",
-    "            self.update_learning_rate()\n",
-    "\n",
-    "\n",
-    "    def val(self):\n",
-    "        self.model.eval()\n",
-    "        val_loss = 0.0\n",
-    "        correct = 0\n",
-    "        total = 0\n",
-    "\n",
-    "        with torch.no_grad():\n",
-    "            for batch_inputs, labels in tqdm(self.val_loader, desc=\"Validation\", leave=False):\n",
-    "                freq_inputs = batch_inputs[\"freq_inputs\"].to(self.device)\n",
-    "                seq_inputs = batch_inputs[\"seq_inputs\"].to(self.device)\n",
-    "                pos_inputs = batch_inputs[\"pos_inputs\"].to(self.device)\n",
-    "                y_val = labels.to(self.device)\n",
-    "\n",
-    "                preds = self.model({\"freq_inputs\": freq_inputs, \"seq_inputs\": seq_inputs, \"pos_inputs\": pos_inputs})\n",
-    "                loss = self.criterion(preds, y_val)\n",
-    "                # preds = (torch.sigmoid(preds)>0.5).float()\n",
-    "                _, preds = torch.max(preds, dim=1)\n",
-    "                val_loss += loss.item()\n",
-    "                total += y_val.size(0)\n",
-    "                correct += (preds == y_val).sum().item()\n",
-    "        if self.val_loss is None or val_loss < self.val_loss:\n",
-    "            self.val_loss = val_loss\n",
-    "            self.save_checkpoint(\"best\")\n",
-    "        # Log validation metrics\n",
-    "        print(f\"Validation Loss: {val_loss / len(self.val_loader):.4f}\")\n",
-    "        print(f\"Validation Accuracy: {correct / total:.4f}\")\n",
-    "\n",
-    "    def save_checkpoint(self, epoch):\n",
-    "        \"\"\"Save model in Hugging Face format.\"\"\"\n",
-    "        checkpoint_dir = os.path.join(self.save_path, f\"checkpoint_epoch_{epoch}\")\n",
-    "        if epoch ==\"best\":\n",
-    "            checkpoint_dir = os.path.join(self.save_path, \"best\")\n",
-    "        self.model.save_pretrained(checkpoint_dir)\n",
-    "        print(f\"Checkpoint saved at {checkpoint_dir}\")"
-   ],
-   "id": "7be377251b81a25d",
-   "outputs": [],
-   "execution_count": 18
-  },
-  {
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2024-12-16T18:49:49.983176Z",
-     "start_time": "2024-12-16T18:33:27.283252Z"
-    }
-   },
-   "cell_type": "code",
-   "source": [
-    "from torch.utils.data import DataLoader\n",
-    "\n",
-    "# Define a collate function to handle the batched data\n",
-    "def collate_fn(batch):\n",
-    "    freq_inputs = torch.stack([torch.tensor(item[\"freq_inputs\"]) for item in batch])\n",
-    "    seq_inputs = torch.stack([torch.tensor(item[\"seq_inputs\"]) for item in batch])\n",
-    "    pos_inputs = torch.stack([torch.tensor(item[\"pos_inputs\"]) for item in batch])\n",
-    "    labels = torch.tensor([torch.tensor(item[\"labels\"],dtype=torch.long) for item in batch])\n",
-    "    return {\"freq_inputs\": freq_inputs, \"seq_inputs\": seq_inputs, \"pos_inputs\": pos_inputs}, labels\n",
-    "\n",
-    "train_loader = DataLoader(dataset_train, batch_size=config.train[\"batch_size\"], shuffle=True,collate_fn=collate_fn)\n",
-    "test_loader = DataLoader(dataset_test, batch_size=config.train[\"batch_size\"], shuffle=False,collate_fn=collate_fn)\n",
-    "trainer = Trainer(model, train_loader, test_loader, config)\n",
-    "\n",
-    "# Train the model\n",
-    "trainer.train()\n",
-    "# Save the final model in Hugging Face format\n",
-    "final_save_path = os.path.join(config.base_exp_dir, \"checkpoints\")\n",
-    "model.save_pretrained(final_save_path)\n",
-    "print(f\"Final model saved at {final_save_path}\")\n"
-   ],
-   "id": "dd1749c306f148eb",
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Epoch 1/10: 100%|██████████| 96/96 [02:28<00:00,  1.55s/it]\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Train Loss: 0.6943\n",
-      "Train Accuracy: 0.4947\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "                                                           \r"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Checkpoint saved at ./exp/fox_nbc/checkpoints\\best\n",
-      "Validation Loss: 0.6931\n",
-      "Validation Accuracy: 0.4980\n",
-      "Checkpoint saved at ./exp/fox_nbc/checkpoints\\checkpoint_epoch_1\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Epoch 2/10: 100%|██████████| 96/96 [01:34<00:00,  1.01it/s]\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Train Loss: 0.6006\n",
-      "Train Accuracy: 0.6597\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "                                                           \r"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Checkpoint saved at ./exp/fox_nbc/checkpoints\\best\n",
-      "Validation Loss: 0.4140\n",
-      "Validation Accuracy: 0.8252\n",
-      "Checkpoint saved at ./exp/fox_nbc/checkpoints\\checkpoint_epoch_2\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Epoch 3/10: 100%|██████████| 96/96 [01:31<00:00,  1.05it/s]\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Train Loss: 0.3597\n",
-      "Train Accuracy: 0.8469\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "                                                           \r"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Checkpoint saved at ./exp/fox_nbc/checkpoints\\best\n",
-      "Validation Loss: 0.3259\n",
-      "Validation Accuracy: 0.8541\n",
-      "Checkpoint saved at ./exp/fox_nbc/checkpoints\\checkpoint_epoch_3\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Epoch 4/10: 100%|██████████| 96/96 [01:00<00:00,  1.58it/s]\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Train Loss: 0.2143\n",
-      "Train Accuracy: 0.9205\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "                                                           \r"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Checkpoint saved at ./exp/fox_nbc/checkpoints\\best\n",
-      "Validation Loss: 0.2619\n",
-      "Validation Accuracy: 0.8988\n",
-      "Checkpoint saved at ./exp/fox_nbc/checkpoints\\checkpoint_epoch_4\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Epoch 5/10: 100%|██████████| 96/96 [01:24<00:00,  1.13it/s]\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Train Loss: 0.1113\n",
-      "Train Accuracy: 0.9573\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "                                                           \r"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Validation Loss: 0.4198\n",
-      "Validation Accuracy: 0.8555\n",
-      "Checkpoint saved at ./exp/fox_nbc/checkpoints\\checkpoint_epoch_5\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Epoch 6/10: 100%|██████████| 96/96 [01:01<00:00,  1.56it/s]\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Train Loss: 0.0643\n",
-      "Train Accuracy: 0.9770\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "                                                           \r"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Validation Loss: 0.3937\n",
-      "Validation Accuracy: 0.8725\n",
-      "Checkpoint saved at ./exp/fox_nbc/checkpoints\\checkpoint_epoch_6\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Epoch 7/10: 100%|██████████| 96/96 [01:01<00:00,  1.57it/s]\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Train Loss: 0.0294\n",
-      "Train Accuracy: 0.9915\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "                                                           \r"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Validation Loss: 0.4704\n",
-      "Validation Accuracy: 0.8725\n",
-      "Checkpoint saved at ./exp/fox_nbc/checkpoints\\checkpoint_epoch_7\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Epoch 8/10: 100%|██████████| 96/96 [01:01<00:00,  1.56it/s]\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Train Loss: 0.0128\n",
-      "Train Accuracy: 0.9970\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "                                                           \r"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Validation Loss: 0.5717\n",
-      "Validation Accuracy: 0.8633\n",
-      "Checkpoint saved at ./exp/fox_nbc/checkpoints\\checkpoint_epoch_8\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Epoch 9/10: 100%|██████████| 96/96 [01:02<00:00,  1.54it/s]\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Train Loss: 0.0088\n",
-      "Train Accuracy: 0.9970\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "                                                           \r"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Validation Loss: 0.5458\n",
-      "Validation Accuracy: 0.8739\n",
-      "Checkpoint saved at ./exp/fox_nbc/checkpoints\\checkpoint_epoch_9\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Epoch 10/10: 100%|██████████| 96/96 [01:06<00:00,  1.45it/s]\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Train Loss: 0.0056\n",
-      "Train Accuracy: 0.9984\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "                                                           \r"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Validation Loss: 0.4930\n",
-      "Validation Accuracy: 0.8804\n",
-      "Checkpoint saved at ./exp/fox_nbc/checkpoints\\checkpoint_epoch_10\n",
-      "Final model saved at ./exp/fox_nbc/checkpoints\n"
-     ]
-    }
-   ],
-   "execution_count": 19
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": "## Evaluate Model",
-   "id": "4af000263dd99bca"
-  },
-  {
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2024-12-16T18:50:16.035455Z",
-     "start_time": "2024-12-16T18:50:02.434452Z"
-    }
-   },
-   "cell_type": "code",
-   "source": [
-    "from transformers import AutoConfig, AutoModel\n",
-    "from sklearn.metrics import accuracy_score, classification_report\n",
-    "def load_last_checkpoint(checkpoint_dir):\n",
-    "    # Find all checkpoints in the directory\n",
-    "    checkpoints = [f for f in os.listdir(checkpoint_dir) if f.startswith(\"checkpoint_epoch_\")]\n",
-    "    if not checkpoints:\n",
-    "        raise FileNotFoundError(f\"No checkpoints found in {checkpoint_dir}!\")\n",
-    "    # Sort checkpoints by epoch number\n",
-    "    checkpoints.sort(key=lambda x: int(x.split(\"_\")[-1]))\n",
-    "\n",
-    "    # Load the last checkpoint\n",
-    "    last_checkpoint = os.path.join(checkpoint_dir, checkpoints[-1])\n",
-    "    # print(f\"Loading checkpoint from {last_checkpoint}\")\n",
-    "    # Load the best checkpoint\n",
-    "    if os.path.join(checkpoint_dir, \"best\") is not None:\n",
-    "        last_checkpoint = os.path.join(checkpoint_dir, \"best\")\n",
-    "    print(f\"Loading checkpoint from {last_checkpoint}\")\n",
-    "    # Load model and config\n",
-    "    config = AutoConfig.from_pretrained(last_checkpoint)\n",
-    "    model = AutoModel.from_pretrained(last_checkpoint, config=config)\n",
-    "    return model\n",
-    "\n",
-    "# Step 1: Define paths and setup\n",
-    "checkpoint_dir = os.path.join(config.base_exp_dir, \"checkpoints\")  # Directory where checkpoints are stored\n",
-    "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
-    "model = load_last_checkpoint(checkpoint_dir)\n",
-    "model.to(device)\n",
-    "\n",
-    "# criterion = torch.nn.BCEWithLogitsLoss()\n",
-    "\n",
-    "criterion = torch.nn.CrossEntropyLoss()\n",
-    "\n",
-    "def evaluate_model(model, val_loader, criterion, device=\"cuda\"):\n",
-    "    model.eval()\n",
-    "    val_loss = 0.0\n",
-    "    correct = 0\n",
-    "    total = 0\n",
-    "    all_preds = []\n",
-    "    all_labels = []\n",
-    "    with torch.no_grad():\n",
-    "        for batch_inputs, labels in tqdm(val_loader, desc=\"Testing\", leave=False):\n",
-    "            freq_inputs = batch_inputs[\"freq_inputs\"].to(device)\n",
-    "            seq_inputs = batch_inputs[\"seq_inputs\"].to(device)\n",
-    "            pos_inputs = batch_inputs[\"pos_inputs\"].to(device)\n",
-    "            labels = labels.to(device)\n",
-    "\n",
-    "            preds= model({\"freq_inputs\": freq_inputs, \"seq_inputs\": seq_inputs, \"pos_inputs\": pos_inputs})\n",
-    "            loss = criterion(preds, labels)\n",
-    "            _, preds = torch.max(preds, dim=1)\n",
-    "            # preds = (torch.sigmoid(preds) > 0.5).float()\n",
-    "            val_loss += loss.item()\n",
-    "            total += labels.size(0)\n",
-    "            # preds = (torch.sigmoid(preds) > 0.5).int()\n",
-    "            correct += (preds == labels).sum().item()\n",
-    "            all_preds.extend(preds.cpu().numpy())\n",
-    "            all_labels.extend(labels.cpu().numpy())\n",
-    "\n",
-    "    return accuracy_score(all_labels, all_preds), classification_report(all_labels, all_preds)\n",
-    "\n",
-    "\n",
-    "accuracy, report = evaluate_model(model, test_loader, criterion)\n",
-    "print(f\"Accuracy: {accuracy:.4f}\")\n",
-    "print(report)\n"
-   ],
-   "id": "b75d2dc8a300cdf6",
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']\n",
-      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Loading checkpoint from ./exp/fox_nbc/checkpoints\\best\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Some weights of the model checkpoint at ./exp/fox_nbc/checkpoints\\best were not used when initializing CustomModel: ['cls.lin0.parametrizations.weight.original0', 'cls.lin0.parametrizations.weight.original1', 'cls.lin1.parametrizations.weight.original0', 'cls.lin1.parametrizations.weight.original1', 'cls.lin2.parametrizations.weight.original0', 'cls.lin2.parametrizations.weight.original1', 'freq.lin0.parametrizations.weight.original0', 'freq.lin0.parametrizations.weight.original1', 'freq.lin1.parametrizations.weight.original0', 'freq.lin1.parametrizations.weight.original1', 'freq.lin2.parametrizations.weight.original0', 'freq.lin2.parametrizations.weight.original1', 'pos.lin0.parametrizations.weight.original0', 'pos.lin0.parametrizations.weight.original1', 'pos.lin1.parametrizations.weight.original0', 'pos.lin1.parametrizations.weight.original1', 'pos.lin2.parametrizations.weight.original0', 'pos.lin2.parametrizations.weight.original1']\n",
-      "- This IS expected if you are initializing CustomModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
-      "- This IS NOT expected if you are initializing CustomModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
-      "Some weights of CustomModel were not initialized from the model checkpoint at ./exp/fox_nbc/checkpoints\\best and are newly initialized: ['cls.lin0.weight_g', 'cls.lin0.weight_v', 'cls.lin1.weight_g', 'cls.lin1.weight_v', 'cls.lin2.weight_g', 'cls.lin2.weight_v', 'freq.lin0.weight_g', 'freq.lin0.weight_v', 'freq.lin1.weight_g', 'freq.lin1.weight_v', 'freq.lin2.weight_g', 'freq.lin2.weight_v', 'pos.lin0.weight_g', 'pos.lin0.weight_v', 'pos.lin1.weight_g', 'pos.lin1.weight_v', 'pos.lin2.weight_g', 'pos.lin2.weight_v']\n",
-      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
-      "                                                        "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Accuracy: 0.8988\n",
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "           0       0.90      0.88      0.89       356\n",
-      "           1       0.90      0.91      0.91       405\n",
-      "\n",
-      "    accuracy                           0.90       761\n",
-      "   macro avg       0.90      0.90      0.90       761\n",
-      "weighted avg       0.90      0.90      0.90       761\n",
-      "\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r"
-     ]
-    }
-   ],
-   "execution_count": 21
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": "# Part 3. Pushing the Model to the Hugging Face",
-   "id": "d2ffeb383ea00beb"
-  },
-  {
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2024-12-16T18:50:47.965853Z",
-     "start_time": "2024-12-16T18:50:23.635567Z"
-    }
-   },
-   "cell_type": "code",
-   "source": "model.push_model(REPO_NAME)",
-   "id": "f55c22b0a1b2a66b",
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "README.md:   0%|          | 0.00/839 [00:00<?, ?B/s]"
-      ],
-      "application/vnd.jupyter.widget-view+json": {
-       "version_major": 2,
-       "version_minor": 0,
-       "model_id": "3258d736d65a4c36b524011271415c56"
-      }
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "C:\\Users\\swall\\anaconda3\\envs\\newsCLS\\Lib\\site-packages\\huggingface_hub\\file_download.py:139: UserWarning: `huggingface_hub` cache-system uses symlinks by default to efficiently store duplicated files but your machine does not support them in C:\\Users\\swall\\.cache\\huggingface\\hub\\models--CISProject--News-Headline-Classifier-Notebook. Caching files will still work but in a degraded version that might require more space on your disk. This warning can be disabled by setting the `HF_HUB_DISABLE_SYMLINKS_WARNING` environment variable. For more details, see https://huggingface.co/docs/huggingface_hub/how-to-cache#limitations.\n",
-      "To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development\n",
-      "  warnings.warn(message)\n",
-      "Repo card metadata block was not found. Setting CardData to empty.\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "model.safetensors:   0%|          | 0.00/518M [00:00<?, ?B/s]"
-      ],
-      "application/vnd.jupyter.widget-view+json": {
-       "version_major": 2,
-       "version_minor": 0,
-       "model_id": "bf9fd6651886433489d5059f9a83b831"
-      }
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "execution_count": 22
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": "### NOTE: You need to ensure that your Hugging Face token has both read and write access to your repository and Hugging Face organization.",
-   "id": "3826c0b6195a8fd5"
-  },
-  {
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2024-12-16T18:51:38.723144Z",
-     "start_time": "2024-12-16T18:51:24.496422Z"
-    }
-   },
-   "cell_type": "code",
-   "source": [
-    "# Load model directly\n",
-    "from transformers import AutoModel, AutoConfig\n",
-    "config = AutoConfig.from_pretrained(\"CISProject/News-Headline-Classifier-Notebook\")\n",
-    "model = AutoModel.from_pretrained(\"CISProject/News-Headline-Classifier-Notebook\",config = config)"
-   ],
-   "id": "33a0ca269c24d700",
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "config.json:   0%|          | 0.00/1.08k [00:00<?, ?B/s]"
-      ],
-      "application/vnd.jupyter.widget-view+json": {
-       "version_major": 2,
-       "version_minor": 0,
-       "model_id": "ee3167049b5942acacc9eaab7cbb0a35"
-      }
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "model.safetensors:   0%|          | 0.00/518M [00:00<?, ?B/s]"
-      ],
-      "application/vnd.jupyter.widget-view+json": {
-       "version_major": 2,
-       "version_minor": 0,
-       "model_id": "456b7f100f9342c49fd9f08d2b24e1d8"
-      }
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "C:\\Users\\swall\\anaconda3\\envs\\newsCLS\\Lib\\site-packages\\torch\\nn\\utils\\weight_norm.py:143: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`.\n",
-      "  WeightNorm.apply(module, name, dim)\n",
-      "Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']\n",
-      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
-      "Some weights of the model checkpoint at CISProject/News-Headline-Classifier-Notebook were not used when initializing CustomModel: ['cls.lin0.parametrizations.weight.original0', 'cls.lin0.parametrizations.weight.original1', 'cls.lin1.parametrizations.weight.original0', 'cls.lin1.parametrizations.weight.original1', 'cls.lin2.parametrizations.weight.original0', 'cls.lin2.parametrizations.weight.original1', 'freq.lin0.parametrizations.weight.original0', 'freq.lin0.parametrizations.weight.original1', 'freq.lin1.parametrizations.weight.original0', 'freq.lin1.parametrizations.weight.original1', 'freq.lin2.parametrizations.weight.original0', 'freq.lin2.parametrizations.weight.original1', 'pos.lin0.parametrizations.weight.original0', 'pos.lin0.parametrizations.weight.original1', 'pos.lin1.parametrizations.weight.original0', 'pos.lin1.parametrizations.weight.original1', 'pos.lin2.parametrizations.weight.original0', 'pos.lin2.parametrizations.weight.original1']\n",
-      "- This IS expected if you are initializing CustomModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
-      "- This IS NOT expected if you are initializing CustomModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
-      "Some weights of CustomModel were not initialized from the model checkpoint at CISProject/News-Headline-Classifier-Notebook and are newly initialized: ['cls.lin0.weight_g', 'cls.lin0.weight_v', 'cls.lin1.weight_g', 'cls.lin1.weight_v', 'cls.lin2.weight_g', 'cls.lin2.weight_v', 'freq.lin0.weight_g', 'freq.lin0.weight_v', 'freq.lin1.weight_g', 'freq.lin1.weight_v', 'freq.lin2.weight_g', 'freq.lin2.weight_v', 'pos.lin0.weight_g', 'pos.lin0.weight_v', 'pos.lin1.weight_g', 'pos.lin1.weight_v', 'pos.lin2.weight_g', 'pos.lin2.weight_v']\n",
-      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
-     ]
-    }
-   ],
-   "execution_count": 23
-  },
-  {
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2024-12-16T18:51:53.997442Z",
-     "start_time": "2024-12-16T18:51:40.978026Z"
-    }
-   },
-   "cell_type": "code",
-   "source": [
-    "from transformers import AutoConfig, AutoModel\n",
-    "from sklearn.metrics import accuracy_score, classification_report\n",
-    "\n",
-    "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
-    "model.to(device)\n",
-    "\n",
-    "#criterion = torch.nn.BCEWithLogitsLoss()\n",
-    "\n",
-    "criterion = torch.nn.CrossEntropyLoss()\n",
-    "def evaluate_model(model, val_loader, criterion, device=\"cuda\"):\n",
-    "    model.eval()\n",
-    "    val_loss = 0.0\n",
-    "    correct = 0\n",
-    "    total = 0\n",
-    "    all_preds = []\n",
-    "    all_labels = []\n",
-    "    with torch.no_grad():\n",
-    "        for batch_inputs, labels in tqdm(val_loader, desc=\"Testing\", leave=False):\n",
-    "            freq_inputs = batch_inputs[\"freq_inputs\"].to(device)\n",
-    "            seq_inputs = batch_inputs[\"seq_inputs\"].to(device)\n",
-    "            pos_inputs = batch_inputs[\"pos_inputs\"].to(device)\n",
-    "            labels = labels.to(device)\n",
-    "\n",
-    "            preds = model({\"freq_inputs\": freq_inputs, \"seq_inputs\": seq_inputs, \"pos_inputs\": pos_inputs})\n",
-    "            loss = criterion(preds, labels)\n",
-    "            _, preds = torch.max(preds, dim=1)\n",
-    "            # preds = (torch.sigmoid(preds) > 0.5).float()\n",
-    "            val_loss += loss.item()\n",
-    "            total += labels.size(0)\n",
-    "            correct += (preds == labels).sum().item()\n",
-    "            all_preds.extend(preds.cpu().numpy())\n",
-    "            all_labels.extend(labels.cpu().numpy())\n",
-    "\n",
-    "    return accuracy_score(all_labels, all_preds), classification_report(all_labels, all_preds)\n",
-    "\n",
-    "\n",
-    "accuracy, report = evaluate_model(model, test_loader, criterion)\n",
-    "print(f\"Accuracy: {accuracy:.4f}\")\n",
-    "print(report)\n"
-   ],
-   "id": "cc313b4396f87690",
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "                                                        "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Accuracy: 0.8988\n",
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "           0       0.90      0.88      0.89       356\n",
-      "           1       0.90      0.91      0.91       405\n",
-      "\n",
-      "    accuracy                           0.90       761\n",
-      "   macro avg       0.90      0.90      0.90       761\n",
-      "weighted avg       0.90      0.90      0.90       761\n",
-      "\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r"
-     ]
-    }
-   ],
-   "execution_count": 24
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "name": "python3",
-   "language": "python",
-   "display_name": "Python 3 (ipykernel)"
-  }
- },
- "nbformat": 5,
- "nbformat_minor": 9
-}