CIS5190ml
/

bert4

Text Classification

Transformers

Safetensors

English

bert

Model card Files Files and versions Community

jyw3 commited on Dec 16, 2024

Commit

2dd2c6f

verified ·

1 Parent(s): fed03d0

Delete evaluation.ipynb

Browse files

Files changed (1) hide show

evaluation.ipynb +0 -206

evaluation.ipynb DELETED Viewed

@@ -1,206 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "id": "initial_id",
-   "metadata": {
-    "collapsed": true,
-    "ExecuteTime": {
-     "end_time": "2024-12-16T00:57:30.435870Z",
-     "start_time": "2024-12-16T00:57:30.239832Z"
-    }
-   },
-   "source": [
-    "import pandas as pd\n",
-    "from datasets import Dataset\n",
-    "from transformers import AutoTokenizer, AutoModelForSequenceClassification\n",
-    "from torch.utils.data import DataLoader\n",
-    "import torch\n",
-    "import evaluate\n",
-    "from tqdm import tqdm\n",
-    "\n",
-    "\n",
-    "# 1. Load the model and tokenizer\n",
-    "tokenizer = AutoTokenizer.from_pretrained(\"CIS5190ml/bert4\")\n",
-    "model = AutoModelForSequenceClassification.from_pretrained(\"CIS5190ml/bert4\")\n",
-    "\n",
-    "# 2. Load the dataset\n",
-    "import pandas as pd\n",
-    "\n",
-    "splits = {'train': 'train_dataset.csv', 'test': 'test_dataset.csv'}\n",
-    "test_df = pd.read_csv("hf://datasets/CIS5190ml/NewData/" + splits["train"])\n",
-    "ds = Dataset.from_pandas(test_df)\n",
-    "\n"
-   ],
-   "outputs": [],
-   "execution_count": 32
-  },
-  {
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2024-12-16T00:57:33.327397Z",
-     "start_time": "2024-12-16T00:57:32.596149Z"
-    }
-   },
-   "cell_type": "code",
-   "source": [
-    "# Preprocessing function\n",
-    "def preprocess_function(examples):\n",
-    "    return tokenizer(examples[\"title\"], truncation=True, padding=\"max_length\")\n",
-    "\n",
-    "encoded_ds = ds.map(preprocess_function, batched=True)\n",
-    "\n",
-    "# Keep only the necessary columns (input_ids, attention_mask, labels)\n",
-    "desired_cols = [\"input_ids\", \"attention_mask\", \"labels\"]\n",
-    "encoded_ds = encoded_ds.remove_columns([col for col in encoded_ds.column_names if col not in desired_cols])\n",
-    "encoded_ds.set_format(\"torch\")\n",
-    "\n",
-    "# Create DataLoader\n",
-    "test_loader = DataLoader(encoded_ds, batch_size=8)\n",
-    "\n",
-    "# Load accuracy metric\n",
-    "accuracy = evaluate.load(\"accuracy\")\n",
-    "\n",
-    "# Set device\n",
-    "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
-    "model.to(device)\n"
-   ],
-   "id": "dfefbe70a4ff8696",
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Map: 100%|██████████| 758/758 [00:00<00:00, 7183.73 examples/s]\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "BertForSequenceClassification(\n",
-       "  (bert): BertModel(\n",
-       "    (embeddings): BertEmbeddings(\n",
-       "      (word_embeddings): Embedding(30522, 768, padding_idx=0)\n",
-       "      (position_embeddings): Embedding(512, 768)\n",
-       "      (token_type_embeddings): Embedding(2, 768)\n",
-       "      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
-       "      (dropout): Dropout(p=0.1, inplace=False)\n",
-       "    )\n",
-       "    (encoder): BertEncoder(\n",
-       "      (layer): ModuleList(\n",
-       "        (0-11): 12 x BertLayer(\n",
-       "          (attention): BertAttention(\n",
-       "            (self): BertSdpaSelfAttention(\n",
-       "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
-       "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
-       "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
-       "              (dropout): Dropout(p=0.1, inplace=False)\n",
-       "            )\n",
-       "            (output): BertSelfOutput(\n",
-       "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
-       "              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
-       "              (dropout): Dropout(p=0.1, inplace=False)\n",
-       "            )\n",
-       "          )\n",
-       "          (intermediate): BertIntermediate(\n",
-       "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
-       "            (intermediate_act_fn): GELUActivation()\n",
-       "          )\n",
-       "          (output): BertOutput(\n",
-       "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
-       "            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
-       "            (dropout): Dropout(p=0.1, inplace=False)\n",
-       "          )\n",
-       "        )\n",
-       "      )\n",
-       "    )\n",
-       "    (pooler): BertPooler(\n",
-       "      (dense): Linear(in_features=768, out_features=768, bias=True)\n",
-       "      (activation): Tanh()\n",
-       "    )\n",
-       "  )\n",
-       "  (dropout): Dropout(p=0.1, inplace=False)\n",
-       "  (classifier): Linear(in_features=768, out_features=2, bias=True)\n",
-       ")"
-      ]
-     },
-     "execution_count": 33,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "execution_count": 33
-  },
-  {
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2024-12-16T00:58:18.207058Z",
-     "start_time": "2024-12-16T00:58:08.007420Z"
-    }
-   },
-   "cell_type": "code",
-   "source": [
-    "# Evaluate\n",
-    "model.eval()\n",
-    "for batch in tqdm(test_loader, desc=\"Evaluating\"):\n",
-    "    input_ids = batch[\"input_ids\"].to(device)\n",
-    "    attention_mask = batch[\"attention_mask\"].to(device)\n",
-    "    labels = batch[\"labels\"].to(device)\n",
-    "\n",
-    "    with torch.no_grad():\n",
-    "        outputs = model(input_ids=input_ids, attention_mask=attention_mask)\n",
-    "        preds = torch.argmax(outputs.logits, dim=-1)\n",
-    "        accuracy.add_batch(predictions=preds, references=labels)\n",
-    "\n",
-    "final_accuracy = accuracy.compute()\n",
-    "print(\"Accuracy:\", final_accuracy[\"accuracy\"])"
-   ],
-   "id": "c6e4fd03bd73664f",
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Evaluating: 100%|██████████| 95/95 [00:10<00:00,  9.33it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Accuracy: 0.7823218997361477\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n"
-     ]
-    }
-   ],
-   "execution_count": 35
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 2
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.6"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}