{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "Dataset from hugging face" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " id place label \\\n", "0 2401 Borderlands Positive \n", "1 2401 Borderlands Positive \n", "2 2401 Borderlands Positive \n", "3 2401 Borderlands Positive \n", "4 2401 Borderlands Positive \n", "\n", " text \n", "0 im getting on borderlands and i will murder yo... \n", "1 I am coming to the borders and I will kill you... \n", "2 im getting on borderlands and i will kill you ... \n", "3 im coming on borderlands and i will murder you... \n", "4 im getting on borderlands 2 and i will murder ... \n" ] } ], "source": [ "import pandas as pd \n", "\n", "column_names = ['id',\"place\",\"label\", \"text\"]\n", "#Train Dataset\n", "train_df = pd.read_csv(\"twitter_training.csv\", names=column_names, header=None)\n", "\n", "#Test Dataset\n", "test_df = pd.read_csv(\"twitter_validation.csv\", names=column_names, header=None)\n", "\n", "\n", "print(train_df.head())\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "[nltk_data] Downloading package stopwords to C:\\Users\\Regino Balogo\n", "[nltk_data] Jr\\AppData\\Roaming\\nltk_data...\n", "[nltk_data] Package stopwords is already up-to-date!\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Sample cleaned text:\n" ] }, { "data": { "text/html": [ "
\n", " | text | \n", "clean_text | \n", "
---|---|---|
0 | \n", "im getting on borderlands and i will murder yo... | \n", "im getting borderlands murder | \n", "
1 | \n", "I am coming to the borders and I will kill you... | \n", "coming borders kill | \n", "
2 | \n", "im getting on borderlands and i will kill you ... | \n", "im getting borderlands kill | \n", "
3 | \n", "im coming on borderlands and i will murder you... | \n", "im coming borderlands murder | \n", "
4 | \n", "im getting on borderlands 2 and i will murder ... | \n", "im getting borderlands 2 murder | \n", "