{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Using custom data configuration tappyness1--causion-800e18f416d7678b\n",
      "Found cached dataset parquet (C:/Users/neoce/.cache/huggingface/datasets/tappyness1___parquet/tappyness1--causion-800e18f416d7678b/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)\n",
      "100%|██████████| 1/1 [00:00<00:00, 937.90it/s]\n"
     ]
    }
   ],
   "source": [
    "from datasets import load_dataset\n",
    "import pandas as pd\n",
    "import os\n",
    "import yaml\n",
    "\n",
    "token_file = open(\"token_secret.yaml\")\n",
    "token_obj = yaml.load(token_file, Loader=yaml.FullLoader)\n",
    "dataset = load_dataset(\"tappyness1/causion\", use_auth_token=token_obj['TOKEN'])\n",
    "counts_df = pd.DataFrame(dataset['train'])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\neoce\\AppData\\Local\\Temp\\ipykernel_18912\\643665856.py:1: FutureWarning: The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n",
      "  date_view_group = counts_df_filter_views.groupby(by=['view', 'day_of_week']).mean()\n"
     ]
    }
   ],
   "source": [
    "filtered_views_list = ['View_from_Second_Link_at_Tuas_to_sg',\n",
    "                        'View_from_Second_Link_at_Tuas_to_jh',\n",
    "                        'View_from_Tuas_Checkpoint_to_sg',\n",
    "                        'View_from_Tuas_Checkpoint_to_jh',\n",
    "                        'View_from_Woodlands_Causeway_Towards_Johor_to_sg',\n",
    "                        'View_from_Woodlands_Causeway_Towards_Johor_to_jh',\n",
    "                        'View_from_Woodlands_Checkpoint_Towards_BKE_to_sg',\n",
    "                        'View_from_Woodlands_Checkpoint_Towards_BKE_to_jh']\n",
    "\n",
    "counts_df_filter_views = counts_df[counts_df['view'].isin(filtered_views_list)]\n",
    "counts_df_filter_views['date'] = pd.to_datetime(counts_df_filter_views['date'])\n",
    "counts_df_filter_views['day_of_week'] = counts_df_filter_views['date'].dt.day_of_week\n",
    "date_view_group = counts_df_filter_views.groupby(by=['view', 'day_of_week']).mean()\n",
    "date_view_group = date_view_group.reset_index()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "6242_hw1_q1",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.16"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}