{ "cells": [ { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Using custom data configuration tappyness1--causion-800e18f416d7678b\n", "Found cached dataset parquet (C:/Users/neoce/.cache/huggingface/datasets/tappyness1___parquet/tappyness1--causion-800e18f416d7678b/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)\n", "100%|██████████| 1/1 [00:00<00:00, 937.90it/s]\n" ] } ], "source": [ "from datasets import load_dataset\n", "import pandas as pd\n", "import os\n", "import yaml\n", "\n", "token_file = open(\"token_secret.yaml\")\n", "token_obj = yaml.load(token_file, Loader=yaml.FullLoader)\n", "dataset = load_dataset(\"tappyness1/causion\", use_auth_token=token_obj['TOKEN'])\n", "counts_df = pd.DataFrame(dataset['train'])\n" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\neoce\\AppData\\Local\\Temp\\ipykernel_18912\\643665856.py:1: FutureWarning: The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n", " date_view_group = counts_df_filter_views.groupby(by=['view', 'day_of_week']).mean()\n" ] } ], "source": [ "filtered_views_list = ['View_from_Second_Link_at_Tuas_to_sg',\n", " 'View_from_Second_Link_at_Tuas_to_jh',\n", " 'View_from_Tuas_Checkpoint_to_sg',\n", " 'View_from_Tuas_Checkpoint_to_jh',\n", " 'View_from_Woodlands_Causeway_Towards_Johor_to_sg',\n", " 'View_from_Woodlands_Causeway_Towards_Johor_to_jh',\n", " 'View_from_Woodlands_Checkpoint_Towards_BKE_to_sg',\n", " 'View_from_Woodlands_Checkpoint_Towards_BKE_to_jh']\n", "\n", "counts_df_filter_views = counts_df[counts_df['view'].isin(filtered_views_list)]\n", "counts_df_filter_views['date'] = pd.to_datetime(counts_df_filter_views['date'])\n", "counts_df_filter_views['day_of_week'] = counts_df_filter_views['date'].dt.day_of_week\n", "date_view_group = counts_df_filter_views.groupby(by=['view', 'day_of_week']).mean()\n", "date_view_group = date_view_group.reset_index()" ] } ], "metadata": { "kernelspec": { "display_name": "6242_hw1_q1", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.16" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }