{ "cells": [ { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "## Season" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "s01\n" ] } ], "source": [ "import requests\n", "import json\n", "\n", "# read the JSON file from the web\n", "json_file = 'https://raw.githubusercontent.com/emorynlp/character-mining/master/json/friends_season_01.json'\n", "r = requests.get(json_file)\n", "\n", "# load season 1\n", "season = json.loads(r.text)\n", "season_id = season['season_id']\n", "print(season_id)" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "## Episodes" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false, "deletable": true, "editable": true, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "s01_e01\n", "s01_e02\n", "s01_e03\n", "s01_e04\n", "s01_e05\n", "s01_e06\n", "s01_e07\n", "s01_e08\n", "s01_e09\n", "s01_e10\n", "s01_e11\n", "s01_e12\n", "s01_e13\n", "s01_e14\n", "s01_e15\n", "s01_e16\n", "s01_e17\n", "s01_e18\n", "s01_e19\n", "s01_e20\n", "s01_e21\n", "s01_e22\n", "s01_e23\n", "s01_e24\n" ] } ], "source": [ "# retrieve episodes\n", "episodes = season['episodes']\n", "\n", "# iterate through the episodes\n", "for episode in episodes:\n", " episode_id = episode['episode_id']\n", " print(episode_id)" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "## Scenes" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false, "deletable": true, "editable": true, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "s01_e18_c01\n", "s01_e18_c02\n", "s01_e18_c03\n", "s01_e18_c04\n", "s01_e18_c05\n", "s01_e18_c06\n", "s01_e18_c07\n", "s01_e18_c08\n" ] } ], "source": [ "# retrive scenes from the 18th episode\n", "episode = episodes[17]\n", "scenes = episode['scenes']\n", "\n", "# iterate through the scenes\n", "for scene in scenes:\n", " scene_id = scene['scene_id']\n", " print(scene_id)" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "## Utterances" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false, "deletable": true, "editable": true, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "s01_e18_c05_u001\n", "s01_e18_c05_u002\n", "s01_e18_c05_u003\n", "s01_e18_c05_u004\n", "s01_e18_c05_u005\n", "s01_e18_c05_u006\n", "s01_e18_c05_u007\n", "s01_e18_c05_u008\n", "s01_e18_c05_u009\n", "s01_e18_c05_u010\n", "s01_e18_c05_u011\n", "s01_e18_c05_u012\n", "s01_e18_c05_u013\n", "s01_e18_c05_u014\n", "s01_e18_c05_u015\n", "s01_e18_c05_u016\n", "s01_e18_c05_u017\n", "s01_e18_c05_u018\n", "s01_e18_c05_u019\n", "s01_e18_c05_u020\n", "s01_e18_c05_u021\n", "s01_e18_c05_u022\n" ] } ], "source": [ "# retrieve utterances from the 5th scene\n", "scene = scenes[4]\n", "utterances = scene['utterances']\n", "\n", "# iterate through the utterances\n", "for utterance in utterances:\n", " utterance_id = utterance['utterance_id']\n", " print(utterance_id)" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "## Utterance" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['Phoebe Buffay', 'Rachel Green']\n", "Yes, we should. I think we should.\n", "[['Yes', ',', 'we', 'should', '.'], ['I', 'think', 'we', 'should', '.']]\n" ] } ], "source": [ "# retrive fields from the 18th utterance\n", "utterance = utterances[17]\n", "\n", "# list of speakers\n", "speakers = utterance['speakers']\n", "print(speakers)\n", "\n", "# the original transcript\n", "transcript = utterance['transcript']\n", "print(transcript)\n", "\n", "# list of sentences, where each sentence is a list of tokens\n", "tokens = utterance['tokens']\n", "print(tokens)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "For seasons 6-9, caption information is available." ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Begin time in milliseconds: 6923\n", "End time in milliseconds: 8382\n", "Text: you sure you wanna do this\n" ] } ], "source": [ "# load season 6\n", "json_file = 'https://raw.githubusercontent.com/emorynlp/character-mining/master/json/friends_season_06.json'\n", "r = requests.get(json_file)\n", "season = json.loads(r.text)\n", "\n", "# 1st episode, 1st scene, 3rd utterance\n", "utterance = season['episodes'][0]['scenes'][0]['utterances'][2]\n", "caption = utterance['caption']\n", "\n", "print('Begin time in milliseconds: %d' % caption[0])\n", "print('End time in milliseconds: %d' % caption[1])\n", "print('Text: %s' % caption[2])" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.0" } }, "nbformat": 4, "nbformat_minor": 2 }