File size: 4,633 Bytes
0648f19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fda69e1
 
0648f19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fda69e1
 
 
 
 
 
 
0648f19
 
 
fda69e1
0648f19
 
 
 
 
 
 
 
 
 
fda69e1
 
 
 
 
 
0648f19
 
 
 
 
 
fda69e1
 
 
 
 
 
0648f19
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import streamlit as st
import pandas as pd

import plotly.graph_objects as go
import plotly.express as px

from ast import literal_eval
from lxml import etree as ET


def prepare_data():
    data = pd.read_csv(
        "data/2002829_mapped_roles.csv", index_col=0, 
        converters={"frame": literal_eval, "changed_roles": literal_eval, "unchanged_roles": literal_eval, "roles": literal_eval}
    )
    
    frame_to_scenario, frame_to_super_scenario = load_kicktionary_info()

    # extract information from "frame" tuples, filter & reorder columns
    data_ = (
        data
            .assign(sentence_idx=data["frame"].apply(lambda frame: frame[0]))
            .assign(frame_idx=data["frame"].apply(lambda frame: frame[1]))
            .assign(frame_name=data["frame"].apply(lambda frame: frame[2]))
            .assign(frame_scenario=data["frame"].apply(lambda frame: frame_to_scenario[frame[2]]))
            .assign(frame_super_scenario=data["frame"].apply(lambda frame: frame_to_super_scenario[frame[2]]))
            .assign(frame_target=data["frame"].apply(lambda frame: frame[3]))
            .drop(columns=["frame"])
    )[["sentence_idx", "frame_idx", "frame_name", "frame_scenario", "frame_super_scenario", "frame_target", "changed_roles", "roles"]]

    # assign value in 0 < t < 1 to represent each frame instance's "time" point in the article
    max_sent = max(data_["sentence_idx"])
    max_frame_per_sent = data_.groupby("sentence_idx").agg({"frame_idx": max}).reset_index()
    sent_to_max_frame = dict(zip(max_frame_per_sent["sentence_idx"], max_frame_per_sent["frame_idx"]))
    data_with_time = data_.assign(
        time_point= (data_
            .apply(lambda row: (row["sentence_idx"] + row["frame_idx"] / (sent_to_max_frame[row["sentence_idx"]])) / (max_sent + 1), axis=1)
        )
    )
    data_with_first_roles = data_with_time.assign(
        first_role = data_with_time["changed_roles"].apply(lambda roles: roles[0] if len(roles) > 0 else None)
    )
    return data_with_first_roles


def load_kicktionary_info():

    kicktionary = ET.parse("kicktionary_lu_info.xml")
    frame_to_scenario = {
        lu.attrib["frame"]: lu.attrib["scenario"]
        for lu in kicktionary.xpath(".//LEXICAL-UNIT") if lu.attrib["frame"]
    }
    frame_to_super_scenario = {
        lu.attrib["frame"]: lu.attrib["super-scenario"]
        for lu in kicktionary.xpath(".//LEXICAL-UNIT") if lu.attrib["frame"]
    }

    return frame_to_scenario, frame_to_super_scenario


def explore_timeline():

    data = prepare_data()

    with st.container():

        st.title("Football Perspective Chains")
        
        frame_label_map = {
            "frame_name": "frames",
            "frame_scenario": "scenarios (groups of related frames)",
            "frame_super_scenario": "super scenarios (groups of related scenarios)"
        }
        frame_column = st.selectbox(
            label="Display frames as: ", 
            options=("frame_name", "frame_scenario", "frame_super_scenario"),
            format_func=lambda label: frame_label_map[label]
        )

        frame_options = sorted(data[frame_column].value_counts().keys())
        selected_frames = st.multiselect(
            label="frame subset selection",
            options=frame_options,
            default=frame_options
        )

        st.header("Timeline")
        time_scatter = (
            data
                [data[frame_column].isin(selected_frames)]
                .dropna(axis=0, subset=["first_role"])
                .plot.scatter(
                    x="first_role", y="time_point", backend="plotly", color=frame_column
                )
        )
        time_scatter.update_traces(marker_size=20)
        time_scatter.update_layout(height=1000)
        st.plotly_chart(time_scatter)

        st.header("Overall focus")
        focus_bar = (
            data
                [data[frame_column].isin(selected_frames)]
                .dropna(axis=0, subset=["first_role"])["first_role"]
                .value_counts().plot.bar(y="first_role", backend="plotly")
        )
        st.plotly_chart(focus_bar)

        st.header("Focus by frame")

        for team in ["Man. United", "Rangers"]:
            st.subheader(team)
            frame_bar = (
                data
                    .pipe(lambda df: df[df[frame_column].isin(selected_frames)])
                    .pipe(lambda df: df[df["first_role"] == team][frame_column])
                    .value_counts().plot.bar(y=frame_column, backend="plotly")
            )
            st.plotly_chart(frame_bar)




if __name__ == "__main__":
    explore_timeline()