Spaces:

responsibility-framing
/

football-perspective-chains

Build error

File size: 4,633 Bytes

import streamlit as st
import pandas as pd

import plotly.graph_objects as go
import plotly.express as px

from ast import literal_eval
from lxml import etree as ET


def prepare_data():
    data = pd.read_csv(
        "data/2002829_mapped_roles.csv", index_col=0, 
        converters={"frame": literal_eval, "changed_roles": literal_eval, "unchanged_roles": literal_eval, "roles": literal_eval}
    )
    
    frame_to_scenario, frame_to_super_scenario = load_kicktionary_info()

    # extract information from "frame" tuples, filter & reorder columns
    data_ = (
        data
            .assign(sentence_idx=data["frame"].apply(lambda frame: frame[0]))
            .assign(frame_idx=data["frame"].apply(lambda frame: frame[1]))
            .assign(frame_name=data["frame"].apply(lambda frame: frame[2]))
            .assign(frame_scenario=data["frame"].apply(lambda frame: frame_to_scenario[frame[2]]))
            .assign(frame_super_scenario=data["frame"].apply(lambda frame: frame_to_super_scenario[frame[2]]))
            .assign(frame_target=data["frame"].apply(lambda frame: frame[3]))
            .drop(columns=["frame"])
    )[["sentence_idx", "frame_idx", "frame_name", "frame_scenario", "frame_super_scenario", "frame_target", "changed_roles", "roles"]]

    # assign value in 0 < t < 1 to represent each frame instance's "time" point in the article
    max_sent = max(data_["sentence_idx"])
    max_frame_per_sent = data_.groupby("sentence_idx").agg({"frame_idx": max}).reset_index()
    sent_to_max_frame = dict(zip(max_frame_per_sent["sentence_idx"], max_frame_per_sent["frame_idx"]))
    data_with_time = data_.assign(
        time_point= (data_
            .apply(lambda row: (row["sentence_idx"] + row["frame_idx"] / (sent_to_max_frame[row["sentence_idx"]])) / (max_sent + 1), axis=1)
        )
    )
    data_with_first_roles = data_with_time.assign(
        first_role = data_with_time["changed_roles"].apply(lambda roles: roles[0] if len(roles) > 0 else None)
    )
    return data_with_first_roles


def load_kicktionary_info():

    kicktionary = ET.parse("kicktionary_lu_info.xml")
    frame_to_scenario = {
        lu.attrib["frame"]: lu.attrib["scenario"]
        for lu in kicktionary.xpath(".//LEXICAL-UNIT") if lu.attrib["frame"]
    }
    frame_to_super_scenario = {
        lu.attrib["frame"]: lu.attrib["super-scenario"]
        for lu in kicktionary.xpath(".//LEXICAL-UNIT") if lu.attrib["frame"]
    }

    return frame_to_scenario, frame_to_super_scenario


def explore_timeline():

    data = prepare_data()

    with st.container():

        st.title("Football Perspective Chains")
        
        frame_label_map = {
            "frame_name": "frames",
            "frame_scenario": "scenarios (groups of related frames)",
            "frame_super_scenario": "super scenarios (groups of related scenarios)"
        }
        frame_column = st.selectbox(
            label="Display frames as: ", 
            options=("frame_name", "frame_scenario", "frame_super_scenario"),
            format_func=lambda label: frame_label_map[label]
        )

        frame_options = sorted(data[frame_column].value_counts().keys())
        selected_frames = st.multiselect(
            label="frame subset selection",
            options=frame_options,
            default=frame_options
        )

        st.header("Timeline")
        time_scatter = (
            data
                [data[frame_column].isin(selected_frames)]
                .dropna(axis=0, subset=["first_role"])
                .plot.scatter(
                    x="first_role", y="time_point", backend="plotly", color=frame_column
                )
        )
        time_scatter.update_traces(marker_size=20)
        time_scatter.update_layout(height=1000)
        st.plotly_chart(time_scatter)

        st.header("Overall focus")
        focus_bar = (
            data
                [data[frame_column].isin(selected_frames)]
                .dropna(axis=0, subset=["first_role"])["first_role"]
                .value_counts().plot.bar(y="first_role", backend="plotly")
        )
        st.plotly_chart(focus_bar)

        st.header("Focus by frame")

        for team in ["Man. United", "Rangers"]:
            st.subheader(team)
            frame_bar = (
                data
                    .pipe(lambda df: df[df[frame_column].isin(selected_frames)])
                    .pipe(lambda df: df[df["first_role"] == team][frame_column])
                    .value_counts().plot.bar(y=frame_column, backend="plotly")
            )
            st.plotly_chart(frame_bar)




if __name__ == "__main__":
    explore_timeline()