Spaces:

valory
/

olas-prediction-live-dashboard

Running

File size: 8,958 Bytes

import pandas as pd
import gradio as gr
from typing import List
from tabs.metrics import tool_metric_choices
import plotly.express as px


HEIGHT = 600
WIDTH = 1000


def prepare_tools(tools: pd.DataFrame) -> pd.DataFrame:
    tools["request_time"] = pd.to_datetime(tools["request_time"])
    tools = tools.sort_values(by="request_time", ascending=True)

    tools["request_month_year_week"] = (
        pd.to_datetime(tools["request_time"]).dt.to_period("W").dt.strftime("%b-%d")
    )
    # preparing the tools graph
    # adding the total
    tools_all = tools.copy(deep=True)
    tools_all["market_creator"] = "all"
    # merging both dataframes
    tools = pd.concat([tools, tools_all], ignore_index=True)
    tools = tools.sort_values(by="request_time", ascending=True)
    return tools


def get_tool_winning_rate(tools_df: pd.DataFrame, inc_tools: List[str]) -> pd.DataFrame:
    """Gets the tool winning rate data for the given tools and calculates the winning percentage."""
    tools_inc = tools_df[tools_df["tool"].isin(inc_tools)]
    # tools_inc['error'] = tools_inc.apply(set_error, axis=1)
    tools_non_error = tools_inc[tools_inc["error"] != 1]
    tools_non_error.loc[:, "currentAnswer"] = tools_non_error["currentAnswer"].replace(
        {"no": "No", "yes": "Yes"}
    )
    tools_non_error = tools_non_error[
        tools_non_error["currentAnswer"].isin(["Yes", "No"])
    ]
    tools_non_error = tools_non_error[tools_non_error["vote"].isin(["Yes", "No"])]
    tools_non_error["win"] = (
        tools_non_error["currentAnswer"] == tools_non_error["vote"]
    ).astype(int)
    tools_non_error.columns = tools_non_error.columns.astype(str)
    wins = (
        tools_non_error.groupby(["tool", "request_month_year_week", "win"])
        .size()
        .unstack()
        .fillna(0)
    )
    wins["win_perc"] = (wins[1] / (wins[0] + wins[1])) * 100
    wins.reset_index(inplace=True)
    wins["total_request"] = wins[0] + wins[1]
    wins.columns = wins.columns.astype(str)
    # Convert request_month_year_week to string and explicitly set type for Altair
    wins["request_month_year_week"] = wins["request_month_year_week"].astype(str)
    return wins


def get_tool_winning_rate_by_market(
    tools_df: pd.DataFrame, inc_tools: List[str]
) -> pd.DataFrame:
    """Gets the tool winning rate data for the given tools by market and calculates the winning percentage."""
    tools_inc = tools_df[tools_df["tool"].isin(inc_tools)]
    tools_non_error = tools_inc[tools_inc["error"] != 1]
    tools_non_error.loc[:, "currentAnswer"] = tools_non_error["currentAnswer"].replace(
        {"no": "No", "yes": "Yes"}
    )
    tools_non_error = tools_non_error[
        tools_non_error["currentAnswer"].isin(["Yes", "No"])
    ]
    tools_non_error = tools_non_error[tools_non_error["vote"].isin(["Yes", "No"])]
    tools_non_error["win"] = (
        tools_non_error["currentAnswer"] == tools_non_error["vote"]
    ).astype(int)
    tools_non_error.columns = tools_non_error.columns.astype(str)
    wins = (
        tools_non_error.groupby(
            ["tool", "request_month_year_week", "market_creator", "win"], sort=False
        )
        .size()
        .unstack()
        .fillna(0)
    )
    wins["win_perc"] = (wins[1] / (wins[0] + wins[1])) * 100
    wins.reset_index(inplace=True)
    wins["total_request"] = wins[0] + wins[1]
    wins.columns = wins.columns.astype(str)
    # Convert request_month_year_week to string and explicitly set type for Altair
    # wins["request_month_year_week"] = wins["request_month_year_week"].astype(str)
    return wins


def get_overall_winning_rate(wins_df: pd.DataFrame) -> pd.DataFrame:
    """Gets the overall winning rate data for the given tools and calculates the winning percentage."""
    overall_wins = (
        wins_df.groupby("request_month_year_week")
        .agg({"0": "sum", "1": "sum", "win_perc": "mean", "total_request": "sum"})
        .rename(columns={"0": "losses", "1": "wins"})
        .reset_index()
    )
    return overall_wins


def get_overall_winning_rate_by_market(wins_df: pd.DataFrame) -> pd.DataFrame:
    """Gets the overall winning rate data for the given tools and calculates the winning percentage."""
    overall_wins = (
        wins_df.groupby(["request_month_year_week", "market_creator"], sort=False)
        .agg({"0": "sum", "1": "sum", "win_perc": "mean", "total_request": "sum"})
        .rename(columns={"0": "losses", "1": "wins"})
        .reset_index()
    )
    return overall_wins


def plot_tool_winnings_overall(
    wins_df: pd.DataFrame, winning_selector: str = "win_perc"
) -> gr.BarPlot:
    """Plots the overall winning rate data for the given tools and calculates the winning percentage."""
    return gr.BarPlot(
        title="Winning Rate",
        x_title="Date",
        y_title=winning_selector,
        show_label=True,
        interactive=True,
        show_actions_button=True,
        tooltip=["request_month_year_week", winning_selector],
        value=wins_df,
        x="request_month_year_week",
        y=winning_selector,
        height=HEIGHT,
        width=WIDTH,
    )


def sort_key(date_str):
    month, year_week = date_str.split("-")
    month_order = [
        "Jan",
        "Feb",
        "Mar",
        "Apr",
        "May",
        "Jun",
        "Jul",
        "Aug",
        "Sep",
        "Oct",
        "Nov",
        "Dec",
    ]
    month_num = month_order.index(month) + 1
    week = int(year_week)
    return (week // 100, month_num, week % 100)  # year, month, week


def integrated_plot_tool_winnings_overall_per_market_by_week(
    winning_df: pd.DataFrame,
    winning_selector: str = "Weekly Mean Mech Tool Accuracy as (Accurate Responses/All) %",
) -> gr.Plot:

    # get the column name from the metric name
    column_name = tool_metric_choices.get(winning_selector)

    wins_df = get_overall_winning_rate_by_market(winning_df)
    # Sort the unique values of request_month_year_week
    sorted_categories = sorted(
        wins_df["request_month_year_week"].unique(), key=sort_key
    )
    # Create a categorical type with a specific order
    wins_df["request_month_year_week"] = pd.Categorical(
        wins_df["request_month_year_week"], categories=sorted_categories, ordered=True
    )

    # Sort the DataFrame based on the new categorical column
    wins_df = wins_df.sort_values("request_month_year_week")

    fig = px.bar(
        wins_df,
        x="request_month_year_week",
        y=column_name,
        color="market_creator",
        barmode="group",
        color_discrete_sequence=["purple", "goldenrod", "darkgreen"],
        category_orders={
            "market_creator": ["pearl", "quickstart", "all"],
            "request_month_year_week": sorted_categories,
        },
    )
    fig.update_layout(
        xaxis_title="Week",
        yaxis_title=winning_selector,
        legend=dict(yanchor="top", y=0.5),
    )
    fig.update_layout(width=WIDTH, height=HEIGHT)
    fig.update_xaxes(tickformat="%b %d\n%Y")
    return gr.Plot(value=fig)


def plot_tool_winnings_by_tool(wins_df: pd.DataFrame, tool: str) -> gr.BarPlot:
    """Plots the winning rate data for the given tool."""
    return gr.BarPlot(
        title="Winning Rate",
        x_title="Week",
        y_title="Winning Rate",
        x="request_month_year_week",
        y="win_perc",
        value=wins_df[wins_df["tool"] == tool],
        show_label=True,
        interactive=True,
        show_actions_button=True,
        tooltip=["request_month_year_week", "win_perc"],
        height=HEIGHT,
        width=WIDTH,
    )


def integrated_tool_winnings_by_tool_per_market(
    wins_df: pd.DataFrame, tool: str
) -> gr.Plot:

    tool_wins_df = wins_df[wins_df["tool"] == tool]
    # Sort the unique values of request_month_year_week
    sorted_categories = sorted(
        tool_wins_df["request_month_year_week"].unique(), key=sort_key
    )
    # Create a categorical type with a specific order
    tool_wins_df["request_month_year_week"] = pd.Categorical(
        tool_wins_df["request_month_year_week"],
        categories=sorted_categories,
        ordered=True,
    )

    # Sort the DataFrame based on the new categorical column
    wins_df = wins_df.sort_values("request_month_year_week")
    fig = px.bar(
        tool_wins_df,
        x="request_month_year_week",
        y="win_perc",
        color="market_creator",
        barmode="group",
        color_discrete_sequence=["purple", "goldenrod", "darkgreen"],
        category_orders={
            "market_creator": ["pearl", "quickstart", "all"],
            "request_month_year_week": sorted_categories,
        },
    )

    fig.update_layout(
        xaxis_title="Week",
        yaxis_title="Weekly Mean Mech Tool Accuracy as (Accurate Responses/All) %",
        legend=dict(yanchor="top", y=0.5),
    )
    fig.update_layout(width=WIDTH, height=HEIGHT)
    fig.update_xaxes(tickformat="%b %d\n%Y")
    return gr.Plot(value=fig)