|
import argparse |
|
import code |
|
import datetime |
|
import json |
|
import os |
|
from pytz import timezone |
|
import time |
|
|
|
import pandas as pd |
|
import plotly.express as px |
|
import plotly.graph_objects as go |
|
from tqdm import tqdm |
|
|
|
|
|
NUM_SERVERS = 14 |
|
LOG_ROOT_DIR = "~/fastchat_logs" |
|
|
|
|
|
def get_log_files(max_num_files=None): |
|
log_root = os.path.expanduser(LOG_ROOT_DIR) |
|
filenames = [] |
|
for i in range(NUM_SERVERS): |
|
for filename in os.listdir(f"{log_root}/server{i}"): |
|
if filename.endswith("-conv.json"): |
|
filepath = f"{log_root}/server{i}/{filename}" |
|
name_tstamp_tuple = (filepath, os.path.getmtime(filepath)) |
|
filenames.append(name_tstamp_tuple) |
|
|
|
filenames = sorted(filenames, key=lambda x: x[1]) |
|
filenames = [x[0] for x in filenames] |
|
|
|
max_num_files = max_num_files or len(filenames) |
|
filenames = filenames[-max_num_files:] |
|
return filenames |
|
|
|
|
|
def load_log_files(filename): |
|
data = [] |
|
for retry in range(5): |
|
try: |
|
lines = open(filename).readlines() |
|
break |
|
except FileNotFoundError: |
|
time.sleep(2) |
|
|
|
for l in lines: |
|
row = json.loads(l) |
|
data.append( |
|
dict( |
|
type=row["type"], |
|
tstamp=row["tstamp"], |
|
model=row.get("model", ""), |
|
models=row.get("models", ["", ""]), |
|
) |
|
) |
|
return data |
|
|
|
|
|
def load_log_files_parallel(log_files, num_threads=16): |
|
data_all = [] |
|
from multiprocessing import Pool |
|
|
|
with Pool(num_threads) as p: |
|
ret_all = list(tqdm(p.imap(load_log_files, log_files), total=len(log_files))) |
|
for ret in ret_all: |
|
data_all.extend(ret) |
|
return data_all |
|
|
|
|
|
def get_anony_vote_df(df): |
|
anony_vote_df = df[ |
|
df["type"].isin(["leftvote", "rightvote", "tievote", "bothbad_vote"]) |
|
] |
|
anony_vote_df = anony_vote_df[anony_vote_df["models"].apply(lambda x: x[0] == "")] |
|
return anony_vote_df |
|
|
|
|
|
def merge_counts(series, on, names): |
|
ret = pd.merge(series[0], series[1], on=on) |
|
for i in range(2, len(series)): |
|
ret = pd.merge(ret, series[i], on=on) |
|
ret = ret.reset_index() |
|
old_names = list(ret.columns)[-len(series) :] |
|
rename = {old_name: new_name for old_name, new_name in zip(old_names, names)} |
|
ret = ret.rename(columns=rename) |
|
return ret |
|
|
|
|
|
def report_basic_stats(log_files): |
|
df_all = load_log_files_parallel(log_files) |
|
df_all = pd.DataFrame(df_all) |
|
now_t = df_all["tstamp"].max() |
|
df_1_hour = df_all[df_all["tstamp"] > (now_t - 3600)] |
|
df_1_day = df_all[df_all["tstamp"] > (now_t - 3600 * 24)] |
|
anony_vote_df_all = get_anony_vote_df(df_all) |
|
|
|
|
|
chat_dates = [ |
|
datetime.datetime.fromtimestamp(x, tz=timezone("US/Pacific")).strftime( |
|
"%Y-%m-%d" |
|
) |
|
for x in df_all[df_all["type"] == "chat"]["tstamp"] |
|
] |
|
chat_dates_counts = pd.value_counts(chat_dates) |
|
vote_dates = [ |
|
datetime.datetime.fromtimestamp(x, tz=timezone("US/Pacific")).strftime( |
|
"%Y-%m-%d" |
|
) |
|
for x in anony_vote_df_all["tstamp"] |
|
] |
|
vote_dates_counts = pd.value_counts(vote_dates) |
|
chat_dates_bar = go.Figure( |
|
data=[ |
|
go.Bar( |
|
name="Anony. Vote", |
|
x=vote_dates_counts.index, |
|
y=vote_dates_counts, |
|
text=[f"{val:.0f}" for val in vote_dates_counts], |
|
textposition="auto", |
|
), |
|
go.Bar( |
|
name="Chat", |
|
x=chat_dates_counts.index, |
|
y=chat_dates_counts, |
|
text=[f"{val:.0f}" for val in chat_dates_counts], |
|
textposition="auto", |
|
), |
|
] |
|
) |
|
chat_dates_bar.update_layout( |
|
barmode="stack", |
|
xaxis_title="Dates", |
|
yaxis_title="Count", |
|
height=300, |
|
width=1200, |
|
) |
|
|
|
|
|
model_hist_all = df_all[df_all["type"] == "chat"]["model"].value_counts() |
|
model_hist_1_day = df_1_day[df_1_day["type"] == "chat"]["model"].value_counts() |
|
model_hist_1_hour = df_1_hour[df_1_hour["type"] == "chat"]["model"].value_counts() |
|
model_hist = merge_counts( |
|
[model_hist_all, model_hist_1_day, model_hist_1_hour], |
|
on="model", |
|
names=["All", "Last Day", "Last Hour"], |
|
) |
|
model_hist_md = model_hist.to_markdown(index=False, tablefmt="github") |
|
|
|
|
|
action_hist_all = df_all["type"].value_counts() |
|
action_hist_1_day = df_1_day["type"].value_counts() |
|
action_hist_1_hour = df_1_hour["type"].value_counts() |
|
action_hist = merge_counts( |
|
[action_hist_all, action_hist_1_day, action_hist_1_hour], |
|
on="type", |
|
names=["All", "Last Day", "Last Hour"], |
|
) |
|
action_hist_md = action_hist.to_markdown(index=False, tablefmt="github") |
|
|
|
|
|
anony_vote_hist_all = anony_vote_df_all["type"].value_counts() |
|
anony_vote_df_1_day = get_anony_vote_df(df_1_day) |
|
anony_vote_hist_1_day = anony_vote_df_1_day["type"].value_counts() |
|
|
|
|
|
anony_vote_hist = merge_counts( |
|
[anony_vote_hist_all, anony_vote_hist_1_day], |
|
on="type", |
|
names=["All", "Last Day"], |
|
) |
|
anony_vote_hist_md = anony_vote_hist.to_markdown(index=False, tablefmt="github") |
|
|
|
|
|
chat_1_day = df_1_day[df_1_day["type"] == "chat"] |
|
num_chats_last_24_hours = [] |
|
base = df_1_day["tstamp"].min() |
|
for i in range(24, 0, -1): |
|
left = base + (i - 1) * 3600 |
|
right = base + i * 3600 |
|
num = ((chat_1_day["tstamp"] >= left) & (chat_1_day["tstamp"] < right)).sum() |
|
num_chats_last_24_hours.append(num) |
|
times = [ |
|
datetime.datetime.fromtimestamp( |
|
base + i * 3600, tz=timezone("US/Pacific") |
|
).strftime("%Y-%m-%d %H:%M:%S %Z") |
|
for i in range(24, 0, -1) |
|
] |
|
last_24_hours_df = pd.DataFrame({"time": times, "value": num_chats_last_24_hours}) |
|
last_24_hours_md = last_24_hours_df.to_markdown(index=False, tablefmt="github") |
|
|
|
|
|
last_updated_tstamp = now_t |
|
last_updated_datetime = datetime.datetime.fromtimestamp( |
|
last_updated_tstamp, tz=timezone("US/Pacific") |
|
).strftime("%Y-%m-%d %H:%M:%S %Z") |
|
|
|
|
|
|
|
return { |
|
"chat_dates_bar": chat_dates_bar, |
|
"model_hist_md": model_hist_md, |
|
"action_hist_md": action_hist_md, |
|
"anony_vote_hist_md": anony_vote_hist_md, |
|
"num_chats_last_24_hours": last_24_hours_md, |
|
"last_updated_datetime": last_updated_datetime, |
|
} |
|
|
|
|
|
if __name__ == "__main__": |
|
parser = argparse.ArgumentParser() |
|
parser.add_argument("--max-num-files", type=int) |
|
args = parser.parse_args() |
|
|
|
log_files = get_log_files(args.max_num_files) |
|
basic_stats = report_basic_stats(log_files) |
|
|
|
print(basic_stats["action_hist_md"] + "\n") |
|
print(basic_stats["model_hist_md"] + "\n") |
|
print(basic_stats["anony_vote_hist_md"] + "\n") |
|
print(basic_stats["num_chats_last_24_hours"] + "\n") |
|
|