|
import json |
|
import matplotlib.pyplot as plt |
|
import seaborn as sns |
|
import numpy as np |
|
|
|
|
|
infile = "output.jsonl" |
|
date = "2024-03" |
|
|
|
durations = [] |
|
|
|
with open(infile) as f: |
|
for line in f: |
|
data = json.loads(line) |
|
l = data["left"]["finish"] |
|
r = data["right"]["finish"] |
|
v = data["timestamp"] |
|
durations.append(v - max(l, r)) |
|
|
|
print( |
|
f"Avg: {np.mean(durations)}, Median: {np.median(durations)}, Max: {np.max(durations)}" |
|
) |
|
|
|
|
|
cutoff = 200.0 |
|
num_bins_inside_cutoff = 20 |
|
|
|
for i, n in enumerate(durations): |
|
if n > cutoff: |
|
durations[i] = cutoff + 0.5 * cutoff / num_bins_inside_cutoff |
|
|
|
|
|
bin_edges = np.linspace(0, cutoff, num_bins_inside_cutoff + 1) |
|
|
|
|
|
overflow_cap = ( |
|
cutoff + cutoff / num_bins_inside_cutoff |
|
) |
|
bin_edges = np.append(bin_edges, overflow_cap) |
|
|
|
|
|
sns.histplot( |
|
durations, bins=bin_edges, kde=False |
|
) |
|
plt.title(f'Distribution of "time to vote" {date}') |
|
plt.xlabel("Duration (seconds)") |
|
plt.ylabel("Frequency") |
|
|
|
|
|
plt.axvline(x=cutoff, color="red", linestyle="--") |
|
plt.text( |
|
cutoff + 1, plt.ylim()[1] * 0.9, "Overflow", color="red", ha="left" |
|
) |
|
|
|
|
|
ax = plt.gca() |
|
labels = [item.get_text() for item in ax.get_xticklabels()] |
|
if "110" in labels: |
|
labels[labels.index("110")] = "" |
|
ax.set_xticklabels(labels) |
|
|
|
|
|
plt.tight_layout() |
|
|
|
|
|
plt.savefig(f"duration_distribution_time_to_vote_{date}.png", dpi=300) |
|
|