Spaces:
Runtime error
Runtime error
File size: 3,912 Bytes
b3509ba |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 |
import os
import threading
import json
import uuid
from pathlib import Path
import datetime
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
matplotlib.use('Agg') # need a different backend for multithreading
import numpy as np
class DictSharedMemory():
"""The simplest most stupid shared memory implementation that uses json to store the entries.
"""
def __init__(self, file_loc=None):
"""Initialize the shared memory. In the current architecture the memory always consists of a set of soltuions or evaluations.
Moreover, the project is designed around LLMs for the proof of concepts, so we treat all entry content as a string.
"""
if file_loc is not None:
self.file_loc = Path(file_loc)
if not self.file_loc.exists():
self.file_loc.touch()
self.lock = threading.Lock()
def add_entry(self, score, agent_id, agent_cycle, entry):
"""Add an entry to the internal memory.
"""
with self.lock:
entry_id = str(uuid.uuid4())
data = {}
epoch = datetime.datetime.utcfromtimestamp(0)
epoch = (datetime.datetime.utcnow() - epoch).total_seconds()
data[entry_id] = {"agent":agent_id, "epoch": epoch, "score": score, "cycle": agent_cycle, "content": entry}
status = self.write_to_file(data)
self.plot_performance()
return status
def get_top_n(self, n):
"""Get the top n entries from the internal memory.
"""
raise NotImplementedError
def write_to_file(self, data):
"""Write the internal memory to a file.
"""
if self.file_loc is not None:
with open(self.file_loc, "r") as f:
try:
file_data = json.load(f)
except:
file_data = {}
file_data = file_data | data
with open(self.file_loc, "w") as f:
json.dump(file_data, f, indent=4)
f.flush()
os.fsync(f.fileno())
return True
def plot_performance(self):
"""Plot the performance of the swarm.
TODO: move it to the logger
"""
with open(self.file_loc, "r") as f:
shared_memory = json.load(f)
# f.flush()
# os.fsync(f.fileno())
df = pd.DataFrame.from_dict(shared_memory, orient="index")
df["agent"] = df["agent"].astype(int)
df["epoch"] = df["epoch"].astype(float)
df["score"] = df["score"].astype(float)
df["cycle"] = df["cycle"].astype(int)
df["content"] = df["content"].astype(str)
fig = plt.figure(figsize=(20, 5))
df = df.sort_values(by="epoch")
df = df.sort_values(by="epoch")
x = df["epoch"].values - df["epoch"].min()
y = df["score"].values
# apply moving average
if len(y) < 20:
window_size = len(y)
else:
window_size = len(y)//10
try:
y_padded = np.pad(y, (window_size//2, window_size//2), mode="reflect")
y_ma = np.convolve(y_padded, np.ones(window_size)/window_size, mode="same")
y_ma = y_ma[window_size//2:-window_size//2]
#moving max
y_max_t = [np.max(y[:i]) for i in range(1, len(y)+1)]
plt.plot(x, y_ma, label="Average score of recently submitted solutions")
plt.plot(x, y_max_t, label="Best at time t")
plt.plot()
plt.ylim([0, 1.02])
plt.xlabel("Time (s)")
plt.ylabel("Score")
plt.legend()
plt.title("Average score of recently submitted solutions")
plt.tight_layout()
plt.savefig(self.file_loc.parent / "performance.png")
except:
pass
plt.close(fig)
|