File size: 3,912 Bytes
b3509ba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import os
import threading
import json
import uuid
from pathlib import Path
import datetime
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
matplotlib.use('Agg') # need a different backend for multithreading
import numpy as np

class DictSharedMemory():
    """The simplest most stupid shared memory implementation that uses json to store the entries.
    """

    def __init__(self, file_loc=None):
        """Initialize the shared memory. In the current architecture the memory always consists of a set of soltuions or evaluations.
        Moreover, the project is designed around LLMs for the proof of concepts, so we treat all entry content as a string.
        """
        if file_loc is not None:
            self.file_loc = Path(file_loc)
            if not self.file_loc.exists():
                self.file_loc.touch()

        self.lock = threading.Lock()

    def add_entry(self, score, agent_id, agent_cycle, entry):
        """Add an entry to the internal memory.
        """
        with self.lock:
            entry_id = str(uuid.uuid4())
            data = {}
            epoch = datetime.datetime.utcfromtimestamp(0)
            epoch = (datetime.datetime.utcnow() - epoch).total_seconds()
            data[entry_id] = {"agent":agent_id, "epoch": epoch, "score": score, "cycle": agent_cycle, "content": entry}
            status = self.write_to_file(data)
            self.plot_performance()
            return status
    
    def get_top_n(self, n):
        """Get the top n entries from the internal memory.
        """
        raise NotImplementedError
    
    def write_to_file(self, data):
        """Write the internal memory to a file.
        """
        if self.file_loc is not None:
            with open(self.file_loc, "r") as f:
                try:
                    file_data = json.load(f)
                except:
                    file_data = {}

            file_data = file_data | data
            with open(self.file_loc, "w") as f:
                json.dump(file_data, f, indent=4)

                f.flush()
                os.fsync(f.fileno())


            return True
        
    def plot_performance(self):
        """Plot the performance of the swarm.
        TODO: move it to the logger
        """
        with open(self.file_loc, "r") as f:
            shared_memory = json.load(f)
            # f.flush()
            # os.fsync(f.fileno())

        df = pd.DataFrame.from_dict(shared_memory, orient="index")
        df["agent"] = df["agent"].astype(int)
        df["epoch"] = df["epoch"].astype(float)
        df["score"] = df["score"].astype(float)
        df["cycle"] = df["cycle"].astype(int)
        df["content"] = df["content"].astype(str)

        fig = plt.figure(figsize=(20, 5))
        df = df.sort_values(by="epoch")
        df = df.sort_values(by="epoch")

        x = df["epoch"].values - df["epoch"].min()
        y = df["score"].values

        # apply moving average
        if len(y) < 20:
            window_size = len(y)
        else:
            window_size = len(y)//10
        try:
            y_padded = np.pad(y, (window_size//2, window_size//2), mode="reflect")
            y_ma = np.convolve(y_padded, np.ones(window_size)/window_size, mode="same")
            y_ma = y_ma[window_size//2:-window_size//2]

            #moving max
            y_max_t = [np.max(y[:i]) for i in range(1, len(y)+1)]

            plt.plot(x, y_ma, label="Average score of recently submitted solutions")
            plt.plot(x, y_max_t, label="Best at time t")
            plt.plot()
            plt.ylim([0, 1.02])
            plt.xlabel("Time (s)")
            plt.ylabel("Score")
            plt.legend()
            plt.title("Average score of recently submitted solutions")
            plt.tight_layout()
            plt.savefig(self.file_loc.parent / "performance.png")
        except:
            pass

        plt.close(fig)