File size: 1,527 Bytes
d26280a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import importlib

from typing import Annotated, Optional

import typer

from langchain.cache import SQLiteCache
from langchain.globals import set_llm_cache

from gpt_engineer.benchmark.benchmarks.load import get_benchmark
from gpt_engineer.benchmark.run import print_results, run


def get_agent(path):
    # Dynamically import the python module at path
    agent_module = importlib.import_module(path.replace("/", ".").replace(".py", ""))
    return agent_module.default_config_agent()


def main(
    path_to_agent: Annotated[
        str,
        typer.Argument(
            help="python file that contains a function called 'default_config_agent'"
        ),
    ],
    benchmarks: Annotated[
        str, typer.Argument(help="benchmark name(s) separated by ','")
    ],
    task_name: Annotated[
        Optional[str], typer.Argument(help="optional task name in benchmark")
    ] = None,
    verbose: Annotated[
        bool, typer.Option(help="print results for each task", show_default=False)
    ] = False,
):
    set_llm_cache(SQLiteCache(database_path=".langchain.db"))

    benchmarks = benchmarks.split(",")
    for benchmark_name in benchmarks:
        benchmark = get_benchmark(benchmark_name)
        agent = get_agent(path_to_agent)

        results = run(agent, benchmark, task_name, verbose=verbose)
        print(
            f"\n--- Results for agent {path_to_agent}, benchmark: {benchmark_name} ---"
        )
        print_results(results)
        print()


if __name__ == "__main__":
    typer.run(main)