Spaces:

NahFam13
/

webbyuu

Running

File size: 1,527 Bytes

d26280a

import importlib

from typing import Annotated, Optional

import typer

from langchain.cache import SQLiteCache
from langchain.globals import set_llm_cache

from gpt_engineer.benchmark.benchmarks.load import get_benchmark
from gpt_engineer.benchmark.run import print_results, run


def get_agent(path):
    # Dynamically import the python module at path
    agent_module = importlib.import_module(path.replace("/", ".").replace(".py", ""))
    return agent_module.default_config_agent()


def main(
    path_to_agent: Annotated[
        str,
        typer.Argument(
            help="python file that contains a function called 'default_config_agent'"
        ),
    ],
    benchmarks: Annotated[
        str, typer.Argument(help="benchmark name(s) separated by ','")
    ],
    task_name: Annotated[
        Optional[str], typer.Argument(help="optional task name in benchmark")
    ] = None,
    verbose: Annotated[
        bool, typer.Option(help="print results for each task", show_default=False)
    ] = False,
):
    set_llm_cache(SQLiteCache(database_path=".langchain.db"))

    benchmarks = benchmarks.split(",")
    for benchmark_name in benchmarks:
        benchmark = get_benchmark(benchmark_name)
        agent = get_agent(path_to_agent)

        results = run(agent, benchmark, task_name, verbose=verbose)
        print(
            f"\n--- Results for agent {path_to_agent}, benchmark: {benchmark_name} ---"
        )
        print_results(results)
        print()


if __name__ == "__main__":
    typer.run(main)