File size: 7,750 Bytes
60e3a80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
from typing import Optional

from rich.console import Console
from rich.progress import Progress, SpinnerColumn, TextColumn
import typer.rich_utils
from typing_extensions import Annotated
import typer
import uvicorn
import os
import webbrowser

from chromadb.api.client import Client
from chromadb.cli.utils import get_directory_size, set_log_file_path, sizeof_fmt
from chromadb.config import Settings, System
from chromadb.db.impl.sqlite import SqliteDB
from chromadb.ingest.impl.utils import trigger_vector_segments_max_seq_id_migration
from chromadb.segment import SegmentManager

app = typer.Typer()
utils_app = typer.Typer(short_help="Use maintenance utilities")
app.add_typer(utils_app, name="utils")

_logo = """
                \033[38;5;069m(((((((((    \033[38;5;203m(((((\033[38;5;220m####
             \033[38;5;069m(((((((((((((\033[38;5;203m(((((((((\033[38;5;220m#########
           \033[38;5;069m(((((((((((((\033[38;5;203m(((((((((((\033[38;5;220m###########
         \033[38;5;069m((((((((((((((\033[38;5;203m((((((((((((\033[38;5;220m############
        \033[38;5;069m(((((((((((((\033[38;5;203m((((((((((((((\033[38;5;220m#############
        \033[38;5;069m(((((((((((((\033[38;5;203m((((((((((((((\033[38;5;220m#############
         \033[38;5;069m((((((((((((\033[38;5;203m(((((((((((((\033[38;5;220m##############
         \033[38;5;069m((((((((((((\033[38;5;203m((((((((((((\033[38;5;220m##############
           \033[38;5;069m((((((((((\033[38;5;203m(((((((((((\033[38;5;220m#############
             \033[38;5;069m((((((((\033[38;5;203m((((((((\033[38;5;220m##############
                \033[38;5;069m(((((\033[38;5;203m((((    \033[38;5;220m#########\033[0m

    """


@app.command()  # type: ignore
def run(
    path: str = typer.Option(
        "./chroma_data", help="The path to the file or directory."
    ),
    host: Annotated[
        Optional[str], typer.Option(help="The host to listen to. Default: localhost")
    ] = "localhost",
    log_path: Annotated[
        Optional[str], typer.Option(help="The path to the log file.")
    ] = "chroma.log",
    port: int = typer.Option(8000, help="The port to run the server on."),
    test: bool = typer.Option(False, help="Test mode.", show_envvar=False, hidden=True),
) -> None:
    """Run a chroma server"""
    console = Console()

    print("\033[1m")  # Bold logo
    print(_logo)
    print("\033[1m")  # Bold
    print("Running Chroma")
    print("\033[0m")  # Reset

    console.print(f"[bold]Saving data to:[/bold] [green]{path}[/green]")
    console.print(
        f"[bold]Connect to chroma at:[/bold] [green]http://{host}:{port}[/green]"
    )
    console.print(
        "[bold]Getting started guide[/bold]: [blue]https://docs.trychroma.com/getting-started[/blue]\n\n"
    )

    # set ENV variable for PERSIST_DIRECTORY to path
    os.environ["IS_PERSISTENT"] = "True"
    os.environ["PERSIST_DIRECTORY"] = path
    os.environ["CHROMA_SERVER_NOFILE"] = "65535"
    os.environ["CHROMA_CLI"] = "True"

    # get the path where chromadb is installed
    chromadb_path = os.path.dirname(os.path.realpath(__file__))

    # this is the path of the CLI, we want to move up one directory
    chromadb_path = os.path.dirname(chromadb_path)
    log_config = set_log_file_path(f"{chromadb_path}/log_config.yml", f"{log_path}")
    config = {
        "app": "chromadb.app:app",
        "host": host,
        "port": port,
        "workers": 1,
        "log_config": log_config,  # Pass the modified log_config dictionary
        "timeout_keep_alive": 30,
    }

    if test:
        return

    uvicorn.run(**config)


@utils_app.command()  # type: ignore
def vacuum(
    path: str = typer.Option(
        help="The path to a Chroma data directory.",
    ),
    force: bool = typer.Option(False, help="Force vacuuming without confirmation."),
) -> None:
    """
    Vacuum the database. This may result in a small increase in performance.

    If you recently upgraded Chroma from a version below 0.6 to 0.6 or above, you should run this command once to greatly reduce the size of your database and enable continuous database pruning. In most other cases, vacuuming will save very little disk space.

    The execution time of this command scales with the size of your database. It block both reads and writes to the database while it is running.
    """
    console = Console(
        highlight=False
    )  # by default, rich highlights numbers which makes the output look weird when we try to color numbers ourselves

    if not os.path.exists(path):
        console.print(f"[bold red]Path {path} does not exist.[/bold red]")
        raise typer.Exit(code=1)

    if not os.path.exists(f"{path}/chroma.sqlite3"):
        console.print(
            f"[bold red]Path {path} is not a Chroma data directory.[/bold red]"
        )
        raise typer.Exit(code=1)

    if not force and not typer.confirm(
        "Are you sure you want to vacuum the database? This will block both reads and writes to the database and may take a while. We recommend shutting down the server before running this command. Continue?",
    ):
        console.print("Vacuum cancelled.")
        raise typer.Exit(code=0)

    settings = Settings()
    settings.is_persistent = True
    settings.persist_directory = path
    system = System(settings=settings)
    system.start()
    client = Client.from_system(system)
    sqlite = system.instance(SqliteDB)

    directory_size_before_vacuum = get_directory_size(path)

    console.print()  # Add a newline before the progress bar

    with Progress(
        SpinnerColumn(finished_text="[bold green]:heavy_check_mark:[/bold green]"),
        TextColumn("[progress.description]{task.description}"),
        transient=True,
    ) as progress:
        collections = client.list_collections()
        task = progress.add_task("Purging the log...", total=len(collections))
        try:
            # Cleaning the log after upgrading to >=0.6 is dependent on vector segments migrating their max_seq_id from the pickled metadata file to SQLite.
            # Vector segments migrate this field automatically on init, but at this point the segment has not been loaded yet.
            trigger_vector_segments_max_seq_id_migration(
                sqlite, system.instance(SegmentManager)
            )

            for collection in collections:
                sqlite.purge_log(collection_id=collection.id)
                progress.update(task, advance=1)
        except Exception as e:
            console.print(f"[bold red]Error purging the log:[/bold red] {e}")
            raise typer.Exit(code=1)

        task = progress.add_task("Vacuuming (this may take a while)...")
        try:
            sqlite.vacuum()
            config = sqlite.config
            config.set_parameter("automatically_purge", True)
            sqlite.set_config(config)
        except Exception as e:
            console.print(f"[bold red]Error vacuuming database:[/bold red] {e}")
            raise typer.Exit(code=1)

        progress.update(task, advance=100)

    directory_size_after_vacuum = get_directory_size(path)
    size_diff = directory_size_before_vacuum - directory_size_after_vacuum

    console.print(
        f":soap: [bold]vacuum complete![/bold] Database size reduced by [green]{sizeof_fmt(size_diff)}[/green] (:arrow_down: [bold green]{(size_diff * 100 / directory_size_before_vacuum):.1f}%[/bold green])."
    )


@app.command()  # type: ignore
def help() -> None:
    """Opens help url in your browser"""

    webbrowser.open("https://discord.gg/MMeYNTmh3x")


@app.command()  # type: ignore
def docs() -> None:
    """Opens docs url in your browser"""

    webbrowser.open("https://docs.trychroma.com")


if __name__ == "__main__":
    app()