Spaces:
Build error
Build error
File size: 7,750 Bytes
60e3a80 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 |
from typing import Optional
from rich.console import Console
from rich.progress import Progress, SpinnerColumn, TextColumn
import typer.rich_utils
from typing_extensions import Annotated
import typer
import uvicorn
import os
import webbrowser
from chromadb.api.client import Client
from chromadb.cli.utils import get_directory_size, set_log_file_path, sizeof_fmt
from chromadb.config import Settings, System
from chromadb.db.impl.sqlite import SqliteDB
from chromadb.ingest.impl.utils import trigger_vector_segments_max_seq_id_migration
from chromadb.segment import SegmentManager
app = typer.Typer()
utils_app = typer.Typer(short_help="Use maintenance utilities")
app.add_typer(utils_app, name="utils")
_logo = """
\033[38;5;069m((((((((( \033[38;5;203m(((((\033[38;5;220m####
\033[38;5;069m(((((((((((((\033[38;5;203m(((((((((\033[38;5;220m#########
\033[38;5;069m(((((((((((((\033[38;5;203m(((((((((((\033[38;5;220m###########
\033[38;5;069m((((((((((((((\033[38;5;203m((((((((((((\033[38;5;220m############
\033[38;5;069m(((((((((((((\033[38;5;203m((((((((((((((\033[38;5;220m#############
\033[38;5;069m(((((((((((((\033[38;5;203m((((((((((((((\033[38;5;220m#############
\033[38;5;069m((((((((((((\033[38;5;203m(((((((((((((\033[38;5;220m##############
\033[38;5;069m((((((((((((\033[38;5;203m((((((((((((\033[38;5;220m##############
\033[38;5;069m((((((((((\033[38;5;203m(((((((((((\033[38;5;220m#############
\033[38;5;069m((((((((\033[38;5;203m((((((((\033[38;5;220m##############
\033[38;5;069m(((((\033[38;5;203m(((( \033[38;5;220m#########\033[0m
"""
@app.command() # type: ignore
def run(
path: str = typer.Option(
"./chroma_data", help="The path to the file or directory."
),
host: Annotated[
Optional[str], typer.Option(help="The host to listen to. Default: localhost")
] = "localhost",
log_path: Annotated[
Optional[str], typer.Option(help="The path to the log file.")
] = "chroma.log",
port: int = typer.Option(8000, help="The port to run the server on."),
test: bool = typer.Option(False, help="Test mode.", show_envvar=False, hidden=True),
) -> None:
"""Run a chroma server"""
console = Console()
print("\033[1m") # Bold logo
print(_logo)
print("\033[1m") # Bold
print("Running Chroma")
print("\033[0m") # Reset
console.print(f"[bold]Saving data to:[/bold] [green]{path}[/green]")
console.print(
f"[bold]Connect to chroma at:[/bold] [green]http://{host}:{port}[/green]"
)
console.print(
"[bold]Getting started guide[/bold]: [blue]https://docs.trychroma.com/getting-started[/blue]\n\n"
)
# set ENV variable for PERSIST_DIRECTORY to path
os.environ["IS_PERSISTENT"] = "True"
os.environ["PERSIST_DIRECTORY"] = path
os.environ["CHROMA_SERVER_NOFILE"] = "65535"
os.environ["CHROMA_CLI"] = "True"
# get the path where chromadb is installed
chromadb_path = os.path.dirname(os.path.realpath(__file__))
# this is the path of the CLI, we want to move up one directory
chromadb_path = os.path.dirname(chromadb_path)
log_config = set_log_file_path(f"{chromadb_path}/log_config.yml", f"{log_path}")
config = {
"app": "chromadb.app:app",
"host": host,
"port": port,
"workers": 1,
"log_config": log_config, # Pass the modified log_config dictionary
"timeout_keep_alive": 30,
}
if test:
return
uvicorn.run(**config)
@utils_app.command() # type: ignore
def vacuum(
path: str = typer.Option(
help="The path to a Chroma data directory.",
),
force: bool = typer.Option(False, help="Force vacuuming without confirmation."),
) -> None:
"""
Vacuum the database. This may result in a small increase in performance.
If you recently upgraded Chroma from a version below 0.6 to 0.6 or above, you should run this command once to greatly reduce the size of your database and enable continuous database pruning. In most other cases, vacuuming will save very little disk space.
The execution time of this command scales with the size of your database. It block both reads and writes to the database while it is running.
"""
console = Console(
highlight=False
) # by default, rich highlights numbers which makes the output look weird when we try to color numbers ourselves
if not os.path.exists(path):
console.print(f"[bold red]Path {path} does not exist.[/bold red]")
raise typer.Exit(code=1)
if not os.path.exists(f"{path}/chroma.sqlite3"):
console.print(
f"[bold red]Path {path} is not a Chroma data directory.[/bold red]"
)
raise typer.Exit(code=1)
if not force and not typer.confirm(
"Are you sure you want to vacuum the database? This will block both reads and writes to the database and may take a while. We recommend shutting down the server before running this command. Continue?",
):
console.print("Vacuum cancelled.")
raise typer.Exit(code=0)
settings = Settings()
settings.is_persistent = True
settings.persist_directory = path
system = System(settings=settings)
system.start()
client = Client.from_system(system)
sqlite = system.instance(SqliteDB)
directory_size_before_vacuum = get_directory_size(path)
console.print() # Add a newline before the progress bar
with Progress(
SpinnerColumn(finished_text="[bold green]:heavy_check_mark:[/bold green]"),
TextColumn("[progress.description]{task.description}"),
transient=True,
) as progress:
collections = client.list_collections()
task = progress.add_task("Purging the log...", total=len(collections))
try:
# Cleaning the log after upgrading to >=0.6 is dependent on vector segments migrating their max_seq_id from the pickled metadata file to SQLite.
# Vector segments migrate this field automatically on init, but at this point the segment has not been loaded yet.
trigger_vector_segments_max_seq_id_migration(
sqlite, system.instance(SegmentManager)
)
for collection in collections:
sqlite.purge_log(collection_id=collection.id)
progress.update(task, advance=1)
except Exception as e:
console.print(f"[bold red]Error purging the log:[/bold red] {e}")
raise typer.Exit(code=1)
task = progress.add_task("Vacuuming (this may take a while)...")
try:
sqlite.vacuum()
config = sqlite.config
config.set_parameter("automatically_purge", True)
sqlite.set_config(config)
except Exception as e:
console.print(f"[bold red]Error vacuuming database:[/bold red] {e}")
raise typer.Exit(code=1)
progress.update(task, advance=100)
directory_size_after_vacuum = get_directory_size(path)
size_diff = directory_size_before_vacuum - directory_size_after_vacuum
console.print(
f":soap: [bold]vacuum complete![/bold] Database size reduced by [green]{sizeof_fmt(size_diff)}[/green] (:arrow_down: [bold green]{(size_diff * 100 / directory_size_before_vacuum):.1f}%[/bold green])."
)
@app.command() # type: ignore
def help() -> None:
"""Opens help url in your browser"""
webbrowser.open("https://discord.gg/MMeYNTmh3x")
@app.command() # type: ignore
def docs() -> None:
"""Opens docs url in your browser"""
webbrowser.open("https://docs.trychroma.com")
if __name__ == "__main__":
app()
|