configurable directory
This commit is contained in:
@@ -89,6 +89,30 @@ You can also run it from the source tree without installing the tool:
|
||||
uv run python -m chromy.main --help
|
||||
```
|
||||
|
||||
## Chroma storage location
|
||||
|
||||
By default, Chromy uses Chroma's default persistent location behavior (a local
|
||||
`chroma/` directory based on your current working directory when you run the
|
||||
command).
|
||||
|
||||
You can override this with `CHROMA_FOLDER`.
|
||||
|
||||
- `CHROMA_FOLDER` must point to a **parent directory**.
|
||||
- Chromy will store data in `<CHROMA_FOLDER>/chroma`.
|
||||
- Relative paths are supported and are resolved from the current working directory.
|
||||
- If `CHROMA_FOLDER` is set, it takes precedence over the default behavior.
|
||||
- If the configured location is invalid or not writable, the command fails with an explicit error (no fallback to the default location).
|
||||
|
||||
Examples:
|
||||
|
||||
```bash
|
||||
# absolute parent path
|
||||
CHROMA_FOLDER=/tmp/chromy-data chromy list-collections
|
||||
|
||||
# relative parent path (resolved from current directory)
|
||||
CHROMA_FOLDER=.local-data chromy create-collection notes
|
||||
```
|
||||
|
||||
## Running Tests
|
||||
|
||||
Run the test suite with pytest:
|
||||
@@ -188,7 +212,8 @@ Query results include the stored document chunk, its id, distance, and file name
|
||||
|
||||
## Notes
|
||||
|
||||
- collections are stored in a local persistent Chroma database in the current directory
|
||||
- by default, collections are stored in a local persistent Chroma database in the current directory
|
||||
- set `CHROMA_FOLDER` to override the parent location; Chromy will use `<CHROMA_FOLDER>/chroma`
|
||||
- `import` requires the target collection to already exist
|
||||
- `import` accepts one or more file paths
|
||||
- unquoted glob patterns such as `*.md` are expanded by the shell before `chromy` starts
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from collections.abc import Sequence
|
||||
from pathlib import Path
|
||||
from tempfile import NamedTemporaryFile
|
||||
from typing import cast
|
||||
from uuid import uuid4
|
||||
|
||||
@@ -10,12 +13,71 @@ from chromadb.api.types import QueryResult, Where
|
||||
from chromadb.errors import NotFoundError
|
||||
|
||||
from chromy.embedding import EmbeddingRecord
|
||||
from chromy.errors import ChromaPathError
|
||||
|
||||
CHROMA_FOLDER_ENV_VAR = "CHROMA_FOLDER"
|
||||
CHROMA_SUBDIRECTORY = "chroma"
|
||||
|
||||
|
||||
def _resolve_persistence_path() -> Path | None:
|
||||
configured_parent = os.getenv(CHROMA_FOLDER_ENV_VAR)
|
||||
|
||||
if configured_parent is None:
|
||||
return None
|
||||
|
||||
trimmed_parent = configured_parent.strip()
|
||||
if not trimmed_parent:
|
||||
raise ChromaPathError(
|
||||
f"{CHROMA_FOLDER_ENV_VAR} is set but empty. Please set a valid parent "
|
||||
"directory path."
|
||||
)
|
||||
|
||||
parent_path = Path(trimmed_parent).expanduser().resolve()
|
||||
return parent_path / CHROMA_SUBDIRECTORY
|
||||
|
||||
|
||||
def _ensure_persistence_path_is_usable(path: Path, configured_parent: str) -> None:
|
||||
try:
|
||||
path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if not path.is_dir():
|
||||
raise ChromaPathError(
|
||||
f"Configured Chroma directory '{path}' is not a directory."
|
||||
)
|
||||
|
||||
with NamedTemporaryFile(dir=path, prefix=".chromy-write-test-", delete=True):
|
||||
pass
|
||||
except ChromaPathError:
|
||||
raise
|
||||
except OSError as exc:
|
||||
raise ChromaPathError(
|
||||
f"Could not create or access Chroma directory '{path}' from "
|
||||
f"{CHROMA_FOLDER_ENV_VAR}='{configured_parent}': {exc}"
|
||||
) from exc
|
||||
|
||||
|
||||
def get_client() -> ClientAPI:
|
||||
persistence_path = _resolve_persistence_path()
|
||||
|
||||
if persistence_path is None:
|
||||
return chromadb.PersistentClient()
|
||||
|
||||
configured_parent = os.getenv(CHROMA_FOLDER_ENV_VAR, "")
|
||||
_ensure_persistence_path_is_usable(persistence_path, configured_parent)
|
||||
|
||||
try:
|
||||
return chromadb.PersistentClient(path=str(persistence_path))
|
||||
except Exception as exc: # pragma: no cover - defensive wrapper
|
||||
raise ChromaPathError(
|
||||
f"Could not initialize Chroma client at '{persistence_path}' from "
|
||||
f"{CHROMA_FOLDER_ENV_VAR}='{configured_parent}': {exc}"
|
||||
) from exc
|
||||
|
||||
|
||||
def _get_client_and_collection(
|
||||
collection_name: str,
|
||||
) -> tuple[ClientAPI, chromadb.Collection]:
|
||||
client = chromadb.PersistentClient()
|
||||
client = get_client()
|
||||
|
||||
try:
|
||||
collection = client.get_collection(name=collection_name)
|
||||
@@ -26,7 +88,7 @@ def _get_client_and_collection(
|
||||
|
||||
|
||||
def list_collections() -> list[str]:
|
||||
client = chromadb.PersistentClient()
|
||||
client = get_client()
|
||||
collections = client.list_collections()
|
||||
|
||||
if not collections:
|
||||
@@ -36,14 +98,14 @@ def list_collections() -> list[str]:
|
||||
|
||||
|
||||
def create_collection(name: str) -> str:
|
||||
client = chromadb.PersistentClient()
|
||||
client = get_client()
|
||||
collection = client.create_collection(name=name)
|
||||
|
||||
return getattr(collection, "name", name)
|
||||
|
||||
|
||||
def delete_collection(name: str) -> None:
|
||||
client = chromadb.PersistentClient()
|
||||
client = get_client()
|
||||
client.delete_collection(name=name)
|
||||
|
||||
|
||||
|
||||
@@ -6,6 +6,7 @@ import typer
|
||||
from chromadb.errors import InternalError, NotFoundError
|
||||
from rich import print
|
||||
|
||||
from chromy.errors import ChromaPathError
|
||||
from chromy.handlers.count_collection import handle_count_collection
|
||||
from chromy.handlers.create_collection import handle_create_collection
|
||||
from chromy.handlers.delete_collection import (
|
||||
@@ -22,7 +23,11 @@ ExitCodeHandler = Callable[[], int]
|
||||
|
||||
|
||||
def _run(handler: ExitCodeHandler) -> None:
|
||||
try:
|
||||
exit_code = handler()
|
||||
except ChromaPathError as exc:
|
||||
_fail(str(exc))
|
||||
|
||||
if exit_code != 0:
|
||||
raise typer.Exit(exit_code)
|
||||
|
||||
|
||||
@@ -3,3 +3,7 @@ from __future__ import annotations
|
||||
|
||||
class UnsupportedTextFileError(Exception):
|
||||
"""Raised when a file does not appear to contain supported text content."""
|
||||
|
||||
|
||||
class ChromaPathError(Exception):
|
||||
"""Raised when the configured Chroma persistence path is invalid or unusable."""
|
||||
|
||||
@@ -61,7 +61,7 @@ def _truncate_file_name(file_name: str, max_length: int = 20) -> str:
|
||||
if len(file_name) <= max_length:
|
||||
return file_name
|
||||
|
||||
return f"{file_name[: max_length - 3]}"
|
||||
return f"{file_name[: max_length - 3]}..."
|
||||
|
||||
|
||||
def handle_import(collection: str, files: list[str]) -> int:
|
||||
|
||||
@@ -0,0 +1,92 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
from chromy.chroma_functions import get_client
|
||||
from chromy.errors import ChromaPathError
|
||||
|
||||
|
||||
class ChromaFunctionsTests(unittest.TestCase):
|
||||
def test_get_client_uses_default_when_env_is_unset(self) -> None:
|
||||
with (
|
||||
patch.dict(os.environ, {}, clear=True),
|
||||
patch("chromy.chroma_functions.chromadb.PersistentClient") as persistent,
|
||||
):
|
||||
get_client()
|
||||
|
||||
persistent.assert_called_once_with()
|
||||
|
||||
def test_get_client_uses_chroma_folder_override(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
configured_parent = Path(temp_dir) / "data"
|
||||
|
||||
with (
|
||||
patch.dict(os.environ, {"CHROMA_FOLDER": str(configured_parent)}),
|
||||
patch(
|
||||
"chromy.chroma_functions.chromadb.PersistentClient"
|
||||
) as persistent,
|
||||
):
|
||||
get_client()
|
||||
|
||||
expected_path = configured_parent.resolve() / "chroma"
|
||||
persistent.assert_called_once_with(path=str(expected_path))
|
||||
self.assertTrue(expected_path.is_dir())
|
||||
|
||||
def test_get_client_resolves_relative_chroma_folder_from_cwd(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
working_dir = Path(temp_dir)
|
||||
previous_cwd = Path.cwd()
|
||||
|
||||
try:
|
||||
os.chdir(working_dir)
|
||||
with (
|
||||
patch.dict(os.environ, {"CHROMA_FOLDER": "relative-parent"}),
|
||||
patch(
|
||||
"chromy.chroma_functions.chromadb.PersistentClient"
|
||||
) as persistent,
|
||||
):
|
||||
get_client()
|
||||
finally:
|
||||
os.chdir(previous_cwd)
|
||||
|
||||
expected_path = (working_dir / "relative-parent").resolve() / "chroma"
|
||||
persistent.assert_called_once_with(path=str(expected_path))
|
||||
|
||||
def test_get_client_fails_when_configured_path_is_not_usable(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
invalid_parent = Path(temp_dir) / "not-a-directory"
|
||||
invalid_parent.write_text("x", encoding="utf-8")
|
||||
|
||||
with (
|
||||
patch.dict(os.environ, {"CHROMA_FOLDER": str(invalid_parent)}),
|
||||
self.assertRaisesRegex(
|
||||
ChromaPathError,
|
||||
"Could not create or access Chroma directory",
|
||||
),
|
||||
):
|
||||
get_client()
|
||||
|
||||
def test_get_client_wraps_client_initialization_failures(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
configured_parent = Path(temp_dir)
|
||||
|
||||
with (
|
||||
patch.dict(os.environ, {"CHROMA_FOLDER": str(configured_parent)}),
|
||||
patch(
|
||||
"chromy.chroma_functions.chromadb.PersistentClient",
|
||||
side_effect=RuntimeError("boom"),
|
||||
),
|
||||
self.assertRaisesRegex(
|
||||
ChromaPathError,
|
||||
"Could not initialize Chroma client",
|
||||
),
|
||||
):
|
||||
get_client()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -10,6 +10,7 @@ from click.testing import Result
|
||||
from typer.testing import CliRunner
|
||||
|
||||
from chromy.cli import app
|
||||
from chromy.errors import ChromaPathError
|
||||
|
||||
|
||||
class CliTests(unittest.TestCase):
|
||||
@@ -251,6 +252,19 @@ class CliTests(unittest.TestCase):
|
||||
self.assertNotEqual(result.exit_code, 0)
|
||||
self.assertIn("Missing option", result.output)
|
||||
|
||||
def test_cli_surfaces_chroma_path_errors(self) -> None:
|
||||
with patch(
|
||||
"chromy.handlers.list_collections.list_collections",
|
||||
side_effect=ChromaPathError("configured path is not writable"),
|
||||
):
|
||||
result = _invoke(["list-collections"])
|
||||
|
||||
self.assertEqual(result.exit_code, 1)
|
||||
self.assertEqual(
|
||||
result.stdout,
|
||||
"Error: configured path is not writable\n",
|
||||
)
|
||||
|
||||
|
||||
def _invoke(arguments: Sequence[str]) -> Result:
|
||||
return CliRunner().invoke(app, list(arguments))
|
||||
|
||||
Reference in New Issue
Block a user