From 96ccf0396d611917d15b4f2f6b8d4fdf32c7a4cb Mon Sep 17 00:00:00 2001 From: Matteo Rosati Date: Wed, 6 May 2026 21:23:37 +0200 Subject: [PATCH] configurable directory --- README.md | 27 +++++++++- chromy/chroma_functions.py | 70 ++++++++++++++++++++++++-- chromy/cli.py | 7 ++- chromy/errors.py | 4 ++ chromy/handlers/import_data.py | 2 +- tests/test_chroma_functions.py | 92 ++++++++++++++++++++++++++++++++++ tests/test_cli.py | 14 ++++++ 7 files changed, 209 insertions(+), 7 deletions(-) create mode 100644 tests/test_chroma_functions.py diff --git a/README.md b/README.md index 45f21dd..ce6890a 100644 --- a/README.md +++ b/README.md @@ -89,6 +89,30 @@ You can also run it from the source tree without installing the tool: uv run python -m chromy.main --help ``` +## Chroma storage location + +By default, Chromy uses Chroma's default persistent location behavior (a local +`chroma/` directory based on your current working directory when you run the +command). + +You can override this with `CHROMA_FOLDER`. + +- `CHROMA_FOLDER` must point to a **parent directory**. +- Chromy will store data in `/chroma`. +- Relative paths are supported and are resolved from the current working directory. +- If `CHROMA_FOLDER` is set, it takes precedence over the default behavior. +- If the configured location is invalid or not writable, the command fails with an explicit error (no fallback to the default location). + +Examples: + +```bash +# absolute parent path +CHROMA_FOLDER=/tmp/chromy-data chromy list-collections + +# relative parent path (resolved from current directory) +CHROMA_FOLDER=.local-data chromy create-collection notes +``` + ## Running Tests Run the test suite with pytest: @@ -188,7 +212,8 @@ Query results include the stored document chunk, its id, distance, and file name ## Notes -- collections are stored in a local persistent Chroma database in the current directory +- by default, collections are stored in a local persistent Chroma database in the current directory +- set `CHROMA_FOLDER` to override the parent location; Chromy will use `/chroma` - `import` requires the target collection to already exist - `import` accepts one or more file paths - unquoted glob patterns such as `*.md` are expanded by the shell before `chromy` starts diff --git a/chromy/chroma_functions.py b/chromy/chroma_functions.py index cdf78ad..3eaf1d5 100644 --- a/chromy/chroma_functions.py +++ b/chromy/chroma_functions.py @@ -1,6 +1,9 @@ from __future__ import annotations +import os from collections.abc import Sequence +from pathlib import Path +from tempfile import NamedTemporaryFile from typing import cast from uuid import uuid4 @@ -10,12 +13,71 @@ from chromadb.api.types import QueryResult, Where from chromadb.errors import NotFoundError from chromy.embedding import EmbeddingRecord +from chromy.errors import ChromaPathError + +CHROMA_FOLDER_ENV_VAR = "CHROMA_FOLDER" +CHROMA_SUBDIRECTORY = "chroma" + + +def _resolve_persistence_path() -> Path | None: + configured_parent = os.getenv(CHROMA_FOLDER_ENV_VAR) + + if configured_parent is None: + return None + + trimmed_parent = configured_parent.strip() + if not trimmed_parent: + raise ChromaPathError( + f"{CHROMA_FOLDER_ENV_VAR} is set but empty. Please set a valid parent " + "directory path." + ) + + parent_path = Path(trimmed_parent).expanduser().resolve() + return parent_path / CHROMA_SUBDIRECTORY + + +def _ensure_persistence_path_is_usable(path: Path, configured_parent: str) -> None: + try: + path.mkdir(parents=True, exist_ok=True) + + if not path.is_dir(): + raise ChromaPathError( + f"Configured Chroma directory '{path}' is not a directory." + ) + + with NamedTemporaryFile(dir=path, prefix=".chromy-write-test-", delete=True): + pass + except ChromaPathError: + raise + except OSError as exc: + raise ChromaPathError( + f"Could not create or access Chroma directory '{path}' from " + f"{CHROMA_FOLDER_ENV_VAR}='{configured_parent}': {exc}" + ) from exc + + +def get_client() -> ClientAPI: + persistence_path = _resolve_persistence_path() + + if persistence_path is None: + return chromadb.PersistentClient() + + configured_parent = os.getenv(CHROMA_FOLDER_ENV_VAR, "") + _ensure_persistence_path_is_usable(persistence_path, configured_parent) + + try: + return chromadb.PersistentClient(path=str(persistence_path)) + except Exception as exc: # pragma: no cover - defensive wrapper + raise ChromaPathError( + f"Could not initialize Chroma client at '{persistence_path}' from " + f"{CHROMA_FOLDER_ENV_VAR}='{configured_parent}': {exc}" + ) from exc def _get_client_and_collection( collection_name: str, ) -> tuple[ClientAPI, chromadb.Collection]: - client = chromadb.PersistentClient() + client = get_client() try: collection = client.get_collection(name=collection_name) @@ -26,7 +88,7 @@ def _get_client_and_collection( def list_collections() -> list[str]: - client = chromadb.PersistentClient() + client = get_client() collections = client.list_collections() if not collections: @@ -36,14 +98,14 @@ def list_collections() -> list[str]: def create_collection(name: str) -> str: - client = chromadb.PersistentClient() + client = get_client() collection = client.create_collection(name=name) return getattr(collection, "name", name) def delete_collection(name: str) -> None: - client = chromadb.PersistentClient() + client = get_client() client.delete_collection(name=name) diff --git a/chromy/cli.py b/chromy/cli.py index b8ec012..402be66 100644 --- a/chromy/cli.py +++ b/chromy/cli.py @@ -6,6 +6,7 @@ import typer from chromadb.errors import InternalError, NotFoundError from rich import print +from chromy.errors import ChromaPathError from chromy.handlers.count_collection import handle_count_collection from chromy.handlers.create_collection import handle_create_collection from chromy.handlers.delete_collection import ( @@ -22,7 +23,11 @@ ExitCodeHandler = Callable[[], int] def _run(handler: ExitCodeHandler) -> None: - exit_code = handler() + try: + exit_code = handler() + except ChromaPathError as exc: + _fail(str(exc)) + if exit_code != 0: raise typer.Exit(exit_code) diff --git a/chromy/errors.py b/chromy/errors.py index 2cb3fcb..699148b 100644 --- a/chromy/errors.py +++ b/chromy/errors.py @@ -3,3 +3,7 @@ from __future__ import annotations class UnsupportedTextFileError(Exception): """Raised when a file does not appear to contain supported text content.""" + + +class ChromaPathError(Exception): + """Raised when the configured Chroma persistence path is invalid or unusable.""" diff --git a/chromy/handlers/import_data.py b/chromy/handlers/import_data.py index 9a4b319..c720b1d 100644 --- a/chromy/handlers/import_data.py +++ b/chromy/handlers/import_data.py @@ -61,7 +61,7 @@ def _truncate_file_name(file_name: str, max_length: int = 20) -> str: if len(file_name) <= max_length: return file_name - return f"{file_name[: max_length - 3]}" + return f"{file_name[: max_length - 3]}..." def handle_import(collection: str, files: list[str]) -> int: diff --git a/tests/test_chroma_functions.py b/tests/test_chroma_functions.py new file mode 100644 index 0000000..d44c92c --- /dev/null +++ b/tests/test_chroma_functions.py @@ -0,0 +1,92 @@ +from __future__ import annotations + +import os +import tempfile +import unittest +from pathlib import Path +from unittest.mock import patch + +from chromy.chroma_functions import get_client +from chromy.errors import ChromaPathError + + +class ChromaFunctionsTests(unittest.TestCase): + def test_get_client_uses_default_when_env_is_unset(self) -> None: + with ( + patch.dict(os.environ, {}, clear=True), + patch("chromy.chroma_functions.chromadb.PersistentClient") as persistent, + ): + get_client() + + persistent.assert_called_once_with() + + def test_get_client_uses_chroma_folder_override(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + configured_parent = Path(temp_dir) / "data" + + with ( + patch.dict(os.environ, {"CHROMA_FOLDER": str(configured_parent)}), + patch( + "chromy.chroma_functions.chromadb.PersistentClient" + ) as persistent, + ): + get_client() + + expected_path = configured_parent.resolve() / "chroma" + persistent.assert_called_once_with(path=str(expected_path)) + self.assertTrue(expected_path.is_dir()) + + def test_get_client_resolves_relative_chroma_folder_from_cwd(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + working_dir = Path(temp_dir) + previous_cwd = Path.cwd() + + try: + os.chdir(working_dir) + with ( + patch.dict(os.environ, {"CHROMA_FOLDER": "relative-parent"}), + patch( + "chromy.chroma_functions.chromadb.PersistentClient" + ) as persistent, + ): + get_client() + finally: + os.chdir(previous_cwd) + + expected_path = (working_dir / "relative-parent").resolve() / "chroma" + persistent.assert_called_once_with(path=str(expected_path)) + + def test_get_client_fails_when_configured_path_is_not_usable(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + invalid_parent = Path(temp_dir) / "not-a-directory" + invalid_parent.write_text("x", encoding="utf-8") + + with ( + patch.dict(os.environ, {"CHROMA_FOLDER": str(invalid_parent)}), + self.assertRaisesRegex( + ChromaPathError, + "Could not create or access Chroma directory", + ), + ): + get_client() + + def test_get_client_wraps_client_initialization_failures(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + configured_parent = Path(temp_dir) + + with ( + patch.dict(os.environ, {"CHROMA_FOLDER": str(configured_parent)}), + patch( + "chromy.chroma_functions.chromadb.PersistentClient", + side_effect=RuntimeError("boom"), + ), + self.assertRaisesRegex( + ChromaPathError, + "Could not initialize Chroma client", + ), + ): + get_client() + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_cli.py b/tests/test_cli.py index ec90ebd..9642ec3 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -10,6 +10,7 @@ from click.testing import Result from typer.testing import CliRunner from chromy.cli import app +from chromy.errors import ChromaPathError class CliTests(unittest.TestCase): @@ -251,6 +252,19 @@ class CliTests(unittest.TestCase): self.assertNotEqual(result.exit_code, 0) self.assertIn("Missing option", result.output) + def test_cli_surfaces_chroma_path_errors(self) -> None: + with patch( + "chromy.handlers.list_collections.list_collections", + side_effect=ChromaPathError("configured path is not writable"), + ): + result = _invoke(["list-collections"]) + + self.assertEqual(result.exit_code, 1) + self.assertEqual( + result.stdout, + "Error: configured path is not writable\n", + ) + def _invoke(arguments: Sequence[str]) -> Result: return CliRunner().invoke(app, list(arguments))