configurable directory
build / build (push) Successful in 47s
pytest / pytest (push) Successful in 35s

This commit is contained in:
Matteo Rosati
2026-05-06 21:23:37 +02:00
parent 28ec29f8af
commit 96ccf0396d
7 changed files with 209 additions and 7 deletions
+26 -1
View File
@@ -89,6 +89,30 @@ You can also run it from the source tree without installing the tool:
uv run python -m chromy.main --help
```
## Chroma storage location
By default, Chromy uses Chroma's default persistent location behavior (a local
`chroma/` directory based on your current working directory when you run the
command).
You can override this with `CHROMA_FOLDER`.
- `CHROMA_FOLDER` must point to a **parent directory**.
- Chromy will store data in `<CHROMA_FOLDER>/chroma`.
- Relative paths are supported and are resolved from the current working directory.
- If `CHROMA_FOLDER` is set, it takes precedence over the default behavior.
- If the configured location is invalid or not writable, the command fails with an explicit error (no fallback to the default location).
Examples:
```bash
# absolute parent path
CHROMA_FOLDER=/tmp/chromy-data chromy list-collections
# relative parent path (resolved from current directory)
CHROMA_FOLDER=.local-data chromy create-collection notes
```
## Running Tests
Run the test suite with pytest:
@@ -188,7 +212,8 @@ Query results include the stored document chunk, its id, distance, and file name
## Notes
- collections are stored in a local persistent Chroma database in the current directory
- by default, collections are stored in a local persistent Chroma database in the current directory
- set `CHROMA_FOLDER` to override the parent location; Chromy will use `<CHROMA_FOLDER>/chroma`
- `import` requires the target collection to already exist
- `import` accepts one or more file paths
- unquoted glob patterns such as `*.md` are expanded by the shell before `chromy` starts
+66 -4
View File
@@ -1,6 +1,9 @@
from __future__ import annotations
import os
from collections.abc import Sequence
from pathlib import Path
from tempfile import NamedTemporaryFile
from typing import cast
from uuid import uuid4
@@ -10,12 +13,71 @@ from chromadb.api.types import QueryResult, Where
from chromadb.errors import NotFoundError
from chromy.embedding import EmbeddingRecord
from chromy.errors import ChromaPathError
CHROMA_FOLDER_ENV_VAR = "CHROMA_FOLDER"
CHROMA_SUBDIRECTORY = "chroma"
def _resolve_persistence_path() -> Path | None:
configured_parent = os.getenv(CHROMA_FOLDER_ENV_VAR)
if configured_parent is None:
return None
trimmed_parent = configured_parent.strip()
if not trimmed_parent:
raise ChromaPathError(
f"{CHROMA_FOLDER_ENV_VAR} is set but empty. Please set a valid parent "
"directory path."
)
parent_path = Path(trimmed_parent).expanduser().resolve()
return parent_path / CHROMA_SUBDIRECTORY
def _ensure_persistence_path_is_usable(path: Path, configured_parent: str) -> None:
try:
path.mkdir(parents=True, exist_ok=True)
if not path.is_dir():
raise ChromaPathError(
f"Configured Chroma directory '{path}' is not a directory."
)
with NamedTemporaryFile(dir=path, prefix=".chromy-write-test-", delete=True):
pass
except ChromaPathError:
raise
except OSError as exc:
raise ChromaPathError(
f"Could not create or access Chroma directory '{path}' from "
f"{CHROMA_FOLDER_ENV_VAR}='{configured_parent}': {exc}"
) from exc
def get_client() -> ClientAPI:
persistence_path = _resolve_persistence_path()
if persistence_path is None:
return chromadb.PersistentClient()
configured_parent = os.getenv(CHROMA_FOLDER_ENV_VAR, "")
_ensure_persistence_path_is_usable(persistence_path, configured_parent)
try:
return chromadb.PersistentClient(path=str(persistence_path))
except Exception as exc: # pragma: no cover - defensive wrapper
raise ChromaPathError(
f"Could not initialize Chroma client at '{persistence_path}' from "
f"{CHROMA_FOLDER_ENV_VAR}='{configured_parent}': {exc}"
) from exc
def _get_client_and_collection(
collection_name: str,
) -> tuple[ClientAPI, chromadb.Collection]:
client = chromadb.PersistentClient()
client = get_client()
try:
collection = client.get_collection(name=collection_name)
@@ -26,7 +88,7 @@ def _get_client_and_collection(
def list_collections() -> list[str]:
client = chromadb.PersistentClient()
client = get_client()
collections = client.list_collections()
if not collections:
@@ -36,14 +98,14 @@ def list_collections() -> list[str]:
def create_collection(name: str) -> str:
client = chromadb.PersistentClient()
client = get_client()
collection = client.create_collection(name=name)
return getattr(collection, "name", name)
def delete_collection(name: str) -> None:
client = chromadb.PersistentClient()
client = get_client()
client.delete_collection(name=name)
+6 -1
View File
@@ -6,6 +6,7 @@ import typer
from chromadb.errors import InternalError, NotFoundError
from rich import print
from chromy.errors import ChromaPathError
from chromy.handlers.count_collection import handle_count_collection
from chromy.handlers.create_collection import handle_create_collection
from chromy.handlers.delete_collection import (
@@ -22,7 +23,11 @@ ExitCodeHandler = Callable[[], int]
def _run(handler: ExitCodeHandler) -> None:
exit_code = handler()
try:
exit_code = handler()
except ChromaPathError as exc:
_fail(str(exc))
if exit_code != 0:
raise typer.Exit(exit_code)
+4
View File
@@ -3,3 +3,7 @@ from __future__ import annotations
class UnsupportedTextFileError(Exception):
"""Raised when a file does not appear to contain supported text content."""
class ChromaPathError(Exception):
"""Raised when the configured Chroma persistence path is invalid or unusable."""
+1 -1
View File
@@ -61,7 +61,7 @@ def _truncate_file_name(file_name: str, max_length: int = 20) -> str:
if len(file_name) <= max_length:
return file_name
return f"{file_name[: max_length - 3]}"
return f"{file_name[: max_length - 3]}..."
def handle_import(collection: str, files: list[str]) -> int:
+92
View File
@@ -0,0 +1,92 @@
from __future__ import annotations
import os
import tempfile
import unittest
from pathlib import Path
from unittest.mock import patch
from chromy.chroma_functions import get_client
from chromy.errors import ChromaPathError
class ChromaFunctionsTests(unittest.TestCase):
def test_get_client_uses_default_when_env_is_unset(self) -> None:
with (
patch.dict(os.environ, {}, clear=True),
patch("chromy.chroma_functions.chromadb.PersistentClient") as persistent,
):
get_client()
persistent.assert_called_once_with()
def test_get_client_uses_chroma_folder_override(self) -> None:
with tempfile.TemporaryDirectory() as temp_dir:
configured_parent = Path(temp_dir) / "data"
with (
patch.dict(os.environ, {"CHROMA_FOLDER": str(configured_parent)}),
patch(
"chromy.chroma_functions.chromadb.PersistentClient"
) as persistent,
):
get_client()
expected_path = configured_parent.resolve() / "chroma"
persistent.assert_called_once_with(path=str(expected_path))
self.assertTrue(expected_path.is_dir())
def test_get_client_resolves_relative_chroma_folder_from_cwd(self) -> None:
with tempfile.TemporaryDirectory() as temp_dir:
working_dir = Path(temp_dir)
previous_cwd = Path.cwd()
try:
os.chdir(working_dir)
with (
patch.dict(os.environ, {"CHROMA_FOLDER": "relative-parent"}),
patch(
"chromy.chroma_functions.chromadb.PersistentClient"
) as persistent,
):
get_client()
finally:
os.chdir(previous_cwd)
expected_path = (working_dir / "relative-parent").resolve() / "chroma"
persistent.assert_called_once_with(path=str(expected_path))
def test_get_client_fails_when_configured_path_is_not_usable(self) -> None:
with tempfile.TemporaryDirectory() as temp_dir:
invalid_parent = Path(temp_dir) / "not-a-directory"
invalid_parent.write_text("x", encoding="utf-8")
with (
patch.dict(os.environ, {"CHROMA_FOLDER": str(invalid_parent)}),
self.assertRaisesRegex(
ChromaPathError,
"Could not create or access Chroma directory",
),
):
get_client()
def test_get_client_wraps_client_initialization_failures(self) -> None:
with tempfile.TemporaryDirectory() as temp_dir:
configured_parent = Path(temp_dir)
with (
patch.dict(os.environ, {"CHROMA_FOLDER": str(configured_parent)}),
patch(
"chromy.chroma_functions.chromadb.PersistentClient",
side_effect=RuntimeError("boom"),
),
self.assertRaisesRegex(
ChromaPathError,
"Could not initialize Chroma client",
),
):
get_client()
if __name__ == "__main__":
unittest.main()
+14
View File
@@ -10,6 +10,7 @@ from click.testing import Result
from typer.testing import CliRunner
from chromy.cli import app
from chromy.errors import ChromaPathError
class CliTests(unittest.TestCase):
@@ -251,6 +252,19 @@ class CliTests(unittest.TestCase):
self.assertNotEqual(result.exit_code, 0)
self.assertIn("Missing option", result.output)
def test_cli_surfaces_chroma_path_errors(self) -> None:
with patch(
"chromy.handlers.list_collections.list_collections",
side_effect=ChromaPathError("configured path is not writable"),
):
result = _invoke(["list-collections"])
self.assertEqual(result.exit_code, 1)
self.assertEqual(
result.stdout,
"Error: configured path is not writable\n",
)
def _invoke(arguments: Sequence[str]) -> Result:
return CliRunner().invoke(app, list(arguments))