configurable directory
build / build (push) Successful in 47s
pytest / pytest (push) Successful in 35s

This commit is contained in:
Matteo Rosati
2026-05-06 21:23:37 +02:00
parent 28ec29f8af
commit 96ccf0396d
7 changed files with 209 additions and 7 deletions
+26 -1
View File
@@ -89,6 +89,30 @@ You can also run it from the source tree without installing the tool:
uv run python -m chromy.main --help uv run python -m chromy.main --help
``` ```
## Chroma storage location
By default, Chromy uses Chroma's default persistent location behavior (a local
`chroma/` directory based on your current working directory when you run the
command).
You can override this with `CHROMA_FOLDER`.
- `CHROMA_FOLDER` must point to a **parent directory**.
- Chromy will store data in `<CHROMA_FOLDER>/chroma`.
- Relative paths are supported and are resolved from the current working directory.
- If `CHROMA_FOLDER` is set, it takes precedence over the default behavior.
- If the configured location is invalid or not writable, the command fails with an explicit error (no fallback to the default location).
Examples:
```bash
# absolute parent path
CHROMA_FOLDER=/tmp/chromy-data chromy list-collections
# relative parent path (resolved from current directory)
CHROMA_FOLDER=.local-data chromy create-collection notes
```
## Running Tests ## Running Tests
Run the test suite with pytest: Run the test suite with pytest:
@@ -188,7 +212,8 @@ Query results include the stored document chunk, its id, distance, and file name
## Notes ## Notes
- collections are stored in a local persistent Chroma database in the current directory - by default, collections are stored in a local persistent Chroma database in the current directory
- set `CHROMA_FOLDER` to override the parent location; Chromy will use `<CHROMA_FOLDER>/chroma`
- `import` requires the target collection to already exist - `import` requires the target collection to already exist
- `import` accepts one or more file paths - `import` accepts one or more file paths
- unquoted glob patterns such as `*.md` are expanded by the shell before `chromy` starts - unquoted glob patterns such as `*.md` are expanded by the shell before `chromy` starts
+66 -4
View File
@@ -1,6 +1,9 @@
from __future__ import annotations from __future__ import annotations
import os
from collections.abc import Sequence from collections.abc import Sequence
from pathlib import Path
from tempfile import NamedTemporaryFile
from typing import cast from typing import cast
from uuid import uuid4 from uuid import uuid4
@@ -10,12 +13,71 @@ from chromadb.api.types import QueryResult, Where
from chromadb.errors import NotFoundError from chromadb.errors import NotFoundError
from chromy.embedding import EmbeddingRecord from chromy.embedding import EmbeddingRecord
from chromy.errors import ChromaPathError
CHROMA_FOLDER_ENV_VAR = "CHROMA_FOLDER"
CHROMA_SUBDIRECTORY = "chroma"
def _resolve_persistence_path() -> Path | None:
configured_parent = os.getenv(CHROMA_FOLDER_ENV_VAR)
if configured_parent is None:
return None
trimmed_parent = configured_parent.strip()
if not trimmed_parent:
raise ChromaPathError(
f"{CHROMA_FOLDER_ENV_VAR} is set but empty. Please set a valid parent "
"directory path."
)
parent_path = Path(trimmed_parent).expanduser().resolve()
return parent_path / CHROMA_SUBDIRECTORY
def _ensure_persistence_path_is_usable(path: Path, configured_parent: str) -> None:
try:
path.mkdir(parents=True, exist_ok=True)
if not path.is_dir():
raise ChromaPathError(
f"Configured Chroma directory '{path}' is not a directory."
)
with NamedTemporaryFile(dir=path, prefix=".chromy-write-test-", delete=True):
pass
except ChromaPathError:
raise
except OSError as exc:
raise ChromaPathError(
f"Could not create or access Chroma directory '{path}' from "
f"{CHROMA_FOLDER_ENV_VAR}='{configured_parent}': {exc}"
) from exc
def get_client() -> ClientAPI:
persistence_path = _resolve_persistence_path()
if persistence_path is None:
return chromadb.PersistentClient()
configured_parent = os.getenv(CHROMA_FOLDER_ENV_VAR, "")
_ensure_persistence_path_is_usable(persistence_path, configured_parent)
try:
return chromadb.PersistentClient(path=str(persistence_path))
except Exception as exc: # pragma: no cover - defensive wrapper
raise ChromaPathError(
f"Could not initialize Chroma client at '{persistence_path}' from "
f"{CHROMA_FOLDER_ENV_VAR}='{configured_parent}': {exc}"
) from exc
def _get_client_and_collection( def _get_client_and_collection(
collection_name: str, collection_name: str,
) -> tuple[ClientAPI, chromadb.Collection]: ) -> tuple[ClientAPI, chromadb.Collection]:
client = chromadb.PersistentClient() client = get_client()
try: try:
collection = client.get_collection(name=collection_name) collection = client.get_collection(name=collection_name)
@@ -26,7 +88,7 @@ def _get_client_and_collection(
def list_collections() -> list[str]: def list_collections() -> list[str]:
client = chromadb.PersistentClient() client = get_client()
collections = client.list_collections() collections = client.list_collections()
if not collections: if not collections:
@@ -36,14 +98,14 @@ def list_collections() -> list[str]:
def create_collection(name: str) -> str: def create_collection(name: str) -> str:
client = chromadb.PersistentClient() client = get_client()
collection = client.create_collection(name=name) collection = client.create_collection(name=name)
return getattr(collection, "name", name) return getattr(collection, "name", name)
def delete_collection(name: str) -> None: def delete_collection(name: str) -> None:
client = chromadb.PersistentClient() client = get_client()
client.delete_collection(name=name) client.delete_collection(name=name)
+6 -1
View File
@@ -6,6 +6,7 @@ import typer
from chromadb.errors import InternalError, NotFoundError from chromadb.errors import InternalError, NotFoundError
from rich import print from rich import print
from chromy.errors import ChromaPathError
from chromy.handlers.count_collection import handle_count_collection from chromy.handlers.count_collection import handle_count_collection
from chromy.handlers.create_collection import handle_create_collection from chromy.handlers.create_collection import handle_create_collection
from chromy.handlers.delete_collection import ( from chromy.handlers.delete_collection import (
@@ -22,7 +23,11 @@ ExitCodeHandler = Callable[[], int]
def _run(handler: ExitCodeHandler) -> None: def _run(handler: ExitCodeHandler) -> None:
exit_code = handler() try:
exit_code = handler()
except ChromaPathError as exc:
_fail(str(exc))
if exit_code != 0: if exit_code != 0:
raise typer.Exit(exit_code) raise typer.Exit(exit_code)
+4
View File
@@ -3,3 +3,7 @@ from __future__ import annotations
class UnsupportedTextFileError(Exception): class UnsupportedTextFileError(Exception):
"""Raised when a file does not appear to contain supported text content.""" """Raised when a file does not appear to contain supported text content."""
class ChromaPathError(Exception):
"""Raised when the configured Chroma persistence path is invalid or unusable."""
+1 -1
View File
@@ -61,7 +61,7 @@ def _truncate_file_name(file_name: str, max_length: int = 20) -> str:
if len(file_name) <= max_length: if len(file_name) <= max_length:
return file_name return file_name
return f"{file_name[: max_length - 3]}" return f"{file_name[: max_length - 3]}..."
def handle_import(collection: str, files: list[str]) -> int: def handle_import(collection: str, files: list[str]) -> int:
+92
View File
@@ -0,0 +1,92 @@
from __future__ import annotations
import os
import tempfile
import unittest
from pathlib import Path
from unittest.mock import patch
from chromy.chroma_functions import get_client
from chromy.errors import ChromaPathError
class ChromaFunctionsTests(unittest.TestCase):
def test_get_client_uses_default_when_env_is_unset(self) -> None:
with (
patch.dict(os.environ, {}, clear=True),
patch("chromy.chroma_functions.chromadb.PersistentClient") as persistent,
):
get_client()
persistent.assert_called_once_with()
def test_get_client_uses_chroma_folder_override(self) -> None:
with tempfile.TemporaryDirectory() as temp_dir:
configured_parent = Path(temp_dir) / "data"
with (
patch.dict(os.environ, {"CHROMA_FOLDER": str(configured_parent)}),
patch(
"chromy.chroma_functions.chromadb.PersistentClient"
) as persistent,
):
get_client()
expected_path = configured_parent.resolve() / "chroma"
persistent.assert_called_once_with(path=str(expected_path))
self.assertTrue(expected_path.is_dir())
def test_get_client_resolves_relative_chroma_folder_from_cwd(self) -> None:
with tempfile.TemporaryDirectory() as temp_dir:
working_dir = Path(temp_dir)
previous_cwd = Path.cwd()
try:
os.chdir(working_dir)
with (
patch.dict(os.environ, {"CHROMA_FOLDER": "relative-parent"}),
patch(
"chromy.chroma_functions.chromadb.PersistentClient"
) as persistent,
):
get_client()
finally:
os.chdir(previous_cwd)
expected_path = (working_dir / "relative-parent").resolve() / "chroma"
persistent.assert_called_once_with(path=str(expected_path))
def test_get_client_fails_when_configured_path_is_not_usable(self) -> None:
with tempfile.TemporaryDirectory() as temp_dir:
invalid_parent = Path(temp_dir) / "not-a-directory"
invalid_parent.write_text("x", encoding="utf-8")
with (
patch.dict(os.environ, {"CHROMA_FOLDER": str(invalid_parent)}),
self.assertRaisesRegex(
ChromaPathError,
"Could not create or access Chroma directory",
),
):
get_client()
def test_get_client_wraps_client_initialization_failures(self) -> None:
with tempfile.TemporaryDirectory() as temp_dir:
configured_parent = Path(temp_dir)
with (
patch.dict(os.environ, {"CHROMA_FOLDER": str(configured_parent)}),
patch(
"chromy.chroma_functions.chromadb.PersistentClient",
side_effect=RuntimeError("boom"),
),
self.assertRaisesRegex(
ChromaPathError,
"Could not initialize Chroma client",
),
):
get_client()
if __name__ == "__main__":
unittest.main()
+14
View File
@@ -10,6 +10,7 @@ from click.testing import Result
from typer.testing import CliRunner from typer.testing import CliRunner
from chromy.cli import app from chromy.cli import app
from chromy.errors import ChromaPathError
class CliTests(unittest.TestCase): class CliTests(unittest.TestCase):
@@ -251,6 +252,19 @@ class CliTests(unittest.TestCase):
self.assertNotEqual(result.exit_code, 0) self.assertNotEqual(result.exit_code, 0)
self.assertIn("Missing option", result.output) self.assertIn("Missing option", result.output)
def test_cli_surfaces_chroma_path_errors(self) -> None:
with patch(
"chromy.handlers.list_collections.list_collections",
side_effect=ChromaPathError("configured path is not writable"),
):
result = _invoke(["list-collections"])
self.assertEqual(result.exit_code, 1)
self.assertEqual(
result.stdout,
"Error: configured path is not writable\n",
)
def _invoke(arguments: Sequence[str]) -> Result: def _invoke(arguments: Sequence[str]) -> Result:
return CliRunner().invoke(app, list(arguments)) return CliRunner().invoke(app, list(arguments))