move top-level modules into a real package
This commit is contained in:
@@ -82,7 +82,7 @@ chromy --help
|
|||||||
You can also run it from the source tree without installing the tool:
|
You can also run it from the source tree without installing the tool:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
uv run python main.py --help
|
uv run python -m chromy.main --help
|
||||||
```
|
```
|
||||||
|
|
||||||
## Commands
|
## Commands
|
||||||
|
|||||||
@@ -0,0 +1 @@
|
|||||||
|
"""Chromy package."""
|
||||||
@@ -6,7 +6,7 @@ from chromadb.api import ClientAPI
|
|||||||
from chromadb.api.types import QueryResult
|
from chromadb.api.types import QueryResult
|
||||||
from chromadb.errors import NotFoundError
|
from chromadb.errors import NotFoundError
|
||||||
|
|
||||||
from embed import EmbeddingRecord
|
from chromy.embed import EmbeddingRecord
|
||||||
|
|
||||||
|
|
||||||
def _get_client_and_collection(
|
def _get_client_and_collection(
|
||||||
@@ -6,15 +6,15 @@ from dataclasses import dataclass
|
|||||||
|
|
||||||
from chromadb.errors import InternalError, NotFoundError
|
from chromadb.errors import InternalError, NotFoundError
|
||||||
|
|
||||||
from handlers.add_data import handle_add_data
|
from chromy.handlers.add_data import handle_add_data
|
||||||
from handlers.count_collection import handle_count_collection
|
from chromy.handlers.count_collection import handle_count_collection
|
||||||
from handlers.create_collection import handle_create_collection
|
from chromy.handlers.create_collection import handle_create_collection
|
||||||
from handlers.delete_collection import (
|
from chromy.handlers.delete_collection import (
|
||||||
handle_delete_collection,
|
handle_delete_collection,
|
||||||
handle_delete_records,
|
handle_delete_records,
|
||||||
)
|
)
|
||||||
from handlers.list_collections import handle_list_collections
|
from chromy.handlers.list_collections import handle_list_collections
|
||||||
from handlers.query import handle_query
|
from chromy.handlers.query import handle_query
|
||||||
|
|
||||||
|
|
||||||
CommandHandler = Callable[[Namespace], int]
|
CommandHandler = Callable[[Namespace], int]
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
from argparse import Namespace
|
from argparse import Namespace
|
||||||
|
|
||||||
from utilities import ingest_file
|
from chromy.utilities import ingest_file
|
||||||
|
|
||||||
|
|
||||||
def handle_add_data(args: Namespace) -> int:
|
def handle_add_data(args: Namespace) -> int:
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
from argparse import Namespace
|
from argparse import Namespace
|
||||||
|
|
||||||
from chroma_functions import count_collection
|
from chromy.chroma_functions import count_collection
|
||||||
|
|
||||||
|
|
||||||
def handle_count_collection(args: Namespace) -> int:
|
def handle_count_collection(args: Namespace) -> int:
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
from argparse import Namespace
|
from argparse import Namespace
|
||||||
|
|
||||||
from chroma_functions import create_collection
|
from chromy.chroma_functions import create_collection
|
||||||
|
|
||||||
|
|
||||||
def handle_create_collection(args: Namespace) -> int:
|
def handle_create_collection(args: Namespace) -> int:
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
from argparse import Namespace
|
from argparse import Namespace
|
||||||
|
|
||||||
from chroma_functions import delete_collection, delete_data
|
from chromy.chroma_functions import delete_collection, delete_data
|
||||||
|
|
||||||
|
|
||||||
def _parse_where_clause(where_clause: str) -> dict[str, str]:
|
def _parse_where_clause(where_clause: str) -> dict[str, str]:
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
from argparse import Namespace
|
from argparse import Namespace
|
||||||
|
|
||||||
from chroma_functions import list_collections
|
from chromy.chroma_functions import list_collections
|
||||||
from utilities import print_lines
|
from chromy.utilities import print_lines
|
||||||
|
|
||||||
|
|
||||||
def handle_list_collections(_: Namespace) -> int:
|
def handle_list_collections(_: Namespace) -> int:
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
from argparse import Namespace
|
from argparse import Namespace
|
||||||
|
|
||||||
from utilities import format_query_result, print_lines, run_query
|
from chromy.utilities import format_query_result, print_lines, run_query
|
||||||
|
|
||||||
|
|
||||||
def handle_query(args: Namespace) -> int:
|
def handle_query(args: Namespace) -> int:
|
||||||
@@ -2,8 +2,8 @@ from __future__ import annotations
|
|||||||
|
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
from cli_app import execute_command
|
from chromy.cli_app import execute_command
|
||||||
from cli_parser import build_parser
|
from chromy.cli_parser import build_parser
|
||||||
|
|
||||||
|
|
||||||
def main() -> int:
|
def main() -> int:
|
||||||
@@ -1,9 +1,9 @@
|
|||||||
from chromadb import QueryResult
|
from chromadb import QueryResult
|
||||||
from collections.abc import Mapping
|
from collections.abc import Mapping
|
||||||
|
|
||||||
from chroma_functions import add_data, query_data
|
from chromy.chroma_functions import add_data, query_data
|
||||||
from chunk_functions import chunk_file
|
from chromy.chunk_functions import chunk_file
|
||||||
from embed import embed
|
from chromy.embed import embed
|
||||||
|
|
||||||
|
|
||||||
def print_lines(lines: list[str]) -> None:
|
def print_lines(lines: list[str]) -> None:
|
||||||
@@ -0,0 +1,37 @@
|
|||||||
|
# 1. Move Top-Level Modules Into a Real Package [DONE]
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
Move the current flat module layout into a proper `chromy/` package so imports, packaging, and future subpackages are easier to manage.
|
||||||
|
|
||||||
|
## Implementation Steps
|
||||||
|
|
||||||
|
- Create a `chromy/` package with `__init__.py`.
|
||||||
|
- Move `main.py`, `cli_app.py`, `cli_parser.py`, `chroma_functions.py`, `chunk_functions.py`, `embed.py`, and `utilities.py` into `chromy/`.
|
||||||
|
- Move `handlers/` into `chromy/handlers/`.
|
||||||
|
- Update imports to absolute package imports such as `from chromy.cli_app import execute_command` and `from chromy.handlers.add_data import handle_add_data`.
|
||||||
|
- Update `[project.scripts]` in `pyproject.toml` from `main:main` to `chromy.main:main`.
|
||||||
|
- Update setuptools configuration to package `chromy` and `chromy.handlers` instead of using top-level `py-modules`.
|
||||||
|
- Update README development commands from `uv run python main.py --help` to `uv run python -m chromy.main --help`.
|
||||||
|
|
||||||
|
## Public Interface Changes
|
||||||
|
|
||||||
|
- The installed CLI command remains `chromy`.
|
||||||
|
- Programmatic imports move from top-level modules to `chromy.*`.
|
||||||
|
- Running from source should use `python -m chromy.main`.
|
||||||
|
|
||||||
|
## Test Plan
|
||||||
|
|
||||||
|
- Run `uv run python -m chromy.main --help`.
|
||||||
|
- Run `uv run python -m chromy.main list-collections`.
|
||||||
|
- Build the package with `uv build`.
|
||||||
|
- Install locally in editable mode and confirm `chromy --help` resolves the packaged entrypoint.
|
||||||
|
- Test all commands to verify they still work:
|
||||||
|
- [creating, listing, deleting] collections
|
||||||
|
- [adding, deleting] documents to a collection (use [romeo_and_juliet.txt](romeo_and_juliet.txt))
|
||||||
|
- querying
|
||||||
|
|
||||||
|
## Assumptions
|
||||||
|
|
||||||
|
- Backward-compatible top-level imports are not required.
|
||||||
|
- The package refactor should preserve behavior before deeper service or architecture changes are made.
|
||||||
@@ -0,0 +1,31 @@
|
|||||||
|
# 2. Replace `argparse.Namespace` Plumbing With Typed Command Inputs
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
Stop passing mutable `argparse.Namespace` objects into handlers. Convert parsed CLI arguments into typed command dataclasses before dispatch.
|
||||||
|
|
||||||
|
## Implementation Steps
|
||||||
|
|
||||||
|
- Add frozen dataclasses for command inputs, such as list collections, create collection, delete collection, count, add data, query, and delete records.
|
||||||
|
- Keep `argparse` isolated in the CLI adapter layer.
|
||||||
|
- Convert `Namespace` into the correct command dataclass immediately before dispatch.
|
||||||
|
- Change handler signatures from `Callable[[Namespace], int]` to typed command-specific callables.
|
||||||
|
- Remove mutations such as `args.error_message = ...` in `cli_app.py` and `handlers/delete_collection.py`.
|
||||||
|
- Return or raise explicit structured results/errors rather than writing temporary state back into the parsed args object.
|
||||||
|
|
||||||
|
## Public Interface Changes
|
||||||
|
|
||||||
|
- CLI command syntax stays the same.
|
||||||
|
- Internal handler APIs change to typed dataclass inputs.
|
||||||
|
- Error message builders should receive typed command inputs or exceptions instead of raw `Namespace`.
|
||||||
|
|
||||||
|
## Test Plan
|
||||||
|
|
||||||
|
- Add parser-to-command conversion tests for every command and alias.
|
||||||
|
- Add handler unit tests that construct command dataclasses directly.
|
||||||
|
- Verify invalid delete filters still produce the same user-facing error.
|
||||||
|
|
||||||
|
## Assumptions
|
||||||
|
|
||||||
|
- Command dataclasses should live near CLI application code until a broader package refactor introduces clearer subpackages.
|
||||||
|
- The first pass should preserve the existing command names, arguments, aliases, and output.
|
||||||
@@ -0,0 +1,31 @@
|
|||||||
|
# 3. Split CLI, Service, Repository, and Formatting Responsibilities
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
Separate command handling, business workflows, Chroma persistence, and output formatting so each layer has a single responsibility.
|
||||||
|
|
||||||
|
## Implementation Steps
|
||||||
|
|
||||||
|
- Keep handlers thin: receive typed command input, call a service, and print formatted output.
|
||||||
|
- Move ingestion orchestration from `utilities.ingest_file` into an `IngestionService`.
|
||||||
|
- Move query orchestration from `utilities.run_query` into a `QueryService`.
|
||||||
|
- Move Chroma collection operations from `chroma_functions.py` into a `ChromaRepository` or `ChromaStore`.
|
||||||
|
- Move output-only functions such as `format_query_result` and `print_lines` into a formatting module.
|
||||||
|
- Keep Chroma-specific result parsing out of CLI handlers.
|
||||||
|
|
||||||
|
## Public Interface Changes
|
||||||
|
|
||||||
|
- CLI behavior remains unchanged.
|
||||||
|
- Internal APIs become service and repository methods instead of free functions.
|
||||||
|
- Formatting functions should accept internal domain objects rather than raw Chroma response dictionaries where possible.
|
||||||
|
|
||||||
|
## Test Plan
|
||||||
|
|
||||||
|
- Unit test services with mocked repository, chunker, and embedder dependencies.
|
||||||
|
- Unit test formatter output without requiring Chroma.
|
||||||
|
- Run a small manual CLI smoke test for create, add, query, count, list, and delete.
|
||||||
|
|
||||||
|
## Assumptions
|
||||||
|
|
||||||
|
- This should be done after typed command inputs and package layout changes, or coordinated carefully with them.
|
||||||
|
- No JSON output mode or web API is added in this refactor; the goal is to make those future changes easier.
|
||||||
@@ -0,0 +1,30 @@
|
|||||||
|
# 4. Centralize Chroma Client Configuration
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
Create Chroma client configuration once per command and inject it into repository or service objects instead of repeatedly calling `chromadb.PersistentClient()`.
|
||||||
|
|
||||||
|
## Implementation Steps
|
||||||
|
|
||||||
|
- Add a small configuration object for Chroma settings, including persistence path.
|
||||||
|
- Load the persistence path from a CLI option, environment variable, or default Chroma behavior.
|
||||||
|
- Create a Chroma client factory that returns one client per command execution.
|
||||||
|
- Inject the client into the Chroma repository instead of constructing it inside each function.
|
||||||
|
- Remove repeated `chromadb.PersistentClient()` calls from collection operations.
|
||||||
|
- Ensure tests can pass an in-memory or temporary Chroma client.
|
||||||
|
|
||||||
|
## Public Interface Changes
|
||||||
|
|
||||||
|
- Add a documented way to configure the Chroma persistence path.
|
||||||
|
- Existing commands should continue to work with the current implicit default when no path is provided.
|
||||||
|
|
||||||
|
## Test Plan
|
||||||
|
|
||||||
|
- Unit test repository operations with an injected test client.
|
||||||
|
- Verify two commands using the same configured persistence directory can see the same collections.
|
||||||
|
- Verify default behavior still works when no path is configured.
|
||||||
|
|
||||||
|
## Assumptions
|
||||||
|
|
||||||
|
- The initial implementation should preserve Chroma's default persistence behavior unless a path is explicitly configured.
|
||||||
|
- Configuration should be introduced before larger repository and service tests depend on isolated storage.
|
||||||
@@ -0,0 +1,30 @@
|
|||||||
|
# 6. Add Ruff and Mypy Configuration
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
Add linting, formatting, and static type checking configuration to `pyproject.toml`. Do not add pytest configuration for this improvement.
|
||||||
|
|
||||||
|
## Implementation Steps
|
||||||
|
|
||||||
|
- Add `ruff` and `mypy` to the development dependency group.
|
||||||
|
- Configure Ruff formatting and lint rules in `pyproject.toml`.
|
||||||
|
- Configure mypy in strict or near-strict mode suitable for the current Chroma and third-party boundaries.
|
||||||
|
- Add README development commands for `uv run ruff check .`, `uv run ruff format .`, and `uv run mypy .`.
|
||||||
|
- Fix only minimal configuration blockers in this plan; broad type modernization belongs to the type-hints plan.
|
||||||
|
|
||||||
|
## Public Interface Changes
|
||||||
|
|
||||||
|
- No runtime CLI behavior changes.
|
||||||
|
- Development workflow gains standard lint, format, and type-check commands.
|
||||||
|
|
||||||
|
## Test Plan
|
||||||
|
|
||||||
|
- Run `uv run ruff check .`.
|
||||||
|
- Run `uv run ruff format --check .`.
|
||||||
|
- Run `uv run mypy .`.
|
||||||
|
- Confirm no pytest configuration is added as part of this plan.
|
||||||
|
|
||||||
|
## Assumptions
|
||||||
|
|
||||||
|
- Pytest is intentionally out of scope for this improvement.
|
||||||
|
- Strict mypy may need targeted ignores for third-party packages that do not expose complete typing.
|
||||||
@@ -0,0 +1,29 @@
|
|||||||
|
# 7. Modernize Type Hints and Add Missing Future Imports
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
Make type annotations consistent across the codebase by using modern Python 3.12 typing syntax and adding future annotations imports where useful.
|
||||||
|
|
||||||
|
## Implementation Steps
|
||||||
|
|
||||||
|
- Add `from __future__ import annotations` consistently to Python modules that do not already have it.
|
||||||
|
- Replace `typing.List` with built-in generic syntax such as `list[str]` and `list[EmbeddingRecord]`.
|
||||||
|
- Use `collections.abc` input interfaces such as `Sequence[str]` where mutation is not required.
|
||||||
|
- Introduce type aliases or dataclasses for internal Chroma result shapes only where they reduce ambiguity.
|
||||||
|
- Keep runtime behavior unchanged.
|
||||||
|
|
||||||
|
## Public Interface Changes
|
||||||
|
|
||||||
|
- No CLI behavior changes.
|
||||||
|
- Public Python annotations become more precise and consistent.
|
||||||
|
|
||||||
|
## Test Plan
|
||||||
|
|
||||||
|
- Run `uv run ruff check .`.
|
||||||
|
- Run `uv run mypy .` after mypy configuration exists.
|
||||||
|
- Manually smoke test CLI commands that touched type boundaries.
|
||||||
|
|
||||||
|
## Assumptions
|
||||||
|
|
||||||
|
- The project remains Python 3.12+, so built-in generic syntax is acceptable everywhere.
|
||||||
|
- Larger domain model changes should be handled in the service/repository and query-formatting plans.
|
||||||
@@ -0,0 +1,29 @@
|
|||||||
|
# 8. Avoid Catching `BaseException` in CLI Dispatch
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
Change CLI dispatch error handling so process-control exceptions such as `KeyboardInterrupt` and `SystemExit` are not swallowed.
|
||||||
|
|
||||||
|
## Implementation Steps
|
||||||
|
|
||||||
|
- Change `execute_command` to catch `Exception` instead of `BaseException`.
|
||||||
|
- Keep mapped, expected errors handled through the existing command error mapping or its replacement.
|
||||||
|
- Print handled user-facing errors to `stderr` instead of `stdout`.
|
||||||
|
- Allow unmapped exceptions, `KeyboardInterrupt`, and `SystemExit` to propagate normally.
|
||||||
|
- Consider adding debug logging for unexpected exceptions after the logging plan exists.
|
||||||
|
|
||||||
|
## Public Interface Changes
|
||||||
|
|
||||||
|
- Expected command errors still return a non-zero exit code.
|
||||||
|
- Handled error messages move from stdout to stderr.
|
||||||
|
- Interrupt and process-exit behavior becomes conventional.
|
||||||
|
|
||||||
|
## Test Plan
|
||||||
|
|
||||||
|
- Test that a mapped exception returns `1` and writes to stderr.
|
||||||
|
- Test that `KeyboardInterrupt` is not caught by `execute_command`.
|
||||||
|
- Test that unmapped exceptions still propagate.
|
||||||
|
|
||||||
|
## Assumptions
|
||||||
|
|
||||||
|
- Returning `1` for handled user errors remains acceptable until the exit-code conventions plan is implemented.
|
||||||
@@ -0,0 +1,29 @@
|
|||||||
|
# 9. Use Domain-Specific Exceptions Instead of Chroma Exceptions in CLI Mapping
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
Hide Chroma-specific exceptions behind application-level exceptions so the CLI does not depend on Chroma's exception model.
|
||||||
|
|
||||||
|
## Implementation Steps
|
||||||
|
|
||||||
|
- Define app-level exceptions such as `CollectionNotFoundError`, `CollectionAlreadyExistsError`, and `StorageOperationError`.
|
||||||
|
- Convert Chroma exceptions inside the repository layer.
|
||||||
|
- Update CLI command error mappings to handle app-level exceptions only.
|
||||||
|
- Preserve existing user-facing messages for missing and duplicate collections.
|
||||||
|
- Avoid importing `chromadb.errors` in CLI modules after the repository layer owns that boundary.
|
||||||
|
|
||||||
|
## Public Interface Changes
|
||||||
|
|
||||||
|
- CLI behavior and messages should remain the same.
|
||||||
|
- Internal error contracts change from Chroma exceptions to app-level exceptions.
|
||||||
|
|
||||||
|
## Test Plan
|
||||||
|
|
||||||
|
- Unit test repository exception translation.
|
||||||
|
- Unit test CLI mappings for app-level exceptions.
|
||||||
|
- Smoke test missing collection, duplicate collection, and successful operations.
|
||||||
|
|
||||||
|
## Assumptions
|
||||||
|
|
||||||
|
- Chroma remains the only storage backend for now.
|
||||||
|
- The exception layer is still useful because it prevents storage details from leaking upward.
|
||||||
@@ -0,0 +1,30 @@
|
|||||||
|
# 10. Make Ingestion More Configurable
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
Move chunking and embedding choices into configuration and expose chunk size as an `add-data` CLI option.
|
||||||
|
|
||||||
|
## Implementation Steps
|
||||||
|
|
||||||
|
- Add ingestion configuration for chunk size, tokenizer/model name, and embedding function provider.
|
||||||
|
- Change chunking code to receive chunk size and tokenizer/model name instead of hard-coding `800` and `"gpt-4"`.
|
||||||
|
- Reuse the embedding function through dependency injection instead of constructing it for every embed call.
|
||||||
|
- Add `--chunk-size` to `add-data`, defaulting to the current value of `800`.
|
||||||
|
- Keep the default tokenizer/model behavior equivalent to the current `"gpt-4"` setting.
|
||||||
|
|
||||||
|
## Public Interface Changes
|
||||||
|
|
||||||
|
- `add-data` gains `--chunk-size`.
|
||||||
|
- Default ingestion behavior remains unchanged when no option is provided.
|
||||||
|
|
||||||
|
## Test Plan
|
||||||
|
|
||||||
|
- Test chunking with default and custom chunk sizes.
|
||||||
|
- Test `add-data --chunk-size` parser behavior.
|
||||||
|
- Test ingestion service with an injected fake embedder.
|
||||||
|
- Smoke test adding a file with and without `--chunk-size`.
|
||||||
|
|
||||||
|
## Assumptions
|
||||||
|
|
||||||
|
- Only chunk size is exposed in the CLI initially.
|
||||||
|
- Tokenizer/model and embedding provider configuration can remain internal or environment-backed until there is a concrete user-facing need.
|
||||||
@@ -0,0 +1,30 @@
|
|||||||
|
# 11. Improve File Handling
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
Make file ingestion boundaries clearer by using `Path`, explicit UTF-8 decoding, and validation before reading.
|
||||||
|
|
||||||
|
## Implementation Steps
|
||||||
|
|
||||||
|
- Change internal file ingestion APIs to accept `Path` instead of raw `str`.
|
||||||
|
- Convert CLI string paths to `Path` in the command adapter or handler.
|
||||||
|
- Validate that the path exists and is a regular file before reading.
|
||||||
|
- Read text with `encoding="utf-8"`.
|
||||||
|
- Raise a clear app-level file error for missing paths, directories, and decoding failures.
|
||||||
|
- Leave PDF and future file loaders out of scope for now.
|
||||||
|
|
||||||
|
## Public Interface Changes
|
||||||
|
|
||||||
|
- CLI argument remains a file path string.
|
||||||
|
- Error messages for missing or invalid files become clearer.
|
||||||
|
|
||||||
|
## Test Plan
|
||||||
|
|
||||||
|
- Test successful text-file loading.
|
||||||
|
- Test missing file, directory path, and invalid UTF-8 handling.
|
||||||
|
- Smoke test `add-data` with a valid UTF-8 file.
|
||||||
|
|
||||||
|
## Assumptions
|
||||||
|
|
||||||
|
- Only plain text ingestion is supported in this plan.
|
||||||
|
- Existing metadata can continue storing the original path string as `file_name` unless a later plan changes metadata shape.
|
||||||
@@ -0,0 +1,31 @@
|
|||||||
|
# 12. Review Dependencies and Remove Unused Ones
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
Audit runtime dependencies and move unused or optional packages out of the base install where appropriate.
|
||||||
|
|
||||||
|
## Implementation Steps
|
||||||
|
|
||||||
|
- Compare imports in source code against dependencies declared in `pyproject.toml`.
|
||||||
|
- Keep packages that are directly imported by current runtime code.
|
||||||
|
- Move packages used only for optional or future features behind extras, such as `pdf` or `openai`.
|
||||||
|
- Remove dependencies that are neither imported nor needed transitively by active code.
|
||||||
|
- Update `uv.lock` after dependency changes.
|
||||||
|
- Update README installation notes if extras are introduced.
|
||||||
|
|
||||||
|
## Public Interface Changes
|
||||||
|
|
||||||
|
- Base installation may become smaller.
|
||||||
|
- Optional feature dependencies should be installed through extras if introduced.
|
||||||
|
|
||||||
|
## Test Plan
|
||||||
|
|
||||||
|
- Run `uv sync` after dependency edits.
|
||||||
|
- Run `uv run python -m chromy.main --help`.
|
||||||
|
- Smoke test commands that use Chroma, semchunk, dotenv loading, and embedding.
|
||||||
|
- Build the package with `uv build`.
|
||||||
|
|
||||||
|
## Assumptions
|
||||||
|
|
||||||
|
- Dependency removal should be conservative and based on actual import usage.
|
||||||
|
- No new optional features are implemented as part of this cleanup.
|
||||||
@@ -0,0 +1,30 @@
|
|||||||
|
# 13. Make Query Result Formatting More Robust
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
Convert raw Chroma query results into typed internal matches before formatting them for terminal output.
|
||||||
|
|
||||||
|
## Implementation Steps
|
||||||
|
|
||||||
|
- Add a `QueryMatch` dataclass with fields for id, document, distance, and metadata.
|
||||||
|
- Add a parser that converts Chroma `QueryResult` data into `list[QueryMatch]`.
|
||||||
|
- Handle empty results, missing documents, missing metadata, missing distances, and unexpected metadata shapes defensively.
|
||||||
|
- Change terminal formatting to accept `list[QueryMatch]`.
|
||||||
|
- Keep current text output as stable as practical.
|
||||||
|
|
||||||
|
## Public Interface Changes
|
||||||
|
|
||||||
|
- CLI output should remain effectively the same for normal query results.
|
||||||
|
- Internal formatter APIs change from raw Chroma result dictionaries to typed match objects.
|
||||||
|
|
||||||
|
## Test Plan
|
||||||
|
|
||||||
|
- Test empty query results.
|
||||||
|
- Test populated results with ids, documents, distances, and metadata.
|
||||||
|
- Test missing documents, missing metadata, multiple query result groups, and non-mapping metadata values.
|
||||||
|
- Smoke test `query` against a real collection.
|
||||||
|
|
||||||
|
## Assumptions
|
||||||
|
|
||||||
|
- The first implementation can format the first query group only, matching current behavior.
|
||||||
|
- Support for alternate output formats is not added in this plan.
|
||||||
@@ -0,0 +1,30 @@
|
|||||||
|
# 14. Clarify Delete Filter Semantics
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
Make it clear that record deletion supports a simple metadata equality filter in `key=value` form.
|
||||||
|
|
||||||
|
## Implementation Steps
|
||||||
|
|
||||||
|
- Rename internal parser variables from `condition` to `key` where equality-only semantics are intended.
|
||||||
|
- Update CLI help for `--where` to say `Metadata equality filter in the format <key>=<value>`.
|
||||||
|
- Update error messages to use `<key>=<value>`.
|
||||||
|
- Update README command documentation to include the `delete` command and its filter syntax.
|
||||||
|
- Reserve richer Chroma filters, such as JSON filters, for a later feature.
|
||||||
|
|
||||||
|
## Public Interface Changes
|
||||||
|
|
||||||
|
- CLI option remains `--where`.
|
||||||
|
- Help text and error messages become more precise.
|
||||||
|
- No richer filter syntax is added yet.
|
||||||
|
|
||||||
|
## Test Plan
|
||||||
|
|
||||||
|
- Test valid `key=value` parsing with whitespace trimming.
|
||||||
|
- Test missing separator, empty key, and empty value errors.
|
||||||
|
- Verify `chromy delete --help` documents equality semantics.
|
||||||
|
|
||||||
|
## Assumptions
|
||||||
|
|
||||||
|
- Keeping the option name `--where` is acceptable for backward compatibility.
|
||||||
|
- Renaming to `--key` is not worth the CLI break for the current feature set.
|
||||||
@@ -0,0 +1,29 @@
|
|||||||
|
# 15. Improve Command Registration So Parser and Dispatcher Cannot Drift
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
Unify command parser metadata, handlers, aliases, and error mappings into one registry or add a startup check that prevents parser and dispatcher drift.
|
||||||
|
|
||||||
|
## Implementation Steps
|
||||||
|
|
||||||
|
- Create one command registry that includes command name, aliases, help text, arguments, handler, and expected error mappings.
|
||||||
|
- Build argparse subcommands from the registry.
|
||||||
|
- Dispatch commands through the same registry.
|
||||||
|
- Remove duplicated command declarations from separate parser and app structures.
|
||||||
|
- Add a small validation check that command names are unique and aliases do not collide.
|
||||||
|
|
||||||
|
## Public Interface Changes
|
||||||
|
|
||||||
|
- CLI command names, aliases, arguments, and help text should remain the same.
|
||||||
|
- Internal command registration becomes centralized.
|
||||||
|
|
||||||
|
## Test Plan
|
||||||
|
|
||||||
|
- Test that every registry command appears in parser help.
|
||||||
|
- Test every alias dispatches to the canonical command.
|
||||||
|
- Test duplicate command or alias validation fails fast.
|
||||||
|
- Smoke test all existing commands through parser and dispatcher.
|
||||||
|
|
||||||
|
## Assumptions
|
||||||
|
|
||||||
|
- A unified registry is preferred over only adding a drift-detection test because the current repo already has structured command metadata.
|
||||||
@@ -0,0 +1,30 @@
|
|||||||
|
# 16. Add Logging for Debuggability
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
Add optional diagnostic logging for ingestion, query, and Chroma operations while keeping normal CLI output clean.
|
||||||
|
|
||||||
|
## Implementation Steps
|
||||||
|
|
||||||
|
- Configure Python's `logging` module in the CLI entrypoint.
|
||||||
|
- Add a global `--verbose` flag to enable debug-level logs.
|
||||||
|
- Log file loading, chunk counts, embedding calls, Chroma writes, collection operations, and query timing.
|
||||||
|
- Send logs to stderr so stdout remains reserved for command output.
|
||||||
|
- Avoid logging document contents or full embeddings.
|
||||||
|
|
||||||
|
## Public Interface Changes
|
||||||
|
|
||||||
|
- Add global CLI flag `--verbose`.
|
||||||
|
- Normal output remains unchanged when verbose mode is not enabled.
|
||||||
|
|
||||||
|
## Test Plan
|
||||||
|
|
||||||
|
- Test parser behavior for `--verbose`.
|
||||||
|
- Test that debug logs are suppressed by default.
|
||||||
|
- Test that verbose mode emits representative diagnostic logs to stderr.
|
||||||
|
- Smoke test `add-data` and `query` with verbose mode enabled.
|
||||||
|
|
||||||
|
## Assumptions
|
||||||
|
|
||||||
|
- A single global verbosity level is enough for now.
|
||||||
|
- Structured logging means consistent logger names and message fields, not a JSON logging format.
|
||||||
@@ -0,0 +1,32 @@
|
|||||||
|
# 17. Add Exit Code Conventions
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
Document and implement consistent exit codes so scripts can distinguish success from expected user errors.
|
||||||
|
|
||||||
|
## Implementation Steps
|
||||||
|
|
||||||
|
- Define named constants or an enum for exit codes.
|
||||||
|
- Use `0` for success.
|
||||||
|
- Use `1` for expected user-facing errors initially.
|
||||||
|
- Optionally reserve distinct documented codes for validation errors, missing collections, and file errors if the CLI needs them.
|
||||||
|
- Update error handling to return constants instead of literal integers.
|
||||||
|
- Document exit code behavior in README.
|
||||||
|
|
||||||
|
## Public Interface Changes
|
||||||
|
|
||||||
|
- Existing successful commands still exit `0`.
|
||||||
|
- Existing handled errors may continue to exit `1` unless distinct codes are explicitly adopted.
|
||||||
|
- README documents the convention.
|
||||||
|
|
||||||
|
## Test Plan
|
||||||
|
|
||||||
|
- Test success returns `0`.
|
||||||
|
- Test expected user errors return the documented code.
|
||||||
|
- Test unexpected exceptions still propagate to the runtime.
|
||||||
|
- Manually verify shell exit status for representative commands.
|
||||||
|
|
||||||
|
## Assumptions
|
||||||
|
|
||||||
|
- Start with `0` and `1` unless there is a clear automation need for more granular codes.
|
||||||
|
- This plan should follow the exception cleanup plan so errors are categorized at the app level.
|
||||||
@@ -0,0 +1,30 @@
|
|||||||
|
# 18. Add Repository-Level Documentation for Architecture
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
Document the intended internal architecture so future changes follow the same parser, handler, service, repository, and formatter boundaries.
|
||||||
|
|
||||||
|
## Implementation Steps
|
||||||
|
|
||||||
|
- Add a "Development Architecture" section to README or create `docs/architecture.md`.
|
||||||
|
- Describe the request flow: CLI parser -> command input -> handler -> service -> Chroma repository -> formatter.
|
||||||
|
- Explain where to add a new command.
|
||||||
|
- Explain where to add a new document loader.
|
||||||
|
- Explain where to add a new embedding provider.
|
||||||
|
- Include the default Chroma persistence behavior and how configuration is injected after that refactor exists.
|
||||||
|
|
||||||
|
## Public Interface Changes
|
||||||
|
|
||||||
|
- No runtime behavior changes.
|
||||||
|
- Developer-facing documentation is added or expanded.
|
||||||
|
|
||||||
|
## Test Plan
|
||||||
|
|
||||||
|
- Verify documentation matches the actual package/module layout after refactors.
|
||||||
|
- Run README examples to ensure commands are still accurate.
|
||||||
|
- Check links and command snippets manually.
|
||||||
|
|
||||||
|
## Assumptions
|
||||||
|
|
||||||
|
- Documentation should be updated after or alongside the architecture refactor so it describes the intended final shape.
|
||||||
|
- A README section is enough unless the architecture content grows too large.
|
||||||
+2
-11
@@ -19,19 +19,10 @@ dependencies = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
[project.scripts]
|
[project.scripts]
|
||||||
chromy = "main:main"
|
chromy = "chromy.main:main"
|
||||||
|
|
||||||
[tool.setuptools]
|
[tool.setuptools]
|
||||||
packages = ["handlers"]
|
packages = ["chromy", "chromy.handlers"]
|
||||||
py-modules = [
|
|
||||||
"main",
|
|
||||||
"cli_app",
|
|
||||||
"cli_parser",
|
|
||||||
"chroma_functions",
|
|
||||||
"chunk_functions",
|
|
||||||
"embed",
|
|
||||||
"utilities",
|
|
||||||
]
|
|
||||||
|
|
||||||
[dependency-groups]
|
[dependency-groups]
|
||||||
dev = ["nuitka[onefile]>=4.0.8"]
|
dev = ["nuitka[onefile]>=4.0.8"]
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user