complete refactor

This commit is contained in:
2026-04-21 17:42:37 +02:00
parent ad73a6a985
commit a7b91b9c4e
10 changed files with 292 additions and 139 deletions
+97
View File
@@ -0,0 +1,97 @@
from __future__ import annotations
from argparse import Namespace
from collections.abc import Callable
from dataclasses import dataclass
from chromadb.errors import InternalError, NotFoundError
from handlers.add_data import handle_add_data
from handlers.count_collection import handle_count_collection
from handlers.create_collection import handle_create_collection
from handlers.delete_collection import handle_delete_collection
from handlers.list_collections import handle_list_collections
from handlers.query import handle_query
CommandHandler = Callable[[Namespace], int]
ErrorMessageBuilder = Callable[[Namespace], str]
@dataclass(frozen=True, slots=True)
class CliErrorHandler:
exception_type: type[BaseException]
message: ErrorMessageBuilder
@dataclass(frozen=True, slots=True)
class CommandConfig:
handler: CommandHandler
error_handlers: tuple[CliErrorHandler, ...] = ()
COMMANDS: dict[str, CommandConfig] = {
"list-collections": CommandConfig(handler=handle_list_collections),
"create-collection": CommandConfig(
handler=handle_create_collection,
error_handlers=(
CliErrorHandler(
exception_type=InternalError,
message=lambda args: f"Collection '{args.collection}' already exists.",
),
),
),
"delete-collection": CommandConfig(
handler=handle_delete_collection,
error_handlers=(
CliErrorHandler(
exception_type=NotFoundError,
message=lambda args: f"Collection '{args.collection}' does not exist.",
),
),
),
"count": CommandConfig(
handler=handle_count_collection,
error_handlers=(
CliErrorHandler(
exception_type=NotFoundError,
message=lambda args: f"Collection '{args.collection}' does not exist.",
),
),
),
"add-data": CommandConfig(
handler=handle_add_data,
error_handlers=(
CliErrorHandler(
exception_type=NotFoundError,
message=lambda args: f"Collection '{args.collection}' does not exist.",
),
CliErrorHandler(
exception_type=FileNotFoundError,
message=lambda args: f"The file {args.file} was not found.",
),
),
),
"query": CommandConfig(
handler=handle_query,
error_handlers=(
CliErrorHandler(
exception_type=NotFoundError,
message=lambda args: f"Collection '{args.collection}' does not exist.",
),
),
),
}
def execute_command(args: Namespace) -> int:
command = COMMANDS[args.command]
try:
return command.handler(args)
except BaseException as exc:
for error_handler in command.error_handlers:
if isinstance(exc, error_handler.exception_type):
print(error_handler.message(args))
return 1
raise
+88 -51
View File
@@ -1,59 +1,96 @@
from __future__ import annotations
import argparse import argparse
from dataclasses import dataclass
@dataclass(frozen=True, slots=True)
class ArgumentSpec:
name: str
help: str
@dataclass(frozen=True, slots=True)
class CommandSpec:
name: str
aliases: tuple[str, ...]
help: str
arguments: tuple[ArgumentSpec, ...] = ()
COMMAND_SPECS: tuple[CommandSpec, ...] = (
CommandSpec(
name="list-collections",
aliases=("lc",),
help="List all collections stored in the local Chroma database.",
),
CommandSpec(
name="create-collection",
aliases=("cc",),
help="Create a collection in the local Chroma database.",
arguments=(
ArgumentSpec("collection", "Name of the collection to create."),
),
),
CommandSpec(
name="delete-collection",
aliases=("dc",),
help="Delete a collection from the local Chroma database.",
arguments=(
ArgumentSpec("collection", "Name of the collection to delete."),
),
),
CommandSpec(
name="count",
aliases=("co",),
help="Count records in a collection from the local Chroma database.",
arguments=(
ArgumentSpec("collection", "Name of the collection to count."),
),
),
CommandSpec(
name="add-data",
aliases=("ad",),
help="Chunk, embed, and add a file to a collection in the local Chroma database.",
arguments=(
ArgumentSpec("collection", "Name of the target collection."),
ArgumentSpec("file", "Path to the file to chunk and add to the collection."),
),
),
CommandSpec(
name="query",
aliases=("q",),
help="Query a collection with the provided text.",
arguments=(
ArgumentSpec("collection", "Name of the target collection."),
ArgumentSpec("query_text", "The text to query."),
),
),
)
def _add_command(
subparsers: argparse._SubParsersAction[argparse.ArgumentParser],
command: CommandSpec,
) -> None:
subparser = subparsers.add_parser(
command.name,
aliases=list(command.aliases),
help=command.help,
description=command.help,
)
for argument in command.arguments:
subparser.add_argument(argument.name, help=argument.help)
subparser.set_defaults(command=command.name)
def build_parser() -> argparse.ArgumentParser: def build_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(description="Inspect local Chroma collections.") parser = argparse.ArgumentParser(description="Inspect local Chroma collections.")
subparsers = parser.add_subparsers(dest="command") subparsers = parser.add_subparsers(dest="command", required=True)
# List existing collections for command in COMMAND_SPECS:
subparsers.add_parser( _add_command(subparsers, command)
"list-collections",
aliases=["lc"],
help="List all collections stored in the local Chroma database.",
)
# Create a new collection
create_parser = subparsers.add_parser(
"create-collection",
aliases=["cc"],
help="Create a collection in the local Chroma database.",
)
create_parser.add_argument("name", help="Name of the collection to create.")
# Delete a collection
delete_parser = subparsers.add_parser(
"delete-collection",
aliases=["dc"],
help="Delete a collection from the local Chroma database.",
)
delete_parser.add_argument("name", help="Name of the collection to delete.")
# Count documents in a collection
count_parser = subparsers.add_parser(
"count",
aliases=["co"],
help="Count records in a collection from the local Chroma database.",
)
count_parser.add_argument("name", help="Name of the collection to count.")
# Add documents to a collection
add_parser = subparsers.add_parser(
"add-data",
aliases=["ad"],
help="Chunk, embed, and add a file to a collection in the local Chroma database.",
)
add_parser.add_argument("collection", help="Name of the target collection.")
add_parser.add_argument(
"file", help="Path to the file to chunk and add to the collection."
)
# Query doc
query_parser = subparsers.add_parser(
"query",
aliases=["q"],
help="Query a collection with given text/s.",
)
query_parser.add_argument("collection", help="Name of the target collection.")
query_parser.add_argument("texts", help="The text/s to query.")
return parser return parser
+9
View File
@@ -0,0 +1,9 @@
from argparse import Namespace
from utilities import ingest_file
def handle_add_data(args: Namespace) -> int:
records_added = ingest_file(args.collection, args.file)
print(f"Added {records_added} records to collection '{args.collection}'.")
return 0
+8
View File
@@ -0,0 +1,8 @@
from argparse import Namespace
from chroma_functions import count_collection
def handle_count_collection(args: Namespace) -> int:
print(count_collection(args.collection))
return 0
+9
View File
@@ -0,0 +1,9 @@
from argparse import Namespace
from chroma_functions import create_collection
def handle_create_collection(args: Namespace) -> int:
collection_name = create_collection(args.collection)
print(f"Created collection '{collection_name}'.")
return 0
+9
View File
@@ -0,0 +1,9 @@
from argparse import Namespace
from chroma_functions import delete_collection
def handle_delete_collection(args: Namespace) -> int:
delete_collection(args.collection)
print(f"Deleted collection '{args.collection}'.")
return 0
+14
View File
@@ -0,0 +1,14 @@
from argparse import Namespace
from chroma_functions import list_collections
from utilities import print_lines
def handle_list_collections(_: Namespace) -> int:
collections = list_collections()
if not collections:
print("No collections found.")
return 0
print_lines(collections)
return 0
+9
View File
@@ -0,0 +1,9 @@
from argparse import Namespace
from utilities import format_query_result, print_lines, run_query
def handle_query(args: Namespace) -> int:
result = run_query(args.collection, args.query_text)
print_lines(format_query_result(result))
return 0
+3 -88
View File
@@ -1,100 +1,15 @@
from __future__ import annotations from __future__ import annotations
from chromadb.errors import InternalError, NotFoundError
from dotenv import load_dotenv from dotenv import load_dotenv
from chroma_functions import ( from cli_app import execute_command
add_data,
count_collection,
create_collection,
delete_collection,
list_collections,
query_data,
)
from chunk_functions import chunk_file
from cli_parser import build_parser from cli_parser import build_parser
from embed import embed
load_dotenv()
def main() -> int: def main() -> int:
load_dotenv()
args = build_parser().parse_args() args = build_parser().parse_args()
return execute_command(args)
if args.command in {"list-collections", "lc"}:
collections = list_collections()
if not collections:
print("No collections found.")
return 0
for name in collections:
print(name)
return 0
if args.command in {"create-collection", "cc"}:
try:
collection = create_collection(args.name)
except InternalError:
print(f"Collection '{args.name}' already exists.")
return 1
print(f"Created collection '{collection}'.")
return 0
if args.command in {"delete-collection", "dc"}:
try:
delete_collection(args.name)
except NotFoundError:
print(f"Collection '{args.name}' does not exist.")
return 1
print(f"Deleted collection '{args.name}'.")
return 0
if args.command in {"count", "co"}:
try:
count = count_collection(args.name)
except NotFoundError:
print(f"Collection '{args.name}' does not exist.")
return 1
print(count)
return 0
if args.command in {"add-data", "ad"}:
try:
chunks = chunk_file(args.file)
embeddings = embed(chunks)
add_data(args.collection, embeddings)
except NotFoundError:
print(f"Collection '{args.collection}' does not exist.")
return 1
except FileNotFoundError:
print(f"The file {args.file} was not found.")
return 1
print(f"Added {len(embeddings)} records to collection '{args.collection}'.")
return 0
if args.command in {"query", "q"}:
try:
result = query_data(args.collection, [args.texts])
except NotFoundError:
print(f"Collection '{args.collection}' does not exist.")
return 1
print(result)
return 0
print("Nothing to do. Use -h to see available commands.")
return 0
if __name__ == "__main__": if __name__ == "__main__":
+46
View File
@@ -0,0 +1,46 @@
from chromadb import QueryResult
from chroma_functions import add_data, query_data
from chunk_functions import chunk_file
from embed import embed
def print_lines(lines: list[str]) -> None:
for line in lines:
print(line)
def ingest_file(collection_name: str, file_path: str) -> int:
chunks = chunk_file(file_path)
embeddings = embed(chunks)
add_data(collection_name, embeddings)
return len(embeddings)
def run_query(collection_name: str, query_text: str) -> QueryResult:
return query_data(collection_name, [query_text])
def format_query_result(result: QueryResult) -> list[str]:
ids = result.get("ids", [[]])
documents = result.get("documents", [[]])
distances = result.get("distances", [[]])
first_ids = ids[0] if ids else []
first_documents = documents[0] if documents else []
first_distances = distances[0] if distances else []
if not first_ids:
return ["No results found."]
lines = ["Query results:"]
for index, document_id in enumerate(first_ids, start=1):
lines.append(f"{index}. id: {document_id}")
if index - 1 < len(first_distances):
lines.append(f" distance: {first_distances[index - 1]}")
if index - 1 < len(first_documents):
lines.append(f" document: {first_documents[index - 1]}")
return lines