complete refactor
This commit is contained in:
+97
@@ -0,0 +1,97 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from argparse import Namespace
|
||||||
|
from collections.abc import Callable
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
from chromadb.errors import InternalError, NotFoundError
|
||||||
|
|
||||||
|
from handlers.add_data import handle_add_data
|
||||||
|
from handlers.count_collection import handle_count_collection
|
||||||
|
from handlers.create_collection import handle_create_collection
|
||||||
|
from handlers.delete_collection import handle_delete_collection
|
||||||
|
from handlers.list_collections import handle_list_collections
|
||||||
|
from handlers.query import handle_query
|
||||||
|
|
||||||
|
|
||||||
|
CommandHandler = Callable[[Namespace], int]
|
||||||
|
ErrorMessageBuilder = Callable[[Namespace], str]
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True, slots=True)
|
||||||
|
class CliErrorHandler:
|
||||||
|
exception_type: type[BaseException]
|
||||||
|
message: ErrorMessageBuilder
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True, slots=True)
|
||||||
|
class CommandConfig:
|
||||||
|
handler: CommandHandler
|
||||||
|
error_handlers: tuple[CliErrorHandler, ...] = ()
|
||||||
|
|
||||||
|
|
||||||
|
COMMANDS: dict[str, CommandConfig] = {
|
||||||
|
"list-collections": CommandConfig(handler=handle_list_collections),
|
||||||
|
"create-collection": CommandConfig(
|
||||||
|
handler=handle_create_collection,
|
||||||
|
error_handlers=(
|
||||||
|
CliErrorHandler(
|
||||||
|
exception_type=InternalError,
|
||||||
|
message=lambda args: f"Collection '{args.collection}' already exists.",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
),
|
||||||
|
"delete-collection": CommandConfig(
|
||||||
|
handler=handle_delete_collection,
|
||||||
|
error_handlers=(
|
||||||
|
CliErrorHandler(
|
||||||
|
exception_type=NotFoundError,
|
||||||
|
message=lambda args: f"Collection '{args.collection}' does not exist.",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
),
|
||||||
|
"count": CommandConfig(
|
||||||
|
handler=handle_count_collection,
|
||||||
|
error_handlers=(
|
||||||
|
CliErrorHandler(
|
||||||
|
exception_type=NotFoundError,
|
||||||
|
message=lambda args: f"Collection '{args.collection}' does not exist.",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
),
|
||||||
|
"add-data": CommandConfig(
|
||||||
|
handler=handle_add_data,
|
||||||
|
error_handlers=(
|
||||||
|
CliErrorHandler(
|
||||||
|
exception_type=NotFoundError,
|
||||||
|
message=lambda args: f"Collection '{args.collection}' does not exist.",
|
||||||
|
),
|
||||||
|
CliErrorHandler(
|
||||||
|
exception_type=FileNotFoundError,
|
||||||
|
message=lambda args: f"The file {args.file} was not found.",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
),
|
||||||
|
"query": CommandConfig(
|
||||||
|
handler=handle_query,
|
||||||
|
error_handlers=(
|
||||||
|
CliErrorHandler(
|
||||||
|
exception_type=NotFoundError,
|
||||||
|
message=lambda args: f"Collection '{args.collection}' does not exist.",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def execute_command(args: Namespace) -> int:
|
||||||
|
command = COMMANDS[args.command]
|
||||||
|
|
||||||
|
try:
|
||||||
|
return command.handler(args)
|
||||||
|
except BaseException as exc:
|
||||||
|
for error_handler in command.error_handlers:
|
||||||
|
if isinstance(exc, error_handler.exception_type):
|
||||||
|
print(error_handler.message(args))
|
||||||
|
return 1
|
||||||
|
raise
|
||||||
+88
-51
@@ -1,59 +1,96 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True, slots=True)
|
||||||
|
class ArgumentSpec:
|
||||||
|
name: str
|
||||||
|
help: str
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True, slots=True)
|
||||||
|
class CommandSpec:
|
||||||
|
name: str
|
||||||
|
aliases: tuple[str, ...]
|
||||||
|
help: str
|
||||||
|
arguments: tuple[ArgumentSpec, ...] = ()
|
||||||
|
|
||||||
|
|
||||||
|
COMMAND_SPECS: tuple[CommandSpec, ...] = (
|
||||||
|
CommandSpec(
|
||||||
|
name="list-collections",
|
||||||
|
aliases=("lc",),
|
||||||
|
help="List all collections stored in the local Chroma database.",
|
||||||
|
),
|
||||||
|
CommandSpec(
|
||||||
|
name="create-collection",
|
||||||
|
aliases=("cc",),
|
||||||
|
help="Create a collection in the local Chroma database.",
|
||||||
|
arguments=(
|
||||||
|
ArgumentSpec("collection", "Name of the collection to create."),
|
||||||
|
),
|
||||||
|
),
|
||||||
|
CommandSpec(
|
||||||
|
name="delete-collection",
|
||||||
|
aliases=("dc",),
|
||||||
|
help="Delete a collection from the local Chroma database.",
|
||||||
|
arguments=(
|
||||||
|
ArgumentSpec("collection", "Name of the collection to delete."),
|
||||||
|
),
|
||||||
|
),
|
||||||
|
CommandSpec(
|
||||||
|
name="count",
|
||||||
|
aliases=("co",),
|
||||||
|
help="Count records in a collection from the local Chroma database.",
|
||||||
|
arguments=(
|
||||||
|
ArgumentSpec("collection", "Name of the collection to count."),
|
||||||
|
),
|
||||||
|
),
|
||||||
|
CommandSpec(
|
||||||
|
name="add-data",
|
||||||
|
aliases=("ad",),
|
||||||
|
help="Chunk, embed, and add a file to a collection in the local Chroma database.",
|
||||||
|
arguments=(
|
||||||
|
ArgumentSpec("collection", "Name of the target collection."),
|
||||||
|
ArgumentSpec("file", "Path to the file to chunk and add to the collection."),
|
||||||
|
),
|
||||||
|
),
|
||||||
|
CommandSpec(
|
||||||
|
name="query",
|
||||||
|
aliases=("q",),
|
||||||
|
help="Query a collection with the provided text.",
|
||||||
|
arguments=(
|
||||||
|
ArgumentSpec("collection", "Name of the target collection."),
|
||||||
|
ArgumentSpec("query_text", "The text to query."),
|
||||||
|
),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _add_command(
|
||||||
|
subparsers: argparse._SubParsersAction[argparse.ArgumentParser],
|
||||||
|
command: CommandSpec,
|
||||||
|
) -> None:
|
||||||
|
subparser = subparsers.add_parser(
|
||||||
|
command.name,
|
||||||
|
aliases=list(command.aliases),
|
||||||
|
help=command.help,
|
||||||
|
description=command.help,
|
||||||
|
)
|
||||||
|
|
||||||
|
for argument in command.arguments:
|
||||||
|
subparser.add_argument(argument.name, help=argument.help)
|
||||||
|
|
||||||
|
subparser.set_defaults(command=command.name)
|
||||||
|
|
||||||
|
|
||||||
def build_parser() -> argparse.ArgumentParser:
|
def build_parser() -> argparse.ArgumentParser:
|
||||||
parser = argparse.ArgumentParser(description="Inspect local Chroma collections.")
|
parser = argparse.ArgumentParser(description="Inspect local Chroma collections.")
|
||||||
subparsers = parser.add_subparsers(dest="command")
|
subparsers = parser.add_subparsers(dest="command", required=True)
|
||||||
|
|
||||||
# List existing collections
|
for command in COMMAND_SPECS:
|
||||||
subparsers.add_parser(
|
_add_command(subparsers, command)
|
||||||
"list-collections",
|
|
||||||
aliases=["lc"],
|
|
||||||
help="List all collections stored in the local Chroma database.",
|
|
||||||
)
|
|
||||||
|
|
||||||
# Create a new collection
|
|
||||||
create_parser = subparsers.add_parser(
|
|
||||||
"create-collection",
|
|
||||||
aliases=["cc"],
|
|
||||||
help="Create a collection in the local Chroma database.",
|
|
||||||
)
|
|
||||||
create_parser.add_argument("name", help="Name of the collection to create.")
|
|
||||||
|
|
||||||
# Delete a collection
|
|
||||||
delete_parser = subparsers.add_parser(
|
|
||||||
"delete-collection",
|
|
||||||
aliases=["dc"],
|
|
||||||
help="Delete a collection from the local Chroma database.",
|
|
||||||
)
|
|
||||||
delete_parser.add_argument("name", help="Name of the collection to delete.")
|
|
||||||
|
|
||||||
# Count documents in a collection
|
|
||||||
count_parser = subparsers.add_parser(
|
|
||||||
"count",
|
|
||||||
aliases=["co"],
|
|
||||||
help="Count records in a collection from the local Chroma database.",
|
|
||||||
)
|
|
||||||
count_parser.add_argument("name", help="Name of the collection to count.")
|
|
||||||
|
|
||||||
# Add documents to a collection
|
|
||||||
add_parser = subparsers.add_parser(
|
|
||||||
"add-data",
|
|
||||||
aliases=["ad"],
|
|
||||||
help="Chunk, embed, and add a file to a collection in the local Chroma database.",
|
|
||||||
)
|
|
||||||
add_parser.add_argument("collection", help="Name of the target collection.")
|
|
||||||
add_parser.add_argument(
|
|
||||||
"file", help="Path to the file to chunk and add to the collection."
|
|
||||||
)
|
|
||||||
|
|
||||||
# Query doc
|
|
||||||
query_parser = subparsers.add_parser(
|
|
||||||
"query",
|
|
||||||
aliases=["q"],
|
|
||||||
help="Query a collection with given text/s.",
|
|
||||||
)
|
|
||||||
query_parser.add_argument("collection", help="Name of the target collection.")
|
|
||||||
query_parser.add_argument("texts", help="The text/s to query.")
|
|
||||||
|
|
||||||
return parser
|
return parser
|
||||||
|
|||||||
@@ -0,0 +1,9 @@
|
|||||||
|
from argparse import Namespace
|
||||||
|
|
||||||
|
from utilities import ingest_file
|
||||||
|
|
||||||
|
|
||||||
|
def handle_add_data(args: Namespace) -> int:
|
||||||
|
records_added = ingest_file(args.collection, args.file)
|
||||||
|
print(f"Added {records_added} records to collection '{args.collection}'.")
|
||||||
|
return 0
|
||||||
@@ -0,0 +1,8 @@
|
|||||||
|
from argparse import Namespace
|
||||||
|
|
||||||
|
from chroma_functions import count_collection
|
||||||
|
|
||||||
|
|
||||||
|
def handle_count_collection(args: Namespace) -> int:
|
||||||
|
print(count_collection(args.collection))
|
||||||
|
return 0
|
||||||
@@ -0,0 +1,9 @@
|
|||||||
|
from argparse import Namespace
|
||||||
|
|
||||||
|
from chroma_functions import create_collection
|
||||||
|
|
||||||
|
|
||||||
|
def handle_create_collection(args: Namespace) -> int:
|
||||||
|
collection_name = create_collection(args.collection)
|
||||||
|
print(f"Created collection '{collection_name}'.")
|
||||||
|
return 0
|
||||||
@@ -0,0 +1,9 @@
|
|||||||
|
from argparse import Namespace
|
||||||
|
|
||||||
|
from chroma_functions import delete_collection
|
||||||
|
|
||||||
|
|
||||||
|
def handle_delete_collection(args: Namespace) -> int:
|
||||||
|
delete_collection(args.collection)
|
||||||
|
print(f"Deleted collection '{args.collection}'.")
|
||||||
|
return 0
|
||||||
@@ -0,0 +1,14 @@
|
|||||||
|
from argparse import Namespace
|
||||||
|
|
||||||
|
from chroma_functions import list_collections
|
||||||
|
from utilities import print_lines
|
||||||
|
|
||||||
|
|
||||||
|
def handle_list_collections(_: Namespace) -> int:
|
||||||
|
collections = list_collections()
|
||||||
|
if not collections:
|
||||||
|
print("No collections found.")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
print_lines(collections)
|
||||||
|
return 0
|
||||||
@@ -0,0 +1,9 @@
|
|||||||
|
from argparse import Namespace
|
||||||
|
|
||||||
|
from utilities import format_query_result, print_lines, run_query
|
||||||
|
|
||||||
|
|
||||||
|
def handle_query(args: Namespace) -> int:
|
||||||
|
result = run_query(args.collection, args.query_text)
|
||||||
|
print_lines(format_query_result(result))
|
||||||
|
return 0
|
||||||
@@ -1,100 +1,15 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from chromadb.errors import InternalError, NotFoundError
|
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
from chroma_functions import (
|
from cli_app import execute_command
|
||||||
add_data,
|
|
||||||
count_collection,
|
|
||||||
create_collection,
|
|
||||||
delete_collection,
|
|
||||||
list_collections,
|
|
||||||
query_data,
|
|
||||||
)
|
|
||||||
from chunk_functions import chunk_file
|
|
||||||
from cli_parser import build_parser
|
from cli_parser import build_parser
|
||||||
from embed import embed
|
|
||||||
|
|
||||||
load_dotenv()
|
|
||||||
|
|
||||||
|
|
||||||
def main() -> int:
|
def main() -> int:
|
||||||
|
load_dotenv()
|
||||||
args = build_parser().parse_args()
|
args = build_parser().parse_args()
|
||||||
|
return execute_command(args)
|
||||||
if args.command in {"list-collections", "lc"}:
|
|
||||||
collections = list_collections()
|
|
||||||
if not collections:
|
|
||||||
print("No collections found.")
|
|
||||||
return 0
|
|
||||||
|
|
||||||
for name in collections:
|
|
||||||
print(name)
|
|
||||||
|
|
||||||
return 0
|
|
||||||
|
|
||||||
if args.command in {"create-collection", "cc"}:
|
|
||||||
try:
|
|
||||||
collection = create_collection(args.name)
|
|
||||||
except InternalError:
|
|
||||||
print(f"Collection '{args.name}' already exists.")
|
|
||||||
return 1
|
|
||||||
|
|
||||||
print(f"Created collection '{collection}'.")
|
|
||||||
|
|
||||||
return 0
|
|
||||||
|
|
||||||
if args.command in {"delete-collection", "dc"}:
|
|
||||||
try:
|
|
||||||
delete_collection(args.name)
|
|
||||||
except NotFoundError:
|
|
||||||
print(f"Collection '{args.name}' does not exist.")
|
|
||||||
return 1
|
|
||||||
|
|
||||||
print(f"Deleted collection '{args.name}'.")
|
|
||||||
|
|
||||||
return 0
|
|
||||||
|
|
||||||
if args.command in {"count", "co"}:
|
|
||||||
try:
|
|
||||||
count = count_collection(args.name)
|
|
||||||
except NotFoundError:
|
|
||||||
print(f"Collection '{args.name}' does not exist.")
|
|
||||||
return 1
|
|
||||||
|
|
||||||
print(count)
|
|
||||||
|
|
||||||
return 0
|
|
||||||
|
|
||||||
if args.command in {"add-data", "ad"}:
|
|
||||||
try:
|
|
||||||
chunks = chunk_file(args.file)
|
|
||||||
embeddings = embed(chunks)
|
|
||||||
add_data(args.collection, embeddings)
|
|
||||||
except NotFoundError:
|
|
||||||
print(f"Collection '{args.collection}' does not exist.")
|
|
||||||
return 1
|
|
||||||
except FileNotFoundError:
|
|
||||||
print(f"The file {args.file} was not found.")
|
|
||||||
return 1
|
|
||||||
|
|
||||||
print(f"Added {len(embeddings)} records to collection '{args.collection}'.")
|
|
||||||
|
|
||||||
return 0
|
|
||||||
|
|
||||||
if args.command in {"query", "q"}:
|
|
||||||
try:
|
|
||||||
result = query_data(args.collection, [args.texts])
|
|
||||||
except NotFoundError:
|
|
||||||
print(f"Collection '{args.collection}' does not exist.")
|
|
||||||
return 1
|
|
||||||
|
|
||||||
print(result)
|
|
||||||
|
|
||||||
return 0
|
|
||||||
|
|
||||||
print("Nothing to do. Use -h to see available commands.")
|
|
||||||
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
@@ -0,0 +1,46 @@
|
|||||||
|
from chromadb import QueryResult
|
||||||
|
|
||||||
|
from chroma_functions import add_data, query_data
|
||||||
|
from chunk_functions import chunk_file
|
||||||
|
from embed import embed
|
||||||
|
|
||||||
|
|
||||||
|
def print_lines(lines: list[str]) -> None:
|
||||||
|
for line in lines:
|
||||||
|
print(line)
|
||||||
|
|
||||||
|
|
||||||
|
def ingest_file(collection_name: str, file_path: str) -> int:
|
||||||
|
chunks = chunk_file(file_path)
|
||||||
|
embeddings = embed(chunks)
|
||||||
|
add_data(collection_name, embeddings)
|
||||||
|
return len(embeddings)
|
||||||
|
|
||||||
|
|
||||||
|
def run_query(collection_name: str, query_text: str) -> QueryResult:
|
||||||
|
return query_data(collection_name, [query_text])
|
||||||
|
|
||||||
|
|
||||||
|
def format_query_result(result: QueryResult) -> list[str]:
|
||||||
|
ids = result.get("ids", [[]])
|
||||||
|
documents = result.get("documents", [[]])
|
||||||
|
distances = result.get("distances", [[]])
|
||||||
|
|
||||||
|
first_ids = ids[0] if ids else []
|
||||||
|
first_documents = documents[0] if documents else []
|
||||||
|
first_distances = distances[0] if distances else []
|
||||||
|
|
||||||
|
if not first_ids:
|
||||||
|
return ["No results found."]
|
||||||
|
|
||||||
|
lines = ["Query results:"]
|
||||||
|
for index, document_id in enumerate(first_ids, start=1):
|
||||||
|
lines.append(f"{index}. id: {document_id}")
|
||||||
|
|
||||||
|
if index - 1 < len(first_distances):
|
||||||
|
lines.append(f" distance: {first_distances[index - 1]}")
|
||||||
|
|
||||||
|
if index - 1 < len(first_documents):
|
||||||
|
lines.append(f" document: {first_documents[index - 1]}")
|
||||||
|
|
||||||
|
return lines
|
||||||
Reference in New Issue
Block a user