decouple core data from CLI formatting
This commit is contained in:
@@ -8,7 +8,6 @@ import chromadb
|
||||
from chromadb.api import ClientAPI
|
||||
from chromadb.api.types import QueryResult, Where
|
||||
from chromadb.errors import NotFoundError
|
||||
from rich.text import Text
|
||||
|
||||
from chromy.embed import EmbeddingRecord
|
||||
|
||||
@@ -26,17 +25,14 @@ def _get_client_and_collection(
|
||||
return client, collection
|
||||
|
||||
|
||||
def list_collections() -> list[Text]:
|
||||
def list_collections() -> list[str]:
|
||||
client = chromadb.PersistentClient()
|
||||
collections = client.list_collections()
|
||||
|
||||
if not collections:
|
||||
return []
|
||||
|
||||
return [
|
||||
Text("· " + getattr(collection, "name", str(collection)))
|
||||
for collection in collections
|
||||
]
|
||||
return [getattr(collection, "name", str(collection)) for collection in collections]
|
||||
|
||||
|
||||
def create_collection(name: str) -> str:
|
||||
@@ -58,13 +54,9 @@ def delete_data(collection_name: str, where: dict[str, str]) -> int:
|
||||
return int(result.get("deleted", 0))
|
||||
|
||||
|
||||
def count_collection(collection_name: str) -> str:
|
||||
def count_collection(collection_name: str) -> int:
|
||||
_, collection = _get_client_and_collection(collection_name)
|
||||
count = collection.count()
|
||||
|
||||
return (
|
||||
f"The '{collection_name}' collection contains [bold green]{count}[/] records."
|
||||
)
|
||||
return collection.count()
|
||||
|
||||
|
||||
def add_data(
|
||||
|
||||
+2
-2
@@ -1,12 +1,12 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from plistlib import InvalidFileException
|
||||
from typing import Annotated, Callable
|
||||
|
||||
import typer
|
||||
from chromadb.errors import InternalError, NotFoundError
|
||||
from rich import print
|
||||
|
||||
from chromy.errors import UnsupportedTextFileError
|
||||
from chromy.handlers.count_collection import handle_count_collection
|
||||
from chromy.handlers.create_collection import handle_create_collection
|
||||
from chromy.handlers.delete_collection import (
|
||||
@@ -124,7 +124,7 @@ def import_data(
|
||||
_fail(f"Collection '{collection}' does not exist.")
|
||||
except FileNotFoundError:
|
||||
_fail(f"The file '{file}' was not found.")
|
||||
except InvalidFileException:
|
||||
except UnsupportedTextFileError:
|
||||
_fail(f"The file '{file}' is not a text file.")
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,5 @@
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
class UnsupportedTextFileError(Exception):
|
||||
"""Raised when a file does not appear to contain supported text content."""
|
||||
@@ -3,8 +3,9 @@ from __future__ import annotations
|
||||
from rich import print
|
||||
|
||||
from chromy.chroma_functions import count_collection
|
||||
from chromy.output import format_count_message
|
||||
|
||||
|
||||
def handle_count_collection(collection: str) -> int:
|
||||
print(count_collection(collection))
|
||||
print(format_count_message(collection, count_collection(collection)))
|
||||
return 0
|
||||
|
||||
@@ -2,10 +2,10 @@ from __future__ import annotations
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
from plistlib import InvalidFileException
|
||||
|
||||
from rich import print
|
||||
|
||||
from chromy.errors import UnsupportedTextFileError
|
||||
from chromy.utilities import ingest_file
|
||||
|
||||
from ..utilities import is_probably_text_file
|
||||
@@ -33,8 +33,8 @@ def handle_import(collection: str, file: str) -> int:
|
||||
absolute_path = _get_absolute_path(file)
|
||||
|
||||
if not is_probably_text_file(absolute_path):
|
||||
raise InvalidFileException()
|
||||
raise UnsupportedTextFileError()
|
||||
|
||||
records_added = ingest_file(collection, _get_absolute_path(file))
|
||||
records_added = ingest_file(collection, absolute_path)
|
||||
print(f"[bold green]Added[/] {records_added} records to collection '{collection}'.")
|
||||
return 0
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from chromy.chroma_functions import list_collections
|
||||
from chromy.utilities import print_lines
|
||||
from chromy.output import format_collection_names, print_lines
|
||||
|
||||
|
||||
def handle_list_collections() -> int:
|
||||
@@ -11,6 +11,6 @@ def handle_list_collections() -> int:
|
||||
print("No collections found.")
|
||||
return 0
|
||||
|
||||
print_lines(collections)
|
||||
print_lines(format_collection_names(collections))
|
||||
|
||||
return 0
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from chromy.utilities import format_query_result, print_lines, run_query
|
||||
from chromy.output import format_query_result, print_lines
|
||||
from chromy.utilities import run_query
|
||||
|
||||
|
||||
def handle_query(collection: str, query_text: str) -> int:
|
||||
|
||||
@@ -0,0 +1,70 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Mapping, Sequence
|
||||
|
||||
from chromadb import QueryResult
|
||||
from rich.console import Console
|
||||
from rich.rule import Rule
|
||||
from rich.text import Text
|
||||
|
||||
CONSOLE = Console()
|
||||
|
||||
|
||||
def print_lines(lines: Sequence[Rule | Text | str]) -> None:
|
||||
for line in lines:
|
||||
CONSOLE.print(line)
|
||||
|
||||
|
||||
def format_collection_names(collections: Sequence[str]) -> list[Text]:
|
||||
return [Text(f"· {collection}") for collection in collections]
|
||||
|
||||
|
||||
def format_count_message(collection_name: str, count: int) -> str:
|
||||
return (
|
||||
f"The '{collection_name}' collection contains [bold green]{count}[/] records."
|
||||
)
|
||||
|
||||
|
||||
def format_query_result(result: QueryResult) -> list[Rule | Text]:
|
||||
ids = result.get("ids", [[]])
|
||||
documents = result.get("documents", [[]])
|
||||
distances = result.get("distances", [[]])
|
||||
metadatas = result.get("metadatas", [[]])
|
||||
|
||||
first_ids = ids[0] if ids else []
|
||||
first_documents = documents[0] if documents else []
|
||||
first_distances = distances[0] if distances else []
|
||||
first_metadatas = metadatas[0] if metadatas else []
|
||||
|
||||
if not first_ids:
|
||||
return [Text.from_markup("[yellow]No results found.[/]")]
|
||||
|
||||
lines: list[Rule | Text] = [Rule(title="Query results")]
|
||||
|
||||
for index, document_id in enumerate(first_ids, start=1):
|
||||
lines.append(
|
||||
Text.from_markup(f"[bold]{index}[/].\t[green]id[/]\t\t{document_id}")
|
||||
)
|
||||
i = index - 1
|
||||
|
||||
if i < len(first_distances):
|
||||
lines.append(
|
||||
Text.from_markup(f"\t[green]distance[/]\t{first_distances[i]}")
|
||||
)
|
||||
|
||||
if i < len(first_metadatas):
|
||||
metadata = first_metadatas[i]
|
||||
|
||||
if isinstance(metadata, Mapping):
|
||||
file_name = metadata.get("file_name")
|
||||
|
||||
if file_name:
|
||||
lines.append(Text.from_markup(f"\t[green]file_name[/]\t{file_name}"))
|
||||
|
||||
if i < len(first_documents):
|
||||
lines.append(Text.from_markup("\n[bold green]Retrieved contents[/]\n"))
|
||||
lines.append(Text(first_documents[i]))
|
||||
|
||||
lines.append(Rule())
|
||||
|
||||
return lines
|
||||
@@ -1,24 +1,13 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Mapping, Sequence
|
||||
from pathlib import Path
|
||||
|
||||
from chromadb import QueryResult
|
||||
from rich.console import Console
|
||||
from rich.rule import Rule
|
||||
from rich.text import Text
|
||||
|
||||
from chromy.chroma_functions import add_data, query_data
|
||||
from chromy.chunk_functions import chunk_file
|
||||
from chromy.embed import embed
|
||||
|
||||
CONSOLE = Console()
|
||||
|
||||
|
||||
def print_lines(lines: Sequence[Rule | Text]) -> None:
|
||||
for line in lines:
|
||||
CONSOLE.print(line)
|
||||
|
||||
|
||||
def ingest_file(collection_name: str, file_path: str) -> int:
|
||||
chunks = chunk_file(file_path)
|
||||
@@ -31,54 +20,6 @@ def run_query(collection_name: str, query_text: str) -> QueryResult:
|
||||
return query_data(collection_name, [query_text])
|
||||
|
||||
|
||||
def format_query_result(result: QueryResult) -> list[Rule | Text]:
|
||||
ids = result.get("ids", [[]])
|
||||
documents = result.get("documents", [[]])
|
||||
distances = result.get("distances", [[]])
|
||||
metadatas = result.get("metadatas", [[]])
|
||||
|
||||
first_ids = ids[0] if ids else []
|
||||
first_documents = documents[0] if documents else []
|
||||
first_distances = distances[0] if distances else []
|
||||
first_metadatas = metadatas[0] if metadatas else []
|
||||
|
||||
if not first_ids:
|
||||
return [Text.from_markup("[yellow]No results found.[/]")]
|
||||
|
||||
lines: list[Rule | Text] = [Rule(title="Query results")]
|
||||
|
||||
for index, document_id in enumerate(first_ids, start=1):
|
||||
lines.append(
|
||||
Text.from_markup(f"[bold]{index}[/].\t[green]id[/]\t\t{document_id}")
|
||||
)
|
||||
i = index - 1
|
||||
|
||||
if i < len(first_distances):
|
||||
lines.append(
|
||||
Text.from_markup(f"\t[green]distance[/]\t{first_distances[i]}")
|
||||
)
|
||||
|
||||
if i < len(first_metadatas):
|
||||
metadata = first_metadatas[i]
|
||||
|
||||
if isinstance(metadata, Mapping):
|
||||
file_name = metadata.get("file_name")
|
||||
|
||||
if file_name:
|
||||
lines.append(
|
||||
Text.from_markup(f"\t[green]file_name[/]\t{file_name}")
|
||||
)
|
||||
|
||||
if i < len(first_documents):
|
||||
lines.append(Text.from_markup("\n[bold green]Retrieved contents[/]\n"))
|
||||
lines.append(Text(first_documents[i]))
|
||||
|
||||
# Print a separator between documents
|
||||
lines.append(Rule())
|
||||
|
||||
return lines
|
||||
|
||||
|
||||
def is_probably_text_file(path: str | Path, sample_size: int = 8192) -> bool:
|
||||
"""
|
||||
Return whether a file appears to contain text.
|
||||
|
||||
Reference in New Issue
Block a user