chromy/utilities.py

from __future__ import annotations

from rich.text import Text
from rich.rule import Rule
from rich.console import Console

from collections.abc import Mapping, Sequence

from chromadb import QueryResult

from chromy.chroma_functions import add_data, query_data
from chromy.chunk_functions import chunk_file
from chromy.embed import embed

CONSOLE = Console()


def print_lines(lines: Sequence[str]) -> None:
    for line in lines:
        CONSOLE.print(line)


def ingest_file(collection_name: str, file_path: str) -> int:
    chunks = chunk_file(file_path)
    embeddings = embed(chunks)
    add_data(collection_name, embeddings, file_path)
    return len(embeddings)


def run_query(collection_name: str, query_text: str) -> QueryResult:
    return query_data(collection_name, [query_text])


def format_query_result(result: QueryResult) -> list[str]:
    ids = result.get("ids", [[]])
    documents = result.get("documents", [[]])
    distances = result.get("distances", [[]])
    metadatas = result.get("metadatas", [[]])

    first_ids = ids[0] if ids else []
    first_documents = documents[0] if documents else []
    first_distances = distances[0] if distances else []
    first_metadatas = metadatas[0] if metadatas else []

    if not first_ids:
        return ["No results found."]

    lines = [Rule(title="Query results")]

    for index, document_id in enumerate(first_ids, start=1):
        # lines.append(f"{index}.\tid: {document_id}")
        lines.append(
            Text.from_markup(f"[bold]{index}[/].\t[green]id[/]\t\t{document_id}")
        )
        i = index - 1

        if i < len(first_distances):
            lines.append(
                Text.from_markup(f"\t[green]distance[/]\t{first_distances[i]}")
            )

        if i < len(first_metadatas):
            metadata = first_metadatas[i]

            if isinstance(metadata, Mapping):
                file_name = metadata.get("file_name")

                if file_name:
                    lines.append(
                        Text.from_markup(f"\t[green]file_name[/]\t{file_name}")
                    )

        if i < len(first_documents):
            lines.append(Text.from_markup("\n[bold green]Retrieved contents[/]\n"))
            lines.append(first_documents[i])

        # Print a separator between documents
        lines.append(Rule())

    return lines
modernize type hints 2026-04-22 17:19:14 +02:00			`from __future__ import annotations`

use typer for better syntax highlight 2026-04-23 15:53:14 +02:00			`from rich.text import Text`
			`from rich.rule import Rule`
			`from rich.console import Console`

modernize type hints 2026-04-22 17:19:14 +02:00			`from collections.abc import Mapping, Sequence`
complete refactor 2026-04-21 17:42:37 +02:00
add ruff. fix all linting 2026-04-22 17:03:01 +02:00			`from chromadb import QueryResult`

move top-level modules into a real package 2026-04-22 15:47:46 +02:00			`from chromy.chroma_functions import add_data, query_data`
			`from chromy.chunk_functions import chunk_file`
			`from chromy.embed import embed`
complete refactor 2026-04-21 17:42:37 +02:00
use typer for better syntax highlight 2026-04-23 15:53:14 +02:00			`CONSOLE = Console()`

complete refactor 2026-04-21 17:42:37 +02:00
modernize type hints 2026-04-22 17:19:14 +02:00			`def print_lines(lines: Sequence[str]) -> None:`
complete refactor 2026-04-21 17:42:37 +02:00			`for line in lines:`
use typer for better syntax highlight 2026-04-23 15:53:14 +02:00			`CONSOLE.print(line)`
complete refactor 2026-04-21 17:42:37 +02:00

			`def ingest_file(collection_name: str, file_path: str) -> int:`
			`chunks = chunk_file(file_path)`
			`embeddings = embed(chunks)`
add metadata (file_name) 2026-04-21 18:24:49 +02:00			`add_data(collection_name, embeddings, file_path)`
complete refactor 2026-04-21 17:42:37 +02:00			`return len(embeddings)`


			`def run_query(collection_name: str, query_text: str) -> QueryResult:`
			`return query_data(collection_name, [query_text])`


			`def format_query_result(result: QueryResult) -> list[str]:`
			`ids = result.get("ids", [[]])`
			`documents = result.get("documents", [[]])`
			`distances = result.get("distances", [[]])`
add metadata (file_name) 2026-04-21 18:24:49 +02:00			`metadatas = result.get("metadatas", [[]])`
complete refactor 2026-04-21 17:42:37 +02:00
			`first_ids = ids[0] if ids else []`
			`first_documents = documents[0] if documents else []`
			`first_distances = distances[0] if distances else []`
add metadata (file_name) 2026-04-21 18:24:49 +02:00			`first_metadatas = metadatas[0] if metadatas else []`
complete refactor 2026-04-21 17:42:37 +02:00
			`if not first_ids:`
			`return ["No results found."]`

use typer for better syntax highlight 2026-04-23 15:53:14 +02:00			`lines = [Rule(title="Query results")]`
add metadata (file_name) 2026-04-21 18:24:49 +02:00
complete refactor 2026-04-21 17:42:37 +02:00			`for index, document_id in enumerate(first_ids, start=1):`
use typer for better syntax highlight 2026-04-23 15:53:14 +02:00			`# lines.append(f"{index}.\tid: {document_id}")`
			`lines.append(`
			`Text.from_markup(f"[bold]{index}[/].\t[green]id[/]\t\t{document_id}")`
			`)`
add metadata (file_name) 2026-04-21 18:24:49 +02:00			`i = index - 1`

			`if i < len(first_distances):`
use typer for better syntax highlight 2026-04-23 15:53:14 +02:00			`lines.append(`
			`Text.from_markup(f"\t[green]distance[/]\t{first_distances[i]}")`
			`)`
add metadata (file_name) 2026-04-21 18:24:49 +02:00
			`if i < len(first_metadatas):`
			`metadata = first_metadatas[i]`

			`if isinstance(metadata, Mapping):`
			`file_name = metadata.get("file_name")`

			`if file_name:`
use typer for better syntax highlight 2026-04-23 15:53:14 +02:00			`lines.append(`
			`Text.from_markup(f"\t[green]file_name[/]\t{file_name}")`
			`)`
complete refactor 2026-04-21 17:42:37 +02:00
add metadata (file_name) 2026-04-21 18:24:49 +02:00			`if i < len(first_documents):`
use typer for better syntax highlight 2026-04-23 15:53:14 +02:00			`lines.append(Text.from_markup("\n[bold green]Retrieved contents[/]\n"))`
			`lines.append(first_documents[i])`
complete refactor 2026-04-21 17:42:37 +02:00
add metadata (file_name) 2026-04-21 18:24:49 +02:00			`# Print a separator between documents`
use typer for better syntax highlight 2026-04-23 15:53:14 +02:00			`lines.append(Rule())`
complete refactor 2026-04-21 17:42:37 +02:00
			`return lines`