2026-04-22 17:19:14 +02:00
|
|
|
from __future__ import annotations
|
|
|
|
|
|
2026-04-23 15:53:14 +02:00
|
|
|
from rich.text import Text
|
|
|
|
|
from rich.rule import Rule
|
|
|
|
|
from rich.console import Console
|
|
|
|
|
|
2026-04-22 17:19:14 +02:00
|
|
|
from collections.abc import Mapping, Sequence
|
2026-04-21 17:42:37 +02:00
|
|
|
|
2026-04-22 17:03:01 +02:00
|
|
|
from chromadb import QueryResult
|
|
|
|
|
|
2026-04-22 15:47:46 +02:00
|
|
|
from chromy.chroma_functions import add_data, query_data
|
|
|
|
|
from chromy.chunk_functions import chunk_file
|
|
|
|
|
from chromy.embed import embed
|
2026-04-21 17:42:37 +02:00
|
|
|
|
2026-04-23 15:53:14 +02:00
|
|
|
CONSOLE = Console()
|
|
|
|
|
|
2026-04-21 17:42:37 +02:00
|
|
|
|
2026-04-22 17:19:14 +02:00
|
|
|
def print_lines(lines: Sequence[str]) -> None:
|
2026-04-21 17:42:37 +02:00
|
|
|
for line in lines:
|
2026-04-23 15:53:14 +02:00
|
|
|
CONSOLE.print(line)
|
2026-04-21 17:42:37 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def ingest_file(collection_name: str, file_path: str) -> int:
|
|
|
|
|
chunks = chunk_file(file_path)
|
|
|
|
|
embeddings = embed(chunks)
|
2026-04-21 18:24:49 +02:00
|
|
|
add_data(collection_name, embeddings, file_path)
|
2026-04-21 17:42:37 +02:00
|
|
|
return len(embeddings)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def run_query(collection_name: str, query_text: str) -> QueryResult:
|
|
|
|
|
return query_data(collection_name, [query_text])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def format_query_result(result: QueryResult) -> list[str]:
|
|
|
|
|
ids = result.get("ids", [[]])
|
|
|
|
|
documents = result.get("documents", [[]])
|
|
|
|
|
distances = result.get("distances", [[]])
|
2026-04-21 18:24:49 +02:00
|
|
|
metadatas = result.get("metadatas", [[]])
|
2026-04-21 17:42:37 +02:00
|
|
|
|
|
|
|
|
first_ids = ids[0] if ids else []
|
|
|
|
|
first_documents = documents[0] if documents else []
|
|
|
|
|
first_distances = distances[0] if distances else []
|
2026-04-21 18:24:49 +02:00
|
|
|
first_metadatas = metadatas[0] if metadatas else []
|
2026-04-21 17:42:37 +02:00
|
|
|
|
|
|
|
|
if not first_ids:
|
|
|
|
|
return ["No results found."]
|
|
|
|
|
|
2026-04-23 15:53:14 +02:00
|
|
|
lines = [Rule(title="Query results")]
|
2026-04-21 18:24:49 +02:00
|
|
|
|
2026-04-21 17:42:37 +02:00
|
|
|
for index, document_id in enumerate(first_ids, start=1):
|
2026-04-23 15:53:14 +02:00
|
|
|
# lines.append(f"{index}.\tid: {document_id}")
|
|
|
|
|
lines.append(
|
|
|
|
|
Text.from_markup(f"[bold]{index}[/].\t[green]id[/]\t\t{document_id}")
|
|
|
|
|
)
|
2026-04-21 18:24:49 +02:00
|
|
|
i = index - 1
|
|
|
|
|
|
|
|
|
|
if i < len(first_distances):
|
2026-04-23 15:53:14 +02:00
|
|
|
lines.append(
|
|
|
|
|
Text.from_markup(f"\t[green]distance[/]\t{first_distances[i]}")
|
|
|
|
|
)
|
2026-04-21 18:24:49 +02:00
|
|
|
|
|
|
|
|
if i < len(first_metadatas):
|
|
|
|
|
metadata = first_metadatas[i]
|
|
|
|
|
|
|
|
|
|
if isinstance(metadata, Mapping):
|
|
|
|
|
file_name = metadata.get("file_name")
|
|
|
|
|
|
|
|
|
|
if file_name:
|
2026-04-23 15:53:14 +02:00
|
|
|
lines.append(
|
|
|
|
|
Text.from_markup(f"\t[green]file_name[/]\t{file_name}")
|
|
|
|
|
)
|
2026-04-21 17:42:37 +02:00
|
|
|
|
2026-04-21 18:24:49 +02:00
|
|
|
if i < len(first_documents):
|
2026-04-23 15:53:14 +02:00
|
|
|
lines.append(Text.from_markup("\n[bold green]Retrieved contents[/]\n"))
|
|
|
|
|
lines.append(first_documents[i])
|
2026-04-21 17:42:37 +02:00
|
|
|
|
2026-04-21 18:24:49 +02:00
|
|
|
# Print a separator between documents
|
2026-04-23 15:53:14 +02:00
|
|
|
lines.append(Rule())
|
2026-04-21 17:42:37 +02:00
|
|
|
|
|
|
|
|
return lines
|