Files
Chromy/chromy/utilities.py
T

65 lines
1.8 KiB
Python
Raw Normal View History

2026-04-21 18:24:49 +02:00
from collections.abc import Mapping
2026-04-21 17:42:37 +02:00
2026-04-22 17:03:01 +02:00
from chromadb import QueryResult
2026-04-22 15:47:46 +02:00
from chromy.chroma_functions import add_data, query_data
from chromy.chunk_functions import chunk_file
from chromy.embed import embed
2026-04-21 17:42:37 +02:00
def print_lines(lines: list[str]) -> None:
for line in lines:
print(line)
def ingest_file(collection_name: str, file_path: str) -> int:
chunks = chunk_file(file_path)
embeddings = embed(chunks)
2026-04-21 18:24:49 +02:00
add_data(collection_name, embeddings, file_path)
2026-04-21 17:42:37 +02:00
return len(embeddings)
def run_query(collection_name: str, query_text: str) -> QueryResult:
return query_data(collection_name, [query_text])
def format_query_result(result: QueryResult) -> list[str]:
ids = result.get("ids", [[]])
documents = result.get("documents", [[]])
distances = result.get("distances", [[]])
2026-04-21 18:24:49 +02:00
metadatas = result.get("metadatas", [[]])
2026-04-21 17:42:37 +02:00
first_ids = ids[0] if ids else []
first_documents = documents[0] if documents else []
first_distances = distances[0] if distances else []
2026-04-21 18:24:49 +02:00
first_metadatas = metadatas[0] if metadatas else []
2026-04-21 17:42:37 +02:00
if not first_ids:
return ["No results found."]
lines = ["Query results:"]
2026-04-21 18:24:49 +02:00
2026-04-21 17:42:37 +02:00
for index, document_id in enumerate(first_ids, start=1):
2026-04-21 18:24:49 +02:00
lines.append(f"{index}.\tid: {document_id}")
i = index - 1
if i < len(first_distances):
lines.append(f"\tdistance: {first_distances[i]}")
if i < len(first_metadatas):
metadata = first_metadatas[i]
if isinstance(metadata, Mapping):
file_name = metadata.get("file_name")
if file_name:
lines.append(f"\tfile_name: {file_name}")
2026-04-21 17:42:37 +02:00
2026-04-21 18:24:49 +02:00
if i < len(first_documents):
lines.append(f"\tdocument: {first_documents[i]}")
2026-04-21 17:42:37 +02:00
2026-04-21 18:24:49 +02:00
# Print a separator between documents
lines.append(60 * "-")
2026-04-21 17:42:37 +02:00
return lines