47 lines
1.3 KiB
Python
47 lines
1.3 KiB
Python
from chromadb import QueryResult
|
|
|
|
from chroma_functions import add_data, query_data
|
|
from chunk_functions import chunk_file
|
|
from embed import embed
|
|
|
|
|
|
def print_lines(lines: list[str]) -> None:
|
|
for line in lines:
|
|
print(line)
|
|
|
|
|
|
def ingest_file(collection_name: str, file_path: str) -> int:
|
|
chunks = chunk_file(file_path)
|
|
embeddings = embed(chunks)
|
|
add_data(collection_name, embeddings)
|
|
return len(embeddings)
|
|
|
|
|
|
def run_query(collection_name: str, query_text: str) -> QueryResult:
|
|
return query_data(collection_name, [query_text])
|
|
|
|
|
|
def format_query_result(result: QueryResult) -> list[str]:
|
|
ids = result.get("ids", [[]])
|
|
documents = result.get("documents", [[]])
|
|
distances = result.get("distances", [[]])
|
|
|
|
first_ids = ids[0] if ids else []
|
|
first_documents = documents[0] if documents else []
|
|
first_distances = distances[0] if distances else []
|
|
|
|
if not first_ids:
|
|
return ["No results found."]
|
|
|
|
lines = ["Query results:"]
|
|
for index, document_id in enumerate(first_ids, start=1):
|
|
lines.append(f"{index}. id: {document_id}")
|
|
|
|
if index - 1 < len(first_distances):
|
|
lines.append(f" distance: {first_distances[index - 1]}")
|
|
|
|
if index - 1 < len(first_documents):
|
|
lines.append(f" document: {first_documents[index - 1]}")
|
|
|
|
return lines
|