add multi-file import support
build / build (push) Successful in 9s
pytest / pytest (push) Successful in 26s

This commit is contained in:
Matteo Rosati
2026-04-29 15:39:42 +02:00
parent 74e48fbcd5
commit 26df98c08e
5 changed files with 214 additions and 29 deletions
+10 -10
View File
@@ -6,7 +6,6 @@ import typer
from chromadb.errors import InternalError, NotFoundError
from rich import print
from chromy.errors import UnsupportedTextFileError
from chromy.handlers.count_collection import handle_count_collection
from chromy.handlers.create_collection import handle_create_collection
from chromy.handlers.delete_collection import (
@@ -106,26 +105,27 @@ def count(
# ------------------------------------------------------------------------------
@app.command(
"import",
help="Chunk, embed, and add a file to a collection in the local Chroma database.",
help=(
"Chunk, embed, and add one or more files to a collection in the "
"local Chroma database."
),
)
def import_data(
collection: Annotated[
str,
typer.Argument(help="Name of the target collection."),
],
file: Annotated[
str,
typer.Argument(help="Path to the file to chunk and add to the collection."),
files: Annotated[
list[str],
typer.Argument(
help="Path(s) to the file(s) to chunk and add to the collection."
),
],
) -> None:
try:
_run(lambda: handle_import(collection, file))
_run(lambda: handle_import(collection, files))
except NotFoundError:
_fail(f"Collection '{collection}' does not exist.")
except FileNotFoundError:
_fail(f"The file '{file}' was not found.")
except UnsupportedTextFileError:
_fail(f"The file '{file}' is not a text file.")
# ------------------------------------------------------------------------------
+44 -3
View File
@@ -2,6 +2,7 @@ from __future__ import annotations
import os
from pathlib import Path
from typing import Final
from rich import print
@@ -10,6 +11,9 @@ from chromy.utilities import ingest_file
from ..utilities import is_probably_text_file
SUCCESS_EXIT_CODE: Final = 0
FAILURE_EXIT_CODE: Final = 1
def _get_absolute_path(file: str) -> str:
"""
@@ -29,12 +33,49 @@ def _get_absolute_path(file: str) -> str:
return str(file_path.resolve())
def handle_import(collection: str, file: str) -> int:
def _import_one(collection: str, file: str) -> int:
absolute_path = _get_absolute_path(file)
if not Path(absolute_path).is_file():
raise FileNotFoundError()
if not is_probably_text_file(absolute_path):
raise UnsupportedTextFileError()
records_added = ingest_file(collection, absolute_path)
print(f"[bold green]Added[/] {records_added} records to collection '{collection}'.")
return 0
print(
"[bold green]Added[/] "
f"{records_added} records from '{file}' to collection '{collection}'."
)
return SUCCESS_EXIT_CODE
def handle_import(collection: str, files: list[str]) -> int:
successful_imports = 0
failed_imports = 0
seen_paths: set[str] = set()
for file in files:
try:
absolute_path = _get_absolute_path(file)
if absolute_path in seen_paths:
continue
seen_paths.add(absolute_path)
_import_one(collection, file)
successful_imports += 1
except FileNotFoundError:
failed_imports += 1
print(f"[bold red]Error[/]: The file '{file}' was not found.")
except UnsupportedTextFileError:
failed_imports += 1
print(f"[bold red]Error[/]: The file '{file}' is not a text file.")
print(
f"Imported {successful_imports} file(s) successfully; {failed_imports} failed."
)
if failed_imports:
return FAILURE_EXIT_CODE
return SUCCESS_EXIT_CODE