add multi-file import support
This commit is contained in:
+10
-10
@@ -6,7 +6,6 @@ import typer
|
||||
from chromadb.errors import InternalError, NotFoundError
|
||||
from rich import print
|
||||
|
||||
from chromy.errors import UnsupportedTextFileError
|
||||
from chromy.handlers.count_collection import handle_count_collection
|
||||
from chromy.handlers.create_collection import handle_create_collection
|
||||
from chromy.handlers.delete_collection import (
|
||||
@@ -106,26 +105,27 @@ def count(
|
||||
# ------------------------------------------------------------------------------
|
||||
@app.command(
|
||||
"import",
|
||||
help="Chunk, embed, and add a file to a collection in the local Chroma database.",
|
||||
help=(
|
||||
"Chunk, embed, and add one or more files to a collection in the "
|
||||
"local Chroma database."
|
||||
),
|
||||
)
|
||||
def import_data(
|
||||
collection: Annotated[
|
||||
str,
|
||||
typer.Argument(help="Name of the target collection."),
|
||||
],
|
||||
file: Annotated[
|
||||
str,
|
||||
typer.Argument(help="Path to the file to chunk and add to the collection."),
|
||||
files: Annotated[
|
||||
list[str],
|
||||
typer.Argument(
|
||||
help="Path(s) to the file(s) to chunk and add to the collection."
|
||||
),
|
||||
],
|
||||
) -> None:
|
||||
try:
|
||||
_run(lambda: handle_import(collection, file))
|
||||
_run(lambda: handle_import(collection, files))
|
||||
except NotFoundError:
|
||||
_fail(f"Collection '{collection}' does not exist.")
|
||||
except FileNotFoundError:
|
||||
_fail(f"The file '{file}' was not found.")
|
||||
except UnsupportedTextFileError:
|
||||
_fail(f"The file '{file}' is not a text file.")
|
||||
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
|
||||
@@ -2,6 +2,7 @@ from __future__ import annotations
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Final
|
||||
|
||||
from rich import print
|
||||
|
||||
@@ -10,6 +11,9 @@ from chromy.utilities import ingest_file
|
||||
|
||||
from ..utilities import is_probably_text_file
|
||||
|
||||
SUCCESS_EXIT_CODE: Final = 0
|
||||
FAILURE_EXIT_CODE: Final = 1
|
||||
|
||||
|
||||
def _get_absolute_path(file: str) -> str:
|
||||
"""
|
||||
@@ -29,12 +33,49 @@ def _get_absolute_path(file: str) -> str:
|
||||
return str(file_path.resolve())
|
||||
|
||||
|
||||
def handle_import(collection: str, file: str) -> int:
|
||||
def _import_one(collection: str, file: str) -> int:
|
||||
absolute_path = _get_absolute_path(file)
|
||||
|
||||
if not Path(absolute_path).is_file():
|
||||
raise FileNotFoundError()
|
||||
|
||||
if not is_probably_text_file(absolute_path):
|
||||
raise UnsupportedTextFileError()
|
||||
|
||||
records_added = ingest_file(collection, absolute_path)
|
||||
print(f"[bold green]Added[/] {records_added} records to collection '{collection}'.")
|
||||
return 0
|
||||
print(
|
||||
"[bold green]Added[/] "
|
||||
f"{records_added} records from '{file}' to collection '{collection}'."
|
||||
)
|
||||
return SUCCESS_EXIT_CODE
|
||||
|
||||
|
||||
def handle_import(collection: str, files: list[str]) -> int:
|
||||
successful_imports = 0
|
||||
failed_imports = 0
|
||||
seen_paths: set[str] = set()
|
||||
|
||||
for file in files:
|
||||
try:
|
||||
absolute_path = _get_absolute_path(file)
|
||||
if absolute_path in seen_paths:
|
||||
continue
|
||||
|
||||
seen_paths.add(absolute_path)
|
||||
_import_one(collection, file)
|
||||
successful_imports += 1
|
||||
except FileNotFoundError:
|
||||
failed_imports += 1
|
||||
print(f"[bold red]Error[/]: The file '{file}' was not found.")
|
||||
except UnsupportedTextFileError:
|
||||
failed_imports += 1
|
||||
print(f"[bold red]Error[/]: The file '{file}' is not a text file.")
|
||||
|
||||
print(
|
||||
f"Imported {successful_imports} file(s) successfully; {failed_imports} failed."
|
||||
)
|
||||
|
||||
if failed_imports:
|
||||
return FAILURE_EXIT_CODE
|
||||
|
||||
return SUCCESS_EXIT_CODE
|
||||
|
||||
Reference in New Issue
Block a user