cannot import non-text files!
build / build (push) Successful in 39s
pytest / pytest (push) Successful in 35s

This commit is contained in:
2026-04-24 18:40:51 +02:00
parent c6ad060e85
commit d71fce7a6a
3 changed files with 51 additions and 1 deletions
+4 -1
View File
@@ -1,5 +1,6 @@
from __future__ import annotations
from plistlib import InvalidFileException
from typing import Annotated, Callable
import typer
@@ -122,7 +123,9 @@ def import_data(
except NotFoundError:
_fail(f"Collection '{collection}' does not exist.")
except FileNotFoundError:
_fail(f"The file {file} was not found.")
_fail(f"The file '{file}' was not found.")
except InvalidFileException:
_fail(f"The file '{file}' is not a text file.")
# ------------------------------------------------------------------------------
+8
View File
@@ -2,11 +2,14 @@ from __future__ import annotations
import os
from pathlib import Path
from plistlib import InvalidFileException
from rich import print
from chromy.utilities import ingest_file
from ..utilities import is_probably_text_file
def _get_absolute_path(file: str) -> str:
"""
@@ -27,6 +30,11 @@ def _get_absolute_path(file: str) -> str:
def handle_import(collection: str, file: str) -> int:
absolute_path = _get_absolute_path(file)
if not is_probably_text_file(absolute_path):
raise InvalidFileException()
records_added = ingest_file(collection, _get_absolute_path(file))
print(f"[bold green]Added[/] {records_added} records to collection '{collection}'.")
return 0
+39
View File
@@ -1,6 +1,7 @@
from __future__ import annotations
from collections.abc import Mapping, Sequence
from pathlib import Path
from chromadb import QueryResult
from rich.console import Console
@@ -76,3 +77,41 @@ def format_query_result(result: QueryResult) -> list[Rule | Text]:
lines.append(Rule())
return lines
def is_probably_text_file(path: str | Path, sample_size: int = 8192) -> bool:
"""
Return whether a file appears to contain text.
Args:
path (str | Path): The path to the file to inspect.
sample_size (int): The maximum number of bytes to read from the file.
Returns:
bool: ``True`` if the sampled bytes decode as UTF-8, UTF-8 with BOM,
UTF-16, or UTF-32, or if the file is empty. Otherwise, ``False``.
"""
path = Path(path)
with path.open("rb") as f:
sample = f.read(sample_size)
if not sample:
return True
encodings = (
"utf-8",
"utf-8-sig",
"utf-16",
"utf-32",
)
for encoding in encodings:
try:
sample.decode(encoding)
return True
except UnicodeDecodeError:
pass
return False