cannot import non-text files!
build / build (push) Successful in 39s
pytest / pytest (push) Successful in 35s

This commit is contained in:
2026-04-24 18:40:51 +02:00
parent c6ad060e85
commit d71fce7a6a
3 changed files with 51 additions and 1 deletions
+4 -1
View File
@@ -1,5 +1,6 @@
from __future__ import annotations from __future__ import annotations
from plistlib import InvalidFileException
from typing import Annotated, Callable from typing import Annotated, Callable
import typer import typer
@@ -122,7 +123,9 @@ def import_data(
except NotFoundError: except NotFoundError:
_fail(f"Collection '{collection}' does not exist.") _fail(f"Collection '{collection}' does not exist.")
except FileNotFoundError: except FileNotFoundError:
_fail(f"The file {file} was not found.") _fail(f"The file '{file}' was not found.")
except InvalidFileException:
_fail(f"The file '{file}' is not a text file.")
# ------------------------------------------------------------------------------ # ------------------------------------------------------------------------------
+8
View File
@@ -2,11 +2,14 @@ from __future__ import annotations
import os import os
from pathlib import Path from pathlib import Path
from plistlib import InvalidFileException
from rich import print from rich import print
from chromy.utilities import ingest_file from chromy.utilities import ingest_file
from ..utilities import is_probably_text_file
def _get_absolute_path(file: str) -> str: def _get_absolute_path(file: str) -> str:
""" """
@@ -27,6 +30,11 @@ def _get_absolute_path(file: str) -> str:
def handle_import(collection: str, file: str) -> int: def handle_import(collection: str, file: str) -> int:
absolute_path = _get_absolute_path(file)
if not is_probably_text_file(absolute_path):
raise InvalidFileException()
records_added = ingest_file(collection, _get_absolute_path(file)) records_added = ingest_file(collection, _get_absolute_path(file))
print(f"[bold green]Added[/] {records_added} records to collection '{collection}'.") print(f"[bold green]Added[/] {records_added} records to collection '{collection}'.")
return 0 return 0
+39
View File
@@ -1,6 +1,7 @@
from __future__ import annotations from __future__ import annotations
from collections.abc import Mapping, Sequence from collections.abc import Mapping, Sequence
from pathlib import Path
from chromadb import QueryResult from chromadb import QueryResult
from rich.console import Console from rich.console import Console
@@ -76,3 +77,41 @@ def format_query_result(result: QueryResult) -> list[Rule | Text]:
lines.append(Rule()) lines.append(Rule())
return lines return lines
def is_probably_text_file(path: str | Path, sample_size: int = 8192) -> bool:
"""
Return whether a file appears to contain text.
Args:
path (str | Path): The path to the file to inspect.
sample_size (int): The maximum number of bytes to read from the file.
Returns:
bool: ``True`` if the sampled bytes decode as UTF-8, UTF-8 with BOM,
UTF-16, or UTF-32, or if the file is empty. Otherwise, ``False``.
"""
path = Path(path)
with path.open("rb") as f:
sample = f.read(sample_size)
if not sample:
return True
encodings = (
"utf-8",
"utf-8-sig",
"utf-16",
"utf-32",
)
for encoding in encodings:
try:
sample.decode(encoding)
return True
except UnicodeDecodeError:
pass
return False