cannot import non-text files!
This commit is contained in:
+4
-1
@@ -1,5 +1,6 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from plistlib import InvalidFileException
|
||||||
from typing import Annotated, Callable
|
from typing import Annotated, Callable
|
||||||
|
|
||||||
import typer
|
import typer
|
||||||
@@ -122,7 +123,9 @@ def import_data(
|
|||||||
except NotFoundError:
|
except NotFoundError:
|
||||||
_fail(f"Collection '{collection}' does not exist.")
|
_fail(f"Collection '{collection}' does not exist.")
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
_fail(f"The file {file} was not found.")
|
_fail(f"The file '{file}' was not found.")
|
||||||
|
except InvalidFileException:
|
||||||
|
_fail(f"The file '{file}' is not a text file.")
|
||||||
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------
|
# ------------------------------------------------------------------------------
|
||||||
|
|||||||
@@ -2,11 +2,14 @@ from __future__ import annotations
|
|||||||
|
|
||||||
import os
|
import os
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from plistlib import InvalidFileException
|
||||||
|
|
||||||
from rich import print
|
from rich import print
|
||||||
|
|
||||||
from chromy.utilities import ingest_file
|
from chromy.utilities import ingest_file
|
||||||
|
|
||||||
|
from ..utilities import is_probably_text_file
|
||||||
|
|
||||||
|
|
||||||
def _get_absolute_path(file: str) -> str:
|
def _get_absolute_path(file: str) -> str:
|
||||||
"""
|
"""
|
||||||
@@ -27,6 +30,11 @@ def _get_absolute_path(file: str) -> str:
|
|||||||
|
|
||||||
|
|
||||||
def handle_import(collection: str, file: str) -> int:
|
def handle_import(collection: str, file: str) -> int:
|
||||||
|
absolute_path = _get_absolute_path(file)
|
||||||
|
|
||||||
|
if not is_probably_text_file(absolute_path):
|
||||||
|
raise InvalidFileException()
|
||||||
|
|
||||||
records_added = ingest_file(collection, _get_absolute_path(file))
|
records_added = ingest_file(collection, _get_absolute_path(file))
|
||||||
print(f"[bold green]Added[/] {records_added} records to collection '{collection}'.")
|
print(f"[bold green]Added[/] {records_added} records to collection '{collection}'.")
|
||||||
return 0
|
return 0
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from collections.abc import Mapping, Sequence
|
from collections.abc import Mapping, Sequence
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
from chromadb import QueryResult
|
from chromadb import QueryResult
|
||||||
from rich.console import Console
|
from rich.console import Console
|
||||||
@@ -76,3 +77,41 @@ def format_query_result(result: QueryResult) -> list[Rule | Text]:
|
|||||||
lines.append(Rule())
|
lines.append(Rule())
|
||||||
|
|
||||||
return lines
|
return lines
|
||||||
|
|
||||||
|
|
||||||
|
def is_probably_text_file(path: str | Path, sample_size: int = 8192) -> bool:
|
||||||
|
"""
|
||||||
|
Return whether a file appears to contain text.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
path (str | Path): The path to the file to inspect.
|
||||||
|
sample_size (int): The maximum number of bytes to read from the file.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
bool: ``True`` if the sampled bytes decode as UTF-8, UTF-8 with BOM,
|
||||||
|
UTF-16, or UTF-32, or if the file is empty. Otherwise, ``False``.
|
||||||
|
"""
|
||||||
|
|
||||||
|
path = Path(path)
|
||||||
|
|
||||||
|
with path.open("rb") as f:
|
||||||
|
sample = f.read(sample_size)
|
||||||
|
|
||||||
|
if not sample:
|
||||||
|
return True
|
||||||
|
|
||||||
|
encodings = (
|
||||||
|
"utf-8",
|
||||||
|
"utf-8-sig",
|
||||||
|
"utf-16",
|
||||||
|
"utf-32",
|
||||||
|
)
|
||||||
|
|
||||||
|
for encoding in encodings:
|
||||||
|
try:
|
||||||
|
sample.decode(encoding)
|
||||||
|
return True
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return False
|
||||||
|
|||||||
Reference in New Issue
Block a user