replace existing file records on re-import
This commit is contained in:
@@ -54,6 +54,14 @@ def delete_data(collection_name: str, where: dict[str, str]) -> int:
|
||||
return int(result.get("deleted", 0))
|
||||
|
||||
|
||||
def has_data_for_file(collection_name: str, file_name: str) -> bool:
|
||||
_, collection = _get_client_and_collection(collection_name)
|
||||
result = collection.get(where=cast(Where, {"file_name": file_name}))
|
||||
ids = result.get("ids", [])
|
||||
|
||||
return len(ids) > 0
|
||||
|
||||
|
||||
def count_collection(collection_name: str) -> int:
|
||||
_, collection = _get_client_and_collection(collection_name)
|
||||
return collection.count()
|
||||
|
||||
+4
-1
@@ -4,12 +4,15 @@ from pathlib import Path
|
||||
|
||||
from chromadb import QueryResult
|
||||
|
||||
from chromy.chroma_functions import add_data, query_data
|
||||
from chromy.chroma_functions import add_data, delete_data, has_data_for_file, query_data
|
||||
from chromy.chunk_functions import chunk_file
|
||||
from chromy.embed import embed
|
||||
|
||||
|
||||
def ingest_file(collection_name: str, file_path: str) -> int:
|
||||
if has_data_for_file(collection_name, file_path):
|
||||
delete_data(collection_name, {"file_name": file_path})
|
||||
|
||||
chunks = chunk_file(file_path)
|
||||
embeddings = embed(chunks)
|
||||
add_data(collection_name, embeddings, file_path)
|
||||
|
||||
Reference in New Issue
Block a user