2026-04-21 14:32:10 +02:00
|
|
|
from typing import List
|
2026-04-21 15:28:20 +02:00
|
|
|
from uuid import uuid4
|
|
|
|
|
|
2026-04-21 17:13:43 +02:00
|
|
|
import chromadb
|
|
|
|
|
from chromadb.errors import NotFoundError
|
|
|
|
|
|
2026-04-21 15:28:20 +02:00
|
|
|
from embed import EmbeddingRecord
|
2026-04-21 14:32:10 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def list_collections() -> List[str]:
|
|
|
|
|
client = chromadb.PersistentClient()
|
|
|
|
|
collections = client.list_collections()
|
|
|
|
|
|
|
|
|
|
if not collections:
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
return [getattr(collection, "name", str(collection)) for collection in collections]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def create_collection(name: str) -> str:
|
|
|
|
|
client = chromadb.PersistentClient()
|
|
|
|
|
collection = client.create_collection(name=name)
|
2026-04-21 14:45:01 +02:00
|
|
|
|
2026-04-21 14:32:10 +02:00
|
|
|
return getattr(collection, "name", name)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def delete_collection(name: str) -> None:
|
|
|
|
|
client = chromadb.PersistentClient()
|
|
|
|
|
client.delete_collection(name=name)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def count_collection(name: str) -> int:
|
2026-04-21 14:45:01 +02:00
|
|
|
client = chromadb.PersistentClient()
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
collection = client.get_collection(name=name)
|
|
|
|
|
except NotFoundError:
|
|
|
|
|
raise
|
|
|
|
|
|
|
|
|
|
return collection.count()
|
2026-04-21 15:28:20 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def add_data(collection: str, data: List[EmbeddingRecord]) -> None:
|
|
|
|
|
if not data:
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
client = chromadb.PersistentClient()
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
target_collection = client.get_collection(name=collection)
|
|
|
|
|
except NotFoundError:
|
|
|
|
|
raise
|
|
|
|
|
|
|
|
|
|
target_collection.add(
|
|
|
|
|
ids=[str(uuid4()) for _ in data],
|
|
|
|
|
documents=[record["text"] for record in data],
|
|
|
|
|
embeddings=[record["embedding"] for record in data],
|
|
|
|
|
)
|
2026-04-21 17:13:43 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def query_data(collection_name: str, texts: list[str]):
|
|
|
|
|
raise NotImplementedError()
|