Files
Chromy/chroma_functions.py
T

80 lines
1.8 KiB
Python
Raw Normal View History

2026-04-21 14:32:10 +02:00
from typing import List
2026-04-21 15:28:20 +02:00
from uuid import uuid4
2026-04-21 17:13:43 +02:00
import chromadb
2026-04-21 17:20:45 +02:00
from chromadb.api.types import QueryResult
2026-04-21 17:13:43 +02:00
from chromadb.errors import NotFoundError
2026-04-21 15:28:20 +02:00
from embed import EmbeddingRecord
2026-04-21 14:32:10 +02:00
def list_collections() -> List[str]:
client = chromadb.PersistentClient()
collections = client.list_collections()
if not collections:
return []
return [getattr(collection, "name", str(collection)) for collection in collections]
def create_collection(name: str) -> str:
client = chromadb.PersistentClient()
collection = client.create_collection(name=name)
2026-04-21 14:45:01 +02:00
2026-04-21 14:32:10 +02:00
return getattr(collection, "name", name)
def delete_collection(name: str) -> None:
client = chromadb.PersistentClient()
client.delete_collection(name=name)
def count_collection(name: str) -> int:
2026-04-21 14:45:01 +02:00
client = chromadb.PersistentClient()
try:
collection = client.get_collection(name=name)
except NotFoundError:
raise
return collection.count()
2026-04-21 15:28:20 +02:00
def add_data(collection: str, data: List[EmbeddingRecord]) -> None:
if not data:
return
client = chromadb.PersistentClient()
try:
target_collection = client.get_collection(name=collection)
except NotFoundError:
raise
target_collection.add(
ids=[str(uuid4()) for _ in data],
documents=[record["text"] for record in data],
embeddings=[record["embedding"] for record in data],
)
2026-04-21 17:13:43 +02:00
2026-04-21 17:20:45 +02:00
def query_data(collection_name: str, texts: list[str]) -> QueryResult:
if not texts:
return {
"ids": [],
"documents": [],
"metadatas": [],
"distances": [],
"embeddings": [],
}
client = chromadb.PersistentClient()
try:
collection = client.get_collection(name=collection_name)
except NotFoundError:
raise
return collection.query(query_texts=texts)