Files
Chromy/chromy/embed.py
T

30 lines
718 B
Python
Raw Normal View History

2026-04-22 17:03:01 +02:00
from __future__ import annotations
2026-04-22 17:19:14 +02:00
from collections.abc import Sequence
2026-04-22 17:03:01 +02:00
from typing import TypedDict
2026-04-21 15:06:04 +02:00
from chromadb.utils.embedding_functions import DefaultEmbeddingFunction
class EmbeddingRecord(TypedDict):
text: str
2026-04-22 17:03:01 +02:00
embedding: list[float]
2026-04-21 15:06:04 +02:00
2026-04-22 17:19:14 +02:00
def embed(chunks: Sequence[str]) -> list[EmbeddingRecord]:
2026-04-21 15:06:04 +02:00
if not chunks:
return []
embedding_function = DefaultEmbeddingFunction()
2026-04-22 17:19:14 +02:00
embeddings = embedding_function(list(chunks))
2026-04-21 15:06:04 +02:00
return [
{
"text": text,
"embedding": (
embedding.tolist() if hasattr(embedding, "tolist") else list(embedding)
),
}
2026-04-22 17:03:01 +02:00
for text, embedding in zip(chunks, embeddings, strict=False)
2026-04-21 15:06:04 +02:00
]