add chunk and embed

This commit is contained in:
2026-04-21 15:06:04 +02:00
parent 35992e6029
commit 746b951f0b
4 changed files with 296 additions and 0 deletions
+26
View File
@@ -0,0 +1,26 @@
from typing import List, TypedDict
from chromadb.utils.embedding_functions import DefaultEmbeddingFunction
class EmbeddingRecord(TypedDict):
text: str
embedding: List[float]
def embed(chunks: List[str]) -> List[EmbeddingRecord]:
if not chunks:
return []
embedding_function = DefaultEmbeddingFunction()
embeddings = embedding_function(chunks)
return [
{
"text": text,
"embedding": (
embedding.tolist() if hasattr(embedding, "tolist") else list(embedding)
),
}
for text, embedding in zip(chunks, embeddings)
]