add chunk and embed

This commit is contained in:
2026-04-21 15:06:04 +02:00
parent 35992e6029
commit 746b951f0b
4 changed files with 296 additions and 0 deletions
+10
View File
@@ -0,0 +1,10 @@
from typing import List
import semchunk
def chunk(text: str, chunk_size: int = 800) -> List[str]:
chunker = semchunk.chunkerify("gpt-4", chunk_size)
chunks = chunker(text)
return chunks