add documents

This commit is contained in:
2026-04-21 15:28:20 +02:00
parent 746b951f0b
commit 18f26815e3
5 changed files with 70 additions and 12 deletions
+17
View File
@@ -0,0 +1,17 @@
from pathlib import Path
from typing import List
import semchunk
def chunk_text(text: str, chunk_size: int = 800) -> List[str]:
chunker = semchunk.chunkerify("gpt-4", chunk_size)
chunks = chunker(text)
return chunks
def chunk_file(filename: str, chunk_size: int = 800) -> List[str]:
contents = Path(filename).read_text()
return chunk_text(contents, chunk_size)