refactor chunking and embedding into their own modules
build / build (push) Successful in 45s
pytest / pytest (push) Successful in 26s

This commit is contained in:
2026-05-01 11:01:30 +02:00
parent 26df98c08e
commit fb62d1b539
8 changed files with 18 additions and 8 deletions
+1 -1
View File
@@ -9,7 +9,7 @@ from chromadb.api import ClientAPI
from chromadb.api.types import QueryResult, Where
from chromadb.errors import NotFoundError
from chromy.embed import EmbeddingRecord
from chromy.embedding import EmbeddingRecord
def _get_client_and_collection(
+5
View File
@@ -0,0 +1,5 @@
from __future__ import annotations
from chromy.chunking.service import chunk_file, chunk_text
__all__ = ["chunk_file", "chunk_text"]
@@ -3,7 +3,7 @@ from __future__ import annotations
from pathlib import Path
from typing import cast
import semchunk
from semchunk import semchunk
def chunk_text(text: str, chunk_size: int = 800) -> list[str]:
+5
View File
@@ -0,0 +1,5 @@
from __future__ import annotations
from chromy.embedding.service import EmbeddingRecord, embed
__all__ = ["EmbeddingRecord", "embed"]
+2 -2
View File
@@ -5,8 +5,8 @@ from pathlib import Path
from chromadb import QueryResult
from chromy.chroma_functions import add_data, delete_data, has_data_for_file, query_data
from chromy.chunk_functions import chunk_file
from chromy.embed import embed
from chromy.chunking import chunk_file
from chromy.embedding import embed
def ingest_file(collection_name: str, file_path: str) -> int:
+2 -2
View File
@@ -24,7 +24,7 @@ dependencies = [
chromy = "chromy.main:main"
[tool.setuptools]
packages = ["chromy", "chromy.handlers"]
packages = ["chromy", "chromy.chunking", "chromy.embedding", "chromy.handlers"]
[dependency-groups]
dev = [
@@ -72,7 +72,7 @@ module = [
ignore_missing_imports = true
[[tool.mypy.overrides]]
module = "chromy.chunk_functions"
module = "chromy.chunking.service"
disable_error_code = [
"attr-defined",
]
+2 -2
View File
@@ -3,7 +3,7 @@ from __future__ import annotations
import unittest
from unittest.mock import patch
from chromy.embed import embed
from chromy.embedding import embed
class EmbedTest(unittest.TestCase):
@@ -12,7 +12,7 @@ class EmbedTest(unittest.TestCase):
def test_embed_pairs_text_with_list_embeddings(self) -> None:
with patch(
"chromy.embed.DefaultEmbeddingFunction",
"chromy.embedding.service.DefaultEmbeddingFunction",
return_value=lambda chunks: ((1.0, 2.0), (3.0, 4.0)),
):
result = embed(["first", "second"])