refactor chunking and embedding into their own modules
build / build (push) Successful in 45s
pytest / pytest (push) Successful in 26s

This commit is contained in:
2026-05-01 11:01:30 +02:00
parent 26df98c08e
commit fb62d1b539
8 changed files with 18 additions and 8 deletions
+1 -1
View File
@@ -9,7 +9,7 @@ from chromadb.api import ClientAPI
from chromadb.api.types import QueryResult, Where from chromadb.api.types import QueryResult, Where
from chromadb.errors import NotFoundError from chromadb.errors import NotFoundError
from chromy.embed import EmbeddingRecord from chromy.embedding import EmbeddingRecord
def _get_client_and_collection( def _get_client_and_collection(
+5
View File
@@ -0,0 +1,5 @@
from __future__ import annotations
from chromy.chunking.service import chunk_file, chunk_text
__all__ = ["chunk_file", "chunk_text"]
@@ -3,7 +3,7 @@ from __future__ import annotations
from pathlib import Path from pathlib import Path
from typing import cast from typing import cast
import semchunk from semchunk import semchunk
def chunk_text(text: str, chunk_size: int = 800) -> list[str]: def chunk_text(text: str, chunk_size: int = 800) -> list[str]:
+5
View File
@@ -0,0 +1,5 @@
from __future__ import annotations
from chromy.embedding.service import EmbeddingRecord, embed
__all__ = ["EmbeddingRecord", "embed"]
+2 -2
View File
@@ -5,8 +5,8 @@ from pathlib import Path
from chromadb import QueryResult from chromadb import QueryResult
from chromy.chroma_functions import add_data, delete_data, has_data_for_file, query_data from chromy.chroma_functions import add_data, delete_data, has_data_for_file, query_data
from chromy.chunk_functions import chunk_file from chromy.chunking import chunk_file
from chromy.embed import embed from chromy.embedding import embed
def ingest_file(collection_name: str, file_path: str) -> int: def ingest_file(collection_name: str, file_path: str) -> int:
+2 -2
View File
@@ -24,7 +24,7 @@ dependencies = [
chromy = "chromy.main:main" chromy = "chromy.main:main"
[tool.setuptools] [tool.setuptools]
packages = ["chromy", "chromy.handlers"] packages = ["chromy", "chromy.chunking", "chromy.embedding", "chromy.handlers"]
[dependency-groups] [dependency-groups]
dev = [ dev = [
@@ -72,7 +72,7 @@ module = [
ignore_missing_imports = true ignore_missing_imports = true
[[tool.mypy.overrides]] [[tool.mypy.overrides]]
module = "chromy.chunk_functions" module = "chromy.chunking.service"
disable_error_code = [ disable_error_code = [
"attr-defined", "attr-defined",
] ]
+2 -2
View File
@@ -3,7 +3,7 @@ from __future__ import annotations
import unittest import unittest
from unittest.mock import patch from unittest.mock import patch
from chromy.embed import embed from chromy.embedding import embed
class EmbedTest(unittest.TestCase): class EmbedTest(unittest.TestCase):
@@ -12,7 +12,7 @@ class EmbedTest(unittest.TestCase):
def test_embed_pairs_text_with_list_embeddings(self) -> None: def test_embed_pairs_text_with_list_embeddings(self) -> None:
with patch( with patch(
"chromy.embed.DefaultEmbeddingFunction", "chromy.embedding.service.DefaultEmbeddingFunction",
return_value=lambda chunks: ((1.0, 2.0), (3.0, 4.0)), return_value=lambda chunks: ((1.0, 2.0), (3.0, 4.0)),
): ):
result = embed(["first", "second"]) result = embed(["first", "second"])