diff --git a/chromy/chroma_functions.py b/chromy/chroma_functions.py index 08389bd..cdf78ad 100644 --- a/chromy/chroma_functions.py +++ b/chromy/chroma_functions.py @@ -9,7 +9,7 @@ from chromadb.api import ClientAPI from chromadb.api.types import QueryResult, Where from chromadb.errors import NotFoundError -from chromy.embed import EmbeddingRecord +from chromy.embedding import EmbeddingRecord def _get_client_and_collection( diff --git a/chromy/chunking/__init__.py b/chromy/chunking/__init__.py new file mode 100644 index 0000000..2882bd2 --- /dev/null +++ b/chromy/chunking/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from chromy.chunking.service import chunk_file, chunk_text + +__all__ = ["chunk_file", "chunk_text"] diff --git a/chromy/chunk_functions.py b/chromy/chunking/service.py similarity index 93% rename from chromy/chunk_functions.py rename to chromy/chunking/service.py index c2c0c91..7f2916c 100644 --- a/chromy/chunk_functions.py +++ b/chromy/chunking/service.py @@ -3,7 +3,7 @@ from __future__ import annotations from pathlib import Path from typing import cast -import semchunk +from semchunk import semchunk def chunk_text(text: str, chunk_size: int = 800) -> list[str]: diff --git a/chromy/embedding/__init__.py b/chromy/embedding/__init__.py new file mode 100644 index 0000000..ec5520e --- /dev/null +++ b/chromy/embedding/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from chromy.embedding.service import EmbeddingRecord, embed + +__all__ = ["EmbeddingRecord", "embed"] diff --git a/chromy/embed.py b/chromy/embedding/service.py similarity index 100% rename from chromy/embed.py rename to chromy/embedding/service.py diff --git a/chromy/utilities.py b/chromy/utilities.py index 98895bf..9fa3adb 100644 --- a/chromy/utilities.py +++ b/chromy/utilities.py @@ -5,8 +5,8 @@ from pathlib import Path from chromadb import QueryResult from chromy.chroma_functions import add_data, delete_data, has_data_for_file, query_data -from chromy.chunk_functions import chunk_file -from chromy.embed import embed +from chromy.chunking import chunk_file +from chromy.embedding import embed def ingest_file(collection_name: str, file_path: str) -> int: diff --git a/pyproject.toml b/pyproject.toml index c102d50..033aaf6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,7 @@ dependencies = [ chromy = "chromy.main:main" [tool.setuptools] -packages = ["chromy", "chromy.handlers"] +packages = ["chromy", "chromy.chunking", "chromy.embedding", "chromy.handlers"] [dependency-groups] dev = [ @@ -72,7 +72,7 @@ module = [ ignore_missing_imports = true [[tool.mypy.overrides]] -module = "chromy.chunk_functions" +module = "chromy.chunking.service" disable_error_code = [ "attr-defined", ] diff --git a/tests/test_embed.py b/tests/test_embed.py index 165e85b..63e4dd2 100644 --- a/tests/test_embed.py +++ b/tests/test_embed.py @@ -3,7 +3,7 @@ from __future__ import annotations import unittest from unittest.mock import patch -from chromy.embed import embed +from chromy.embedding import embed class EmbedTest(unittest.TestCase): @@ -12,7 +12,7 @@ class EmbedTest(unittest.TestCase): def test_embed_pairs_text_with_list_embeddings(self) -> None: with patch( - "chromy.embed.DefaultEmbeddingFunction", + "chromy.embedding.service.DefaultEmbeddingFunction", return_value=lambda chunks: ((1.0, 2.0), (3.0, 4.0)), ): result = embed(["first", "second"])