refactor chunking and embedding into their own modules
This commit is contained in:
@@ -9,7 +9,7 @@ from chromadb.api import ClientAPI
|
||||
from chromadb.api.types import QueryResult, Where
|
||||
from chromadb.errors import NotFoundError
|
||||
|
||||
from chromy.embed import EmbeddingRecord
|
||||
from chromy.embedding import EmbeddingRecord
|
||||
|
||||
|
||||
def _get_client_and_collection(
|
||||
|
||||
@@ -0,0 +1,5 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from chromy.chunking.service import chunk_file, chunk_text
|
||||
|
||||
__all__ = ["chunk_file", "chunk_text"]
|
||||
@@ -3,7 +3,7 @@ from __future__ import annotations
|
||||
from pathlib import Path
|
||||
from typing import cast
|
||||
|
||||
import semchunk
|
||||
from semchunk import semchunk
|
||||
|
||||
|
||||
def chunk_text(text: str, chunk_size: int = 800) -> list[str]:
|
||||
@@ -0,0 +1,5 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from chromy.embedding.service import EmbeddingRecord, embed
|
||||
|
||||
__all__ = ["EmbeddingRecord", "embed"]
|
||||
+2
-2
@@ -5,8 +5,8 @@ from pathlib import Path
|
||||
from chromadb import QueryResult
|
||||
|
||||
from chromy.chroma_functions import add_data, delete_data, has_data_for_file, query_data
|
||||
from chromy.chunk_functions import chunk_file
|
||||
from chromy.embed import embed
|
||||
from chromy.chunking import chunk_file
|
||||
from chromy.embedding import embed
|
||||
|
||||
|
||||
def ingest_file(collection_name: str, file_path: str) -> int:
|
||||
|
||||
Reference in New Issue
Block a user