1
- from typing import Any , Iterable , List , Optional , Tuple , Type , TypeVar
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING , Any , Dict , Iterable , List , Optional , Tuple
2
4
3
5
from langchain_core .documents import Document
4
- from langchain_core .embeddings import Embeddings
5
6
from langchain_core .vectorstores import VectorStore , VectorStoreRetriever
6
7
from ragstack_colbert import Chunk
7
8
from ragstack_colbert import ColbertVectorStore as RagstackColbertVectorStore
8
- from ragstack_colbert .base_database import BaseDatabase as ColbertBaseDatabase
9
- from ragstack_colbert .base_embedding_model import (
10
- BaseEmbeddingModel as ColbertBaseEmbeddingModel ,
11
- )
12
- from ragstack_colbert .base_retriever import BaseRetriever as ColbertBaseRetriever
13
- from ragstack_colbert .base_vector_store import BaseVectorStore as ColbertBaseVectorStore
14
- from typing_extensions import override
9
+ from typing_extensions import Self , override
15
10
16
11
from ragstack_langchain .colbert .embedding import TokensEmbeddings
17
12
18
- CVS = TypeVar ("CVS" , bound = "ColbertVectorStore" )
13
+ if TYPE_CHECKING :
14
+ from langchain_core .embeddings import Embeddings
15
+ from ragstack_colbert .base_database import BaseDatabase as ColbertBaseDatabase
16
+ from ragstack_colbert .base_embedding_model import (
17
+ BaseEmbeddingModel as ColbertBaseEmbeddingModel ,
18
+ )
19
+ from ragstack_colbert .base_retriever import BaseRetriever as ColbertBaseRetriever
20
+ from ragstack_colbert .base_vector_store import (
21
+ BaseVectorStore as ColbertBaseVectorStore ,
22
+ )
19
23
20
24
21
25
class ColbertVectorStore (VectorStore ):
@@ -35,7 +39,7 @@ def _initialize(
35
39
self ,
36
40
database : ColbertBaseDatabase ,
37
41
embedding_model : ColbertBaseEmbeddingModel ,
38
- ):
42
+ ) -> None :
39
43
self ._vector_store = RagstackColbertVectorStore (
40
44
database = database , embedding_model = embedding_model
41
45
)
@@ -45,7 +49,7 @@ def _initialize(
45
49
def add_texts (
46
50
self ,
47
51
texts : Iterable [str ],
48
- metadatas : Optional [List [dict ]] = None ,
52
+ metadatas : Optional [List [Dict [ str , Any ] ]] = None ,
49
53
doc_id : Optional [str ] = None ,
50
54
** kwargs : Any ,
51
55
) -> List [str ]:
@@ -60,17 +64,18 @@ def add_texts(
60
64
Returns:
61
65
List of ids from adding the texts into the vectorstore.
62
66
"""
63
- return self ._vector_store .add_texts (
67
+ results = self ._vector_store .add_texts (
64
68
texts = list (texts ), metadatas = metadatas , doc_id = doc_id
65
69
)
70
+ return [results [0 ][0 ]] if results else []
66
71
67
72
@override
68
73
async def aadd_texts (
69
74
self ,
70
75
texts : Iterable [str ],
71
- metadatas : Optional [List [dict ]] = None ,
76
+ metadatas : Optional [List [Dict [ str , Any ] ]] = None ,
72
77
doc_id : Optional [str ] = None ,
73
- concurrent_inserts : Optional [ int ] = 100 ,
78
+ concurrent_inserts : int = 100 ,
74
79
** kwargs : Any ,
75
80
) -> List [str ]:
76
81
"""Run more texts through the embeddings and add to the vectorstore.
@@ -86,51 +91,30 @@ async def aadd_texts(
86
91
Returns:
87
92
List of ids from adding the texts into the vectorstore.
88
93
"""
89
- return await self ._vector_store .aadd_texts (
94
+ results = await self ._vector_store .aadd_texts (
90
95
texts = list (texts ),
91
96
metadatas = metadatas ,
92
97
doc_id = doc_id ,
93
98
concurrent_inserts = concurrent_inserts ,
94
99
)
100
+ return [results [0 ][0 ]] if results else []
95
101
96
102
@override
97
103
def delete (self , ids : Optional [List [str ]] = None , ** kwargs : Any ) -> Optional [bool ]:
98
- """Delete by vector ID or other criteria.
99
-
100
- Args:
101
- ids: List of ids to delete.
102
- **kwargs: Other keyword arguments that subclasses might use.
103
-
104
- Returns:
105
- Optional[bool]: True if deletion is successful,
106
- False otherwise, None if not implemented.
107
- """
108
- return None if ids is None else self ._vector_store .delete (ids = ids )
104
+ return None if ids is None else self ._vector_store .delete_chunks (doc_ids = ids )
109
105
110
106
@override
111
107
async def adelete (
112
108
self ,
113
109
ids : Optional [List [str ]] = None ,
114
- concurrent_deletes : Optional [ int ] = 100 ,
110
+ concurrent_deletes : int = 100 ,
115
111
** kwargs : Any ,
116
112
) -> Optional [bool ]:
117
- """Delete by vector ID or other criteria.
118
-
119
- Args:
120
- ids: List of ids to delete.
121
- concurrent_deletes: How many concurrent deletes to make to the database.
122
- Defaults to 100.
123
- **kwargs: Other keyword arguments that subclasses might use.
124
-
125
- Returns:
126
- Optional[bool]: True if deletion is successful,
127
- False otherwise, None if not implemented.
128
- """
129
113
return (
130
114
None
131
115
if ids is None
132
- else await self ._vector_store .adelete (
133
- ids = ids , concurrent_deletes = concurrent_deletes
116
+ else await self ._vector_store .adelete_chunks (
117
+ doc_ids = ids , concurrent_deletes = concurrent_deletes
134
118
)
135
119
)
136
120
@@ -215,7 +199,7 @@ def from_documents(
215
199
* ,
216
200
database : Optional [ColbertBaseDatabase ] = None ,
217
201
** kwargs : Any ,
218
- ) -> CVS :
202
+ ) -> Self :
219
203
"""Return VectorStore initialized from documents and embeddings."""
220
204
texts = [d .page_content for d in documents ]
221
205
metadatas = [d .metadata for d in documents ]
@@ -230,14 +214,14 @@ def from_documents(
230
214
@classmethod
231
215
@override
232
216
async def afrom_documents (
233
- cls : Type [ CVS ] ,
217
+ cls ,
234
218
documents : List [Document ],
235
219
embedding : Embeddings ,
236
220
* ,
237
221
database : Optional [ColbertBaseDatabase ] = None ,
238
- concurrent_inserts : Optional [ int ] = 100 ,
222
+ concurrent_inserts : int = 100 ,
239
223
** kwargs : Any ,
240
- ) -> CVS :
224
+ ) -> Self :
241
225
"""Return VectorStore initialized from documents and embeddings."""
242
226
texts = [d .page_content for d in documents ]
243
227
metadatas = [d .metadata for d in documents ]
@@ -253,14 +237,14 @@ async def afrom_documents(
253
237
@classmethod
254
238
@override
255
239
def from_texts (
256
- cls : Type [ CVS ] ,
240
+ cls ,
257
241
texts : List [str ],
258
242
embedding : Embeddings ,
259
- metadatas : Optional [List [dict ]] = None ,
243
+ metadatas : Optional [List [Dict [ str , Any ] ]] = None ,
260
244
* ,
261
245
database : Optional [ColbertBaseDatabase ] = None ,
262
246
** kwargs : Any ,
263
- ) -> CVS :
247
+ ) -> Self :
264
248
if not isinstance (embedding , TokensEmbeddings ):
265
249
raise TypeError ("ColbertVectorStore requires a TokensEmbeddings embedding." )
266
250
if database is None :
@@ -276,15 +260,15 @@ def from_texts(
276
260
@classmethod
277
261
@override
278
262
async def afrom_texts (
279
- cls : Type [ CVS ] ,
263
+ cls ,
280
264
texts : List [str ],
281
265
embedding : Embeddings ,
282
- metadatas : Optional [List [dict ]] = None ,
266
+ metadatas : Optional [List [Dict [ str , Any ] ]] = None ,
283
267
* ,
284
268
database : Optional [ColbertBaseDatabase ] = None ,
285
- concurrent_inserts : Optional [ int ] = 100 ,
269
+ concurrent_inserts : int = 100 ,
286
270
** kwargs : Any ,
287
- ) -> CVS :
271
+ ) -> Self :
288
272
if not isinstance (embedding , TokensEmbeddings ):
289
273
raise TypeError ("ColbertVectorStore requires a TokensEmbeddings embedding." )
290
274
if database is None :
0 commit comments