@@ -61,6 +61,7 @@ da = DocumentArray(
6161 ' host' : ' localhost' ,
6262 ' port' : ' 6333' ,
6363 ' n_dim' : 10 ,
64+ ' distance' : " cosine" ,
6465 },
6566)
6667
@@ -98,7 +99,7 @@ Create `docker-compose.yml`:
9899version : ' 3.4'
99100services :
100101 qdrant :
101- image : qdrant/qdrant:v0.7 .0
102+ image : qdrant/qdrant:v0.8 .0
102103 ports :
103104 - " 6333:6333"
104105 ulimits : # Only required for tests, as there are a lot of collections created
@@ -121,7 +122,9 @@ from docarray import DocumentArray
121122
122123N, D = 100 , 128
123124
124- da = DocumentArray.empty(N, storage = ' qdrant' , config = {' n_dim' : D}) # init
125+ da = DocumentArray.empty(
126+ N, storage = ' qdrant' , config = {' n_dim' : D, ' distance' : ' cosine' }
127+ ) # init
125128
126129da.embeddings = np.random.random([N, D])
127130
@@ -146,7 +149,7 @@ in [Qdrant's Documentation](https://qdrant.tech/documentation/filtering/)
146149Consider Documents with embeddings ` [0,0,0] ` up to ` [9,9,9] ` where the document with embedding ` [i,i,i] `
147150has as tag ` price ` with value ` i ` . We can create such example with the following code:
148151
149- ``` python
152+ ``` pyt
150153from docarray import Document, DocumentArray
151154import numpy as np
152155
@@ -175,7 +178,7 @@ for embedding, price in zip(da.embeddings, da[:, 'tags__price']):
175178
176179Consider we want the nearest vectors to the embedding ` [8. 8. 8.] ` , with the restriction that
177180prices must follow a filter. As an example, let's consider that retrieved documents must have ` price ` value lower
178- or equal than ` max_price ` . We can encode this information in annlite using ` filter = {'price': {'$lte': max_price}} ` .
181+ or equal than ` max_price ` . We can encode this information in qdrant using ` filter = {'price': {'$lte': max_price}} ` .
179182
180183Then the search with the proposed filter can be implemented and used with the following code:
181184
@@ -206,3 +209,58 @@ Embeddings Nearest Neighbours with "price" at most 7:
206209 embedding=[5. 5. 5.], price=5
207210 embedding=[4. 4. 4.], price=4
208211```
212+ ### Example of ` .filter ` with a filter
213+ Consider Documents have tags ` price ` with value ` i ` . We can create such example with the following code:
214+ ``` python
215+ from docarray import Document, DocumentArray
216+ import numpy as np
217+
218+ n_dim = 3
219+ distance = ' euclidean'
220+
221+ da = DocumentArray(
222+ storage = ' qdrant' ,
223+ config = {' n_dim' : n_dim, ' columns' : {' price' : ' float' }, ' distance' : distance},
224+ )
225+
226+ print (f ' \n DocumentArray distance: { distance} ' )
227+
228+ with da:
229+ da.extend(
230+ [
231+ Document(id = f ' r { i} ' , embedding = i * np.ones(n_dim), tags = {' price' : i})
232+ for i in range (10 )
233+ ]
234+ )
235+
236+ print (' \n Indexed Prices:\n ' )
237+ for embedding, price in zip (da.embeddings, da[:, ' tags__price' ]):
238+ print (f ' \t embedding= { embedding} , \t price= { price} ' )
239+ ```
240+ Consider we want the vectors with the restriction that prices must follow a filter. As an example,
241+ let's consider that retrieved documents must have ` price ` value lower or equal than ` max_price ` . We can encode
242+ this information in qdrant using ` filter = {'price': {'$lte': max_price}} ` .
243+
244+ Then the search with the proposed filter can be implemented and used with the following code:
245+ ``` python
246+ max_price = 7
247+ n_limit = 4
248+
249+ filter = {' must' : [{' key' : ' price' , ' range' : {' lte' : max_price}}]}
250+ results = da.filter(filter = filter , limit = n_limit)
251+
252+ print (' \n Points with "price" at most 7:\n ' )
253+ for embedding, price in zip (results.embeddings, results[:, ' tags__price' ]):
254+ print (f ' \t embedding= { embedding} , \t price= { price} ' )
255+ ```
256+ This would print:
257+
258+ ```
259+
260+ Points with "price" at most 7:
261+
262+ embedding=[6. 6. 6.], price=6
263+ embedding=[7. 7. 7.], price=7
264+ embedding=[1. 1. 1.], price=1
265+ embedding=[2. 2. 2.], price=2
266+ ```
0 commit comments