Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 21 additions & 1 deletion docarray/array/storage/annlite/find.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@

if TYPE_CHECKING:
import numpy as np
from .... import DocumentArray

from .... import DocumentArray


class FindMixin:
Expand Down Expand Up @@ -41,3 +42,22 @@ def _find(
)

return match_docs

def _filter(
self,
filter: Dict,
limit: Optional[Union[int, float]] = 20,
only_id: bool = False,
) -> 'DocumentArray':
"""Returns a subset of documents by filtering by the given filter (`Annlite` filter).

:param filter: the input filter to apply in each stored document
:param limit: the number of results to get for each query document in search.
:param only_id: if set, then returning matches will only contain ``id``
:return: a `DocumentArray` containing the `Document` objects that verify the filter.
"""

docs = self._annlite.filter(
filter=filter, limit=limit, include_metadata=not only_id
)
return DocumentArray(docs)
57 changes: 56 additions & 1 deletion docs/advanced/document-store/annlite.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,62 @@ The following configs can be set:
Search with `.find` can be restricted by user-defined filters.
Filters can be constructed following the guidelines provided in [the AnnLite source repository](https://github.com/jina-ai/annlite).

### Example of `.find` with a filter
### Example of `.find` with a filter only


Consider you store Documents with a certain tag `price` into annlite and you want to retrieve all Documents
with `price` lower or equal to some `max_price` value.


You can index such Documents as follows:
```python
from docarray import Document, DocumentArray
import numpy as np

n_dim = 3
da = DocumentArray(
storage='annlite',
config={
'n_dim': n_dim,
'columns': [('price', 'float')],
},
)

with da:
da.extend([Document(id=f'r{i}', tags={'price': i}) for i in range(10)])

print('\nIndexed Prices:\n')
for price in da[:, 'tags__price']:
print(f'\t price={price}')
```

Then you can retrieve all documents whose price is lower than or equal to `max_price` by applying the following
filter:

```python
max_price = 3
n_limit = 4

filter = {'price': {'$lte': max_price}}
results = da.find(filter=filter)

print('\n Returned examples that verify filter "price at most 3":\n')
for price in results[:, 'tags__price']:
print(f'\t price={price}')
```

This would print

```
Returned examples that satisfy condition "price at most 3":

price=0
price=1
price=2
price=3
```

### Example of `.find` with query vector and filter

Consider Documents with embeddings `[0,0,0]` up to ` [9,9,9]` where the document with embedding `[i,i,i]`
has as tag `price` with value `i`. We can create such example with the following code:
Expand Down
6 changes: 3 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,15 +64,15 @@
'uvicorn',
'strawberry-graphql',
'weaviate-client~=3.3.0',
'annlite>=0.3.0',
'annlite>=0.3.2',
'qdrant-client~=0.7.3',
'elasticsearch>=8.2.0',
],
'qdrant': [
'qdrant-client~=0.7.3',
],
'annlite': [
'annlite>=0.3.0',
'annlite>=0.3.2',
],
'weaviate': [
'weaviate-client~=3.3.0',
Expand Down Expand Up @@ -100,7 +100,7 @@
'jupyterlab',
'transformers>=4.16.2',
'weaviate-client~=3.3.0',
'annlite>=0.3.0',
'annlite>=0.3.2',
'elasticsearch>=8.2.0',
'jina',
],
Expand Down
11 changes: 11 additions & 0 deletions tests/unit/array/mixins/test_find.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,17 @@ def test_search_pre_filtering(
)
for operator in ['gt', 'gte', 'lt', 'lte']
],
*[
tuple(
[
'annlite',
lambda operator, threshold: {'price': {operator: threshold}},
numeric_operators_annlite,
operator,
]
)
for operator in numeric_operators_annlite.keys()
],
],
)
def test_filtering(storage, filter_gen, operator, numeric_operators, start_storage):
Expand Down