docarray · JoanFM · Oct 18, 2022 · Oct 18, 2022 · Oct 18, 2022 · Oct 18, 2022
diff --git a/docarray/array/mixins/find.py b/docarray/array/mixins/find.py
@@ -154,10 +154,10 @@ def find(
                     'filter and query cannot be both dict type, set only one for filtering'
                 )
         elif query is None:
-            if isinstance(filter, dict):
+            if isinstance(filter, (str, dict)):
                 return self._filter(filter, limit=limit)
             else:
-                raise ValueError('filter must be dict when query is None')
+                raise ValueError('filter must be dict or str when query is None')
         elif isinstance(query, str) or (
             isinstance(query, list) and isinstance(query[0], str)
         ):

diff --git a/docarray/array/storage/redis/backend.py b/docarray/array/storage/redis/backend.py
@@ -46,7 +46,6 @@ class BackendMixin(BaseBackendMixin):
         'float': TypeMap(type='float', converter=NumericField),
         'double': TypeMap(type='double', converter=NumericField),
         'long': TypeMap(type='long', converter=NumericField),
-        'bool': TypeMap(type='long', converter=NumericField),
     }
 
     def _init_storage(

diff --git a/docarray/array/storage/redis/find.py b/docarray/array/storage/redis/find.py
@@ -1,3 +1,4 @@
+import warnings
 from typing import TYPE_CHECKING, Dict, List, Optional, Sequence, TypeVar, Union
 
 import numpy as np
@@ -38,14 +39,13 @@ class FindMixin(BaseFindMixin):
     def _find_similar_vectors(
         self,
         query: 'RedisArrayType',
-        filter: Optional[Dict] = None,
+        filter: Optional[Union[str, Dict]] = None,
         limit: Union[int, float] = 20,
         **kwargs,
     ):
 
         if filter:
-            nodes = _build_query_nodes(filter)
-            query_str = intersect(*nodes).to_string()
+            query_str = _get_redis_filter_query(filter)
         else:
             query_str = '*'
 
@@ -90,11 +90,10 @@ def _find(
 
     def _find_with_filter(
         self,
-        filter: Dict,
+        filter: Union[str, Dict],
         limit: Union[int, float] = 20,
     ):
-        nodes = _build_query_nodes(filter)
-        query_str = intersect(*nodes).to_string()
+        query_str = _get_redis_filter_query(filter)
         q = Query(query_str)
         q.paging(0, limit)
 
@@ -218,3 +217,19 @@ def _build_query_nodes(filter):
 def _build_query_str(query):
     query_str = '|'.join(query.split(' '))
     return query_str
+
+
+def _get_redis_filter_query(filter: Union[str, Dict]):
+    if isinstance(filter, dict):
+        warnings.warn(
+            "Dict syntax for redis filter will be deprecated, use string literals instead",
+            DeprecationWarning,
+        )
+        nodes = _build_query_nodes(filter)
+        query_str = intersect(*nodes).to_string()
+    elif isinstance(filter, str):
+        query_str = filter
+    else:
+        raise ValueError(f'Unexpected type of filter: {type(filter)}, expected str')
+
+    return query_str
diff --git a/docs/advanced/document-store/redis.md b/docs/advanced/document-store/redis.md
@@ -113,17 +113,41 @@ da2.summary()
 
 Other functions behave the same as in-memory DocumentArray.
 
+## Configuration
 
-### Vector search with filter query
+The following configs can be set:
+
+| Name              | Description                                                                                       | Default                                           |
+|-------------------|---------------------------------------------------------------------------------------------------|-------------------------------------------------- |
+| `host`            | Host address of the Redis server                                                                  | `'localhost'`                                     |
+| `port`            | Port of the Redis Server                                                                          | `6379`                                            |
+| `redis_config`    | Other Redis configs in a Dict and pass to `Redis` client constructor, e.g. `socket_timeout`, `ssl`| `{}`                                              |
+| `index_name`      | Redis index name; the name of RedisSearch index to set this DocumentArray                         | `None`                                            |
+| `n_dim`           | Dimensionality of the embeddings                                                                  | `None`                                            |
+| `update_schema`   | Boolean flag indicating whether to update Redis Search schema                                     | `True`                                            |
+| `distance`        | Similarity distance metric in Redis, one of {`'L2'`, `'IP'`, `'COSINE'`}                          | `'COSINE'`                                        |
+| `batch_size`      | Batch size used to handle storage updates                                                         | `64`                                              |
+| `method`          | Vector similarity index algorithm in Redis, either `FLAT` or `HNSW`                               | `'HNSW'`                                          |
+| `index_text`      | Boolean flag indicating whether to index `.text`. `True` will enable full text search on `.text`  | `None`                                            |
+| `tag_indices`     | List of tags to index as text field                                                               | `[]`                                              |
+| `ef_construction` | Optional parameter for Redis HNSW algorithm                                                       | `200`                                             |
+| `m`               | Optional parameter for Redis HNSW algorithm                                                       | `16`                                              |
+| `ef_runtime`      | Optional parameter for Redis HNSW algorithm                                                       | `10`                                              |
+| `block_size`      | Optional parameter for Redis FLAT algorithm                                                       | `1048576`                                         |
+| `initial_cap`     | Optional parameter for Redis HNSW and FLAT algorithm                                              | `None`, defaults to the default value in Redis    |
+| `columns`         | Other fields to store in Document and build schema                                                | `None`                                            |
 
-You can perform Vector Similarity Search based on [FLAT or HNSW algorithm](vector-search-index) and pre-filter results using a filter query that is based on [MongoDB's Query](https://www.mongodb.com/docs/manual/reference/operator/query/). The following tags filters can be combine with `$and` and `$or`:
+You can check the default values in [the docarray source code](https://github.com/jina-ai/docarray/blob/main/docarray/array/storage/redis/backend.py).
+For vector search configurations, default values are those of the database backend, which you can find in the [Redis documentation](https://redis.io/docs/stack/search/reference/vectors/).
 
-- `$eq` - Equal to (number, string)
-- `$ne` - Not equal to (number, string)
-- `$gt` - Greater than (number)
-- `$gte` - Greater than or equal to (number)
-- `$lt` - Less than (number)
-- `$lte` - Less than or equal to (number)
+```{note}
+We will support geo-filtering soon. 
+The benchmark test is on the way.
+```
+
+### Vector search with filter query
+
+You can perform Vector Similarity Search based on [FLAT or HNSW algorithm](vector-search-index) and pre-filter results using [Redis' Search Query Syntax](https://redis.io/docs/stack/search/reference/query_syntax/).
 
 
 Consider Documents with embeddings `[0, 0, 0]` up to `[9, 9, 9]` where the Document with embedding `[i, i, i]`
@@ -139,7 +163,7 @@ da = DocumentArray(
     storage='redis',
     config={
         'n_dim': n_dim,
-        'columns': {'price': 'int', 'color': 'str', 'stock': 'bool'},
+        'columns': {'price': 'int', 'color': 'str', 'stock': 'int'},
         'distance': 'L2',
     },
 )
@@ -150,7 +174,7 @@ with da:
             Document(
                 id=f'{i}',
                 embedding=i * np.ones(n_dim),
-                tags={'price': i, 'color': 'blue', 'stock': i % 2 == 0},
+                tags={'price': i, 'color': 'blue', 'stock': int(i % 2 == 0)},
             )
             for i in range(10)
         ]
@@ -160,7 +184,7 @@ with da:
             Document(
                 id=f'{i+10}',
                 embedding=i * np.ones(n_dim),
-                tags={'price': i, 'color': 'red', 'stock': i % 2 == 0},
+                tags={'price': i, 'color': 'red', 'stock': int(i % 2 == 0)},
             )
             for i in range(10)
         ]
@@ -176,22 +200,7 @@ for doc in da:
 Consider the case where you want the nearest vectors to the embedding `[8.,  8.,  8.]`, with the restriction that prices, colors and stock must pass a filter. For example, let's consider that retrieved Documents must have a `price` value lower than or equal to `max_price`, have `color` equal to `blue` and have `stock` equal to `True`. We can encode this information in Redis using
 
 ```text
-{
-    "price": {"$lte": max_price},
-    "color": {"$gt": color},
-    "stock": {"$eq": True},
-}
-```
-or 
-
-```text
-{
-    "$and": {
-        "price": {"$lte": max_price},
-        "color": {"$gt": color},
-        "stock": {"$eq": True},
-    }
-}
+@price:[-inf {max_price}] @color:{color} @stock:[1 1]
 ```
 
 Then the search with the proposed filter can be used as follows:
@@ -203,11 +212,7 @@ n_limit = 5
 np_query = np.ones(n_dim) * 8
 print(f'\nQuery vector: \t{np_query}')
 
-filter = {
-    "price": {"$lte": max_price},
-    "color": {"$eq": color},
-    "stock": {"$eq": True},
-}
+filter = f'@price:[-inf {max_price}] @color:{color} @stock:[1 1]'
 
 results = da.find(np_query, filter=filter, limit=n_limit)
 
@@ -225,49 +230,73 @@ This will print:
 ```console
 Embeddings Approximate Nearest Neighbours with "price" at most 7, "color" blue and "stock" True:
 
- score=12,	 embedding=[6. 6. 6.],	 price=6,	 color=blue,	 stock=True
- score=48,	 embedding=[4. 4. 4.],	 price=4,	 color=blue,	 stock=True
- score=108,	 embedding=[2. 2. 2.],	 price=2,	 color=blue,	 stock=True
- score=192,	 embedding=[0. 0. 0.],	 price=0,	 color=blue,	 stock=True
-```
-More example filter expresses
-- A Nike shoes or price less than `100`
-
-```JSON
-{
-    "$or": {
-        "brand": {"$eq": "Nike"},
-        "price": {"$lt": 100}
-    }
-}
+ score=12,	 embedding=[6. 6. 6.],	 price=6,	 color=blue,	 stock=1
+ score=48,	 embedding=[4. 4. 4.],	 price=4,	 color=blue,	 stock=1
+ score=108,	 embedding=[2. 2. 2.],	 price=2,	 color=blue,	 stock=1
+ score=192,	 embedding=[0. 0. 0.],	 price=0,	 color=blue,	 stock=1
 ```
 
-- A Nike shoes **and** either price is less than `100` or color is `"blue"`
+````{admonition} Note
+:class: note
+Note that Redis does not support Boolean types in attributes. Therefore, you need to configure your boolean field as 
+integer in `columns` configuration (`'field': 'int'`) and use a filter query that treats the field as an integer
+(`@field: [1 1]`).
+````
+
+### Search by filter query
+
+One can search with user-defined query filters using the `.find` method. Such queries follow the [Redis Search Query Syntax](https://redis.io/docs/stack/search/reference/query_syntax/).
+
+Consider a case where you store Documents with a tag of `price` into Redis and you want to retrieve all Documents
+with `price` less than or equal to  some `max_price` value.
+
+You can index such Documents as follows:
 
-```JSON
-{
-    "brand": {"$eq": "Nike"},
-    "$or": {
-        "price": {"$lt": 100},
-        "color": {"$eq": "blue"},
+```python
+from docarray import Document, DocumentArray
+
+n_dim = 3
+da = DocumentArray(
+    storage='redis',
+    config={
+        'n_dim': n_dim,
+        'columns': {'price': 'float'},
     },
-}
+)
+
+with da:
+    da.extend([Document(id=f'r{i}', tags={'price': i}) for i in range(10)])
+
+print('\nIndexed Prices:\n')
+for price in da[:, 'tags__price']:
+    print(f'\t price={price}')
+```
+
+Then you can retrieve all documents whose price is less than or equal to `max_price` by applying the following
+filter:
+
+```python
+max_price = 3
+n_limit = 4
+
+filter = f'@price:[-inf {max_price}] '
+results = da.find(filter=filter)
+
+print('\n Returned examples that verify filter "price at most 3":\n')
+for price in results[:, 'tags__price']:
+    print(f'\t price={price}')
 ```
 
-- A Nike shoes **or** both price is less than `100` and color is `"blue"`
-
-```JSON
-{
-    "$or": {
-        "brand": {"$eq": "Nike"},
-        "$and": {
-            "price": {"$lt": 100},
-            "color": {"$eq": "blue"},
-        },
-    }
-}
+This would print
+
 ```
+ Returned examples that satisfy condition "price at most 3":
 
+  price=0
+  price=1
+  price=2
+  price=3
+```
 
 (vector-search-index)=
 ### Update Vector Search Indexing Schema
@@ -471,34 +500,3 @@ results = da.find('cheap', index='price')
 
 
 
-## Configuration
-
-The following configs can be set:
-
-| Name              | Description                                                                                       | Default                                           |
-|-------------------|---------------------------------------------------------------------------------------------------|-------------------------------------------------- |
-| `host`            | Host address of the Redis server                                                                  | `'localhost'`                                     |
-| `port`            | Port of the Redis Server                                                                          | `6379`                                            |
-| `redis_config`    | Other Redis configs in a Dict and pass to `Redis` client constructor, e.g. `socket_timeout`, `ssl`| `{}`                                              |
-| `index_name`      | Redis index name; the name of RedisSearch index to set this DocumentArray                         | `None`                                            |
-| `n_dim`           | Dimensionality of the embeddings                                                                  | `None`                                            |
-| `update_schema`   | Boolean flag indicating whether to update Redis Search schema                                     | `True`                                            |
-| `distance`        | Similarity distance metric in Redis, one of {`'L2'`, `'IP'`, `'COSINE'`}                          | `'COSINE'`                                        |
-| `batch_size`      | Batch size used to handle storage updates                                                         | `64`                                              |
-| `method`          | Vector similarity index algorithm in Redis, either `FLAT` or `HNSW`                               | `'HNSW'`                                          |
-| `index_text`      | Boolean flag indicating whether to index `.text`. `True` will enable full text search on `.text`  | `None`                                            |
-| `tag_indices`     | List of tags to index as text field                                                               | `[]`                                              |
-| `ef_construction` | Optional parameter for Redis HNSW algorithm                                                       | `200`                                             |
-| `m`               | Optional parameter for Redis HNSW algorithm                                                       | `16`                                              |
-| `ef_runtime`      | Optional parameter for Redis HNSW algorithm                                                       | `10`                                              |
-| `block_size`      | Optional parameter for Redis FLAT algorithm                                                       | `1048576`                                         |
-| `initial_cap`     | Optional parameter for Redis HNSW and FLAT algorithm                                              | `None`, defaults to the default value in Redis    |
-| `columns`         | Other fields to store in Document and build schema                                                | `None`                                            |
-
-You can check the default values in [the docarray source code](https://github.com/jina-ai/docarray/blob/main/docarray/array/storage/redis/backend.py)
-For vector search configurations, default values are those of the database backend, you can find them in [redis documentation](https://redis.io/docs/stack/search/reference/vectors/)
-
-```{note}
-We will support geo-filtering soon. 
-The benchmark test is on the way.
-```
diff --git a/tests/unit/array/mixins/test_exception.py b/tests/unit/array/mixins/test_exception.py
@@ -34,5 +34,7 @@ def test_embedding_ops_error():
     with pytest.raises(ValueError, match='Did you forget to set'):
         db.find(da)
     da.embeddings = np.random.random([100, 256])
-    with pytest.raises(ValueError, match='filter must be dict when query is None'):
+    with pytest.raises(
+        ValueError, match='filter must be dict or str when query is None'
+    ):
         da.find(None)