docarray · hanxiao · Apr 7, 2022 · Apr 7, 2022
diff --git a/docarray/array/mixins/plot.py b/docarray/array/mixins/plot.py
@@ -37,6 +37,8 @@ def summary(self):
         console = Console()
 
         all_attrs = self._get_attributes('non_empty_fields')
+        # remove underscore attribute
+        all_attrs = [tuple(vv for vv in v if not vv.startswith('_')) for v in all_attrs]
         attr_counter = Counter(all_attrs)
 
         table = Table(box=box.SIMPLE, highlight=True)
@@ -72,6 +74,9 @@ def summary(self):
                     _text = f'{_doc_text} attributes'
                 table.add_row(_text, str(_a))
 
+        is_multimodal = all(d.is_multimodal for d in self)
+        table.add_row('Multimodal dataclass', str(is_multimodal))
+
         tables.append(Panel(table, title='Documents Summary', expand=False))
 
         all_attrs_names = tuple(sorted(all_attrs_names))
@@ -339,7 +344,7 @@ def plot_image_sprites(
             img_size = min_size
             img_per_row = int(canvas_size / img_size)
 
-        max_num_img = img_per_row ** 2
+        max_num_img = img_per_row**2
         sprite_img = np.zeros(
             [img_size * img_per_row, img_size * img_per_row, 3], dtype='uint8'
         )

diff --git a/docarray/helper.py b/docarray/helper.py
@@ -60,16 +60,19 @@ def _f(*args, **kwargs):
 
 def dunder_get(_dict: Any, key: str) -> Any:
     """Returns value for a specified dunderkey
+
     A "dunderkey" is just a fieldname that may or may not contain
     double underscores (dunderscores!) for referencing nested keys in
     a dict. eg::
-         >>> data = {'a': {'b': 1}}
-         >>> dunder_get(data, 'a__b')
-         1
+     >>> data = {'a': {'b': 1}}
+     >>> dunder_get(data, 'a__b')
+
     key 'b' can be referrenced as 'a__b'
-    :param _dict : (dict, list, struct or object) which we want to index into
-    :param key   : (str) that represents a first level or nested key in the dict
+
+    :param _dict: (dict, list, struct or object) which we want to index into
+    :param key: (str) that represents a first level or nested key in the dict
     :return: (mixed) value corresponding to the key
+
     """
 
     if not _dict:

diff --git a/docs/fundamentals/dataclass/access.md b/docs/fundamentals/dataclass/access.md
@@ -1,2 +1,330 @@
 # Access Modality
 
+```{tip}
+It is strongly recommended to go through {ref}`access-elements` first before continue.
+```
+
+Access modality means access the sub-Documents corresponded to a dataclass field. 
+
+In the last chapter, we learned how to represent a multimodal document via `@dataclass` and type annotation from `docarray.typing`. We also learned that a multimodal dataclass can be converted into a `Document` object easily. That means if we have a list of multimodal dataclass objects, we can build a DocumentArray out of them:
+
+```python
+from docarray import Document, dataclass, DocumentArray
+from docarray.typing import Image, Text
+
+
+@dataclass
+class MMDoc:
+    banner: Image
+    description: Text
+
+
+da = DocumentArray(
+    [
+        Document(
+            MMDoc(banner='test-1.jpeg', description='this is a test white-noise image')
+        ),
+        Document(
+            MMDoc(banner='test-2.jpeg', description='another test image but in black')
+        ),
+    ]
+)
+
+da.summary()
+```
+
+```text
+╭────────────── Documents Summary ───────────────╮
+│                                                │
+│   Length                    2                  │
+│   Homogenous Documents      True               │
+│   Has nested Documents in   ('chunks',)        │
+│   Common Attributes         ('id', 'chunks')   │
+│   Multimodal dataclass      True               │
+│                                                │
+╰────────────────────────────────────────────────╯
+╭──────────────────────── Attributes Summary ────────────────────────╮
+│                                                                    │
+│   Attribute   Data type         #Unique values   Has empty value   │
+│  ────────────────────────────────────────────────────────────────  │
+│   chunks      ('ChunkArray',)   2                False             │
+│   id          ('str',)          2                False             │
+│                                                                    │
+╰────────────────────────────────────────────────────────────────────╯
+```
+
+A natural question would be, how do we select those Documents correspond to `MMDoc.banner`? 
+
+This chapter describes how to select the sub-documents that correspond to a modality from a DocumentArray. So let me reiterate the logic here: when calling `Document()` to build Document object from a dataclass object, each field in a that dataclass will generate a sub-document nested under `.chunks` or even `.chunks.chunks.chunks` at arbitrary level. To process a dataclass field via existing DocArray API/Jina/Hub Executor, we need a way to accurately select those sub-documents from the nested structure, which is the purpose of this chapter. 
+
+## Selector Syntax
+
+Follow the syntax convention described in {ref}`access-elements`, a modality selector also starts with `@`, it uses `.` to indicate the field of the dataclass. Selecting a DocumentArray always results in another DocumentArray.
+
+```text
+@.[field1, field2, ...]
+^^ ~~~~~~  ~~~~~~
+||   |       |
+||   |-------|
+||       |
+||       | --- indicate the field of dataclass
+||
+|| ------ indicate the start of modality selector
+|
+| ---- indicate the start of selector
+```
+
+Use the above DocumentArray as an example,
+
+````{tab} Select Documents correspond to .banner 
+
+```python
+da['@.[banner]']
+```
+
+```text
+╭───────────────────────────── Documents Summary ──────────────────────────────╮
+│                                                                              │
+│   Length                 2                                                   │
+│   Homogenous Documents   True                                                │
+│   Common Attributes      ('id', 'parent_id', 'granularity', 'tensor',        │
+│                          'mime_type', 'uri', 'modality')                     │
+│   Multimodal dataclass   False                                               │
+│                                                                              │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─────────────────────── Attributes Summary ────────────────────────╮
+│                                                                   │
+│   Attribute     Data type      #Unique values   Has empty value   │
+│  ───────────────────────────────────────────────────────────────  │
+│   granularity   ('int',)       1                False             │
+│   id            ('str',)       2                False             │
+│   mime_type     ('str',)       1                False             │
+│   modality      ('str',)       1                False             │
+│   parent_id     ('str',)       2                False             │
+│   tensor        ('ndarray',)   2                False             │
+│   uri           ('str',)       2                False             │
+│                                                                   │
+╰───────────────────────────────────────────────────────────────────╯
+```
+
+
+````
+
+````{tab} Select Documents correspond to .description 
+
+```python
+da['@.[description]']
+```
+
+```text
+╭───────────────────────────── Documents Summary ──────────────────────────────╮
+│                                                                              │
+│   Length                 2                                                   │
+│   Homogenous Documents   True                                                │
+│   Common Attributes      ('id', 'parent_id', 'granularity', 'text',          │
+│                          'modality')                                         │
+│   Multimodal dataclass   False                                               │
+│                                                                              │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭────────────────────── Attributes Summary ──────────────────────╮
+│                                                                │
+│   Attribute     Data type   #Unique values   Has empty value   │
+│  ────────────────────────────────────────────────────────────  │
+│   granularity   ('int',)    1                False             │
+│   id            ('str',)    2                False             │
+│   modality      ('str',)    1                False             │
+│   parent_id     ('str',)    2                False             │
+│   text          ('str',)    2                False             │
+│                                                                │
+╰────────────────────────────────────────────────────────────────╯
+```
+
+
+````
+
+### Select multiple fields
+
+The square bracket is required when you want to select multiple fields, they need to be separated by comma `,`.
+
+````{tab} Select Documents correspond to two fields
+
+```python
+da['@.[description, banner]']
+```
+````
+
+````{tab} Result
+
+
+```text
+╭───────────────────────────── Documents Summary ──────────────────────────────╮
+│                                                                              │
+│   Length                        4                                            │
+│   Homogenous Documents          False                                        │
+│   2 Documents have attributes   ('id', 'parent_id', 'granularity', 'text',   │
+│                                 'modality')                                  │
+│   2 Documents have attributes   ('id', 'parent_id', 'granularity',           │
+│                                 'tensor', 'mime_type', 'uri', 'modality')    │
+│   Multimodal dataclass          False                                        │
+│                                                                              │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭───────────────────────────── Attributes Summary ─────────────────────────────╮
+│                                                                              │
+│   Attribute     Data type                 #Unique values   Has empty value   │
+│  ──────────────────────────────────────────────────────────────────────────  │
+│   granularity   ('int',)                  1                False             │
+│   id            ('str',)                  4                False             │
+│   mime_type     ('str',)                  2                False             │
+│   modality      ('str',)                  2                False             │
+│   parent_id     ('str',)                  2                False             │
+│   tensor        ('ndarray', 'NoneType')   4                True              │
+│   text          ('str',)                  3                False             │
+│   uri           ('str',)                  3                False             │
+│                                                                              │
+╰──────────────────────────────────────────────────────────────────────────────╯
+```
+````
+
+### Slice dataclass objects
+
+Remember each dataclass object corresponds to one Document object, you can first slice the DocumentArray before selecting the field. Specifically, you can do
+
+```text
+@r[slice].[field1, field2, ...]
+```
+
+where `slice` can be any slice syntax accepted in {ref}`access-elements`.
+
+For example, to select the sub-Document `.banner` for only the first Document,
+
+````{tab} Select .banner of the first dataclass  
+
+```python
+da['@r[:1].[banner]']
+```
+
+````
+
+````{tab} Result
+
+```text
+╭───────────────────────────── Documents Summary ──────────────────────────────╮
+│                                                                              │
+│   Length                 1                                                   │
+│   Homogenous Documents   True                                                │
+│   Common Attributes      ('id', 'parent_id', 'granularity', 'tensor',        │
+│                          'mime_type', 'uri', 'modality')                     │
+│   Multimodal dataclass   False                                               │
+│                                                                              │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─────────────────────── Attributes Summary ────────────────────────╮
+│                                                                   │
+│   Attribute     Data type      #Unique values   Has empty value   │
+│  ───────────────────────────────────────────────────────────────  │
+│   granularity   ('int',)       1                False             │
+│   id            ('str',)       1                False             │
+│   mime_type     ('str',)       1                False             │
+│   modality      ('str',)       1                False             │
+│   parent_id     ('str',)       1                False             │
+│   tensor        ('ndarray',)   1                False             │
+│   uri           ('str',)       1                False             │
+│                                                                   │
+╰───────────────────────────────────────────────────────────────────╯
+
+```
+
+````
+
+### Slice `List[Type]` fields
+
+If a field is annotated as a List of DocArray types, it will create a DocumentArray, one can add slicing after the field selector to further restrict the size of the sub-Documents.
+
+```{code-block} python
+---
+emphasize-lines: 30
+---
+from typing import List
+
+from docarray import Document, dataclass, DocumentArray
+from docarray.typing import Image, Text
+
+
+@dataclass
+class MMDoc:
+    banner: List[Image]
+    description: Text
+
+
+da = DocumentArray(
+    [
+        Document(
+            MMDoc(
+                banner=['test-1.jpeg', 'test-2.jpeg'],
+                description='this is a test white image',
+            )
+        ),
+        Document(
+            MMDoc(
+                banner=['test-1.jpeg', 'test-2.jpeg'],
+                description='another test image but in black',
+            )
+        ),
+    ]
+)
+
+for d in da['@.[banner]:1']:
+    print(d.uri)
+```
+
+
+```text
+test-1.jpeg
+test-1.jpeg
+```
+
+In summary, slicing can be put in front of field selector  to restrict the number of dataclass object; or can be put after the field selector to restrict the number of sub-Documents.
+
+### Select nested fields
+
+A field can be annotated as a DocArray dataclass. In this case, the nested structure from the latter dataclass is copied to the former's `.chunks`. To select the deeply nested field, one can simply follow:
+
+```text
+@.[field1, field2, ...].[nested_field1, nested_field1, ...]
+```
+
+For example,
+
+```{code-block} python
+---
+emphasize-lines: 23
+---
+from docarray import dataclass, Document, DocumentArray
+from docarray.typing import Image, Text
+
+
+@dataclass
+class BannerDoc:
+    description: Text = 'this is a test empty image'
+    banner: Image = 'test-1.jpeg'
+
+
+@dataclass
+class ColumnArticle:
+    featured: BannerDoc
+    description: Text = 'this is a column article'
+    website: str = 'https://jina.ai'
+
+
+c1 = ColumnArticle(featured=BannerDoc(banner='test-1.jpeg'))
+c2 = ColumnArticle(featured=BannerDoc(banner='test-2.jpeg'))
+
+da = DocumentArray([Document(c1), Document(c2)])
+
+for d in da['@.[featured].[banner]']:
+    print(d.uri)
+```
+
+```text
+test-1.jpeg
+test-2.jpeg
+```
diff --git a/docs/fundamentals/dataclass/construct.md b/docs/fundamentals/dataclass/construct.md
@@ -21,6 +21,8 @@ class MyMultiModalDoc:
 m = MyMultiModalDoc(avatar='test-1.jpeg', description='hello, world')
 ```
 
+**Each field is a modality.** The above example contains two modalities: image and text. 
+
 To convert it into a `Document` object, simply:
 
 ```python