Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 28 additions & 6 deletions docarray/index/backends/in_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,33 @@


class InMemoryExactNNIndex(BaseDocIndex, Generic[TSchema]):
def __init__(self, docs: Optional[DocList] = None, **kwargs):
def __init__(
self,
docs: Optional[DocList] = None,
index_file_path: Optional[str] = None,
**kwargs,
):
"""Initialize InMemoryExactNNIndex"""
super().__init__(db_config=None, **kwargs)
self._runtime_config = self.RuntimeConfig()
self._docs = (
docs
if docs is not None
else DocList.__class_getitem__(cast(Type[BaseDoc], self._schema))()
)

if docs and index_file_path:
raise ValueError(
'Initialize `InMemoryExactNNIndex` with either `docs` or '
'`index_file_path`, not both. Provide `docs` for a fresh index, or '
'`index_file_path` to use an existing file.'
)

if index_file_path:
self._docs = DocList.__class_getitem__(
cast(Type[BaseDoc], self._schema)
).load_binary(file=index_file_path)
else:
self._docs = (
docs
if docs is not None
else DocList.__class_getitem__(cast(Type[BaseDoc], self._schema))()
)

def python_type_to_db_type(self, python_type: Type) -> Any:
"""Map python type to database type.
Expand Down Expand Up @@ -293,3 +311,7 @@ def _text_search_batched(
self, queries: Sequence[str], limit: int, search_field: str = ''
) -> _FindResultBatched:
raise NotImplementedError(f'{type(self)} does not support text search.')

def persist(self, file: str = 'in_memory_index.bin') -> None:
"""Persist InMemoryExactNNIndex into a binary file."""
self._docs.save_binary(file=file)
9 changes: 9 additions & 0 deletions docs/user_guide/storing/index_in_memory.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,15 @@ doc_index.index(docs)
doc_index = InMemoryExactNNIndex[MyDoc](docs)
```

Additionally, you can preserve your index as a binary file and instantiate a new one using this file:
```python
# Save your existing index as a binary file
doc_index.persist('docs.bin')

# Initialize a new document index using the saved binary file
new_doc_index = InMemoryExactNNIndex[MyDoc](index_file_path='docs.bin')
```

## Configuration

This section lays out the configurations and options that are specific to [InMemoryExactNNIndex][docarray.index.backends.in_memory.InMemoryExactNNIndex].
Expand Down
15 changes: 15 additions & 0 deletions tests/index/in_memory/test_in_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,3 +120,18 @@ def test_concatenated_queries(doc_index):
docs, scores = doc_index.execute_query(q)

assert len(docs) == 4


def test_save_and_load(doc_index, tmpdir):
initial_num_docs = doc_index.num_docs()

binary_file = str(tmpdir / 'docs.bin')
doc_index.persist(binary_file)

new_doc_index = InMemoryExactNNIndex[SchemaDoc](index_file_path=binary_file)

docs, scores = new_doc_index.find(np.ones(10), search_field='tensor', limit=5)

assert len(docs) == 5
assert len(scores) == 5
assert new_doc_index.num_docs() == initial_num_docs