Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 17 additions & 3 deletions docarray/array/doc_list/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -546,11 +546,11 @@ def to_dataframe(self) -> 'pd.DataFrame':
def _stream_header(self) -> bytes:
# Binary format for streaming case

# V1 DocList streaming serialization format
# | 1 byte | 8 bytes | 4 bytes | variable | 4 bytes | variable ...
# V2 DocList streaming serialization format
# | 1 byte | 8 bytes | 4 bytes | variable(docarray v2) | 4 bytes | variable(docarray v2) ...

# 1 byte (uint8)
version_byte = b'\x01'
version_byte = b'\x02'
# 8 bytes (uint64)
num_docs_as_bytes = len(self).to_bytes(8, 'big', signed=False)
return version_byte + num_docs_as_bytes
Expand Down Expand Up @@ -597,6 +597,12 @@ def _load_binary_all(
from docarray.utils._internal.progress_bar import _get_progressbar

# 1 byte (uint8)
version_num = int.from_bytes(d[0:1], 'big', signed=False)
if version_num != 2:
raise ValueError(
f'Unsupported version number {version_num} in binary format, expected 2'
)

# 8 bytes (uint64)
num_docs = int.from_bytes(d[1:9], 'big', signed=False)

Expand Down Expand Up @@ -655,6 +661,14 @@ def _load_binary_stream(
with file_ctx as f:
version_numdocs_lendoc0 = f.read(9)
# 1 byte (uint8)
version_num = int.from_bytes(
version_numdocs_lendoc0[0:1], 'big', signed=False
)
if version_num != 2:
raise ValueError(
f'Unsupported version number {version_num} in binary format, expected 2'
)

# 8 bytes (uint64)
num_docs = int.from_bytes(version_numdocs_lendoc0[1:9], 'big', signed=False)

Expand Down