Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 9 additions & 5 deletions docarray/array/storage/qdrant/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
PointsList,
PointStruct,
HnswConfigDiff,
VectorParams,
)

from docarray import Document
Expand Down Expand Up @@ -133,11 +134,11 @@ def _initialize_qdrant_schema(self):
full_scan_threshold=self._config.full_scan_threshold,
m=self._config.m,
)
vectors_config = VectorParams(size=self._n_dim, distance=Distance.COSINE)
self.client.http.collections_api.create_collection(
self.collection_name,
CreateCollection(
vector_size=self.n_dim,
distance=self.distance,
collection_name=self.collection_name,
create_collection=CreateCollection(
vectors=vectors_config,
hnsw_config=hnsw_config,
),
)
Expand Down Expand Up @@ -175,10 +176,13 @@ def _get_offset2ids_meta(self) -> List[str]:
).result.payload['offset2id']

def _update_offset2ids_meta(self):
from qdrant_client.http.models import Distance, VectorParams

vectors_config = VectorParams(size=100, distance=Distance.COSINE)
if not self._collection_exists(self.collection_name_meta):
self.client.http.collections_api.create_collection(
self.collection_name_meta,
CreateCollection(vector_size=1, distance=Distance.COSINE),
CreateCollection(vectors=vectors_config),
)

self.client.http.points_api.upsert_points(
Expand Down
46 changes: 26 additions & 20 deletions docarray/document/mixins/audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,28 +43,34 @@ def load_uri_to_audio_tensor(self: 'T') -> 'T':

:return: Document itself after processed
"""
ifile = wave.open(
with wave.open(
self.uri
) #: note wave is Python built-in module https://docs.python.org/3/library/wave.html
samples = ifile.getnframes()
audio = ifile.readframes(samples)
) as ifile: #: note wave is Python built-in module https://docs.python.org/3/library/wave.html
samples = ifile.getnframes()
audio = ifile.readframes(samples)

# Convert buffer to float32 using NumPy
audio_as_np_int16 = np.frombuffer(audio, dtype=np.int16)
audio_as_np_float32 = audio_as_np_int16.astype(np.float32)
# Convert buffer to float32 using NumPy
audio_as_np_int16 = np.frombuffer(audio, dtype=np.int16)
audio_as_np_float32 = audio_as_np_int16.astype(np.float32)

# Normalise float32 array so that values are between -1.0 and +1.0
max_int16 = 2**15
audio_normalised = audio_as_np_float32 / max_int16
# Normalise float32 array so that values are between -1.0 and +1.0
max_int16 = 2**15
audio_normalised = audio_as_np_float32 / max_int16

channels = ifile.getnchannels()
if channels == 2:
# 1 for mono, 2 for stereo
audio_stereo = np.empty((int(len(audio_normalised) / channels), channels))
audio_stereo[:, 0] = audio_normalised[range(0, len(audio_normalised), 2)]
audio_stereo[:, 1] = audio_normalised[range(1, len(audio_normalised), 2)]
channels = ifile.getnchannels()
if channels == 2:
# 1 for mono, 2 for stereo
audio_stereo = np.empty(
(int(len(audio_normalised) / channels), channels)
)
audio_stereo[:, 0] = audio_normalised[
range(0, len(audio_normalised), 2)
]
audio_stereo[:, 1] = audio_normalised[
range(1, len(audio_normalised), 2)
]

self.tensor = audio_stereo
else:
self.tensor = audio_normalised
return self
self.tensor = audio_stereo
else:
self.tensor = audio_normalised
return self
4 changes: 2 additions & 2 deletions tests/unit/array/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ services:
AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED: 'true'
PERSISTENCE_DATA_PATH: '/var/lib/weaviate'
qdrant:
image: qdrant/qdrant:v0.7.0
image: qdrant/qdrant:v0.10.0
ports:
- "6333:6333"
ulimits: # Only required for tests, as there are a lot of collections created
Expand All @@ -33,4 +33,4 @@ services:

networks:
elastic:
name: elastic
name: elastic