Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion crates/dbsp/src/storage/file/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1409,7 +1409,7 @@ where
if let Some(prev0) = &self.prev0 {
debug_assert!(
&**prev0 < key0,
"can't write {prev0:?} then {key0:?} to column 0",
"can't write {prev0:?} >= {key0:?} to column 0",
);
}
self.prev0 = Some(clone_box(key0));
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
14 changes: 11 additions & 3 deletions crates/storage-test-compat/src/bin/golden-writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,21 +17,23 @@ use dbsp::storage::file::Factories;
use feldera_types::config::{StorageConfig, StorageOptions};

use storage_test_compat::{
buffer_cache, golden_aux, golden_row, golden_row_small, storage_base_and_path, GoldenRow,
GoldenRowSmall, DEFAULT_ROWS,
buffer_cache, golden_aux, golden_row, golden_row_small, golden_row_variant,
storage_base_and_path, GoldenRow, GoldenRowSmall, GoldenRowVariant, DEFAULT_ROWS,
};

#[derive(Copy, Clone)]
enum GoldenSize {
Large,
Small,
Variant,
}

impl GoldenSize {
fn suffix(self) -> &'static str {
match self {
GoldenSize::Large => "large",
GoldenSize::Small => "small",
GoldenSize::Variant => "variant",
}
}
}
Expand Down Expand Up @@ -123,7 +125,7 @@ where

fn main() -> Result<(), Box<dyn std::error::Error>> {
let mut config = Config::default();
for size in [GoldenSize::Large, GoldenSize::Small] {
for size in [GoldenSize::Large, GoldenSize::Small, GoldenSize::Variant] {
config.size = size;
for compression in [None, Some(Compression::Snappy)] {
config.compression = compression;
Expand All @@ -144,6 +146,12 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
config.compression,
golden_row_small,
)?,
GoldenSize::Variant => write_golden::<GoldenRowVariant>(
&output,
config.rows,
config.compression,
golden_row_variant,
)?,
}
}
}
Expand Down
152 changes: 144 additions & 8 deletions crates/storage-test-compat/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
//!
//! This crate defines deterministic rows for two tuple shapes:
//! - `GoldenRow` (Tup65) for the large, optimized format.
//! - `GoldenRowVariant` (Tup25) for a row containing only variant values.
//! - `GoldenRowSmall` (Tup8) for the legacy/rkyv default layout.
//!
//! The `golden-files/` directory contains serialized batches generated by the
Expand All @@ -18,8 +19,8 @@ use dbsp::utils::Tup8;
use uuid::Uuid as RawUuid;

use feldera_sqllib::{
to_array, to_map, Array, ByteArray, Date, GeoPoint, LongInterval, Map, ShortInterval,
SqlDecimal, SqlString, Time, Timestamp, Uuid, Variant,
to_array, to_map, Array, ByteArray, Date, DynamicDecimal, GeoPoint, LongInterval, Map,
ShortInterval, SqlDecimal, SqlString, Time, Timestamp, Uuid, Variant,
};

type Opt<T> = Option<T>;
Expand All @@ -36,6 +37,14 @@ feldera_macros::declare_tuple! {
T40, T41, T42, T43, T44, T45, T46, T47, T48, T49,
T50, T51, T52, T53, T54, T55, T56, T57, T58, T59,
T60, T61, T62, T63, T64
>
}

feldera_macros::declare_tuple! {
Tup25<
T0, T1, T2, T3, T4, T5, T6, T7, T8, T9,
T10, T11, T12, T13, T14, T15, T16, T17, T18, T19,
T20, T21, T22, T23, T24
>
}

Expand Down Expand Up @@ -111,6 +120,35 @@ pub type GoldenRow = Tup65<
// None optimization added to larger tups with the v4 storage format.
pub type GoldenRowSmall = Tup8<u64, bool, i32, i64, F32, SqlString, Opt<i32>, Opt<ByteArray>>;

// There are 24 kinds of variant

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: there are actually 25 Variant variants (SqlNull, VariantNull, Boolean, TinyInt, SmallInt, Int, BigInt, UTinyInt, USmallInt, UInt, UBigInt, Real, Double, SqlDecimal, String, Date, Time, Timestamp, ShortInterval, LongInterval, Binary, Geometry, Uuid, Array, Map) — which matches Tup25 and the 25 fields you build in golden_row_variant. The comment understates by one.

pub type GoldenRowVariant = Tup25<
Variant,

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we make some of them Option to get more coverage (maybe every 2nd one?), it changes the layout

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are no option Variant values. A NULL is stored as a NULL value at runtime.

Variant,
Variant,
Variant,
Variant,
Variant,
Variant,
Variant,
Variant,
Variant,
Variant,
Variant,
Variant,
Variant,
Variant,
Variant,
Variant,
Variant,
Variant,
Variant,
Variant,
Variant,
Variant,
Variant,
Variant,
>;

pub const DEFAULT_ROWS: usize = 256;

pub fn golden_file_directory() -> PathBuf {
Expand Down Expand Up @@ -369,6 +407,96 @@ pub fn golden_row(row: usize) -> GoldenRow {
)
}

pub fn golden_row_variant(row: usize) -> GoldenRowVariant {
let row_u64 = 0x0101_0000_0000_0000u64.wrapping_add(row as u64);

// This has the me monotone

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we're not building any nested variants here? should we?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are no nested variants - we are building one of each of the possible types.
The only array supported is VARIANT ARRAY.
Similar for MAP.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There should be full coverage of the Variant enum type in this test.

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Typo: // This has the me monotone — should be // This has to be monotone (or just merge with the comment a few lines below).

let i8_v = Variant::TinyInt(0x11i8.wrapping_add(row as i8));
let i16_v = Variant::SmallInt(0x2222i16.wrapping_add(row as i16));
let i32_v = Variant::Int(0x3333_3333i32.wrapping_add(row as i32));
let i64_v = Variant::BigInt(0x4444_4444_4444_4444i64.wrapping_add(row as i64));
let u8_v = Variant::UTinyInt(0x66u8.wrapping_add(row as u8));
let u16_v = Variant::USmallInt(0x7777u16.wrapping_add(row as u16));
let u32_v = Variant::UInt(0x8888_8888u32.wrapping_add(row as u32));
let u64_v = Variant::UBigInt(row_u64);
let f32_v = Variant::Real(F32::from(1234.25 + row as f32));
let f64_v = Variant::Double(F64::from(5678.75 + row as f64));
let string_v = Variant::String(SqlString::from(format!("string-{row:04x}")));

let bytes = [0xBA, 0x5E, (row & 0xFF) as u8, ((row >> 8) & 0xFF) as u8];
let byte_array_v = Variant::Binary(ByteArray::new(&bytes));
let geo_point_v = Variant::Geometry(GeoPoint::new(1000.25 + row as f64, -1000.75 - row as f64));
let short_interval_v = Variant::ShortInterval(ShortInterval::from_microseconds(
0x1111_0000i64.wrapping_add(row as i64),
));
let long_interval_v = Variant::LongInterval(LongInterval::from_months(
0x2222i32.wrapping_add(row as i32),
));
let timestamp_v = Variant::Timestamp(Timestamp::from_microseconds(
1_600_000_000_000i64.wrapping_add(row as i64),
));
let date_v = Variant::Date(Date::from_days(18_000i32.wrapping_add(row as i32)));
let time_v = Variant::Time(Time::from_nanoseconds(
43_200_000_000_000u64.wrapping_add((row as u64) % 1_000_000),
));
let uuid_v = Variant::Uuid(Uuid::from(RawUuid::from_u128(
0x1111_2222_3333_4444_5555_6666_0000_0000u128 | row as u128,
)));
let dec = Dec12_2::for_i32(12_345i32.wrapping_add(row as i32));
let dd: DynamicDecimal = dec.into();
let dec12_2_v = Variant::SqlDecimal((dd.significand(), dd.exponent()));

let array_v = Variant::Array(to_array(vec![
Variant::String(SqlString::from("array-base")),
Variant::String(SqlString::from(format!("array-{row:04x}"))),
]));

let map_v = {
let mut map = BTreeMap::new();
map.insert(
Variant::String(SqlString::from("k1")),
Variant::String(SqlString::from(format!("v1-{row:04x}"))),
);
map.insert(
Variant::String(SqlString::from("k2")),
Variant::String(SqlString::from(format!("v2-{row:04x}"))),
);
Variant::Map(to_map(map))
};
let null_v = Variant::SqlNull;
let vnull_v = Variant::VariantNull;
let bool_v = Variant::Boolean(row.is_multiple_of(2));

// Values have to be monotone, so we start with u32
Tup25(
u32_v,
i8_v,
i16_v,
i32_v,
i64_v,
u8_v,
u16_v,
u64_v,
f32_v,
f64_v,
dec12_2_v,
string_v,
byte_array_v,
geo_point_v,
short_interval_v,
long_interval_v,
timestamp_v,
date_v,
time_v,
uuid_v,
array_v,
map_v,
null_v,
vnull_v,
bool_v,
)
}

pub fn golden_row_small(row: usize) -> GoldenRowSmall {
let row_u64 = 0x0202_0000_0000_0000u64.wrapping_add(row as u64);

Expand Down Expand Up @@ -399,8 +527,8 @@ mod tests {
use std::io;

use super::{
buffer_cache, golden_aux, golden_file_directory, golden_row, golden_row_small, GoldenRow,
GoldenRowSmall,
buffer_cache, golden_aux, golden_file_directory, golden_row, golden_row_small,
golden_row_variant, GoldenRow, GoldenRowSmall, GoldenRowVariant,
};
use dbsp::dynamic::{DowncastTrait, DynData, Erase};
use dbsp::storage::backend::{StorageBackend, StoragePath};
Expand Down Expand Up @@ -500,17 +628,25 @@ mod tests {
}
println!("processing {}", file_name);

let is_small = file_name.contains("-small");
let storage_path = StoragePath::from(file_name);
if is_small {
let storage_path = StoragePath::from(file_name.clone());
if file_name.contains("-small") {
validate_rows::<GoldenRowSmall>(
&*storage_backend,
storage_path,
golden_row_small,
golden_aux,
);
} else {
} else if file_name.contains("-large") {
validate_rows::<GoldenRow>(&*storage_backend, storage_path, golden_row, golden_aux);
} else if file_name.contains("-variant") {
validate_rows::<GoldenRowVariant>(
&*storage_backend,
storage_path,
golden_row_variant,
golden_aux,
);
} else {
panic!("Unexpected file name {}", file_name);
}
}

Expand Down
4 changes: 2 additions & 2 deletions docs.feldera.com/docs/sql/aggregates.md
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ The following window aggregate functions are supported:
<th>Description</th>
</tr>
<tr>
<td><a id="window-avg"></a>AVG(numeric)</td>
<td><a id="window-avg"></a><code>AVG(</code>numeric<code>)</code></td>
<td>Returns the average (arithmetic mean) of numeric across all values in window</td>
</tr>
<tr>
Expand Down Expand Up @@ -217,7 +217,7 @@ The following window aggregate functions are supported:
`ROW_NUMBER` is currently only supported if the window is used to compute a TopK aggregate.</td>
</tr>
<tr>
<td><a id="window-sum"></a><code>SUM</code>(<em>numeric</em>)</td>
<td><a id="window-sum"></a><code>SUM(</code><em>numeric</em><code>)</code></td>
<td>Returns the sum of <em>numeric</em> across all values in window</td>
</tr>
</table>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
import org.dbsp.sqlCompiler.ir.aggregate.DBSPFold;
import org.dbsp.sqlCompiler.ir.aggregate.DBSPMinMax;
import org.dbsp.sqlCompiler.ir.expression.DBSPApplyExpression;
import org.dbsp.sqlCompiler.ir.expression.DBSPConstructorExpression;
import org.dbsp.sqlCompiler.ir.statement.DBSPStaticItem;
import org.dbsp.util.HashString;
import org.dbsp.util.NullPrintStream;
Expand Down