-
Notifications
You must be signed in to change notification settings - Fork 134
Add new golden files with VARIANT values #6140
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,6 +2,7 @@ | |
| //! | ||
| //! This crate defines deterministic rows for two tuple shapes: | ||
| //! - `GoldenRow` (Tup65) for the large, optimized format. | ||
| //! - `GoldenRowVariant` (Tup25) for a row containing only variant values. | ||
| //! - `GoldenRowSmall` (Tup8) for the legacy/rkyv default layout. | ||
| //! | ||
| //! The `golden-files/` directory contains serialized batches generated by the | ||
|
|
@@ -18,8 +19,8 @@ use dbsp::utils::Tup8; | |
| use uuid::Uuid as RawUuid; | ||
|
|
||
| use feldera_sqllib::{ | ||
| to_array, to_map, Array, ByteArray, Date, GeoPoint, LongInterval, Map, ShortInterval, | ||
| SqlDecimal, SqlString, Time, Timestamp, Uuid, Variant, | ||
| to_array, to_map, Array, ByteArray, Date, DynamicDecimal, GeoPoint, LongInterval, Map, | ||
| ShortInterval, SqlDecimal, SqlString, Time, Timestamp, Uuid, Variant, | ||
| }; | ||
|
|
||
| type Opt<T> = Option<T>; | ||
|
|
@@ -36,6 +37,14 @@ feldera_macros::declare_tuple! { | |
| T40, T41, T42, T43, T44, T45, T46, T47, T48, T49, | ||
| T50, T51, T52, T53, T54, T55, T56, T57, T58, T59, | ||
| T60, T61, T62, T63, T64 | ||
| > | ||
| } | ||
|
|
||
| feldera_macros::declare_tuple! { | ||
| Tup25< | ||
| T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, | ||
| T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, | ||
| T20, T21, T22, T23, T24 | ||
| > | ||
| } | ||
|
|
||
|
|
@@ -111,6 +120,35 @@ pub type GoldenRow = Tup65< | |
| // None optimization added to larger tups with the v4 storage format. | ||
| pub type GoldenRowSmall = Tup8<u64, bool, i32, i64, F32, SqlString, Opt<i32>, Opt<ByteArray>>; | ||
|
|
||
| // There are 24 kinds of variant | ||
| pub type GoldenRowVariant = Tup25< | ||
| Variant, | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we make some of them Option to get more coverage (maybe every 2nd one?), it changes the layout
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There are no option Variant values. A NULL is stored as a NULL value at runtime. |
||
| Variant, | ||
| Variant, | ||
| Variant, | ||
| Variant, | ||
| Variant, | ||
| Variant, | ||
| Variant, | ||
| Variant, | ||
| Variant, | ||
| Variant, | ||
| Variant, | ||
| Variant, | ||
| Variant, | ||
| Variant, | ||
| Variant, | ||
| Variant, | ||
| Variant, | ||
| Variant, | ||
| Variant, | ||
| Variant, | ||
| Variant, | ||
| Variant, | ||
| Variant, | ||
| Variant, | ||
| >; | ||
|
|
||
| pub const DEFAULT_ROWS: usize = 256; | ||
|
|
||
| pub fn golden_file_directory() -> PathBuf { | ||
|
|
@@ -369,6 +407,96 @@ pub fn golden_row(row: usize) -> GoldenRow { | |
| ) | ||
| } | ||
|
|
||
| pub fn golden_row_variant(row: usize) -> GoldenRowVariant { | ||
| let row_u64 = 0x0101_0000_0000_0000u64.wrapping_add(row as u64); | ||
|
|
||
| // This has the me monotone | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we're not building any nested variants here? should we?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There are no nested variants - we are building one of each of the possible types.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There should be full coverage of the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Typo: |
||
| let i8_v = Variant::TinyInt(0x11i8.wrapping_add(row as i8)); | ||
| let i16_v = Variant::SmallInt(0x2222i16.wrapping_add(row as i16)); | ||
| let i32_v = Variant::Int(0x3333_3333i32.wrapping_add(row as i32)); | ||
| let i64_v = Variant::BigInt(0x4444_4444_4444_4444i64.wrapping_add(row as i64)); | ||
| let u8_v = Variant::UTinyInt(0x66u8.wrapping_add(row as u8)); | ||
| let u16_v = Variant::USmallInt(0x7777u16.wrapping_add(row as u16)); | ||
| let u32_v = Variant::UInt(0x8888_8888u32.wrapping_add(row as u32)); | ||
| let u64_v = Variant::UBigInt(row_u64); | ||
| let f32_v = Variant::Real(F32::from(1234.25 + row as f32)); | ||
| let f64_v = Variant::Double(F64::from(5678.75 + row as f64)); | ||
| let string_v = Variant::String(SqlString::from(format!("string-{row:04x}"))); | ||
|
|
||
| let bytes = [0xBA, 0x5E, (row & 0xFF) as u8, ((row >> 8) & 0xFF) as u8]; | ||
| let byte_array_v = Variant::Binary(ByteArray::new(&bytes)); | ||
| let geo_point_v = Variant::Geometry(GeoPoint::new(1000.25 + row as f64, -1000.75 - row as f64)); | ||
| let short_interval_v = Variant::ShortInterval(ShortInterval::from_microseconds( | ||
| 0x1111_0000i64.wrapping_add(row as i64), | ||
| )); | ||
| let long_interval_v = Variant::LongInterval(LongInterval::from_months( | ||
| 0x2222i32.wrapping_add(row as i32), | ||
| )); | ||
| let timestamp_v = Variant::Timestamp(Timestamp::from_microseconds( | ||
| 1_600_000_000_000i64.wrapping_add(row as i64), | ||
| )); | ||
| let date_v = Variant::Date(Date::from_days(18_000i32.wrapping_add(row as i32))); | ||
| let time_v = Variant::Time(Time::from_nanoseconds( | ||
| 43_200_000_000_000u64.wrapping_add((row as u64) % 1_000_000), | ||
| )); | ||
| let uuid_v = Variant::Uuid(Uuid::from(RawUuid::from_u128( | ||
| 0x1111_2222_3333_4444_5555_6666_0000_0000u128 | row as u128, | ||
| ))); | ||
| let dec = Dec12_2::for_i32(12_345i32.wrapping_add(row as i32)); | ||
| let dd: DynamicDecimal = dec.into(); | ||
| let dec12_2_v = Variant::SqlDecimal((dd.significand(), dd.exponent())); | ||
|
|
||
| let array_v = Variant::Array(to_array(vec![ | ||
| Variant::String(SqlString::from("array-base")), | ||
| Variant::String(SqlString::from(format!("array-{row:04x}"))), | ||
| ])); | ||
|
|
||
| let map_v = { | ||
| let mut map = BTreeMap::new(); | ||
| map.insert( | ||
| Variant::String(SqlString::from("k1")), | ||
| Variant::String(SqlString::from(format!("v1-{row:04x}"))), | ||
| ); | ||
| map.insert( | ||
| Variant::String(SqlString::from("k2")), | ||
| Variant::String(SqlString::from(format!("v2-{row:04x}"))), | ||
| ); | ||
| Variant::Map(to_map(map)) | ||
| }; | ||
| let null_v = Variant::SqlNull; | ||
| let vnull_v = Variant::VariantNull; | ||
| let bool_v = Variant::Boolean(row.is_multiple_of(2)); | ||
|
|
||
| // Values have to be monotone, so we start with u32 | ||
| Tup25( | ||
| u32_v, | ||
| i8_v, | ||
| i16_v, | ||
| i32_v, | ||
| i64_v, | ||
| u8_v, | ||
| u16_v, | ||
| u64_v, | ||
| f32_v, | ||
| f64_v, | ||
| dec12_2_v, | ||
| string_v, | ||
| byte_array_v, | ||
| geo_point_v, | ||
| short_interval_v, | ||
| long_interval_v, | ||
| timestamp_v, | ||
| date_v, | ||
| time_v, | ||
| uuid_v, | ||
| array_v, | ||
| map_v, | ||
| null_v, | ||
| vnull_v, | ||
| bool_v, | ||
| ) | ||
| } | ||
|
|
||
| pub fn golden_row_small(row: usize) -> GoldenRowSmall { | ||
| let row_u64 = 0x0202_0000_0000_0000u64.wrapping_add(row as u64); | ||
|
|
||
|
|
@@ -399,8 +527,8 @@ mod tests { | |
| use std::io; | ||
|
|
||
| use super::{ | ||
| buffer_cache, golden_aux, golden_file_directory, golden_row, golden_row_small, GoldenRow, | ||
| GoldenRowSmall, | ||
| buffer_cache, golden_aux, golden_file_directory, golden_row, golden_row_small, | ||
| golden_row_variant, GoldenRow, GoldenRowSmall, GoldenRowVariant, | ||
| }; | ||
| use dbsp::dynamic::{DowncastTrait, DynData, Erase}; | ||
| use dbsp::storage::backend::{StorageBackend, StoragePath}; | ||
|
|
@@ -500,17 +628,25 @@ mod tests { | |
| } | ||
| println!("processing {}", file_name); | ||
|
|
||
| let is_small = file_name.contains("-small"); | ||
| let storage_path = StoragePath::from(file_name); | ||
| if is_small { | ||
| let storage_path = StoragePath::from(file_name.clone()); | ||
| if file_name.contains("-small") { | ||
| validate_rows::<GoldenRowSmall>( | ||
| &*storage_backend, | ||
| storage_path, | ||
| golden_row_small, | ||
| golden_aux, | ||
| ); | ||
| } else { | ||
| } else if file_name.contains("-large") { | ||
| validate_rows::<GoldenRow>(&*storage_backend, storage_path, golden_row, golden_aux); | ||
| } else if file_name.contains("-variant") { | ||
| validate_rows::<GoldenRowVariant>( | ||
| &*storage_backend, | ||
| storage_path, | ||
| golden_row_variant, | ||
| golden_aux, | ||
| ); | ||
| } else { | ||
| panic!("Unexpected file name {}", file_name); | ||
| } | ||
| } | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: there are actually 25
Variantvariants (SqlNull,VariantNull,Boolean,TinyInt,SmallInt,Int,BigInt,UTinyInt,USmallInt,UInt,UBigInt,Real,Double,SqlDecimal,String,Date,Time,Timestamp,ShortInterval,LongInterval,Binary,Geometry,Uuid,Array,Map) — which matchesTup25and the 25 fields you build ingolden_row_variant. The comment understates by one.