From 344bfd5192a8563fc22dc6416e02a78060428a97 Mon Sep 17 00:00:00 2001 From: JAYICE <49588871+JayiceZ@users.noreply.github.com> Date: Wed, 13 Apr 2022 21:56:33 +0800 Subject: [PATCH 1/5] executor: use DataChunkbuilder to refactor ValuesExecutor (#623) * use DataChunkbuilder to refactor ValuesExecutor Signed-off-by: Jayice <1185430411@qq.com> * code format Signed-off-by: Jayice <1185430411@qq.com> * code style Signed-off-by: Jayice <1185430411@qq.com> Signed-off-by: Shmiwy --- src/executor/values.rs | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/src/executor/values.rs b/src/executor/values.rs index 8831c27ce..469a0063b 100644 --- a/src/executor/values.rs +++ b/src/executor/values.rs @@ -3,7 +3,7 @@ use itertools::izip; use super::*; -use crate::array::{ArrayBuilderImpl, DataChunk}; +use crate::array::{DataChunk, DataChunkBuilder}; use crate::binder::BoundExpr; use crate::types::{DataType, DataTypeKind}; @@ -18,18 +18,12 @@ pub struct ValuesExecutor { impl ValuesExecutor { #[try_stream(boxed, ok = DataChunk, error = ExecutorError)] pub async fn execute(self) { - for chunk in self.values.chunks(PROCESSING_WINDOW_SIZE) { - type Type = DataTypeKind; - // Create array builders. - let column_types = &self.column_types; - let mut builders = column_types - .iter() - .map(|ty| ArrayBuilderImpl::with_capacity(chunk.len(), ty)) - .collect_vec(); - // Push value into the builder. - let dummy = DataChunk::single(0); - for row in chunk { - for (expr, column_type, builder) in izip!(row, column_types, &mut builders) { + type Type = DataTypeKind; + let mut builder = DataChunkBuilder::new(self.column_types.iter(), PROCESSING_WINDOW_SIZE); + let dummy = DataChunk::single(0); + for row in self.values { + let row_data: Result, _> = izip!(row, &self.column_types) + .map(|(expr, column_type)| { let value = expr.eval(&dummy)?; let size = match column_type.kind { Type::Varchar(size) => size, @@ -51,11 +45,15 @@ impl ValuesExecutor { }); } } - builder.push(&value.get(0)); - } + Ok(value.get(0)) + }) + .collect(); + if let Some(chunk) = builder.push_row(row_data?) { + yield chunk; } - // Finish build and yield chunk. - yield builders.into_iter().collect(); + } + if let Some(chunk) = builder.take() { + yield chunk; } } } From 55a36b43390b0d94f4b3da0b3699ba1e8457ff3f Mon Sep 17 00:00:00 2001 From: Shmiwy Date: Wed, 13 Apr 2022 19:24:16 +0800 Subject: [PATCH 2/5] fix: make header correct when using group by(#538) Signed-off-by: Shmiwy --- src/binder/expression/mod.rs | 20 +++++++++++++++++++ src/optimizer/plan_nodes/logical_aggregate.rs | 20 +++++++++++++++---- 2 files changed, 36 insertions(+), 4 deletions(-) diff --git a/src/binder/expression/mod.rs b/src/binder/expression/mod.rs index 205535b24..83108dee9 100644 --- a/src/binder/expression/mod.rs +++ b/src/binder/expression/mod.rs @@ -133,6 +133,26 @@ impl BoundExpr { visitor.visit_expr(self); visitor.0 } + + pub fn format_name(&self, child_schema: &Vec) -> String { + println!("{:?}", self); + match self { + Self::Constant(DataValue::Int64(num)) => format!("{}", num), + Self::Constant(DataValue::Int32(num)) => format!("{}", num), + Self::Constant(DataValue::Float64(num)) => format!("{}", num), + Self::BinaryOp(expr) => { + let left_expr_name = expr.left_expr.format_name(child_schema); + let right_expr_name = expr.right_expr.format_name(child_schema); + format!("{}{}{}", left_expr_name, expr.op, right_expr_name) + } + Self::UnaryOp(expr) => { + let expr_name = expr.expr.format_name(child_schema); + format!("{}{}", expr.op, expr_name) + } + Self::InputRef(expr) => child_schema[expr.index].name().to_string(), + _ => "".to_string(), + } + } } impl std::fmt::Debug for BoundExpr { diff --git a/src/optimizer/plan_nodes/logical_aggregate.rs b/src/optimizer/plan_nodes/logical_aggregate.rs index fe277e5c0..066130611 100644 --- a/src/optimizer/plan_nodes/logical_aggregate.rs +++ b/src/optimizer/plan_nodes/logical_aggregate.rs @@ -69,13 +69,25 @@ impl_plan_tree_node_for_unary!(LogicalAggregate); impl PlanNode for LogicalAggregate { fn schema(&self) -> Vec { let child_schema = self.child.schema(); + let mut out_names = vec![]; let mut input_refs = vec![]; - self.group_keys - .iter() - .for_each(|expr| expr.resolve_input_ref(&mut input_refs)); + self.group_keys.iter().for_each(|expr| { + out_names.push(expr.format_name(&child_schema)); + expr.resolve_input_ref(&mut input_refs) + }); input_refs .iter() - .map(|expr| child_schema[expr.index].clone()) + .map(|expr| { + if expr.index < out_names.len() { + ColumnDesc::new( + expr.return_type.clone(), + out_names[expr.index].clone(), + false, + ) + } else { + child_schema[expr.index].clone() + } + }) .chain(self.agg_calls.iter().map(|agg_call| { agg_call .return_type From 7399c8826edca68a0b6962d64518acdc55127b62 Mon Sep 17 00:00:00 2001 From: Shmiwy Date: Wed, 13 Apr 2022 20:32:26 +0800 Subject: [PATCH 3/5] fix: map group_keys to ColumnDesc Signed-off-by: Shmiwy wyf000219@gamil.com Signed-off-by: Shmiwy --- src/binder/expression/mod.rs | 1 - src/optimizer/plan_nodes/logical_aggregate.rs | 25 ++++++------------- 2 files changed, 8 insertions(+), 18 deletions(-) diff --git a/src/binder/expression/mod.rs b/src/binder/expression/mod.rs index 83108dee9..8e3fbe206 100644 --- a/src/binder/expression/mod.rs +++ b/src/binder/expression/mod.rs @@ -135,7 +135,6 @@ impl BoundExpr { } pub fn format_name(&self, child_schema: &Vec) -> String { - println!("{:?}", self); match self { Self::Constant(DataValue::Int64(num)) => format!("{}", num), Self::Constant(DataValue::Int32(num)) => format!("{}", num), diff --git a/src/optimizer/plan_nodes/logical_aggregate.rs b/src/optimizer/plan_nodes/logical_aggregate.rs index 066130611..0303196f2 100644 --- a/src/optimizer/plan_nodes/logical_aggregate.rs +++ b/src/optimizer/plan_nodes/logical_aggregate.rs @@ -69,24 +69,15 @@ impl_plan_tree_node_for_unary!(LogicalAggregate); impl PlanNode for LogicalAggregate { fn schema(&self) -> Vec { let child_schema = self.child.schema(); - let mut out_names = vec![]; - let mut input_refs = vec![]; - self.group_keys.iter().for_each(|expr| { - out_names.push(expr.format_name(&child_schema)); - expr.resolve_input_ref(&mut input_refs) - }); - input_refs + self.group_keys .iter() - .map(|expr| { - if expr.index < out_names.len() { - ColumnDesc::new( - expr.return_type.clone(), - out_names[expr.index].clone(), - false, - ) - } else { - child_schema[expr.index].clone() - } + .enumerate() + .map(|(index, expr)| { + ColumnDesc::new( + child_schema[index].datatype().clone(), + expr.format_name(&child_schema), + false, + ) }) .chain(self.agg_calls.iter().map(|agg_call| { agg_call From cf9480d052605a9eb59d084c1d3b291a39cddca2 Mon Sep 17 00:00:00 2001 From: Shmiwy Date: Sun, 17 Apr 2022 16:06:55 +0800 Subject: [PATCH 4/5] fix: use expr's return type instead of child schema's Signed-off-by: Shmiwy wyf000219@gamil.com Signed-off-by: Shmiwy --- src/binder/expression/mod.rs | 85 ++++++++++++++++++- src/optimizer/plan_nodes/logical_aggregate.rs | 5 +- 2 files changed, 86 insertions(+), 4 deletions(-) diff --git a/src/binder/expression/mod.rs b/src/binder/expression/mod.rs index 8e3fbe206..df2ed18fd 100644 --- a/src/binder/expression/mod.rs +++ b/src/binder/expression/mod.rs @@ -138,7 +138,7 @@ impl BoundExpr { match self { Self::Constant(DataValue::Int64(num)) => format!("{}", num), Self::Constant(DataValue::Int32(num)) => format!("{}", num), - Self::Constant(DataValue::Float64(num)) => format!("{}", num), + Self::Constant(DataValue::Float64(num)) => format!("{:.3}", num), Self::BinaryOp(expr) => { let left_expr_name = expr.left_expr.format_name(child_schema); let right_expr_name = expr.right_expr.format_name(child_schema); @@ -301,3 +301,86 @@ impl From<&Value> for DataValue { } } } + +#[cfg(test)] +mod tests { + use sqlparser::ast::{BinaryOperator, UnaryOperator}; + + use crate::binder::{BoundBinaryOp, BoundExpr, BoundInputRef, BoundUnaryOp}; + use crate::catalog::ColumnDesc; + use crate::types::{DataType, DataTypeKind, DataValue}; + + // test when BoundExpr is Constant + #[test] + fn test_format_name_constant() { + let expr = BoundExpr::Constant(DataValue::Int32(1_i32)); + assert_eq!("1", expr.format_name(&vec![])); + let expr = BoundExpr::Constant(DataValue::Int64(1_i64)); + assert_eq!("1", expr.format_name(&vec![])); + let expr = BoundExpr::Constant(DataValue::Float64(32.0_f64)); + assert_eq!("32.000", expr.format_name(&vec![])); + } + + // test when BoundExpr is UnaryOp(form like -a) + #[test] + fn test_format_name_unary_op() { + let data_type = DataType::new(DataTypeKind::Int(None), true); + let expr = BoundExpr::InputRef(BoundInputRef { + index: 0, + return_type: data_type.clone(), + }); + let child_schema = vec![ColumnDesc::new(data_type.clone(), "a".to_string(), false)]; + let expr = BoundExpr::UnaryOp(BoundUnaryOp { + op: UnaryOperator::Minus, + expr: Box::new(expr), + return_type: Some(data_type), + }); + assert_eq!("-a", expr.format_name(&child_schema)); + } + + // test when BoundExpr is BinaryOp + #[test] + fn test_format_name_binary_op() { + // forms like a + 1 + { + let left_data_type = DataType::new(DataTypeKind::Int(None), true); + let left_expr = BoundExpr::InputRef(BoundInputRef { + index: 0, + return_type: left_data_type.clone(), + }); + let right_expr = BoundExpr::Constant(DataValue::Int64(1_i64)); + let child_schema = vec![ColumnDesc::new(left_data_type, "a".to_string(), false)]; + let expr = BoundExpr::BinaryOp(BoundBinaryOp { + op: BinaryOperator::Plus, + left_expr: Box::new(left_expr), + right_expr: Box::new(right_expr), + return_type: Some(DataType::new(DataTypeKind::Int(None), true)), + }); + assert_eq!("a+1", expr.format_name(&child_schema)); + } + // forms like a + b + { + let data_type = DataType::new(DataTypeKind::Int(None), true); + let left_expr = BoundExpr::InputRef(BoundInputRef { + index: 0, + return_type: data_type.clone(), + }); + let right_expr = BoundExpr::InputRef(BoundInputRef { + index: 1, + return_type: data_type.clone(), + }); + let child_schema = vec![ + ColumnDesc::new(data_type.clone(), "a".to_string(), false), + ColumnDesc::new(data_type.clone(), "b".to_string(), false), + ]; + + let expr = BoundExpr::BinaryOp(BoundBinaryOp { + op: BinaryOperator::Plus, + left_expr: Box::new(left_expr), + right_expr: Box::new(right_expr), + return_type: Some(data_type), + }); + assert_eq!("a+b", expr.format_name(&child_schema)); + } + } +} diff --git a/src/optimizer/plan_nodes/logical_aggregate.rs b/src/optimizer/plan_nodes/logical_aggregate.rs index 0303196f2..06c9e4db6 100644 --- a/src/optimizer/plan_nodes/logical_aggregate.rs +++ b/src/optimizer/plan_nodes/logical_aggregate.rs @@ -71,10 +71,9 @@ impl PlanNode for LogicalAggregate { let child_schema = self.child.schema(); self.group_keys .iter() - .enumerate() - .map(|(index, expr)| { + .map(|expr| { ColumnDesc::new( - child_schema[index].datatype().clone(), + expr.return_type().unwrap(), expr.format_name(&child_schema), false, ) From 8ad9389409affb4154ede2386ff3f2e54a59ce4e Mon Sep 17 00:00:00 2001 From: TennyZhuang Date: Sat, 16 Apr 2022 17:45:52 +0800 Subject: [PATCH 5/5] build: bump toolchain and deps, remove unused dep (#629) Signed-off-by: TennyZhuang Signed-off-by: Shmiwy --- Cargo.lock | 81 +++++++++++++++++++++++--------------------------- Cargo.toml | 2 -- rust-toolchain | 2 +- 3 files changed, 38 insertions(+), 47 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 72be2f51a..104256710 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -32,17 +32,6 @@ version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6" -[[package]] -name = "async-channel" -version = "1.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2114d64672151c0c5eaa5e131ec84a74f06e1e559830dabba01ca30605d66319" -dependencies = [ - "concurrent-queue", - "event-listener", - "futures-core", -] - [[package]] name = "async-io" version = "1.6.0" @@ -303,16 +292,16 @@ dependencies = [ [[package]] name = "clap" -version = "3.1.8" +version = "3.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71c47df61d9e16dc010b55dba1952a57d8c215dbb533fd13cdd13369aac73b1c" +checksum = "6aad2534fad53df1cc12519c5cda696dd3e20e6118a027e24054aea14a0bdcbe" dependencies = [ "atty", "bitflags", "clap_derive", + "clap_lex", "indexmap", "lazy_static", - "os_str_bytes", "strsim", "termcolor", "textwrap 0.15.0", @@ -331,6 +320,15 @@ dependencies = [ "syn", ] +[[package]] +name = "clap_lex" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "189ddd3b5d32a70b35e7686054371742a937b0d99128e76dde6340210e966669" +dependencies = [ + "os_str_bytes", +] + [[package]] name = "clipboard-win" version = "4.4.1" @@ -1022,9 +1020,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.122" +version = "0.2.123" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec647867e2bf0772e28c8bcde4f0d19a9216916e890543b5a03ed8ef27b8f259" +checksum = "cb691a747a7ab48abc15c5b42066eaafde10dc427e3b6ee2a1cf43db04c763bd" [[package]] name = "linux-raw-sys" @@ -1287,9 +1285,6 @@ name = "os_str_bytes" version = "6.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e22443d1643a904602595ba1cd8f7d896afe56d26712531c5ff73a15b2fbf64" -dependencies = [ - "memchr", -] [[package]] name = "parking" @@ -1469,9 +1464,9 @@ dependencies = [ [[package]] name = "prost" -version = "0.10.0" +version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bd5316aa8f5c82add416dfbc25116b84b748a21153f512917e8143640a71bbd" +checksum = "a07b0857a71a8cb765763950499cae2413c3f9cede1133478c43600d9e146890" dependencies = [ "bytes", "prost-derive", @@ -1479,9 +1474,9 @@ dependencies = [ [[package]] name = "prost-build" -version = "0.10.0" +version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "328f9f29b82409216decb172d81e936415d21245befa79cd34c3f29d87d1c50b" +checksum = "120fbe7988713f39d780a58cf1a7ef0d7ef66c6d87e5aa3438940c05357929f4" dependencies = [ "bytes", "cfg-if 1.0.0", @@ -1501,9 +1496,9 @@ dependencies = [ [[package]] name = "prost-derive" -version = "0.10.0" +version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df35198f0777b75e9ff669737c6da5136b59dba33cf5a010a6d1cc4d56defc6f" +checksum = "7b670f45da57fb8542ebdbb6105a925fe571b67f9e7ed9f47a06a84e72b4e7cc" dependencies = [ "anyhow", "itertools", @@ -1514,9 +1509,9 @@ dependencies = [ [[package]] name = "prost-types" -version = "0.10.0" +version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "926681c118ae6e512a3ccefd4abbe5521a14f4cc1e207356d4d00c0b7f2006fd" +checksum = "2d0a014229361011dc8e69c8a1ec6c2e8d0f2af7c91e3ea3f5b2170298461e68" dependencies = [ "bytes", "prost", @@ -1551,9 +1546,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.17" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "632d02bff7f874a36f33ea8bb416cd484b90cc66c1194b1a1110d067a7013f58" +checksum = "a1feb54ed693b93a84e14094943b84b7c4eae204c512b7ccb95ab0c66d278ad1" dependencies = [ "proc-macro2", ] @@ -1585,9 +1580,9 @@ dependencies = [ [[package]] name = "rayon" -version = "1.5.1" +version = "1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c06aca804d41dbc8ba42dfd964f0d01334eceb64314b9ecf7c5fad5188a06d90" +checksum = "fd249e82c21598a9a426a4e00dd7adc1d640b22445ec8545feef801d1a74c221" dependencies = [ "autocfg", "crossbeam-deque", @@ -1597,14 +1592,13 @@ dependencies = [ [[package]] name = "rayon-core" -version = "1.9.1" +version = "1.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d78120e2c850279833f1dd3582f730c4ab53ed95aeaaaa862a2a5c71b1656d8e" +checksum = "9f51245e1e62e1f1629cbfec37b5793bbabcaeb90f30e94d2ba03564687353e4" dependencies = [ "crossbeam-channel", "crossbeam-deque", "crossbeam-utils 0.8.8", - "lazy_static", "num_cpus", ] @@ -1674,7 +1668,6 @@ name = "risinglight" version = "0.1.3" dependencies = [ "anyhow", - "async-channel", "async-recursion", "async-stream", "async-trait", @@ -1684,7 +1677,7 @@ dependencies = [ "btreemultimap", "bytes", "chrono", - "clap 3.1.8", + "clap 3.1.9", "comfy-table", "crc32fast", "criterion", @@ -1770,9 +1763,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.34.2" +version = "0.34.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96619609a54d638872db136f56941d34e2a00bb0acf3fa783a90d6b96a093ba2" +checksum = "cb617eb09c4ef1536405e357e3b63f39e3ab4cc2159db05395278ad5c352bb16" dependencies = [ "bitflags", "errno", @@ -2212,9 +2205,9 @@ dependencies = [ [[package]] name = "tracing" -version = "0.1.32" +version = "0.1.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a1bdf54a7c28a2bbf701e1d2233f6c77f473486b94bee4f9678da5a148dca7f" +checksum = "5d0ecdcb44a79f0fe9844f0c4f33a342cbcbb5117de8001e6ba0dc2351327d09" dependencies = [ "cfg-if 1.0.0", "pin-project-lite", @@ -2235,9 +2228,9 @@ dependencies = [ [[package]] name = "tracing-core" -version = "0.1.24" +version = "0.1.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90442985ee2f57c9e1b548ee72ae842f4a9a20e3f417cc38dbc5dc684d9bb4ee" +checksum = "f54c8ca710e81886d498c2fd3331b56c93aa248d49de2222ad2742247c60072f" dependencies = [ "lazy_static", "valuable", @@ -2256,9 +2249,9 @@ dependencies = [ [[package]] name = "tracing-subscriber" -version = "0.3.10" +version = "0.3.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9df98b037d039d03400d9dd06b0f8ce05486b5f25e9a2d7d36196e142ebbc52" +checksum = "4bc28f93baff38037f64e6f43d34cfa1605f27a49c34e8a04c5e78b0babf2596" dependencies = [ "ansi_term", "lazy_static", diff --git a/Cargo.toml b/Cargo.toml index 4d60802c4..b97bdd742 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,10 +15,8 @@ default = ["jemalloc"] simd = [] jemalloc = ["tikv-jemallocator"] - [dependencies] anyhow = "1" -async-channel = "1" async-recursion = "1" async-stream = "0.3" async-trait = "0.1" diff --git a/rust-toolchain b/rust-toolchain index b5eabc535..f201a5aac 100644 --- a/rust-toolchain +++ b/rust-toolchain @@ -1 +1 @@ -nightly-2022-03-30 +nightly-2022-04-16