diff --git a/prqlc/prqlc/src/ir/pl/extra.rs b/prqlc/prqlc/src/ir/pl/extra.rs index 2fd5cd265738..fd4325cebf07 100644 --- a/prqlc/prqlc/src/ir/pl/extra.rs +++ b/prqlc/prqlc/src/ir/pl/extra.rs @@ -90,7 +90,10 @@ pub enum TransformKind { range: Range, pipeline: Box, }, - Append(Box), + Append { + by: AppendBy, + bottom: Box, + }, Loop(Box), } @@ -115,6 +118,12 @@ pub enum JoinSide { Full, } +#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize, JsonSchema)] +pub enum AppendBy { + Name, + Position, +} + impl Expr { pub fn new(kind: impl Into) -> Self { Expr { diff --git a/prqlc/prqlc/src/ir/pl/fold.rs b/prqlc/prqlc/src/ir/pl/fold.rs index 435947a34d6b..341620a64284 100644 --- a/prqlc/prqlc/src/ir/pl/fold.rs +++ b/prqlc/prqlc/src/ir/pl/fold.rs @@ -256,7 +256,10 @@ pub fn fold_transform_kind( with: Box::new(fold.fold_expr(*with)?), filter: Box::new(fold.fold_expr(*filter)?), }, - Append(bottom) => Append(Box::new(fold.fold_expr(*bottom)?)), + Append { by, bottom } => Append { + by, + bottom: Box::new(fold.fold_expr(*bottom)?), + }, Group { by, pipeline } => Group { by: Box::new(fold.fold_expr(*by)?), pipeline: Box::new(fold.fold_expr(*pipeline)?), diff --git a/prqlc/prqlc/src/ir/rq/fold.rs b/prqlc/prqlc/src/ir/rq/fold.rs index a9169730b3ef..b236b8957588 100644 --- a/prqlc/prqlc/src/ir/rq/fold.rs +++ b/prqlc/prqlc/src/ir/rq/fold.rs @@ -185,7 +185,10 @@ pub fn fold_transform( with: fold.fold_table_ref(with)?, filter: fold.fold_expr(filter)?, }, - Append(bottom) => Append(fold.fold_table_ref(bottom)?), + Append { by, bottom } => Append { + by, + bottom: fold.fold_table_ref(bottom)?, + }, Loop(transforms) => Loop(fold_transforms(fold, transforms)?), }; Ok(transform) diff --git a/prqlc/prqlc/src/ir/rq/transform.rs b/prqlc/prqlc/src/ir/rq/transform.rs index 87dc7abf39bb..638f2fb86223 100644 --- a/prqlc/prqlc/src/ir/rq/transform.rs +++ b/prqlc/prqlc/src/ir/rq/transform.rs @@ -5,7 +5,7 @@ use serde::{Deserialize, Serialize}; use super::*; use crate::ir::generic::ColumnSort; use crate::ir::generic::WindowFrame; -use crate::ir::pl::JoinSide; +use crate::ir::pl::{AppendBy, JoinSide}; /// Transformation of a table. #[derive( @@ -27,7 +27,10 @@ pub enum Transform { with: TableRef, filter: Expr, }, - Append(TableRef), + Append { + by: AppendBy, + bottom: TableRef, + }, Loop(Vec), } diff --git a/prqlc/prqlc/src/semantic/lowering.rs b/prqlc/prqlc/src/semantic/lowering.rs index 8b949f1ff617..4d17109020d9 100644 --- a/prqlc/prqlc/src/semantic/lowering.rs +++ b/prqlc/prqlc/src/semantic/lowering.rs @@ -612,11 +612,11 @@ impl Lowerer { }; self.pipeline.push(transform); } - pl::TransformKind::Append(bottom) => { + pl::TransformKind::Append { by, bottom } => { let mut bottom = self.lower_table_ref(*bottom)?; bottom.prefer_cte = false; - self.pipeline.push(Transform::Append(bottom)); + self.pipeline.push(Transform::Append { by, bottom }); } pl::TransformKind::Loop(pipeline) => { let relation = self.lower_relation(*pipeline)?; diff --git a/prqlc/prqlc/src/semantic/reporting.rs b/prqlc/prqlc/src/semantic/reporting.rs index 386a86b54c45..868b6847637f 100644 --- a/prqlc/prqlc/src/semantic/reporting.rs +++ b/prqlc/prqlc/src/semantic/reporting.rs @@ -257,7 +257,7 @@ impl PlFold for FrameCollector { pl::TransformKind::Derive { assigns: ref e } | pl::TransformKind::Select { assigns: ref e } | pl::TransformKind::Filter { filter: ref e } - | pl::TransformKind::Append(ref e) + | pl::TransformKind::Append { bottom: ref e, .. } | pl::TransformKind::Loop(ref e) | pl::TransformKind::Group { pipeline: ref e, .. diff --git a/prqlc/prqlc/src/semantic/resolver/flatten.rs b/prqlc/prqlc/src/semantic/resolver/flatten.rs index 37cd2004ec99..8e52bc941c5b 100644 --- a/prqlc/prqlc/src/semantic/resolver/flatten.rs +++ b/prqlc/prqlc/src/semantic/resolver/flatten.rs @@ -151,9 +151,11 @@ impl PlFold for Flattener { // in scope for downstream transforms in the outer pipeline. Per the PRQL // spec a join retains the left (input) side's order, so snapshot the // input's sort and restore it after folding the kind. - let input_sort = - matches!(kind, TransformKind::Join { .. } | TransformKind::Append(_)) - .then(|| self.sort.clone()); + let input_sort = matches!( + kind, + TransformKind::Join { .. } | TransformKind::Append { .. } + ) + .then(|| self.sort.clone()); let kind = fold_transform_kind(self, kind)?; @@ -177,8 +179,10 @@ impl PlFold for Flattener { // derive {`album_name` = `name`} // select {`artist_id`, `album_name`} // ) (this.id == that.artist_id) - let sort = if matches!(kind, TransformKind::Join { .. } | TransformKind::Append(_)) - { + let sort = if matches!( + kind, + TransformKind::Join { .. } | TransformKind::Append { .. } + ) { vec![] } else { self.sort.clone() diff --git a/prqlc/prqlc/src/semantic/resolver/mod.rs b/prqlc/prqlc/src/semantic/resolver/mod.rs index 3453dd7cb13e..dc50be5e9fcc 100644 --- a/prqlc/prqlc/src/semantic/resolver/mod.rs +++ b/prqlc/prqlc/src/semantic/resolver/mod.rs @@ -44,7 +44,7 @@ impl Resolver<'_> { pub(super) mod test { use insta::assert_yaml_snapshot; - use crate::ir::pl::{Expr, Lineage, PlFold}; + use crate::ir::pl::{AppendBy, Expr, Lineage, PlFold}; use crate::{Errors, Result}; pub fn erase_ids(expr: Expr) -> Expr { @@ -463,7 +463,10 @@ pub(super) mod test { bottom_expr.lineage = Some(bottom_lineage); let transform_call = TransformCall { - kind: Box::new(TransformKind::Append(Box::new(bottom_expr))), + kind: Box::new(TransformKind::Append { + by: AppendBy::Position, + bottom: Box::new(bottom_expr), + }), input: Box::new(top_expr), partition: None, frame: crate::ir::pl::WindowFrame::default(), diff --git a/prqlc/prqlc/src/semantic/resolver/transforms.rs b/prqlc/prqlc/src/semantic/resolver/transforms.rs index d6f2430b8291..bf269cfa138a 100644 --- a/prqlc/prqlc/src/semantic/resolver/transforms.rs +++ b/prqlc/prqlc/src/semantic/resolver/transforms.rs @@ -1,11 +1,12 @@ -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use std::iter::zip; use itertools::Itertools; use serde::Deserialize; -use super::types::{ty_tuple_kind, type_intersection}; +use super::types::{ty_tuple_kind, type_intersection, type_union_of_tuples}; use super::Resolver; +use crate::codegen::write_ty_kind; use crate::ir::decl::{Decl, DeclKind, Module}; use crate::ir::generic::{SortDirection, WindowKind}; use crate::ir::pl::*; @@ -254,9 +255,35 @@ impl Resolver<'_> { (transform_kind, tbl) } "append" => { - let [bottom, top] = unpack::<2>(func.args); + let [by, bottom, top] = unpack::<3>(func.args); - (TransformKind::Append(Box::new(bottom)), top) + let by = { + let span = by.span; + let ident = by + .clone() + .try_cast(ExprKind::into_ident, Some("by"), "ident")?; + + match ident.to_string().as_str() { + "position" => AppendBy::Position, + "name" => AppendBy::Name, + _ => { + return Err(Error::new(Reason::Expected { + who: Some("`by`".to_string()), + expected: "position or name".to_string(), + found: ident.to_string(), + }) + .with_span(span)) + } + } + }; + + ( + TransformKind::Append { + by, + bottom: Box::new(bottom), + }, + top, + ) } "loop" => { let [pipeline, tbl] = unpack::<2>(func.args); @@ -600,11 +627,26 @@ impl Resolver<'_> { let pipeline = pipeline.kind.into_function().unwrap().unwrap(); pipeline.return_ty.map(|x| *x) } - TransformKind::Append(bottom) => { + TransformKind::Append { bottom, by } => { let top = transform_call.input.ty.clone().unwrap(); + let bottom_span = bottom.span; let bottom = bottom.ty.clone().unwrap(); - Some(type_intersection(top, bottom).with_span(transform_call.input.span)?) + if *by == AppendBy::Position { + Some(type_intersection(top, bottom).with_span(bottom_span)?) + } else if top.clone().is_relation() && bottom.clone().is_relation() { + Some(type_union_of_tuples( + top.into_relation().unwrap(), + bottom.into_relation().unwrap(), + )?) + } else { + return Err(Error::new_simple(format!( + "cannot append type `{}` to `{}`", + write_ty_kind(&bottom.kind), + write_ty_kind(&top.kind) + ))) + .with_span(bottom_span); + } } }) } @@ -757,10 +799,13 @@ impl TransformCall { let right = lineage_or_default(with)?; join(left, right) } - Append(bottom) => { + Append { bottom, by } => { let top = lineage_or_default(&self.input)?; - let bottom = lineage_or_default(bottom)?; - append(top, bottom)? + let bot = lineage_or_default(bottom)?; + match by { + AppendBy::Position => append(top, bot).with_span(bottom.span)?, + AppendBy::Name => append_by_name(top, bot).with_span(bottom.span)?, + } } Loop(_) => lineage_or_default(&self.input)?, Sort { .. } | Filter { .. } | Take { .. } => lineage_or_default(&self.input)?, @@ -855,6 +900,41 @@ fn append(mut top: Lineage, bottom: Lineage) -> Result { Ok(top) } +fn append_by_name(mut top: Lineage, bottom: Lineage) -> Result { + // Merge inputs from both relations so lineage can track both sources + // This is similar to how `join` handles inputs + top.inputs.extend(bottom.inputs); + + // start with all the columns from top + let mut columns = top.columns.clone(); + let top_names: HashSet = top + .columns + .into_iter() + .filter_map(|c| match c { + LineageColumn::Single { name, .. } => Some(name?.name), + _ => None, + }) + .collect(); + + // add columns from bottom that aren't already in top + for column in bottom.columns { + match column { + LineageColumn::Single { ref name, .. } => { + if let Some(name) = name.clone() { + if !top_names.contains(&name.name) { + columns.push(column); + } + } + } + LineageColumn::All { .. } => todo!(), + } + } + + log::trace!("append_by_name columns: {columns:#?}"); + top.columns = columns; + Ok(top) +} + impl Lineage { pub fn clear(&mut self) { self.prev_columns.clear(); @@ -1183,17 +1263,17 @@ mod tests { assert_snapshot!(crate::tests::compile( "from a | select {x, y} | append (from b | select {x})" ) - .unwrap_err(), @r" + .unwrap_err(), @r###" Error: - ╭─[ :1:10 ] + ╭─[ :1:43 ] │ 1 │ from a | select {x, y} | append (from b | select {x}) - │ ──────┬────── - │ ╰──────── cannot combine relations with different numbers of columns + │ ─────┬──── + │ ╰────── cannot combine relations with different numbers of columns │ │ Help: `append` requires both tables to have matching columns ───╯ - "); + "###); } // `append` of relations whose columns have incompatible types must also @@ -1203,15 +1283,15 @@ mod tests { assert_snapshot!(crate::tests::compile( "from a | select {x = 1} | append (from b | select {x = 1.0})" ) - .unwrap_err(), @r" + .unwrap_err(), @r###" Error: - ╭─[ :1:10 ] + ╭─[ :1:44 ] │ 1 │ from a | select {x = 1} | append (from b | select {x = 1.0}) - │ ───────┬────── - │ ╰──────── cannot combine types `int` and `float` + │ ────────┬─────── + │ ╰───────── cannot combine types `int` and `float` ───╯ - "); + "###); } #[test] diff --git a/prqlc/prqlc/src/semantic/resolver/types.rs b/prqlc/prqlc/src/semantic/resolver/types.rs index 4f3f74f67dc1..a49776a1553d 100644 --- a/prqlc/prqlc/src/semantic/resolver/types.rs +++ b/prqlc/prqlc/src/semantic/resolver/types.rs @@ -4,6 +4,7 @@ use crate::ir::pl::*; use crate::pr::{PrimitiveSet, Ty, TyKind, TyTupleField}; use crate::Result; use crate::{Error, Reason, WithErrorInfo}; +use itertools::Itertools; impl Resolver<'_> { pub fn infer_type(expr: &Expr) -> Result> { @@ -455,3 +456,39 @@ fn different_column_count_error() -> Error { Error::new_simple("cannot combine relations with different numbers of columns") .push_hint("`append` requires both tables to have matching columns") } + +pub fn type_union_of_tuples(a: Vec, b: Vec) -> Result { + let has_other = a.iter().any(|f| f.is_wildcard()) || b.iter().any(|f| f.is_wildcard()); + + let mut fields: Vec = a.into_iter().filter(|f| f.is_single()).collect_vec(); + + for b_field in b.into_iter().filter(|f| f.is_single()) { + match b_field { + TyTupleField::Single(b_name, b_ty) => { + match fields + .iter() + .position(|f| f.clone().into_single().ok().unwrap().0 == b_name) + { + Some(i) => { + let TyTupleField::Single(a_name, a_ty) = fields[i].clone() else { + unreachable!() + }; + if let (Some(a_ty), Some(b_ty)) = (a_ty, b_ty) { + fields[i] = + TyTupleField::Single(a_name, Some(type_intersection(a_ty, b_ty)?)); + } + } + None => { + fields.push(TyTupleField::Single(b_name, b_ty)); + } + } + } + _ => unreachable!(), + } + } + if has_other { + fields.push(TyTupleField::Wildcard(None)); + } + + Ok(Ty::new(TyKind::Tuple(fields))) +} diff --git a/prqlc/prqlc/src/semantic/std.prql b/prqlc/prqlc/src/semantic/std.prql index 24b709f0cd4f..71522473113e 100644 --- a/prqlc/prqlc/src/semantic/std.prql +++ b/prqlc/prqlc/src/semantic/std.prql @@ -118,7 +118,12 @@ let window = func tbl -> internal window -let append = `default_db.bottom` top -> internal append +let append = func + `noresolve.by`:position + `default_db.bottom` + top + -> internal append + let intersect = `default_db.bottom` top -> ( t = top join (b = bottom) (tuple_reduce std.and (tuple_map _eq (tuple_zip t.* b.*))) diff --git a/prqlc/prqlc/src/sql/dialect.rs b/prqlc/prqlc/src/sql/dialect.rs index 24a9862a5ce6..52f57635c7d9 100644 --- a/prqlc/prqlc/src/sql/dialect.rs +++ b/prqlc/prqlc/src/sql/dialect.rs @@ -216,6 +216,11 @@ pub(super) trait DialectHandler: Any + Debug { true } + /// Support for UNION [..] BY NAME. + fn union_by_name(&self) -> bool { + false + } + /// Support or EXCEPT ALL. /// When not supported, fallback to anti join. fn except_all(&self) -> bool { @@ -243,6 +248,7 @@ pub(super) trait DialectHandler: Any + Debug { true } + /// Support for SELECT DISTINCT ON(partition) fn supports_distinct_on(&self) -> bool { false } @@ -634,6 +640,11 @@ impl DialectHandler for BigQueryDialect { true } + fn union_by_name(&self) -> bool { + // https://docs.cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#union + true + } + fn prefers_subquery_parentheses_shorthand(&self) -> bool { true } @@ -684,6 +695,11 @@ impl DialectHandler for SnowflakeDialect { false } + fn union_by_name(&self) -> bool { + // https://docs.snowflake.com/en/sql-reference/operators-query + true + } + fn interval_quoting_style(&self, _dtf: &DateTimeField) -> IntervalQuotingStyle { // Snowflake requires quotes around value and unit together // https://docs.snowflake.com/en/sql-reference/data-types-datetime#interval-constants @@ -709,6 +725,11 @@ impl DialectHandler for DuckDbDialect { } fn supports_distinct_on(&self) -> bool { + // https://duckdb.org/docs/current/sql/query_syntax/select#distinct-on-clause + true + } + + fn union_by_name(&self) -> bool { true } diff --git a/prqlc/prqlc/src/sql/gen_query.rs b/prqlc/prqlc/src/sql/gen_query.rs index cbd0589e77ee..58f28171cdaf 100644 --- a/prqlc/prqlc/src/sql/gen_query.rs +++ b/prqlc/prqlc/src/sql/gen_query.rs @@ -15,7 +15,7 @@ use super::operators::translate_operator; use super::pq::ast::{Cte, CteKind, RelationExpr, RelationExprKind, SqlRelation, SqlTransform}; use super::{Context, Dialect}; use crate::debug; -use crate::ir::pl::{JoinSide, Literal}; +use crate::ir::pl::{AppendBy, JoinSide, Literal}; use crate::ir::rq::{CId, Expr, ExprKind, RelationLiteral, RelationalQuery}; use crate::utils::{BreakUp, Pluck}; use crate::{Error, Result, WithErrorInfo}; @@ -292,10 +292,15 @@ fn translate_set_ops_pipeline( _ => unreachable!(), }; - let (distinct, bottom) = match transform { - Union { distinct, bottom } - | Except { distinct, bottom } - | Intersect { distinct, bottom } => (distinct, bottom), + let (distinct, bottom, by) = match transform { + Union { + distinct, + bottom, + by, + } => (distinct, bottom, by), + Except { distinct, bottom } | Intersect { distinct, bottom } => { + (distinct, bottom, AppendBy::Position) + } _ => unreachable!(), }; @@ -330,14 +335,13 @@ fn translate_set_ops_pipeline( top = default_query(SetExpr::SetOperation { left, right, - set_quantifier: if distinct { - if context.dialect.set_ops_distinct() { - sql_ast::SetQuantifier::Distinct - } else { - sql_ast::SetQuantifier::None - } - } else { - sql_ast::SetQuantifier::All + set_quantifier: match (distinct, context.dialect.set_ops_distinct(), by) { + (false, _, AppendBy::Position) => sql_ast::SetQuantifier::All, + (false, _, AppendBy::Name) => sql_ast::SetQuantifier::AllByName, + (true, true, AppendBy::Position) => sql_ast::SetQuantifier::Distinct, + (true, true, AppendBy::Name) => sql_ast::SetQuantifier::DistinctByName, + (true, false, AppendBy::Position) => sql_ast::SetQuantifier::None, + (true, false, AppendBy::Name) => sql_ast::SetQuantifier::ByName, }, op, }); diff --git a/prqlc/prqlc/src/sql/pq/ast.rs b/prqlc/prqlc/src/sql/pq/ast.rs index d6422b15bfdf..fc941093d9ab 100644 --- a/prqlc/prqlc/src/sql/pq/ast.rs +++ b/prqlc/prqlc/src/sql/pq/ast.rs @@ -10,7 +10,7 @@ use serde::Serialize; use super::context::RIId; use crate::ir::generic::ColumnSort; -use crate::ir::pl::JoinSide; +use crate::ir::pl::{AppendBy, JoinSide}; use crate::ir::rq::{self, fold_column_sorts, RelationLiteral, RqFold}; use crate::Result; @@ -111,6 +111,7 @@ pub enum SqlTransform { Union { bottom: Rel, distinct: bool, + by: AppendBy, }, } @@ -176,9 +177,14 @@ pub fn fold_sql_transform< SqlTransform::Distinct => SqlTransform::Distinct, SqlTransform::DistinctOn(ids) => SqlTransform::DistinctOn(fold.fold_cids(ids)?), - SqlTransform::Union { bottom, distinct } => SqlTransform::Union { + SqlTransform::Union { + bottom, + distinct, + by, + } => SqlTransform::Union { bottom: fold.fold_rel(bottom)?, distinct, + by, }, SqlTransform::Except { bottom, distinct } => SqlTransform::Except { bottom: fold.fold_rel(bottom)?, diff --git a/prqlc/prqlc/src/sql/pq/gen_query.rs b/prqlc/prqlc/src/sql/pq/gen_query.rs index 7e55630858d8..c1c6b452264e 100644 --- a/prqlc/prqlc/src/sql/pq/gen_query.rs +++ b/prqlc/prqlc/src/sql/pq/gen_query.rs @@ -161,7 +161,7 @@ impl PqMapper for TransformCompiler<' rq::Transform::Sort(v) => pq::SqlTransform::Sort(v), rq::Transform::Take(v) => pq::SqlTransform::Take(v), rq::Transform::Compute(_) - | rq::Transform::Append(_) + | rq::Transform::Append { .. } | rq::Transform::Loop(_) => { // these are not used from here on return Ok(None); diff --git a/prqlc/prqlc/src/sql/pq/preprocess.rs b/prqlc/prqlc/src/sql/pq/preprocess.rs index 91cdb1aa3e64..fbc70c91478d 100644 --- a/prqlc/prqlc/src/sql/pq/preprocess.rs +++ b/prqlc/prqlc/src/sql/pq/preprocess.rs @@ -7,7 +7,7 @@ use super::anchor::{infer_complexity, CidCollector, Complexity}; use super::ast::*; use crate::ir::generic::{ColumnSort, SortDirection, WindowFrame, WindowKind}; -use crate::ir::pl::{JoinSide, Literal}; +use crate::ir::pl::{AppendBy, JoinSide, Literal}; use crate::ir::rq::{ self, maybe_binop, new_binop, CId, Compute, Expr, ExprKind, RqFold, Transform, Window, }; @@ -284,7 +284,8 @@ pub(in crate::sql) fn union( let mut res = Vec::with_capacity(pipeline.len()); let mut pipeline = pipeline.into_iter().peekable(); while let Some(t) = pipeline.next() { - let Super(Append(bottom)) = t else { + let Super(Append { by, bottom }) = t else { + log::trace!("seen {t:#?}"); res.push(t); continue; }; @@ -297,7 +298,88 @@ pub(in crate::sql) fn union( false }; - res.push(SqlTransform::Union { bottom, distinct }); + if by == AppendBy::Name && !ctx.dialect.union_by_name() { + let top_cols = ctx.anchor.determine_select_columns(&res[0..res.len()]); + let bottom_cols = ctx + .anchor + .relation_instances + .get(&bottom) + .unwrap() + .table_ref + .columns + .iter() + .map(|(_, c)| *c) + .collect_vec(); + + if ctx.anchor.contains_wildcard(&top_cols) || ctx.anchor.contains_wildcard(&bottom_cols) + { + return Err(Error::new_simple(format!( + "Target dialect does not support UNION BY NAME" + )) + .push_hint( + "providing more column information may allow the query to be translated.", + )); + } + + // fallback mode: determine the union of both sets of columns, then add + // selects to both top and bottom that align the columns and add nulls + let mut union_cols: HashMap, Option)> = HashMap::new(); + let mut union_col_names: Vec = Vec::new(); + for cid in top_cols { + let cname = ctx.anchor.ensure_column_name(cid).unwrap(); + union_col_names.push(cname.clone()); + union_cols.insert(cname.clone(), (Some(cid), None)); + } + for cid in bottom_cols { + let cname = ctx.anchor.ensure_column_name(cid).unwrap(); + match union_cols.get(cname) { + Some((Some(top_cid), None)) => { + union_cols.insert(cname.clone(), (Some(*top_cid), Some(cid))); + } + None => { + union_col_names.push(cname.clone()); + union_cols.insert(cname.clone(), (None, Some(cid))); + } + _ => unreachable!(), + } + } + + let null_expr = Expr { + span: None, + kind: ExprKind::Literal(Literal::Null), + }; + let null_cid = ctx.anchor.cid.gen(); + ctx.anchor.register_compute(rq::Compute { + id: null_cid, + expr: null_expr, + window: None, + is_aggregation: false, + }); + + log::trace!("push select {union_col_names:#?}"); + + // res.push(SqlTransform::Super(Transform::Select( + // union_col_names + // .iter() + // .map(|cname| match union_cols.get(cname) { + // Some((Some(cid), _)) => cid.clone(), + // _ => null_cid.clone(), + // }) + // .collect_vec(), + // ))); + + res.push(SqlTransform::Union { + bottom, + distinct, + by: AppendBy::Position, + }); + } else { + res.push(SqlTransform::Union { + bottom, + distinct, + by, + }); + } } Ok(res) } diff --git a/prqlc/prqlc/tests/integration/error_messages.rs b/prqlc/prqlc/tests/integration/error_messages.rs index d17d3df97315..e6c333efe63d 100644 --- a/prqlc/prqlc/tests/integration/error_messages.rs +++ b/prqlc/prqlc/tests/integration/error_messages.rs @@ -490,3 +490,28 @@ fn bare_lambda_expression() { ───╯ "); } + +#[test] +fn append_by_name_unsupported_dialect() { + assert_snapshot!(compile(r###" + from foo | append by:name bar + "###).unwrap_err(), @r###" + Error: Target dialect does not support UNION BY NAME + ↳ Hint: providing more column information may allow the query to be translated. + "###); +} + +#[test] +fn append_by_name_bad_arg() { + assert_snapshot!(compile(r###" + from foo | append by:jove bar + "###).unwrap_err(), @r###" + Error: + ╭─[ :2:26 ] + │ + 2 │ from foo | append by:jove bar + │ ──┬─ + │ ╰─── `by` expected position or name, but found jove + ───╯ + "###); +} diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__set_ops_remove.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__set_ops_remove.snap index cd69dacc93d0..18fe9addc9c5 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__set_ops_remove.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__set_ops_remove.snap @@ -18,7 +18,7 @@ frames: table: - default_db - _literal_127 -- - 0:3172-3258 +- - 0:3210-3296 - columns: - !Single name: @@ -43,7 +43,7 @@ frames: table: - default_db - _literal_122 -- - 0:3261-3315 +- - 0:3299-3353 - columns: - !Single name: @@ -157,14 +157,14 @@ nodes: - 122 - id: 190 kind: RqOperator - span: 0:3201-3257 + span: 0:3239-3295 targets: - 181 - 184 parent: 192 - id: 192 kind: 'TransformCall: Join' - span: 0:3172-3258 + span: 0:3210-3296 children: - 154 - 122 @@ -172,7 +172,7 @@ nodes: parent: 211 - id: 203 kind: Ident - span: 0:6033-6041 + span: 0:6071-6079 ident: !Ident - this - b @@ -181,17 +181,17 @@ nodes: - 122 - id: 207 kind: RqOperator - span: 0:3269-3314 + span: 0:3307-3352 targets: - 203 - 210 parent: 211 - id: 210 kind: Literal - span: 0:6045-6049 + span: 0:6083-6087 - id: 211 kind: 'TransformCall: Filter' - span: 0:3261-3315 + span: 0:3299-3353 children: - 192 - 207 @@ -207,7 +207,7 @@ nodes: parent: 214 - id: 214 kind: Tuple - span: 0:3325-3328 + span: 0:3363-3366 children: - 213 parent: 215 diff --git a/prqlc/prqlc/tests/integration/sql.rs b/prqlc/prqlc/tests/integration/sql.rs index 0f2cc0ebecc1..e9a8025aee70 100644 --- a/prqlc/prqlc/tests/integration/sql.rs +++ b/prqlc/prqlc/tests/integration/sql.rs @@ -7449,3 +7449,111 @@ fn test_tuple_map_aliases() { foo "###); } + +#[test] +fn test_append_by_name_1() { + assert_snapshot!(compile(r###" + prql target:sql.duckdb + + from foo + append by:name bar + "###).unwrap(), @r###" + SELECT + * + FROM + foo + UNION + ALL BY NAME + SELECT + * + FROM + bar + "###); +} + +#[test] +fn test_append_by_name_2() { + assert_snapshot!(compile(r###" + prql target:sql.duckdb + + from foo + select {x, y} + append by:name (from bar | select {y, z}) + "###).unwrap(), @r###" + SELECT + x, + y + FROM + foo + UNION + ALL BY NAME + SELECT + y, + z + FROM + bar + "###); +} + +#[test] +fn test_append_by_name_distinct() { + assert_snapshot!(compile(r###" + prql target:sql.duckdb + let distinct = rel -> (_param.rel | group this (take 1)) + + from foo + append by:name bar + distinct + "###).unwrap(), @r###" + SELECT + * + FROM + foo + UNION + DISTINCT BY NAME + SELECT + * + FROM + bar + "###); + + assert_snapshot!(compile(r###" + prql target:sql.snowflake + let distinct = rel -> (_param.rel | group this (take 1)) + + from foo + append by:name bar + distinct + "###).unwrap(), @r###" + SELECT + * + FROM + "foo" + UNION + BY NAME + SELECT + * + FROM + "bar" + "###); +} + +#[test] +fn test_append_by_name_fallback() { + assert_snapshot!(compile(r###" + from foo + select {x, y} + append by:name (from bar | select {y, b}) + "###).unwrap(), @r###" + SELECT + x, y, NULL AS b + FROM + foo + UNION + ALL + SELECT + NULL AS x, y, b + FROM + bar + "###); +}