Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 46 additions & 3 deletions src/compute-types/src/explain/text.rs
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,7 @@ impl Plan {
key_val_plan,
plan,
mfp_after,
temporal_bucketing_strategy: _,
} => {
ctx.indent.set();
if !mfp_after.expressions.is_empty() || !mfp_after.predicates.is_empty() {
Expand Down Expand Up @@ -374,7 +375,11 @@ impl Plan {

ctx.indent.reset();
}
TopK { input, top_k_plan } => {
TopK {
input,
top_k_plan,
temporal_bucketing_strategy: _,
} => {
use crate::plan::top_k::TopKPlan;
match top_k_plan {
TopKPlan::MonotonicTop1(plan) => {
Expand Down Expand Up @@ -469,6 +474,7 @@ impl Plan {
Union {
inputs,
consolidate_output,
temporal_bucketing_strategies: _,
} => {
write!(f, "{}→", ctx.indent)?;
if *consolidate_output {
Expand Down Expand Up @@ -739,6 +745,7 @@ impl Plan {
key_val_plan,
plan,
mfp_after,
temporal_bucketing_strategy,
} => {
use crate::plan::reduce::ReducePlan;
match plan {
Expand All @@ -764,6 +771,13 @@ impl Plan {
let key = CompactScalars(key);
writeln!(f, "{}input_key={}", ctx.indent, key)?;
}
if !matches!(temporal_bucketing_strategy, ArrangementStrategy::Direct) {
writeln!(
f,
"{}temporal_bucketing_strategy={}",
ctx.indent, temporal_bucketing_strategy
)?;
}
if key_val_plan.key_plan.deref().is_identity() {
writeln!(f, "{}key_plan=id", ctx.indent)?;
} else {
Expand All @@ -790,7 +804,11 @@ impl Plan {
input.fmt_text(f, ctx)
})?;
}
TopK { input, top_k_plan } => {
TopK {
input,
top_k_plan,
temporal_bucketing_strategy,
} => {
use crate::plan::top_k::TopKPlan;
match top_k_plan {
TopKPlan::MonotonicTop1(plan) => {
Expand Down Expand Up @@ -851,7 +869,16 @@ impl Plan {
}
}
writeln!(f, "{}", annotations)?;
ctx.indented(|ctx| input.fmt_text(f, ctx))?;
ctx.indented(|ctx| {
if !matches!(temporal_bucketing_strategy, ArrangementStrategy::Direct) {
writeln!(
f,
"{}temporal_bucketing_strategy={}",
ctx.indent, temporal_bucketing_strategy
)?;
}
input.fmt_text(f, ctx)
})?;
}
Negate { input } => {
writeln!(f, "{}Negate{}", ctx.indent, annotations)?;
Expand All @@ -876,6 +903,7 @@ impl Plan {
Union {
inputs,
consolidate_output,
temporal_bucketing_strategies,
} => {
if *consolidate_output {
writeln!(
Expand All @@ -887,6 +915,21 @@ impl Plan {
writeln!(f, "{}Union{}", ctx.indent, annotations)?;
}
ctx.indented(|ctx| {
if temporal_bucketing_strategies
.iter()
.any(|s| !matches!(s, ArrangementStrategy::Direct))
{
let strategies = temporal_bucketing_strategies
.iter()
.map(|s| format!("{}", s))
.collect::<Vec<_>>()
.join(", ");
writeln!(
f,
"{}temporal_bucketing_strategies=[{}]",
ctx.indent, strategies
)?;
}
for input in inputs.iter() {
input.fmt_text(f, ctx)?;
}
Expand Down
85 changes: 51 additions & 34 deletions src/compute-types/src/plan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,15 @@ pub enum ArrangementStrategy {
TemporalBucketing,
}

impl std::fmt::Display for ArrangementStrategy {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
ArrangementStrategy::Direct => write!(f, "Direct"),
ArrangementStrategy::TemporalBucketing => write!(f, "TemporalBucketing"),
}
}
}

/// An identifier for an LIR node.
#[derive(
Clone,
Expand Down Expand Up @@ -308,6 +317,20 @@ pub enum PlanNode {
/// predicates so that it can be readily evaluated.
/// TODO(ggevay): should we wrap this in [`mz_expr::SafeMfpPlan`]?
mfp_after: MapFilterProject,
/// Strategy for forming the internal input arrangement built by `Reduce`
/// (materialized via `key_val_plan`).
///
/// Set by the lowering from the input's `has_future_updates` flag. The
/// renderer applies it to the keyed `(key, val)` stream feeding the
/// reduce. See `render_reduce` for the rationale on why this is
/// plumbed through `Reduce` rather than handled at the arrangement site.
///
/// Note: unrelated to the hash buckets used by hierarchical reductions
/// (e.g. `ReducePlan::Hierarchical`'s `buckets`), which are an internal
/// sharding scheme for `min`/`max`-style aggregations. Here "bucketing"
/// refers exclusively to temporal (time-domain) bucketing of
/// future-stamped updates.
temporal_bucketing_strategy: ArrangementStrategy,
},
/// Key-based "Top K" operator, retaining the first K records in each group.
TopK {
Expand All @@ -319,6 +342,14 @@ pub enum PlanNode {
/// on the properties of the reduction, and the input itself. Please check
/// out the documentation for this type for more detail.
top_k_plan: TopKPlan,
/// Strategy for bucketing the input collection ahead of the Top-K operator.
///
/// Set by the lowering from the input's `has_future_updates` flag. The
/// renderer applies it to the per-row input stream at the top of
/// `render_topk`, covering all three `TopKPlan` arms uniformly. See
/// `PlanNode::Reduce::temporal_bucketing_strategy` for the underlying
/// convention.
temporal_bucketing_strategy: ArrangementStrategy,
},
/// Inverts the sign of each update.
Negate {
Expand Down Expand Up @@ -350,6 +381,15 @@ pub enum PlanNode {
inputs: Vec<Plan>,
/// Whether to consolidate the output, e.g., cancel negated records.
consolidate_output: bool,
/// Per-input bucketing strategies. Lockstep with `inputs`: index `i` is the
/// strategy applied to `inputs[i]` before concatenation.
///
/// Set by the lowering from each input's `has_future_updates` flag. Only
/// consolidating Unions (`consolidate_output: true`) carry non-`Direct`
/// entries, because bucketing only pays off ahead of a consolidating
/// downstream operator. See `PlanNode::Reduce::temporal_bucketing_strategy`
/// for the underlying convention.
temporal_bucketing_strategies: Vec<ArrangementStrategy>,
},
/// The `input` plan, but with additional arrangements.
///
Expand Down Expand Up @@ -513,9 +553,14 @@ impl Plan {
// Subsequently, we perform plan refinements for the dataflow.
Self::refine_source_mfps(&mut dataflow);

if features.enable_consolidate_after_union_negate {
Self::refine_union_negate_consolidation(&mut dataflow);
}
// Note: `consolidate_output` for `Union` and per-input
// `temporal_bucketing_strategies` are decided at lowering time (see the
// `Union` arm of `lower_mir_expr_stack_safe`). The pre-existing
// `refine_union_negate_consolidation` pass — which used to flip
// `consolidate_output` to `true` for Unions with a `Negate` child — has
// been folded into the lowering, since lowering is the only point where
// the bucketing decision (which depends on `has_future_updates`) is
// available.

if dataflow.is_single_time() {
Self::refine_single_time_operator_selection(&mut dataflow);
Expand Down Expand Up @@ -634,37 +679,6 @@ impl Plan {
mz_repr::explain::trace_plan(dataflow);
}

/// Changes the `consolidate_output` flag of such Unions that have at least one Negated input.
#[mz_ore::instrument(
target = "optimizer",
level = "debug",
fields(path.segment = "refine_union_negate_consolidation")
)]
fn refine_union_negate_consolidation(dataflow: &mut DataflowDescription<Self>) {
for build_desc in dataflow.objects_to_build.iter_mut() {
let mut todo = vec![&mut build_desc.plan];
while let Some(expression) = todo.pop() {
let node = &mut expression.node;
match node {
PlanNode::Union {
inputs,
consolidate_output,
..
} => {
if inputs
.iter()
.any(|input| matches!(input.node, PlanNode::Negate { .. }))
{
*consolidate_output = true;
}
}
_ => {}
}
todo.extend(node.children_mut());
}
}
mz_repr::explain::trace_plan(dataflow);
}

/// Refines the plans of objects to be built as part of `dataflow` to take advantage
/// of monotonic operators if the dataflow refers to a single-time, i.e., is for a
Expand Down Expand Up @@ -775,6 +789,7 @@ impl CollectionPlan for PlanNode {
| PlanNode::Union {
inputs,
consolidate_output: _,
temporal_bucketing_strategies: _,
} => {
for input in inputs {
input.depends_on_into(out);
Expand Down Expand Up @@ -805,10 +820,12 @@ impl CollectionPlan for PlanNode {
key_val_plan: _,
plan: _,
mfp_after: _,
temporal_bucketing_strategy: _,
}
| PlanNode::TopK {
input,
top_k_plan: _,
temporal_bucketing_strategy: _,
}
| PlanNode::Negate { input }
| PlanNode::Threshold {
Expand Down
16 changes: 14 additions & 2 deletions src/compute-types/src/plan/interpret/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,7 @@ where
key_val_plan,
plan,
mfp_after,
temporal_bucketing_strategy: _,
} => {
// Descend recursively into all children.
let input = self.apply_rec(input, rg)?;
Expand All @@ -406,7 +407,11 @@ where
mfp_after,
))
}
TopK { input, top_k_plan } => {
TopK {
input,
top_k_plan,
temporal_bucketing_strategy: _,
} => {
// Descend recursively into all children.
let input = self.apply_rec(input, rg)?;
// Interpret the current node.
Expand All @@ -430,6 +435,7 @@ where
Union {
inputs,
consolidate_output,
temporal_bucketing_strategies: _,
} => {
// Descend recursively into all children.
let inputs = inputs
Expand Down Expand Up @@ -676,6 +682,7 @@ where
key_val_plan,
plan,
mfp_after,
temporal_bucketing_strategy: _,
} => {
// Descend recursively into all children.
let input = self.apply_rec(input, rg)?;
Expand All @@ -693,7 +700,11 @@ where
// Pass the interpretation result up.
Ok(result)
}
TopK { input, top_k_plan } => {
TopK {
input,
top_k_plan,
temporal_bucketing_strategy: _,
} => {
// Descend recursively into all children.
let input = self.apply_rec(input, rg)?;
// Interpret the current node.
Expand Down Expand Up @@ -731,6 +742,7 @@ where
Union {
inputs,
consolidate_output,
temporal_bucketing_strategies: _,
} => {
// Descend recursively into all children.
let inputs: Vec<_> = inputs
Expand Down
Loading
Loading