diff --git a/src/datahike/query/execute.cljc b/src/datahike/query/execute.cljc index 148a0a4b..eeb774c5 100644 --- a/src/datahike/query/execute.cljc +++ b/src/datahike/query/execute.cljc @@ -2903,8 +2903,38 @@ (some #(#{:entity-group :pattern-scan} (:op %)) (:ops cv-plan))) rec-clause-versions) + ;; delta-driven-expand emits `[entity-id, + ;; propagated-y]` per AVET hit. That's correct + ;; only for simple transitive closure shapes + ;; — `(rule ?x ?y) [?x :attr ?prev-y] + ;; (rule ?prev-y ?z)` and the like, where the + ;; recursive body is exhausted by the + ;; reverse-index step. Any `:function` (e.g. + ;; arithmetic on a counter, get-else + ;; column-extraction), `:predicate` + ;; (filtering on a get-else output), or + ;; `:attached-preds` on the entity-group is + ;; SILENTLY SKIPPED by the shortcut, producing + ;; wrong tuples (eg `[entity-id, 0]` instead + ;; of `[node-id, depth+1]` for a tree-walk + ;; CTE). Only fire the optimization when every + ;; recursive-clause version is reducible to + ;; `:rule-lookup` + one `:entity-group` / + ;; `:pattern-scan` with no attached predicates. + rec-shape-simple? + (every? (fn [cv-plan] + (every? (fn [op] + (case (:op op) + (:rule-lookup :pattern-scan) + true + :entity-group + (empty? (:attached-preds op)) + false)) + (:ops cv-plan))) + rec-clause-versions) use-delta-driven? (and base-scan-attr rec-has-db-pattern? + rec-shape-simple? (= rn rule-name) (= 1 (count scc-rule-names)) (= 2 (count head-vars)) diff --git a/src/datahike/query/lower.cljc b/src/datahike/query/lower.cljc index 454687d3..d896d930 100644 --- a/src/datahike/query/lower.cljc +++ b/src/datahike/query/lower.cljc @@ -15,11 +15,13 @@ consumed by execute.cljc." (:require [clojure.set] + [clojure.walk] [datahike.db.interface :as dbi] [datahike.index.interface :as di] [datahike.query.analyze :as analyze] [datahike.query.estimate :as estimate] [datahike.query.ir :as ir] + [datahike.query.logical :as logical] [datahike.query.plan :as plan])) #?(:clj (set! *warn-on-reflection* true)) @@ -112,6 +114,385 @@ {:type (.-type p) :clause (.-clause p) :vars (.-vars p)}) +;; --------------------------------------------------------------------------- +;; Sub-plan factories (OR / NOT / rule expansion) +;; +;; These factories used to live in plan.cljc, but they need to call the +;; lowering pipeline recursively (for each OR branch, NOT body, or rule +;; branch the planner sees, we re-run build-logical-plan → lower so the +;; sub-plan gets the same logical recognition top-level queries get). +;; Putting them here keeps the dependency one-way (lower → plan, never +;; the reverse) and lets the sub-plan calls go straight to `lower` +;; without the `requiring-resolve` dance the previous layout required. + +(declare lower) + +(defn- plan-via-ir + "Lower a sub-plan via the full IR pipeline (`build-logical-plan → + lower`). The single point where the sub-plan factories below enter + the lowering pipeline. + + `bound-vars` may arrive as a Set (legacy interface) or a Map + (`{var → cardinality}` — the form lower threads through as `bvc`). + `build-logical-plan` wants a Set; lower itself accepts either." + ([db clauses bound-vars rules] + (plan-via-ir db clauses bound-vars rules nil)) + ([db clauses bound-vars rules guarded-rules] + (let [bound-set (cond (map? bound-vars) (set (keys bound-vars)) + (set? bound-vars) bound-vars + :else (set bound-vars)) + logical-plan (logical/build-logical-plan db (vec clauses) bound-set + rules guarded-rules)] + (lower logical-plan db rules)))) + +(defn- normalize-and-plan-branches + "Normalize branch clauses and create sub-plans for each branch. + Used by both OR and OR-JOIN planning. + + Branch forms: + - Single data pattern: [?e :attr ?v] → wrap as [[?e :attr ?v]] + - Single predicate: [(pred ?a ?b)] → wrap as [[(pred ?a ?b)]] + - Multiple clauses: [[?e :a ?v] [(> ?v 5)]] → use as-is + - AND compound: (and [?e :a ?v] ...) → use as-is + + The key distinction: a multi-clause branch has a vector as its first element + (Datalog clauses are vectors). A single predicate like [(= ?a 1)] has a list + as its first element (the function call expression). + + Sub-plans are built via the full IR pipeline so each branch gets the + logical pass (LOptionalScan recognition for get-else, etc.) before + the physical lowering — same as top-level queries." + [db branches bound-vars rules] + (mapv (fn [branch] + (let [branch-clauses (if (and (sequential? branch) + (not (vector? (first branch)))) + [branch] + (vec branch))] + (plan-via-ir db branch-clauses bound-vars rules))) + branches)) + +(defn plan-or-op + "Plan an OR or OR-JOIN clause. When join-vars? is true, validates and + includes :join-vars in the op (OR-JOIN semantics)." + ([db clause-info bound-vars rules] + (plan-or-op db clause-info bound-vars rules false)) + ([db clause-info bound-vars rules join-vars?] + (let [join-vars (when join-vars? + (let [raw (:join-vars clause-info)] + (when (some sequential? raw) + (throw (ex-info (str "Insufficient bindings: " + (into #{} (mapcat analyze/extract-vars) raw) + " not bound in " (:clause clause-info)) + {:error :query/where :form (:clause clause-info)}))) + (set raw))) + sub-plans (normalize-and-plan-branches db (:branches clause-info) bound-vars rules) + total-est (reduce + 0 (keep (fn [p] (some :estimated-card (:ops p))) sub-plans))] + (cond-> {:op (if join-vars? :or-join :or) + :clause (:clause clause-info) + :branches sub-plans + :vars (:vars clause-info) + :estimated-card (max 1 total-est)} + join-vars? (assoc :join-vars join-vars))))) + +(defn plan-not-op + "Plan a NOT or NOT-JOIN clause. When join-vars? is true, scopes the + sub-plan to only the join-vars (NOT-JOIN semantics)." + ([db clause-info bound-vars rules] + (plan-not-op db clause-info bound-vars rules false)) + ([db clause-info bound-vars rules join-vars?] + (let [join-vars (when join-vars? (set (:join-vars clause-info))) + sub-plan (plan-via-ir db (:sub-clauses clause-info) + (if join-vars? join-vars bound-vars) rules)] + (cond-> {:op (if join-vars? :not-join :not) + :clause (:clause clause-info) + :sub-plan sub-plan + :vars (:vars clause-info) + :estimated-card nil} + join-vars? (assoc :join-vars join-vars))))) + +(defn- rename-branch-vars + "Rename variables in a rule branch body, substituting rule-args with call-args. + Constant call-args get synthetic variables with identity-binding preamble so they + are available to function clauses in the body. Internal vars get unique suffixes. + Returns vector of renamed clauses." + [branch call-args seqid db] + (let [[[_ & rule-args] & clauses] branch + ;; Replace constant call-args with synthetic variables + call-args-safe (map-indexed (fn [i arg] + (if (analyze/free-var? arg) + arg + (symbol (str "?__const__" i "__auto__" seqid)))) + call-args) + ;; Build identity-binding clauses for constant args (not equality predicates!) + ;; [(identity 42) ?__const__0__auto__1] creates a binding for the synthetic var. + const-bindings (into [] + (keep (fn [[safe orig]] + (when (not= safe orig) + [(list 'identity orig) safe]))) + (map vector call-args-safe call-args)) + replacements (zipmap rule-args call-args-safe) + ;; Resolve keyword attrs to entity refs in attribute-refs mode. + ;; + ;; The resolution must recurse through compound clause forms + ;; (or, or-join, and, not, not-join, source-prefixed) to reach + ;; data patterns nested inside them. Without recursion, a rule + ;; body like `(or-join [...] [?e :attr ?v] ...)` would keep its + ;; inner pattern's attribute as a keyword while the outer + ;; query's clauses get resolved by substitute-consts-with-lookup-refs + ;; — at execute-time the lookup-batch-search path then slices + ;; AEVT for the keyword, finds zero datoms (datoms in + ;; :attribute-refs? mode are stored with attr=eid, not keyword), + ;; and the rule branch silently produces empty results. Surface + ;; symptom in jobtech: 3 changelog tests on HistoricalDB return + ;; 0 rows instead of the expected change events. + attr-refs? (:attribute-refs? (dbi/-config db)) + data-pattern? (fn [x] + (and (vector? x) + (let [f (first x)] + (or (and (symbol? f) (analyze/free-var? f)) + (number? f) + (and (vector? f) (= 2 (count f))))) + (or (keyword? (second x)) + (and (symbol? (second x)) (analyze/free-var? (second x))) + (number? (second x))))) + resolve-attr-in-pattern (fn [pat] + (if (and attr-refs? (keyword? (second pat))) + (assoc pat 1 (dbi/-ref-for db (second pat))) + pat)) + resolve-recursive (fn resolve-recursive [form] + (cond + ;; Data pattern: resolve its attr. + (data-pattern? form) + (resolve-attr-in-pattern form) + + ;; Compound list form (or, or-join, and, not, not-join, etc.) + ;; — recurse into elements that look like clauses. + (and (sequential? form) + (symbol? (first form)) + (#{'or 'or-join 'and 'not 'not-join} (first form))) + (let [head (first form) + ;; or-join / not-join have a vars vector after the head + [pre-rest body] (case head + (or-join not-join) [(take 2 form) (drop 2 form)] + [(take 1 form) (rest form)])] + (concat pre-rest (map resolve-recursive body))) + + :else form)) + renamed (mapv (fn [c] + (resolve-recursive + (clojure.walk/postwalk + (fn [x] + (if (analyze/free-var? x) + (if (contains? replacements x) + (get replacements x) + (symbol (str (name x) "__auto__" seqid))) + x)) + c))) + clauses)] + ;; Put const-bindings first so synthetic vars are bound before body uses them + (into const-bindings renamed))) + +(defn plan-rule-op [db clause-info bound-vars rules scc-info] + (let [[rule-name & call-args] (:clause clause-info) + ;; Validate: non-var rule args must be scalars (not collections/maps) + _ (doseq [arg call-args] + (when (and (not (analyze/free-var? arg)) + (not (nil? arg)) + (or (and (vector? arg) + (not= 2 (count arg))) ;; allow lookup-refs [attr val] + (map? arg) + (set? arg))) + (throw (ex-info (str "Bad format for value in pattern, must be a scalar, nil or a vector of two elements. Got: " (pr-str arg)) + {:error :query/where + :form (:clause clause-info)})))) + branches (get rules rule-name)] + (if (not branches) + (plan/plan-passthrough-op clause-info) + (let [{:keys [scc recursive?]} (get scc-info rule-name)] + (if recursive? + ;; Recursive rule — pre-build branch plans with clause versions. + ;; IMPORTANT: We use the rule head vars (all free) for branch renaming, + ;; NOT the call-args (which may contain constants like 62). + ;; Constants are filtered AFTER the fixpoint completes. + ;; This ensures the recursive accumulator contains the full relation. + ;; + ;; For mutual recursion (SCC with multiple rules), we collect base/rec + ;; branches from ALL rules in the SCC. Each rule has its own accumulator. + (let [seqid (gensym "r") + scc-rule-names scc + is-scc-call? (fn [c] + (and (sequential? c) + (symbol? (first c)) + (contains? scc-rule-names (first c)))) + ;; For each SCC rule, extract head vars and build branch plans + scc-rule-plans + (into {} + (map (fn [rn] + (let [rn-branches (get rules rn) + head-vars (vec (rest (first (first rn-branches)))) + free-call-args (mapv (fn [hv] + (if (analyze/free-var? hv) + hv + (symbol (str "?" (name hv))))) + head-vars) + is-base? (fn [branch] + (let [[_head & body] branch] + (not (some is-scc-call? body)))) + base-bs (filterv is-base? rn-branches) + rec-bs (filterv (complement is-base?) rn-branches) + ;; Head vars are NOT pre-bound at the start of either + ;; base or recursive branch bodies: + ;; + ;; - Base branch: head-vars are produced by the body + ;; (a pattern binds them, a function computes them). + ;; - Recursive branch: head-vars are produced by the + ;; branch's rule-lookup op (the accumulator scan), + ;; which is itself a regular op the planner orders. + ;; Rule-lookup's outputs propagate through the + ;; bindedness tracker like any other producer. + ;; + ;; Earlier code added head-vars to branch-bound with a + ;; conservative card-1 placeholder, intending it as a + ;; cardinality hint. Under the new op-required-vars + ;; contract, however, any entry in bound-vars is + ;; treated as runnability-bound — so :function ops + ;; that reference a head-var (e.g. `[(str ?id "/") + ;; ?path]`) would appear runnable from clause-zero, + ;; get cost-ordered AHEAD of the pattern that + ;; actually binds the var, and produce a relation + ;; missing the function's output binding. The + ;; downstream rel-dedup-into! then sees a head-var + ;; missing from :attrs and NPEs. + ;; + ;; Fix: keep branch-bound = the OUTER scope's bound + ;; vars only. Cardinality estimation for patterns + ;; that reference head-vars now correctly treats them + ;; as free (worst-case attribute-total estimate) + ;; until the body's own producer op binds them with + ;; a known card. + branch-bound bound-vars + ;; Rule branch bodies go through the + ;; shared IR-pipeline helper — same + ;; routing OR / NOT / AND sub-plans + ;; use, so kernel features that + ;; depend on logical recognition + ;; (most visibly `[(get-else $ ?e + ;; :attr default) ?v]` becoming an + ;; LOptionalScan that binds `?e`) + ;; apply uniformly inside rule + ;; bodies. + plan-branch (fn plan-branch + [branch-clauses guarded] + (plan-via-ir db branch-clauses branch-bound + rules guarded)) + base-ps (mapv (fn [b] + (let [renamed (rename-branch-vars b free-call-args seqid db)] + (plan-branch (vec renamed) nil))) + base-bs) + rec-cvs + (vec (mapcat + (fn [branch] + (let [renamed (rename-branch-vars branch free-call-args seqid db) + scc-indices (keep-indexed + (fn [i c] (when (is-scc-call? c) i)) + renamed)] + (map (fn [delta-idx] + (let [versioned + (vec (map-indexed + (fn [i c] + (if (is-scc-call? c) + (with-meta (vec c) + {:rule-lookup-mode + (if (= i delta-idx) :delta :main)}) + c)) + renamed))] + (plan-branch versioned scc-rule-names))) + scc-indices))) + rec-bs))] + [rn {:head-vars free-call-args + :base-plans base-ps + :rec-clause-versions rec-cvs}]))) + scc-rule-names) + ;; Note: an earlier `has-scanless-base?` guard nilled out + ;; `scc-rule-plans` whenever a base case lacked an + ;; `:entity-group` / `:pattern-scan` op (e.g. SQL + ;; `WITH RECURSIVE … (SELECT 1 …)` anchor lowering to + ;; `[(identity 1) ?n]`, or a `[(ground […]) [?v ...]]` + ;; collection seed), routing such rules to `legacy/solve-rule`. + ;; Legacy can't evaluate recursive bodies that bind head vars + ;; through `:function` ops and then filter them with predicates + ;; — those queries hung or failed. + ;; + ;; The fixpoint executor already handles function-only base + ;; cases: `execute-branch-plans` runs the base plan against + ;; an empty ctx, `legacy/bind-by-fn` produces a single-tuple + ;; Relation, and the recursive branch's `rule-lookup` ops feed + ;; off the accumulator as usual. Magic sets are silently + ;; skipped when `base-scan-attr` is nil (the + ;; `and magic-demand base-scan-attr …` check in + ;; `execute-recursive-rule`). The guard was redundant and + ;; introduced a real regression for scanless-base recursion. + ;; Extract base scan attribute for magic set optimization + base-scan-attr + (when scc-rule-plans + (let [bp (first (:base-plans (get scc-rule-plans rule-name)))] + (when bp + (some (fn [op] + (case (:op op) + :entity-group (get (:clause (:scan-op op)) 1) + :pattern-scan (get (:clause op) 1) + nil)) + (:ops bp)))))] + {:op :recursive-rule + :clause (:clause clause-info) + :rule-name rule-name + :call-args (vec call-args) + :head-vars (when scc-rule-plans (:head-vars (get scc-rule-plans rule-name))) + :scc-rule-names scc-rule-names + :scc-rule-plans scc-rule-plans + :base-plans (when scc-rule-plans (:base-plans (get scc-rule-plans rule-name))) + :rec-clause-versions (when scc-rule-plans (:rec-clause-versions (get scc-rule-plans rule-name))) + :base-scan-attr base-scan-attr + :vars (:vars clause-info) + :estimated-card nil}) + ;; Non-recursive — expand to OR + (let [seqid (gensym "r") + expanded (for [branch branches] + (rename-branch-vars branch call-args seqid db)) + ;; Body's bound-vars = outer scope's bound-vars only. + ;; + ;; The const-safe-vars (synthetic vars wrapping non-var call-args) + ;; are produced by the [(identity X) safe-var] preamble that + ;; rename-branch-vars prepends to each branch body. They are + ;; NOT bound at the start of the body — they're bound by the + ;; identity op's execution. Pre-binding them in rule-bound + ;; would look like a cardinality hint (card 1, since identity + ;; produces a single value) but under the central op-required- + ;; vars contract, any entry in bound-vars is read as runnability + ;; -bound. A predicate `[(< ?safe 100)]` referencing a pre- + ;; bound safe-var would then appear runnable from clause-zero, + ;; cost-order ahead of the identity preamble, and run against + ;; an empty :rels at execute time. + ;; + ;; The card-1 cardinality hint we want for downstream + ;; estimation reaches subsequent clauses naturally via + ;; bvc-eff threading: identity is the first clause processed + ;; in create-plan's reduce, its function-output-var-cards + ;; map (?safe → 1) is folded into bvc-eff by extend-bvc, and + ;; every later clause sees safe-var → 1 in its planning + ;; environment without it leaking into the runnability + ;; check's seed-bound set. + ;; Same IR-pipeline routing as the recursive branch above + ;; and as the OR / NOT / AND sub-plan paths in create-plan. + sub-plans (mapv (fn [branch-clauses] + (plan-via-ir db branch-clauses bound-vars rules)) + expanded) + total-est (reduce + 0 (keep (fn [p] (some :estimated-card (:ops p))) sub-plans))] + {:op :or + :clause (:clause clause-info) + :branches sub-plans + :vars (:vars clause-info) + :estimated-card (max 1 total-est)})))))) ;; --------------------------------------------------------------------------- ;; Collect all scans from logical plan (including from entity joins) @@ -423,7 +804,7 @@ (instance? datahike.query.ir.LAntiJoin node) (let [ci (anti-join->classified node) join-vars? (= :not-join (.-type ^datahike.query.ir.LAntiJoin node)) - op (plan/plan-not-op db ci all-clause-vars rules join-vars?)] + op (plan-not-op db ci all-clause-vars rules join-vars?)] (if join-vars? (update acc :ops conj op) (update acc :not-ops conj op))) @@ -432,12 +813,12 @@ (instance? datahike.query.ir.LUnion node) (let [ci (union->classified node) join-vars? (= :or-join (.-type ^datahike.query.ir.LUnion node))] - (update acc :ops conj (plan/plan-or-op db ci bvc rules join-vars?))) + (update acc :ops conj (plan-or-op db ci bvc rules join-vars?))) - ;; LRuleCall → delegate to plan/plan-rule-op + ;; LRuleCall → delegate to plan-rule-op (instance? datahike.query.ir.LRuleCall node) (let [ci (rule-call->classified node)] - (update acc :ops conj (plan/plan-rule-op db ci bvc rules scc-info))) + (update acc :ops conj (plan-rule-op db ci bvc rules scc-info))) ;; LRuleLookup → rule lookup op (instance? datahike.query.ir.LRuleLookup node) @@ -457,14 +838,14 @@ inner-ci (analyze/classify-clause inner-clause) inner-ci (assoc inner-ci :source source-sym)] (case (:type inner-ci) - :not (let [op (plan/plan-not-op db inner-ci all-clause-vars rules)] + :not (let [op (plan-not-op db inner-ci all-clause-vars rules)] (update acc :ops conj (assoc op :source source-sym))) - :not-join (let [op (plan/plan-not-op db inner-ci all-clause-vars rules true)] + :not-join (let [op (plan-not-op db inner-ci all-clause-vars rules true)] (update acc :ops conj (assoc op :source source-sym))) :or (update acc :ops conj - (assoc (plan/plan-or-op db inner-ci all-clause-vars rules) :source source-sym)) + (assoc (plan-or-op db inner-ci all-clause-vars rules) :source source-sym)) :or-join (update acc :ops conj - (assoc (plan/plan-or-op db inner-ci all-clause-vars rules true) :source source-sym)) + (assoc (plan-or-op db inner-ci all-clause-vars rules true) :source source-sym)) (update acc :ops conj (plan/plan-passthrough-op ci)))) ;; Regular passthrough (update acc :ops conj (plan/plan-passthrough-op ci)))) diff --git a/src/datahike/query/plan.cljc b/src/datahike/query/plan.cljc index 7bb98ab7..2fe913b4 100644 --- a/src/datahike/query/plan.cljc +++ b/src/datahike/query/plan.cljc @@ -25,8 +25,6 @@ #?(:clj (set! *warn-on-reflection* true)) -(declare create-plan) - ;; --------------------------------------------------------------------------- ;; Index selection @@ -1145,8 +1143,8 @@ ops like `:not` whose op-required-vars contract demands those vars bound will be marked unrunnable and surface as `Insufficient bindings` even though they're well-formed at execute time. Top- - level callers (create-plan's outer scope) pass `nil` / omit and - we treat the outer scope as empty." + level callers (lower's outer scope) pass `nil` / omit and we treat + the outer scope as empty." ([ops] (order-plan-ops ops nil)) ([ops outer-bound-vars] (let [;; LOptionalScan-derived pattern-scans (get-else with default) need @@ -1228,67 +1226,6 @@ ;; --------------------------------------------------------------------------- ;; OR / NOT / Rule plan ops -(defn- normalize-and-plan-branches - "Normalize branch clauses and create sub-plans for each branch. - Used by both OR and OR-JOIN planning. - - Branch forms: - - Single data pattern: [?e :attr ?v] → wrap as [[?e :attr ?v]] - - Single predicate: [(pred ?a ?b)] → wrap as [[(pred ?a ?b)]] - - Multiple clauses: [[?e :a ?v] [(> ?v 5)]] → use as-is - - AND compound: (and [?e :a ?v] ...) → use as-is - - The key distinction: a multi-clause branch has a vector as its first element - (Datalog clauses are vectors). A single predicate like [(= ?a 1)] has a list - as its first element (the function call expression)." - [db branches bound-vars rules] - (mapv (fn [branch] - (let [branch-clauses (if (and (sequential? branch) - (not (vector? (first branch)))) - [branch] - (vec branch))] - (create-plan db branch-clauses bound-vars rules))) - branches)) - -(defn plan-or-op - "Plan an OR or OR-JOIN clause. When join-vars? is true, validates and - includes :join-vars in the op (OR-JOIN semantics)." - ([db clause-info bound-vars rules] - (plan-or-op db clause-info bound-vars rules false)) - ([db clause-info bound-vars rules join-vars?] - (let [join-vars (when join-vars? - (let [raw (:join-vars clause-info)] - (when (some sequential? raw) - (throw (ex-info (str "Insufficient bindings: " - (into #{} (mapcat analyze/extract-vars) raw) - " not bound in " (:clause clause-info)) - {:error :query/where :form (:clause clause-info)}))) - (set raw))) - sub-plans (normalize-and-plan-branches db (:branches clause-info) bound-vars rules) - total-est (reduce + 0 (keep (fn [p] (some :estimated-card (:ops p))) sub-plans))] - (cond-> {:op (if join-vars? :or-join :or) - :clause (:clause clause-info) - :branches sub-plans - :vars (:vars clause-info) - :estimated-card (max 1 total-est)} - join-vars? (assoc :join-vars join-vars))))) - -(defn plan-not-op - "Plan a NOT or NOT-JOIN clause. When join-vars? is true, scopes the - sub-plan to only the join-vars (NOT-JOIN semantics)." - ([db clause-info bound-vars rules] - (plan-not-op db clause-info bound-vars rules false)) - ([db clause-info bound-vars rules join-vars?] - (let [join-vars (when join-vars? (set (:join-vars clause-info))) - sub-plan (create-plan db (vec (:sub-clauses clause-info)) - (if join-vars? join-vars bound-vars) rules)] - (cond-> {:op (if join-vars? :not-join :not) - :clause (:clause clause-info) - :sub-plan sub-plan - :vars (:vars clause-info) - :estimated-card nil} - join-vars? (assoc :join-vars join-vars))))) - ;; --------------------------------------------------------------------------- ;; Rule SCC detection and expansion @@ -1363,272 +1300,6 @@ (map (fn [r] [r {:scc scc :recursive? recursive?}]) scc)))) sccs))) -(defn- rename-branch-vars - "Rename variables in a rule branch body, substituting rule-args with call-args. - Constant call-args get synthetic variables with identity-binding preamble so they - are available to function clauses in the body. Internal vars get unique suffixes. - Returns vector of renamed clauses." - [branch call-args seqid db] - (let [[[_ & rule-args] & clauses] branch - ;; Replace constant call-args with synthetic variables - call-args-safe (map-indexed (fn [i arg] - (if (analyze/free-var? arg) - arg - (symbol (str "?__const__" i "__auto__" seqid)))) - call-args) - ;; Build identity-binding clauses for constant args (not equality predicates!) - ;; [(identity 42) ?__const__0__auto__1] creates a binding for the synthetic var. - const-bindings (into [] - (keep (fn [[safe orig]] - (when (not= safe orig) - [(list 'identity orig) safe]))) - (map vector call-args-safe call-args)) - replacements (zipmap rule-args call-args-safe) - ;; Resolve keyword attrs to entity refs in attribute-refs mode. - ;; - ;; The resolution must recurse through compound clause forms - ;; (or, or-join, and, not, not-join, source-prefixed) to reach - ;; data patterns nested inside them. Without recursion, a rule - ;; body like `(or-join [...] [?e :attr ?v] ...)` would keep its - ;; inner pattern's attribute as a keyword while the outer - ;; query's clauses get resolved by substitute-consts-with-lookup-refs - ;; — at execute-time the lookup-batch-search path then slices - ;; AEVT for the keyword, finds zero datoms (datoms in - ;; :attribute-refs? mode are stored with attr=eid, not keyword), - ;; and the rule branch silently produces empty results. Surface - ;; symptom in jobtech: 3 changelog tests on HistoricalDB return - ;; 0 rows instead of the expected change events. - attr-refs? (:attribute-refs? (dbi/-config db)) - data-pattern? (fn [x] - (and (vector? x) - (let [f (first x)] - (or (and (symbol? f) (analyze/free-var? f)) - (number? f) - (and (vector? f) (= 2 (count f))))) - (or (keyword? (second x)) - (and (symbol? (second x)) (analyze/free-var? (second x))) - (number? (second x))))) - resolve-attr-in-pattern (fn [pat] - (if (and attr-refs? (keyword? (second pat))) - (assoc pat 1 (dbi/-ref-for db (second pat))) - pat)) - resolve-recursive (fn resolve-recursive [form] - (cond - ;; Data pattern: resolve its attr. - (data-pattern? form) - (resolve-attr-in-pattern form) - - ;; Compound list form (or, or-join, and, not, not-join, etc.) - ;; — recurse into elements that look like clauses. - (and (sequential? form) - (symbol? (first form)) - (#{'or 'or-join 'and 'not 'not-join} (first form))) - (let [head (first form) - ;; or-join / not-join have a vars vector after the head - [pre-rest body] (case head - (or-join not-join) [(take 2 form) (drop 2 form)] - [(take 1 form) (rest form)])] - (concat pre-rest (map resolve-recursive body))) - - :else form)) - renamed (mapv (fn [c] - (resolve-recursive - (clojure.walk/postwalk - (fn [x] - (if (analyze/free-var? x) - (if (contains? replacements x) - (get replacements x) - (symbol (str (name x) "__auto__" seqid))) - x)) - c))) - clauses)] - ;; Put const-bindings first so synthetic vars are bound before body uses them - (into const-bindings renamed))) - -(defn plan-rule-op [db clause-info bound-vars rules scc-info] - (let [[rule-name & call-args] (:clause clause-info) - ;; Validate: non-var rule args must be scalars (not collections/maps) - _ (doseq [arg call-args] - (when (and (not (analyze/free-var? arg)) - (not (nil? arg)) - (or (and (vector? arg) - (not= 2 (count arg))) ;; allow lookup-refs [attr val] - (map? arg) - (set? arg))) - (throw (ex-info (str "Bad format for value in pattern, must be a scalar, nil or a vector of two elements. Got: " (pr-str arg)) - {:error :query/where - :form (:clause clause-info)})))) - branches (get rules rule-name)] - (if (not branches) - (plan-passthrough-op clause-info) - (let [{:keys [scc recursive?]} (get scc-info rule-name)] - (if recursive? - ;; Recursive rule — pre-build branch plans with clause versions. - ;; IMPORTANT: We use the rule head vars (all free) for branch renaming, - ;; NOT the call-args (which may contain constants like 62). - ;; Constants are filtered AFTER the fixpoint completes. - ;; This ensures the recursive accumulator contains the full relation. - ;; - ;; For mutual recursion (SCC with multiple rules), we collect base/rec - ;; branches from ALL rules in the SCC. Each rule has its own accumulator. - (let [seqid (gensym "r") - scc-rule-names scc - is-scc-call? (fn [c] - (and (sequential? c) - (symbol? (first c)) - (contains? scc-rule-names (first c)))) - ;; For each SCC rule, extract head vars and build branch plans - scc-rule-plans - (into {} - (map (fn [rn] - (let [rn-branches (get rules rn) - head-vars (vec (rest (first (first rn-branches)))) - free-call-args (mapv (fn [hv] - (if (analyze/free-var? hv) - hv - (symbol (str "?" (name hv))))) - head-vars) - is-base? (fn [branch] - (let [[_head & body] branch] - (not (some is-scc-call? body)))) - base-bs (filterv is-base? rn-branches) - rec-bs (filterv (complement is-base?) rn-branches) - ;; Head vars are NOT pre-bound at the start of either - ;; base or recursive branch bodies: - ;; - ;; - Base branch: head-vars are produced by the body - ;; (a pattern binds them, a function computes them). - ;; - Recursive branch: head-vars are produced by the - ;; branch's rule-lookup op (the accumulator scan), - ;; which is itself a regular op the planner orders. - ;; Rule-lookup's outputs propagate through the - ;; bindedness tracker like any other producer. - ;; - ;; Earlier code added head-vars to branch-bound with a - ;; conservative card-1 placeholder, intending it as a - ;; cardinality hint. Under the new op-required-vars - ;; contract, however, any entry in bound-vars is - ;; treated as runnability-bound — so :function ops - ;; that reference a head-var (e.g. `[(str ?id "/") - ;; ?path]`) would appear runnable from clause-zero, - ;; get cost-ordered AHEAD of the pattern that - ;; actually binds the var, and produce a relation - ;; missing the function's output binding. The - ;; downstream rel-dedup-into! then sees a head-var - ;; missing from :attrs and NPEs. - ;; - ;; Fix: keep branch-bound = the OUTER scope's bound - ;; vars only. Cardinality estimation for patterns - ;; that reference head-vars now correctly treats them - ;; as free (worst-case attribute-total estimate) - ;; until the body's own producer op binds them with - ;; a known card. - branch-bound bound-vars - base-ps (mapv (fn [b] - (let [renamed (rename-branch-vars b free-call-args seqid db)] - (create-plan db (vec renamed) branch-bound rules))) - base-bs) - rec-cvs - (vec (mapcat - (fn [branch] - (let [renamed (rename-branch-vars branch free-call-args seqid db) - scc-indices (keep-indexed - (fn [i c] (when (is-scc-call? c) i)) - renamed)] - (map (fn [delta-idx] - (let [versioned - (vec (map-indexed - (fn [i c] - (if (is-scc-call? c) - (with-meta (vec c) - {:rule-lookup-mode - (if (= i delta-idx) :delta :main)}) - c)) - renamed))] - (create-plan db versioned branch-bound rules scc-rule-names))) - scc-indices))) - rec-bs))] - [rn {:head-vars free-call-args - :base-plans base-ps - :rec-clause-versions rec-cvs}]))) - scc-rule-names) - ;; Note: an earlier `has-scanless-base?` guard nilled out - ;; `scc-rule-plans` whenever a base case lacked an - ;; `:entity-group` / `:pattern-scan` op (e.g. SQL - ;; `WITH RECURSIVE … (SELECT 1 …)` anchor lowering to - ;; `[(identity 1) ?n]`, or a `[(ground […]) [?v ...]]` - ;; collection seed), routing such rules to `legacy/solve-rule`. - ;; Legacy can't evaluate recursive bodies that bind head vars - ;; through `:function` ops and then filter them with predicates - ;; — those queries hung or failed. - ;; - ;; The fixpoint executor already handles function-only base - ;; cases: `execute-branch-plans` runs the base plan against - ;; an empty ctx, `legacy/bind-by-fn` produces a single-tuple - ;; Relation, and the recursive branch's `rule-lookup` ops feed - ;; off the accumulator as usual. Magic sets are silently - ;; skipped when `base-scan-attr` is nil (the - ;; `and magic-demand base-scan-attr …` check in - ;; `execute-recursive-rule`). The guard was redundant and - ;; introduced a real regression for scanless-base recursion. - ;; Extract base scan attribute for magic set optimization - base-scan-attr - (when scc-rule-plans - (let [bp (first (:base-plans (get scc-rule-plans rule-name)))] - (when bp - (some (fn [op] - (case (:op op) - :entity-group (get (:clause (:scan-op op)) 1) - :pattern-scan (get (:clause op) 1) - nil)) - (:ops bp)))))] - {:op :recursive-rule - :clause (:clause clause-info) - :rule-name rule-name - :call-args (vec call-args) - :head-vars (when scc-rule-plans (:head-vars (get scc-rule-plans rule-name))) - :scc-rule-names scc-rule-names - :scc-rule-plans scc-rule-plans - :base-plans (when scc-rule-plans (:base-plans (get scc-rule-plans rule-name))) - :rec-clause-versions (when scc-rule-plans (:rec-clause-versions (get scc-rule-plans rule-name))) - :base-scan-attr base-scan-attr - :vars (:vars clause-info) - :estimated-card nil}) - ;; Non-recursive — expand to OR - (let [seqid (gensym "r") - expanded (for [branch branches] - (rename-branch-vars branch call-args seqid db)) - ;; Body's bound-vars = outer scope's bound-vars only. - ;; - ;; The const-safe-vars (synthetic vars wrapping non-var call-args) - ;; are produced by the [(identity X) safe-var] preamble that - ;; rename-branch-vars prepends to each branch body. They are - ;; NOT bound at the start of the body — they're bound by the - ;; identity op's execution. Pre-binding them in rule-bound - ;; would look like a cardinality hint (card 1, since identity - ;; produces a single value) but under the central op-required- - ;; vars contract, any entry in bound-vars is read as runnability - ;; -bound. A predicate `[(< ?safe 100)]` referencing a pre- - ;; bound safe-var would then appear runnable from clause-zero, - ;; cost-order ahead of the identity preamble, and run against - ;; an empty :rels at execute time. - ;; - ;; The card-1 cardinality hint we want for downstream - ;; estimation reaches subsequent clauses naturally via - ;; bvc-eff threading: identity is the first clause processed - ;; in create-plan's reduce, its function-output-var-cards - ;; map (?safe → 1) is folded into bvc-eff by extend-bvc, and - ;; every later clause sees safe-var → 1 in its planning - ;; environment without it leaking into the runnability - ;; check's seed-bound set. - sub-plans (mapv #(create-plan db (vec %) bound-vars rules) expanded) - total-est (reduce + 0 (keep (fn [p] (some :estimated-card (:ops p))) sub-plans))] - {:op :or - :clause (:clause clause-info) - :branches sub-plans - :vars (:vars clause-info) - :estimated-card (max 1 total-est)})))))) - (defn plan-rule-lookup-op "Create a plan op for a rule-call inside a recursive branch. The :mode is :delta or :main, indicating which accumulator to read from." @@ -1645,286 +1316,6 @@ ;; --------------------------------------------------------------------------- ;; Public API -(defn create-plan - "Create an optimized query plan from where clauses. - Steps: - 1. Classify all clauses - 2. Detect pushable predicates - 3. Build pattern ops, fold into entity-groups with anti-merge NOT - 4. Order groups + remaining ops by estimated cardinality - 5. Detect inter-group value joins (probe-vars) - - The optional `guarded-rules` param is a set of rule names that are in the - current SCC — calls to these become :rule-lookup ops instead of recursive expansion." - ([db where-clauses bound-vars] - (create-plan db where-clauses bound-vars nil)) - ([db where-clauses bound-vars rules] - (create-plan db where-clauses bound-vars rules nil)) - ([db where-clauses bound-vars rules guarded-rules] - (let [;; Step 1: Classify - classified (mapv analyze/classify-clause where-clauses) - - ;; Step 1b: Pre-collect all pattern vars from the full clause tree. - ;; This includes vars from patterns inside NOT/OR/source-prefix. - ;; Used as enriched bound-vars when building sub-plans, so nested - ;; NOT/OR know what vars will be available from the outer context. - ;; - ;; bound-vars may be a Map (var → card) under the bound-var-cards - ;; model or a Set (legacy interface). When a Map: preserve the - ;; outer entries verbatim (so cards reach inner pattern estimation - ;; via plan-or-op / plan-rule-op / plan-not-op → create-plan), and - ;; mark newly-extracted in-scope vars with a non-numeric sentinel. - ;; estimate-pattern-with-bindings's card-of returns nil for - ;; non-numbers, so sentinel entries are ignored for cardinality - ;; estimation while still passing `contains?` membership checks - ;; needed for join-var validation and rule-call detection. - all-clause-vars (let [extracted (into #{} (mapcat analyze/extract-vars) where-clauses)] - (cond - (map? bound-vars) - (into bound-vars - (map (fn [v] [v ::in-scope])) - (remove (set (keys bound-vars)) extracted)) - - (set? bound-vars) - (into bound-vars extracted) - - :else - (into (set bound-vars) extracted))) - - ;; Pre-compute SCC info for rules (once per plan, not per rule-op) - scc-info (when rules (compute-rule-sccs rules)) - - ;; Step 2: Detect pushdown candidates - {:keys [pushdowns consumed]} (analyze/detect-pushdown classified bound-vars) - - ;; Step 3: Build raw ops — track consumed pushdown preds. - ;; - ;; bvc-eff threads cardinality propagation through the clause stream - ;; in classification order: each op's :output-var-cards extends the - ;; effective bound-var-cards for SUBSEQUENT ops in the same plan - ;; scope. Critical for queries where a function or pattern - ;; introduces a tightly-bounded var that a later pattern can use as - ;; a probe constraint — e.g. the `(ground inv-map) [[?type ?reverse-type]]` - ;; preamble inside the -reverse-edge rule body, which binds - ;; ?reverse-type to a card-N collection that the next pattern - ;; `[?r :type ?reverse-type]` should use as scan-card. Without - ;; this thread, every pattern in the body sees only outer - ;; bound-vars and falls back to base attribute estimates, - ;; producing wrong scan-vs-merge selection (the regression that - ;; made the GraphQL edge fetcher 40×+ slower than legacy). - init-bvc (cond (map? bound-vars) bound-vars - (set? bound-vars) (zipmap bound-vars (repeat ::in-scope)) - :else (zipmap (set bound-vars) (repeat ::in-scope))) - {:keys [ops actual-consumed not-ops]} - (reduce - (fn [acc ci] - ;; Check for rule calls (classified as :pattern but first elem is rule name) - (let [bvc-eff (:bvc-eff acc) - is-rule-call? (and rules - (= :pattern (:type ci)) - (symbol? (:e ci)) - (not (analyze/free-var? (:e ci))) - (contains? rules (:e ci))) - ;; Check for metadata-tagged rule-lookup (from clause version generation) - lookup-mode (when is-rule-call? - (:rule-lookup-mode (meta (:clause ci)))) - ;; Extend an acc's :bvc-eff with op's :output-var-cards - ;; (numeric entries only — sentinels untouched). Returns acc'. - ;; MIN on collisions keeps the tightest known bound. - ;; - ;; Restricted to FUNCTION ops only: pattern-scan ops also expose - ;; :output-var-cards, but propagating them eagerly would mark - ;; sibling pattern entity-vars (e.g. ?r in `[?r :concept-1 ?c]` - ;; followed by `[?r :type ?t]`) as upstream-bound when planning - ;; the next pattern. dp-order-fuse-ops fuses these into a - ;; single entity-group at execution time — the scan PRODUCES - ;; ?r, it doesn't consume it — so estimating later patterns - ;; with ?r bound yields the both-bound branch min(e,v,base) - ;; which underestimates by ~base/distinct-v×. Function ops - ;; (identity / ground) genuinely bind their output vars - ;; before subsequent ops execute, so threading those is safe. - extend-bvc (fn [acc' op] - (if (and (map? op) (= :function (:op op))) - (if-let [out (:output-var-cards op)] - (assoc acc' :bvc-eff - (reduce-kv - (fn [m v c] - (let [cur (get m v)] - (cond - (and (number? cur) (number? c)) (assoc m v (long (min cur c))) - (number? c) (assoc m v c) - :else m))) - (:bvc-eff acc') out)) - acc') - acc'))] - (if (and is-rule-call? guarded-rules - (contains? guarded-rules (:e ci))) - ;; Inside a recursive branch plan — emit :rule-lookup - (let [rule-ci (assoc ci :type :rule-call - :vars (into #{} (filter analyze/free-var?) (rest (:clause ci)))) - mode (or lookup-mode :main)] - (update acc :ops conj (plan-rule-lookup-op rule-ci mode))) - (if is-rule-call? - (let [rule-ci (assoc ci :type :rule-call - :vars (into #{} (filter analyze/free-var?) (rest (:clause ci)))) - op (plan-rule-op db rule-ci bvc-eff rules scc-info)] - (-> acc (update :ops conj op) (extend-bvc op))) - - (case (:type ci) - :pattern - (let [schema-info (analyze/pattern-schema-info db ci) - preds (get pushdowns (:clause ci) []) - [op consumed-preds] (plan-pattern-op db ci schema-info preds bvc-eff)] - (-> acc - (update :ops conj op) - (update :actual-consumed into (or consumed-preds #{})) - (extend-bvc op))) - - :function (let [op (plan-function-op ci db)] - (-> acc (update :ops conj op) (extend-bvc op))) - :predicate (update acc :ops conj [:maybe-pred ci]) - :or (let [op (plan-or-op db ci bvc-eff rules)] - (-> acc (update :ops conj op) (extend-bvc op))) - :or-join (let [op (plan-or-op db ci bvc-eff rules true)] - (-> acc (update :ops conj op) (extend-bvc op))) - ;; Collect NOT ops separately for anti-merge folding - :not (update acc :not-ops conj (plan-not-op db ci bvc-eff rules)) - :not-join (update acc :ops conj (plan-not-op db ci bvc-eff rules true)) - :and (let [sub-plan (create-plan db (vec (:sub-clauses ci)) bvc-eff rules)] - (update acc :ops into (:ops sub-plan))) - - ;; Source-prefixed clause: ($2 not ...) or [$2 ?e :attr ?v] - ;; Unwrap the source, re-classify inner clause, and tag with :source - :source-prefix - (let [source-sym (:source-sym ci) - inner (:inner-clause ci) - inner-ci (analyze/classify-clause inner) - inner-ci (assoc inner-ci :source source-sym)] - (case (:type inner-ci) - :pattern - (let [schema-info (analyze/pattern-schema-info db inner-ci) - [op _] (plan-pattern-op db inner-ci schema-info [] bvc-eff)] - (-> acc (update :ops conj (assoc op :source source-sym)) (extend-bvc op))) - :not - (let [op (plan-not-op db inner-ci bvc-eff rules)] - (update acc :ops conj (assoc op :source source-sym))) - :not-join - (let [op (plan-not-op db inner-ci bvc-eff rules true)] - (update acc :ops conj (assoc op :source source-sym))) - :or - (update acc :ops conj (assoc (plan-or-op db inner-ci bvc-eff rules) :source source-sym)) - :or-join - (update acc :ops conj (assoc (plan-or-op db inner-ci bvc-eff rules true) :source source-sym)) - ;; Source-prefix wrapping another source-prefix or unknown - (update acc :ops conj (plan-passthrough-op ci)))) - - (update acc :ops conj (plan-passthrough-op ci))))))) - {:ops [] :actual-consumed #{} :not-ops [] :bvc-eff init-bvc} - classified) - - ;; Collect pattern ops first for predicate sampling - raw-pattern-ops (filterv #(and (map? %) (= :pattern-scan (:op %))) ops) - - ;; Resolve deferred predicates (with sampling for selectivity) - resolved-ops (reduce - (fn [acc op] - (if (and (vector? op) (= :maybe-pred (first op))) - (let [ci (second op)] - (if (contains? actual-consumed (:clause ci)) - acc - (conj acc (plan-predicate-op ci db raw-pattern-ops)))) - (conj acc op))) - [] - ops) - - ;; Step 3b: Separate pattern ops from non-pattern ops - pattern-ops (filterv #(= :pattern-scan (:op %)) resolved-ops) - other-ops (filterv #(not= :pattern-scan (:op %)) resolved-ops) - - ;; Step 3c: Build entity groups (includes anti-merge NOT folding) - total-entities (estimate/estimate-total-entities db) - {:keys [entity-groups remaining-nots]} - (if (seq pattern-ops) - (build-entity-groups db pattern-ops not-ops total-entities) - {:entity-groups [] :remaining-nots not-ops}) - - ;; Step 3d: Restore predicates that were consumed as pushdowns but ended up - ;; on merge-ops (where pushdowns can't be applied — merges use EAVT lookupGE). - merge-lost-pred-clauses (into #{} (comp (filter #(= :entity-group (:op %))) - (mapcat :merge-lost-preds)) - entity-groups) - restored-preds (when (seq merge-lost-pred-clauses) - (let [ci-by-clause (into {} (map (fn [ci] [(:clause ci) ci])) classified)] - (mapv (fn [pred-clause] - (plan-predicate-op (ci-by-clause pred-clause) db raw-pattern-ops)) - merge-lost-pred-clauses))) - other-ops (if (seq restored-preds) - (into other-ops restored-preds) - other-ops) - ;; Strip internal :merge-lost-preds from entity-group ops - entity-groups (mapv #(dissoc % :merge-lost-preds) entity-groups) - - ;; Step 4: Order everything together. Seed the cost-based - ;; runnability check with the outer scope's bound-vars so - ;; nested plans (or-join branches, :not bodies, …) recognise - ;; vars bound by their ancestors. Without the seed, an - ;; or-join branch that wraps a `:not` referencing a shared-var - ;; bound from the outer scope is rejected with `Insufficient - ;; bindings` — the shared-var is bound at execute time but - ;; the local order-plan-ops doesn't know that. - all-ops (into (into (vec entity-groups) other-ops) remaining-nots) - ordered-ops (order-plan-ops all-ops bound-vars) - - ;; Step 5: Detect inter-group value joins - group-joins (detect-inter-group-joins - (filterv #(#{:entity-group :pattern-scan} (:op %)) ordered-ops)) - - ;; Step 6: Validate NOT/NOT-JOIN bindings post-ordering. - ;; Walk the ordered ops and accumulate bound vars. Each NOT must have - ;; at least one var bound by ops that PRECEDE it in the execution order. - ;; This correctly handles reordering: (not [?e ...]) [?e :name] gets - ;; reordered to [?e :name] (not [?e ...]), and ?e is bound before NOT. - _ (loop [remaining ordered-ops - ;; bound-vars may be a set (legacy) or a map (var → card). - ;; vars-so-far is purely a membership set for NOT validation, - ;; so normalise to a set up front. - vars-so-far (cond (map? bound-vars) (set (keys bound-vars)) - (set? bound-vars) bound-vars - :else (set bound-vars))] - (when (seq remaining) - (let [op (first remaining)] - (when (#{:not :not-join} (:op op)) - (let [not-vars (:vars op)] - (when (empty? (clojure.set/intersection not-vars vars-so-far)) - (throw (ex-info (str "Insufficient bindings: none of " not-vars - " is bound in " (:clause op)) - {:error :query/where - :form (:clause op)}))))) - (recur (rest remaining) - (into vars-so-far - (case (:op op) - :entity-group (into (:vars (:scan-op op)) - (mapcat :vars (:merge-ops op))) - :pattern-scan (:vars op) - ;; plan-function-op stores the result var(s) in - ;; :binding (scalar, tuple, list, or map). The - ;; legacy `:bind-vars` key is never set — - ;; reading it lost the result-var contribution - ;; and falsely tripped the NOT validation when - ;; a function-chain output was the only var - ;; reaching a NOT clause. Mirror lower.cljc's - ;; identical loop. - :function (analyze/extract-vars (:binding op)) - nil))))))] - - {:ops ordered-ops - :consumed-preds (if (seq merge-lost-pred-clauses) - (reduce disj actual-consumed merge-lost-pred-clauses) - actual-consumed) - :classified classified - :group-joins group-joins - :has-passthrough? (some #(= :passthrough (:op %)) ordered-ops)}))) - (defn replan "Adaptively re-plan remaining operations after observing actual cardinality." [plan executed-idx actual-card db] diff --git a/test/datahike/test/query_ir_test.clj b/test/datahike/test/query_ir_test.clj index 609ded3e..882bdab0 100644 --- a/test/datahike/test/query_ir_test.clj +++ b/test/datahike/test/query_ir_test.clj @@ -23,23 +23,6 @@ (let [logical (logical/build-logical-plan db clauses #{} nil)] (lower/lower logical db nil))) -(defn- plan-match? - "Check that IR pipeline and create-plan produce structurally equivalent plans. - The IR path may attach predicates to groups (optimization); normalize by - flattening attached-preds back to standalone ops for comparison." - [db clauses] - (let [old (plan/create-plan db clauses #{} nil) - new (ir-plan db clauses) - ;; Normalize: flatten attached-preds back to standalone ops - normalize-ops (fn [ops] - (mapv :op (into [] (mapcat (fn [op] - (if (seq (:attached-preds op)) - (cons op (:attached-preds op)) - [op]))) - ops)))] - (and (= (mapv :op (:ops old)) (normalize-ops (:ops new))) - (= (:group-joins old) (:group-joins new))))) - (defn- query-both "Run a query through both planner and legacy paths, return [planner legacy]." [query db] @@ -132,41 +115,26 @@ (is (every? #(instance? datahike.query.ir.LScan %) nodes)))) ;; --------------------------------------------------------------------------- -;; Lowering equivalence tests - -(deftest test-lower-matches-create-plan - (let [db (db/empty-db {:name {:db/index true} :age {} :email {} :flag {} :friend {}})] - (testing "Single pattern" - (is (plan-match? db '[[?e :name "Alice"]]))) - (testing "Entity join" - (is (plan-match? db '[[?e :name ?n] [?e :age ?a]]))) - (testing "3-way entity join" - (is (plan-match? db '[[?e :name ?n] [?e :age ?a] [?e :email ?em]]))) - (testing "Value join" - (is (plan-match? db '[[?e :name ?n] [?e2 :friend ?n]]))) - (testing "With predicate" - (is (plan-match? db '[[?e :age ?a] [(> ?a 18)]]))) - (testing "Pushdown predicate" - (is (plan-match? db '[[?e :name ?n] [(> ?n "M")]]))) - (testing "Anti-merge" - (is (plan-match? db '[[?e :name ?n] [?e :age ?a] (not [?e :flag :deleted])]))) - (testing "OR clause" - (is (plan-match? db '[[?e :name ?n] (or [?e :age 30] [?e :age 25])]))))) - -(deftest test-lower-deep-structural-equality - (testing "Entity group internals match exactly" +;; Lowering shape tests +;; +;; These tests previously cross-checked the IR pipeline against the legacy +;; physical-only `plan/create-plan` to guard the migration. Now that the +;; legacy path is gone (PR removing `create-plan`) and every sub-plan +;; routes through the IR pipeline, the comparison would be a tautology. +;; End-to-end correctness is covered by `test-end-to-end-queries` (below) +;; and by the broader suite (`query-rules-test`, `jobtech-patterns-test`, +;; the planner-vs-legacy diff harness in `assert-engines-agree`). + +(deftest test-lowered-entity-group-shape + (testing "Lowered entity-group has the structural fields callers expect" (let [db (db/empty-db {:name {} :age {} :email {}}) clauses '[[?e :name ?n] [?e :age ?a] [?e :email ?em]] - old-eg (first (:ops (plan/create-plan db clauses #{} nil))) - new-eg (first (:ops (ir-plan db clauses)))] - (is (= (:clause (:scan-op old-eg)) (:clause (:scan-op new-eg)))) - (is (= (:index (:scan-op old-eg)) (:index (:scan-op new-eg)))) - (is (= (mapv :clause (:merge-ops old-eg)) (mapv :clause (:merge-ops new-eg)))) - (is (= (mapv :index (:merge-ops old-eg)) (mapv :index (:merge-ops new-eg)))) - (is (= (:output-vars old-eg) (:output-vars new-eg))) - (is (= old-eg new-eg) "Full entity-group equality (both paths now produce :pipeline)") - (is (some? (:pipeline new-eg)) "IR plan has :pipeline annotation") - (is (some? (:pipeline old-eg)) "Legacy plan has :pipeline annotation")))) + eg (first (:ops (ir-plan db clauses)))] + (is (= :entity-group (:op eg))) + (is (= '?e (:entity-var eg))) + (is (some? (:scan-op eg))) + (is (vector? (:merge-ops eg))) + (is (some? (:pipeline eg)))))) ;; --------------------------------------------------------------------------- ;; End-to-end query correctness tests @@ -217,11 +185,8 @@ test-db)) (deftest test-ir-pipeline-with-in-bindings - (let [[current ir] (query-both '[:find ?e :in $ ?name :where [?e :name ?name]] - test-db)] - ;; Can't use assert-ir-match directly since d/q needs extra args - ;; Test the plan equivalence instead - (is (plan-match? (db/empty-db {:name {}}) '[[?e :name ?name]])))) + (testing "IR plan builds cleanly for a `:in ?name` binding shape" + (is (seq (:ops (ir-plan (db/empty-db {:name {}}) '[[?e :name ?name]])))))) ;; --------------------------------------------------------------------------- ;; Pipeline annotation tests diff --git a/test/datahike/test/query_rules_test.cljc b/test/datahike/test/query_rules_test.cljc index 98f524a6..d59af538 100644 --- a/test/datahike/test/query_rules_test.cljc +++ b/test/datahike/test/query_rules_test.cljc @@ -160,6 +160,40 @@ [(>= ?a 18)]]]) #{["Oleg"]})))) +(deftest test-get-else-inside-rule-body + ;; Regression for plan-rule-op routing branch bodies through the + ;; logical IR pipeline. Top-level queries get `[(get-else $ ?e :a v) + ;; ?x]` promoted to an LOptionalScan that binds ?e via an attribute + ;; scan; previously rule body planning skipped that pass and ?e was + ;; left unbound, so the same body returned `#{[nil …]}` instead of + ;; actual matches. + (let [db (d/db-with (db/empty-db {:concept/id {:db/unique :db.unique/identity} + :concept/preferred {} + :concept/deprecated {}}) + [{:db/id 1 :concept/id "c1" :concept/preferred "Alice"} + {:db/id 2 :concept/id "c2" :concept/preferred "Bob" :concept/deprecated true} + {:db/id 3 :concept/id "c3" :concept/preferred "Carol"}])] + (testing "scalar get-else binds the entity through the optional scan" + (is (= (d/q '{:find [?id ?dep] + :in [$ %] + :where [(maybe-dep ?id ?dep)]} + db + '[[(maybe-dep ?id ?dep) + [(get-else $ ?e :concept/deprecated false) ?dep] + [?e :concept/id ?id]]]) + #{["c1" false] ["c2" true] ["c3" false]}))) + + (testing "predicate on get-else output inside a rule" + (is (= (d/q '{:find [?id] + :in [$ %] + :where [(active? ?id)]} + db + '[[(active? ?id) + [?e :concept/id ?id] + [(get-else $ ?e :concept/deprecated false) ?dep] + [(not ?dep)]]]) + #{["c1"] ["c3"]}))))) + ;; https://github.com/tonsky/datahike/issues/218 (deftest test-false-arguments