diff --git a/src/datahike/query.cljc b/src/datahike/query.cljc index 922b8d4b3..9fdbf878d 100644 --- a/src/datahike/query.cljc +++ b/src/datahike/query.cljc @@ -1227,9 +1227,22 @@ :rels (->> (:rels context) (keep #(limit-rel % vars))))) +(defn- ctx-bound-vars [context] + (set (concat (mapcat #(keys (:attrs %)) (:rels context)) + (keys (:consts context))))) + +(defn all-bound? + "True iff every var in `vars` is currently bound in `context`." + [context vars] + (set/subset? (set vars) (ctx-bound-vars context))) + +(defn some-bound? + "True iff at least one var in `vars` is currently bound in `context`." + [context vars] + (boolean (seq (set/intersection (set vars) (ctx-bound-vars context))))) + (defn check-all-bound [context vars form] - (let [bound (set (concat (mapcat #(keys (:attrs %)) (:rels context)) - (keys (:consts context))))] + (let [bound (ctx-bound-vars context)] (when-not (set/subset? vars bound) (let [missing (set/difference (set vars) bound)] (log/raise "Insufficient bindings: " missing " not bound in " form @@ -1238,12 +1251,10 @@ :vars missing}))))) (defn check-some-bound [context vars form] - (let [bound (set (concat (mapcat #(keys (:attrs %)) (:rels context)) - (keys (:consts context))))] - (when (empty? (set/intersection vars bound)) - (log/raise "Insufficient bindings: none of " vars " is bound in " form - {:error :query/where - :form form})))) + (when (empty? (set/intersection vars (ctx-bound-vars context))) + (log/raise "Insufficient bindings: none of " vars " is bound in " form + {:error :query/where + :form form}))) (defn resolve-context [context clauses] (dt/resolve-clauses resolve-clause context clauses)) @@ -1894,8 +1905,14 @@ ([context clause orig-clause] (condp looks-like? clause [[symbol? '*]] ;; predicate [(pred ?a ?b ?c)] - (do (check-all-bound context (identity (filter free-var? (first clause))) orig-clause) - (filter-by-pred context clause)) + ;; Defer if any input var isn't bound yet — the iterative resolver + ;; (datahike.tools/resolve-clauses) will retry once binders fire. + ;; If the var is never bound, the resolver raises "Cannot resolve any + ;; more clauses" with the full pending list, which is more useful + ;; than a misleading single-clause error from this site. + (let [vars (filter free-var? (first clause))] + (when (all-bound? context vars) + (filter-by-pred context clause))) [[symbol? '*] '_] ;; function [(fn ?a ?b) ?res] (bind-by-fn context clause) @@ -1918,8 +1935,8 @@ '[or-join [[*] *] *] ;; (or-join [[req-vars] vars] ...) (let [[_ [req-vars & vars] & branches] clause] - (check-all-bound context req-vars orig-clause) - (recur context (list* 'or-join (concat req-vars vars) branches) clause)) + (when (all-bound? context req-vars) + (recur context (list* 'or-join (concat req-vars vars) branches) clause))) '[or-join [*] *] ;; (or-join [vars] ...) ;; TODO required vars @@ -1953,34 +1970,34 @@ '[not *] ;; (not ...) (let [[_ & clauses] clause - negation-vars (collect-vars clauses) - _ (check-some-bound context negation-vars orig-clause) - join-rel (reduce hash-join (:rels context)) - negation-context (-> context - (assoc :rels [join-rel]) - (assoc :stats []) - (resolve-context clauses)) - negation-join-rel (reduce hash-join (:rels negation-context)) - negation (subtract-rel join-rel negation-join-rel)] - (cond-> (assoc context :rels [negation]) - (:stats context) (assoc :tmp-stats {:type :not - :branches (:stats negation-context)}))) + negation-vars (collect-vars clauses)] + (when (some-bound? context negation-vars) + (let [join-rel (reduce hash-join (:rels context)) + negation-context (-> context + (assoc :rels [join-rel]) + (assoc :stats []) + (resolve-context clauses)) + negation-join-rel (reduce hash-join (:rels negation-context)) + negation (subtract-rel join-rel negation-join-rel)] + (cond-> (assoc context :rels [negation]) + (:stats context) (assoc :tmp-stats {:type :not + :branches (:stats negation-context)}))))) '[not-join [*] *] ;; (not-join [vars] ...) - (let [[_ vars & clauses] clause - _ (check-all-bound context vars orig-clause) - join-rel (reduce hash-join (:rels context)) - negation-context (-> context - (assoc :rels [join-rel]) - (assoc :stats []) - (limit-context vars) - (resolve-context clauses) - (limit-context vars)) - negation-join-rel (reduce hash-join (:rels negation-context)) - negation (subtract-rel join-rel negation-join-rel)] - (cond-> (assoc context :rels [negation]) - (:stats context) (assoc :tmp-stats {:type :not - :branches (:stats negation-context)}))) + (let [[_ vars & clauses] clause] + (when (all-bound? context vars) + (let [join-rel (reduce hash-join (:rels context)) + negation-context (-> context + (assoc :rels [join-rel]) + (assoc :stats []) + (limit-context vars) + (resolve-context clauses) + (limit-context vars)) + negation-join-rel (reduce hash-join (:rels negation-context)) + negation (subtract-rel join-rel negation-join-rel)] + (cond-> (assoc context :rels [negation]) + (:stats context) (assoc :tmp-stats {:type :not + :branches (:stats negation-context)}))))) '[*] ;; pattern (let [source rel/*implicit-source* diff --git a/src/datahike/query/execute.cljc b/src/datahike/query/execute.cljc index 664c25d63..6cb99b688 100644 --- a/src/datahike/query/execute.cljc +++ b/src/datahike/query/execute.cljc @@ -246,9 +246,27 @@ d))))))))) (defn- adopt-vector - "Create a PersistentVector from an object array without copying." + "Create a PersistentVector from an object array. + + `clojure.lang.PersistentVector/adopt` is fast (zero-copy) but only + correct when `arr.length <= 32`. It constructs the vector with + `root = EMPTY_NODE` and the data in the tail, which is the + PersistentVector internal layout for short vectors. For arrays + longer than 32 the result is silently corrupt: `cnt > 32` but + `tailoff() = cnt-32 > 0`, and any `arrayFor(i)` for i < tailoff + walks `EMPTY_NODE.array` and NPEs on the first level. + + Real-world repro: SELECT against a 33+-column table from pgwire + (Odoo's res_partner has 34 columns). The corrupt row crashes at + the first `seq`/`nth`/`take` with `Cannot read field \"array\" + because \"node\" is null`. + + `LazilyPersistentVector/createOwning` does the right dispatch: + the cheap adopt for length ≤ 32, and `PersistentVector/create` + (transient-build, valid tree) for longer arrays. Still no copy + in the short path." [^objects arr] - #?(:clj (clojure.lang.PersistentVector/adopt arr) + #?(:clj (clojure.lang.LazilyPersistentVector/createOwning arr) :cljs (vec arr))) ;; --------------------------------------------------------------------------- diff --git a/src/datahike/query/lower.cljc b/src/datahike/query/lower.cljc index b8085ed2e..590220245 100644 --- a/src/datahike/query/lower.cljc +++ b/src/datahike/query/lower.cljc @@ -407,7 +407,24 @@ (filterv #(#{:entity-group :pattern-scan} (:op %)) ordered-ops)) ;; --------------------------------------------------------------- - ;; Step 7: NOT binding validation + ;; Step 7: NOT binding validation. + ;; Walks the ordered ops in execution order, tracking which vars + ;; are bound after each op runs. NOT/NOT-JOIN must have at least + ;; one of its vars bound by a prior op (legacy semantics). + ;; + ;; The per-op contribution-set must mirror what the executor + ;; will actually bind: + ;; - :entity-group → scan + merge vars + ;; - :pattern-scan → pattern vars + ;; - :function → the result-binding var (`:binding` from + ;; plan-function-op). Predicates produce no + ;; new bindings; or/or-join handle their own + ;; binding internally. + ;; Earlier this case used `(:bind-vars op)` which plan-function-op + ;; never sets — function ops looked like they bound nothing, so + ;; any subsequent NOT/predicate whose only required var came from + ;; a function chain (e.g. `format_type(...)` feeding NOT IN) was + ;; falsely rejected with "Insufficient bindings". _ (loop [remaining ordered-ops vars-so-far bound-vars] (when (seq remaining) @@ -425,7 +442,7 @@ :entity-group (into (:vars (:scan-op op)) (mapcat :vars (:merge-ops op))) :pattern-scan (:vars op) - :function (into #{} (filter analyze/free-var?) (:bind-vars op)) + :function (analyze/extract-vars (:binding op)) nil))))))] {:ops ordered-ops diff --git a/src/datahike/query/plan.cljc b/src/datahike/query/plan.cljc index 29ce5ac89..ee4ffdb14 100644 --- a/src/datahike/query/plan.cljc +++ b/src/datahike/query/plan.cljc @@ -1397,7 +1397,15 @@ :entity-group (into (:vars (:scan-op op)) (mapcat :vars (:merge-ops op))) :pattern-scan (:vars op) - :function (into #{} (filter analyze/free-var?) (:bind-vars op)) + ;; plan-function-op stores the result var(s) in + ;; :binding (scalar, tuple, list, or map). The + ;; legacy `:bind-vars` key is never set — + ;; reading it lost the result-var contribution + ;; and falsely tripped the NOT validation when + ;; a function-chain output was the only var + ;; reaching a NOT clause. Mirror lower.cljc's + ;; identical loop. + :function (analyze/extract-vars (:binding op)) nil))))))] {:ops ordered-ops diff --git a/src/datahike/query_stats.cljc b/src/datahike/query_stats.cljc index ecaec3442..2cd311e04 100644 --- a/src/datahike/query_stats.cljc +++ b/src/datahike/query_stats.cljc @@ -17,17 +17,22 @@ (:rels context))}) (defn update-ctx-with-stats - "update-fn must expect [context] as argument" + "update-fn must expect [context] as argument. + Returns nil when update-fn returns nil — that is the iterative + resolver's defer signal (datahike.tools/resolve-clauses re-queues + the clause for the next pass). Without the nil propagation, stats + collection would silently keep a half-built map and confuse retries." [context clause update-fn] (if (:stats context) - (let [{:keys [res t]} (dt/timed #(update-fn context)) - clause-stats (merge (get-stats res) - {:clause clause - :t t} - (:tmp-stats res))] - (-> res - (update :stats conj clause-stats) - (dissoc :tmp-stats))) + (let [{:keys [res t]} (dt/timed #(update-fn context))] + (when res + (let [clause-stats (merge (get-stats res) + {:clause clause + :t t} + (:tmp-stats res))] + (-> res + (update :stats conj clause-stats) + (dissoc :tmp-stats))))) (update-fn context))) (defn extend-stat diff --git a/test/datahike/test/attribute_refs/query_not_test.cljc b/test/datahike/test/attribute_refs/query_not_test.cljc index 387399c6a..5bdcf6f39 100644 --- a/test/datahike/test/attribute_refs/query_not_test.cljc +++ b/test/datahike/test/attribute_refs/query_not_test.cljc @@ -176,31 +176,19 @@ (shift-in #{[4 3] [3 3] [4 4]} [0 1] ref-e0))) (deftest test-insufficient-bindings - (if datahike.test.core-test/compiled-engine? - ;; Compiled engine reorders NOT after its bindings — reorderable cases are valid queries - (do - (testing "reorderable NOT — compiled engine handles correctly" - (is (set? (d/q '[:find ?e :where (not [?e :mname "Ivan"]) [?e :mname]] test-db)))) - (testing "NOT-JOIN with inner vars bound within body" - (is (set? (d/q '[:find ?e :where [?e :mname] - (not-join [?e] (not [1 :age ?a]) [?e :age ?a])] - test-db))))) - ;; Legacy engine requires bindings before NOT - (are [q msg] (thrown-with-msg? Throwable msg - (d/q (into '[:find ?e :where] q) - test-db)) - '[(not [?e :mname "Ivan"]) - [?e :mname]] - #"Insufficient bindings: none of #\{\?e\} is bound" - - '[[?e :mname] - (not-join [?e] - (not [1 :age ?a]) - [?e :age ?a])] - #"Insufficient bindings: none of #\{\?a\} is bound")) - - ;; Both engines: truly unbound vars must throw + ;; Both engines now accept NOT before its binder — see + ;; datahike.test.query-not-test for the rationale (legacy engine's + ;; iterative resolver defers and retries NOT/predicate clauses). + (testing "reorderable NOT — both engines handle correctly" + (is (set? (d/q '[:find ?e :where (not [?e :mname "Ivan"]) [?e :mname]] test-db)))) + (testing "NOT-JOIN with inner vars bound within body" + (is (set? (d/q '[:find ?e :where [?e :mname] + (not-join [?e] (not [1 :age ?a]) [?e :age ?a])] + test-db)))) + + ;; Truly unbound vars still error — message changes from + ;; "Insufficient bindings" to "Cannot resolve any more clauses". (testing "truly unbound vars throw" - (is (thrown-with-msg? Throwable #"Insufficient bindings" + (is (thrown-with-msg? Throwable #"Cannot resolve any more clauses|Insufficient bindings" (d/q '[:find ?e :where [?e :mname] (not [?a :mname "Ivan"])] test-db))))) diff --git a/test/datahike/test/attribute_refs/query_or_test.cljc b/test/datahike/test/attribute_refs/query_or_test.cljc index 6ff0318d5..2ef558427 100644 --- a/test/datahike/test/attribute_refs/query_or_test.cljc +++ b/test/datahike/test/attribute_refs/query_or_test.cljc @@ -130,7 +130,9 @@ [?e :age ?a])] test-db))) - (is (thrown-with-msg? Throwable #"Insufficient bindings: #\{\?e\} not bound" + ;; or-join required-vars now defers; if no clause binds them, the + ;; iterative resolver raises "Cannot resolve any more clauses". + (is (thrown-with-msg? Throwable #"Cannot resolve any more clauses|Insufficient bindings" (d/q '[:find ?e :where (or-join [[?e]] [?e :weight 40])] diff --git a/test/datahike/test/query_not_test.cljc b/test/datahike/test/query_not_test.cljc index 9d7671619..73a24d0ec 100644 --- a/test/datahike/test/query_not_test.cljc +++ b/test/datahike/test/query_not_test.cljc @@ -172,32 +172,74 @@ #{[4 3] [3 3] [4 4]})) (deftest test-insufficient-bindings - (if datahike.test.core-test/compiled-engine? - ;; Compiled engine reorders NOT after its bindings — these are valid queries - (do - (testing "reorderable NOT — compiled engine handles correctly" - (is (= #{[3] [4]} - (d/q '[:find ?e :where (not [?e :name "Ivan"]) [?e :name]] @test-db)))) - (testing "NOT-JOIN with inner vars bound within body" - (is (= #{[1] [3] [5]} - (d/q '[:find ?e :where [?e :name] - (not-join [?e] (not [1 :age ?a]) [?e :age ?a])] - @test-db))))) - ;; Legacy engine requires bindings before NOT - (are [q msg] (thrown-with-msg? Throwable msg - (d/q (into '[:find ?e :where] (quote q)) @test-db)) - [(not [?e :name "Ivan"]) - [?e :name]] - #"Insufficient bindings: none of #\{\?e\} is bound" - - [[?e :name] - (not-join [?e] - (not [1 :age ?a]) - [?e :age ?a])] - #"Insufficient bindings: none of #\{\?a\} is bound")) - - ;; Both engines: truly unbound vars must throw + ;; Both engines now accept NOT before its binder — the legacy engine's + ;; iterative resolver defers an unresolvable NOT and retries it after + ;; binders fire (datahike/tools.cljc:resolve-clauses), matching the + ;; compiled engine's plan-time topological reordering. Previously the + ;; legacy engine raised "Insufficient bindings" eagerly. + (testing "reorderable NOT — both engines handle correctly" + (is (= #{[3] [4]} + (d/q '[:find ?e :where (not [?e :name "Ivan"]) [?e :name]] @test-db)))) + (testing "NOT-JOIN with inner vars bound within body" + (is (= #{[1] [3] [5]} + (d/q '[:find ?e :where [?e :name] + (not-join [?e] (not [1 :age ?a]) [?e :age ?a])] + @test-db)))) + + ;; Truly unbound vars must still error — the iterative resolver gives + ;; up after a fixed-point pass with no progress, raising + ;; "Cannot resolve any more clauses" with the failed-clauses list. (testing "truly unbound vars throw" - (is (thrown-with-msg? Throwable #"Insufficient bindings" + (is (thrown-with-msg? Throwable #"Cannot resolve any more clauses|Insufficient bindings" (d/q '[:find ?e :where [?e :name] (not [?a :name "Ivan"])] @test-db))))) + +(deftest test-deferred-clause-binding + ;; Regression test for a planner gap: clauses that gate on bound vars + ;; (predicates, NOT, NOT-JOIN, OR-JOIN-with-required-vars) used to + ;; raise "Insufficient bindings" on the first pass instead of deferring + ;; like bind-by-fn does. When their inputs traced back to a deferred + ;; binder (e.g. a get-else whose entity var was itself bound by a + ;; later pattern), the eager raise masked perfectly resolvable queries. + ;; + ;; The user-facing repro lives in pgwire-datahike: a SQL `WHERE + ;; format_type(a.atttypid, a.atttypmod) NOT IN (…)` translated to: + ;; [?e :pg_attribute/db-row-exists true] ; row marker — last + ;; [(get-else $ ?e :…/atttypid :__null__) ?a] ; deferred until ?e bound + ;; [(get-else $ ?e :…/atttypmod :__null__) ?b] + ;; [(?fmt ?a ?b) ?v1] ; deferred until ?a ?b bound + ;; (not [(contains? #{…} ?v1)]) ; deferred until ?v1 bound + ;; Old planner: raised on the (not …) before the chain could fire. + ;; New: each clause defers, the resolver iterates, all clauses resolve. + (let [db (d/db-with (db/empty-db) + [{:db/id 1 :name "Ivan"} + {:db/id 2 :name "Oleg"} + {:db/id 3 :name "Ivan"}])] + (testing "predicate before its binder defers" + (is (= #{[1] [3]} + (d/q '[:find ?e + :where [(= ?n "Ivan")] + [?e :name ?n]] + db)))) + + (testing "NOT before its binder defers" + (is (= #{[2]} + (d/q '[:find ?e + :where (not [?e :name "Ivan"]) + [?e :name]] + db)))) + + (testing "fn-call deferred chain feeding (not [pred])" + ;; ?upper resolves only after both the binder and the deferred + ;; fn-call run; the NOT clause must wait through the cascade. + ;; Tests both engines: + ;; - Legacy: iterative resolver retries the deferred clauses. + ;; - New planner: relies on lower.cljc's NOT validation reading + ;; :function ops' :binding (was :bind-vars — wrong key, never set). + (is (= #{[2]} + (d/q '[:find ?e + :in $ ?up + :where (not [(contains? #{"IVAN"} ?upper)]) + [(?up ?n) ?upper] + [?e :name ?n]] + db (fn [^String s] (.toUpperCase s)))))))) diff --git a/test/datahike/test/query_or_test.cljc b/test/datahike/test/query_or_test.cljc index 0788366fd..c14483250 100644 --- a/test/datahike/test/query_or_test.cljc +++ b/test/datahike/test/query_or_test.cljc @@ -203,7 +203,11 @@ [?e :age ?a])] @test-db))) - (is (thrown-with-msg? Throwable #"Insufficient bindings: #\{\?e\} not bound" + ;; or-join with required vars defers if those vars aren't bound. + ;; When no other clause can bind them, the iterative resolver raises + ;; "Cannot resolve any more clauses" instead of the previous eager + ;; "Insufficient bindings" — same semantics, less misleading message. + (is (thrown-with-msg? Throwable #"Cannot resolve any more clauses|Insufficient bindings" (d/q '[:find ?e :where (or-join [[?e]] [?e :name "Ivan"])] diff --git a/test/datahike/test/query_test.cljc b/test/datahike/test/query_test.cljc index 400004c71..1eeabf1d3 100644 --- a/test/datahike/test/query_test.cljc +++ b/test/datahike/test/query_test.cljc @@ -397,19 +397,14 @@ [(= ?age 37)]]} :args [db]}) #{[2] [3]})) - (if core-test/compiled-engine? - ;; Compiled engine reorders predicate after its binding - (is (= #{[2] [3]} - (d/q {:query '{:find [?e] - :where [[(= ?age 37)] - [?e :age ?age]]} - :args [db]}))) - ;; Legacy engine requires correct ordering - (is (thrown-with-msg? Throwable #"Insufficient bindings: #\{\?age\} not bound" - (d/q {:query '{:find [?e] - :where [[(= ?age 37)] - [?e :age ?age]]} - :args [db]}))))))) + ;; Both engines handle predicate-before-binding now — the legacy + ;; engine's iterative resolver defers the predicate and retries + ;; after the pattern binds ?age, matching the compiled engine. + (is (= #{[2] [3]} + (d/q {:query '{:find [?e] + :where [[(= ?age 37)] + [?e :age ?age]]} + :args [db]})))))) (deftest test-zeros-in-pattern (let [cfg {:store {:backend :memory @@ -991,3 +986,34 @@ we query all (parent, child) pairs." (let [f (dq/basic-index-selector 5)] (is (= [10 7] ((f [1 3]) [9 10 4 7 1234]))) (is (= [7 10] ((f [3 1]) [9 10 4 7 1234]))))) + +(deftest test-find-arity-greater-than-32 + ;; Regression for the executor's row-materialization path: it used + ;; `clojure.lang.PersistentVector/adopt` to wrap the result Object[] + ;; without copying. That call is only correct for arrays of length + ;; ≤ 32 — for longer arrays it builds a corrupt PersistentVector + ;; (cnt > 32 with root = EMPTY_NODE), which silently NPEs on the + ;; first `seq`/`nth`/`take`. Exercised in production via Odoo's + ;; res_partner SELECT with 34 columns. + (let [n 35 + attrs (vec (for [i (range n)] + (keyword "t" (str "c" i)))) + db (-> (db/empty-db) + (d/db-with (vec (for [a attrs] + {:db/ident a :db/valueType :db.type/long :db/cardinality :db.cardinality/one}))) + (d/db-with [(into {} (map-indexed (fn [i a] [a i]) attrs))])) + q `{:find ~(vec (map #(symbol (str "?v" %)) (range n))) + :where ~(vec (map-indexed + (fn [i a] ['?e a (symbol (str "?v" i))]) + attrs))} + result (d/q q db) + row (first result)] + (is (= n (count row))) + (testing "row is a valid PersistentVector — seq/nth/take all succeed" + (is (= (vec (range n)) (vec (seq row)))) + (is (= 0 (nth row 0))) + (is (= 17 (nth row 17))) + (is (= 34 (nth row 34))) + (is (= (vec (range n)) (vec (take 100 row))))) + (testing "round-trip through (vec (take k row)) — the pgwire pattern" + (is (= (vec (range 30)) (vec (take 30 row)))))))