Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 55 additions & 38 deletions src/datahike/query.cljc
Original file line number Diff line number Diff line change
Expand Up @@ -1227,9 +1227,22 @@
:rels (->> (:rels context)
(keep #(limit-rel % vars)))))

(defn- ctx-bound-vars [context]
(set (concat (mapcat #(keys (:attrs %)) (:rels context))
(keys (:consts context)))))

(defn all-bound?
"True iff every var in `vars` is currently bound in `context`."
[context vars]
(set/subset? (set vars) (ctx-bound-vars context)))

(defn some-bound?
"True iff at least one var in `vars` is currently bound in `context`."
[context vars]
(boolean (seq (set/intersection (set vars) (ctx-bound-vars context)))))

(defn check-all-bound [context vars form]
(let [bound (set (concat (mapcat #(keys (:attrs %)) (:rels context))
(keys (:consts context))))]
(let [bound (ctx-bound-vars context)]
(when-not (set/subset? vars bound)
(let [missing (set/difference (set vars) bound)]
(log/raise "Insufficient bindings: " missing " not bound in " form
Expand All @@ -1238,12 +1251,10 @@
:vars missing})))))

(defn check-some-bound [context vars form]
(let [bound (set (concat (mapcat #(keys (:attrs %)) (:rels context))
(keys (:consts context))))]
(when (empty? (set/intersection vars bound))
(log/raise "Insufficient bindings: none of " vars " is bound in " form
{:error :query/where
:form form}))))
(when (empty? (set/intersection vars (ctx-bound-vars context)))
(log/raise "Insufficient bindings: none of " vars " is bound in " form
{:error :query/where
:form form})))

(defn resolve-context [context clauses]
(dt/resolve-clauses resolve-clause context clauses))
Expand Down Expand Up @@ -1894,8 +1905,14 @@
([context clause orig-clause]
(condp looks-like? clause
[[symbol? '*]] ;; predicate [(pred ?a ?b ?c)]
(do (check-all-bound context (identity (filter free-var? (first clause))) orig-clause)
(filter-by-pred context clause))
;; Defer if any input var isn't bound yet — the iterative resolver
;; (datahike.tools/resolve-clauses) will retry once binders fire.
;; If the var is never bound, the resolver raises "Cannot resolve any
;; more clauses" with the full pending list, which is more useful
;; than a misleading single-clause error from this site.
(let [vars (filter free-var? (first clause))]
(when (all-bound? context vars)
(filter-by-pred context clause)))

[[symbol? '*] '_] ;; function [(fn ?a ?b) ?res]
(bind-by-fn context clause)
Expand All @@ -1918,8 +1935,8 @@

'[or-join [[*] *] *] ;; (or-join [[req-vars] vars] ...)
(let [[_ [req-vars & vars] & branches] clause]
(check-all-bound context req-vars orig-clause)
(recur context (list* 'or-join (concat req-vars vars) branches) clause))
(when (all-bound? context req-vars)
(recur context (list* 'or-join (concat req-vars vars) branches) clause)))

'[or-join [*] *] ;; (or-join [vars] ...)
;; TODO required vars
Expand Down Expand Up @@ -1953,34 +1970,34 @@

'[not *] ;; (not ...)
(let [[_ & clauses] clause
negation-vars (collect-vars clauses)
_ (check-some-bound context negation-vars orig-clause)
join-rel (reduce hash-join (:rels context))
negation-context (-> context
(assoc :rels [join-rel])
(assoc :stats [])
(resolve-context clauses))
negation-join-rel (reduce hash-join (:rels negation-context))
negation (subtract-rel join-rel negation-join-rel)]
(cond-> (assoc context :rels [negation])
(:stats context) (assoc :tmp-stats {:type :not
:branches (:stats negation-context)})))
negation-vars (collect-vars clauses)]
(when (some-bound? context negation-vars)
(let [join-rel (reduce hash-join (:rels context))
negation-context (-> context
(assoc :rels [join-rel])
(assoc :stats [])
(resolve-context clauses))
negation-join-rel (reduce hash-join (:rels negation-context))
negation (subtract-rel join-rel negation-join-rel)]
(cond-> (assoc context :rels [negation])
(:stats context) (assoc :tmp-stats {:type :not
:branches (:stats negation-context)})))))

'[not-join [*] *] ;; (not-join [vars] ...)
(let [[_ vars & clauses] clause
_ (check-all-bound context vars orig-clause)
join-rel (reduce hash-join (:rels context))
negation-context (-> context
(assoc :rels [join-rel])
(assoc :stats [])
(limit-context vars)
(resolve-context clauses)
(limit-context vars))
negation-join-rel (reduce hash-join (:rels negation-context))
negation (subtract-rel join-rel negation-join-rel)]
(cond-> (assoc context :rels [negation])
(:stats context) (assoc :tmp-stats {:type :not
:branches (:stats negation-context)})))
(let [[_ vars & clauses] clause]
(when (all-bound? context vars)
(let [join-rel (reduce hash-join (:rels context))
negation-context (-> context
(assoc :rels [join-rel])
(assoc :stats [])
(limit-context vars)
(resolve-context clauses)
(limit-context vars))
negation-join-rel (reduce hash-join (:rels negation-context))
negation (subtract-rel join-rel negation-join-rel)]
(cond-> (assoc context :rels [negation])
(:stats context) (assoc :tmp-stats {:type :not
:branches (:stats negation-context)})))))

'[*] ;; pattern
(let [source rel/*implicit-source*
Expand Down
22 changes: 20 additions & 2 deletions src/datahike/query/execute.cljc
Original file line number Diff line number Diff line change
Expand Up @@ -246,9 +246,27 @@
d)))))))))

(defn- adopt-vector
"Create a PersistentVector from an object array without copying."
"Create a PersistentVector from an object array.

`clojure.lang.PersistentVector/adopt` is fast (zero-copy) but only
correct when `arr.length <= 32`. It constructs the vector with
`root = EMPTY_NODE` and the data in the tail, which is the
PersistentVector internal layout for short vectors. For arrays
longer than 32 the result is silently corrupt: `cnt > 32` but
`tailoff() = cnt-32 > 0`, and any `arrayFor(i)` for i < tailoff
walks `EMPTY_NODE.array` and NPEs on the first level.

Real-world repro: SELECT against a 33+-column table from pgwire
(Odoo's res_partner has 34 columns). The corrupt row crashes at
the first `seq`/`nth`/`take` with `Cannot read field \"array\"
because \"node\" is null`.

`LazilyPersistentVector/createOwning` does the right dispatch:
the cheap adopt for length ≤ 32, and `PersistentVector/create`
(transient-build, valid tree) for longer arrays. Still no copy
in the short path."
[^objects arr]
#?(:clj (clojure.lang.PersistentVector/adopt arr)
#?(:clj (clojure.lang.LazilyPersistentVector/createOwning arr)
:cljs (vec arr)))

;; ---------------------------------------------------------------------------
Expand Down
21 changes: 19 additions & 2 deletions src/datahike/query/lower.cljc
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,24 @@
(filterv #(#{:entity-group :pattern-scan} (:op %)) ordered-ops))

;; ---------------------------------------------------------------
;; Step 7: NOT binding validation
;; Step 7: NOT binding validation.
;; Walks the ordered ops in execution order, tracking which vars
;; are bound after each op runs. NOT/NOT-JOIN must have at least
;; one of its vars bound by a prior op (legacy semantics).
;;
;; The per-op contribution-set must mirror what the executor
;; will actually bind:
;; - :entity-group → scan + merge vars
;; - :pattern-scan → pattern vars
;; - :function → the result-binding var (`:binding` from
;; plan-function-op). Predicates produce no
;; new bindings; or/or-join handle their own
;; binding internally.
;; Earlier this case used `(:bind-vars op)` which plan-function-op
;; never sets — function ops looked like they bound nothing, so
;; any subsequent NOT/predicate whose only required var came from
;; a function chain (e.g. `format_type(...)` feeding NOT IN) was
;; falsely rejected with "Insufficient bindings".
_ (loop [remaining ordered-ops
vars-so-far bound-vars]
(when (seq remaining)
Expand All @@ -425,7 +442,7 @@
:entity-group (into (:vars (:scan-op op))
(mapcat :vars (:merge-ops op)))
:pattern-scan (:vars op)
:function (into #{} (filter analyze/free-var?) (:bind-vars op))
:function (analyze/extract-vars (:binding op))
nil))))))]

{:ops ordered-ops
Expand Down
10 changes: 9 additions & 1 deletion src/datahike/query/plan.cljc
Original file line number Diff line number Diff line change
Expand Up @@ -1397,7 +1397,15 @@
:entity-group (into (:vars (:scan-op op))
(mapcat :vars (:merge-ops op)))
:pattern-scan (:vars op)
:function (into #{} (filter analyze/free-var?) (:bind-vars op))
;; plan-function-op stores the result var(s) in
;; :binding (scalar, tuple, list, or map). The
;; legacy `:bind-vars` key is never set —
;; reading it lost the result-var contribution
;; and falsely tripped the NOT validation when
;; a function-chain output was the only var
;; reaching a NOT clause. Mirror lower.cljc's
;; identical loop.
:function (analyze/extract-vars (:binding op))
nil))))))]

{:ops ordered-ops
Expand Down
23 changes: 14 additions & 9 deletions src/datahike/query_stats.cljc
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,22 @@
(:rels context))})

(defn update-ctx-with-stats
"update-fn must expect [context] as argument"
"update-fn must expect [context] as argument.
Returns nil when update-fn returns nil — that is the iterative
resolver's defer signal (datahike.tools/resolve-clauses re-queues
the clause for the next pass). Without the nil propagation, stats
collection would silently keep a half-built map and confuse retries."
[context clause update-fn]
(if (:stats context)
(let [{:keys [res t]} (dt/timed #(update-fn context))
clause-stats (merge (get-stats res)
{:clause clause
:t t}
(:tmp-stats res))]
(-> res
(update :stats conj clause-stats)
(dissoc :tmp-stats)))
(let [{:keys [res t]} (dt/timed #(update-fn context))]
(when res
(let [clause-stats (merge (get-stats res)
{:clause clause
:t t}
(:tmp-stats res))]
(-> res
(update :stats conj clause-stats)
(dissoc :tmp-stats)))))
(update-fn context)))

(defn extend-stat
Expand Down
38 changes: 13 additions & 25 deletions test/datahike/test/attribute_refs/query_not_test.cljc
Original file line number Diff line number Diff line change
Expand Up @@ -176,31 +176,19 @@
(shift-in #{[4 3] [3 3] [4 4]} [0 1] ref-e0)))

(deftest test-insufficient-bindings
(if datahike.test.core-test/compiled-engine?
;; Compiled engine reorders NOT after its bindings — reorderable cases are valid queries
(do
(testing "reorderable NOT — compiled engine handles correctly"
(is (set? (d/q '[:find ?e :where (not [?e :mname "Ivan"]) [?e :mname]] test-db))))
(testing "NOT-JOIN with inner vars bound within body"
(is (set? (d/q '[:find ?e :where [?e :mname]
(not-join [?e] (not [1 :age ?a]) [?e :age ?a])]
test-db)))))
;; Legacy engine requires bindings before NOT
(are [q msg] (thrown-with-msg? Throwable msg
(d/q (into '[:find ?e :where] q)
test-db))
'[(not [?e :mname "Ivan"])
[?e :mname]]
#"Insufficient bindings: none of #\{\?e\} is bound"

'[[?e :mname]
(not-join [?e]
(not [1 :age ?a])
[?e :age ?a])]
#"Insufficient bindings: none of #\{\?a\} is bound"))

;; Both engines: truly unbound vars must throw
;; Both engines now accept NOT before its binder — see
;; datahike.test.query-not-test for the rationale (legacy engine's
;; iterative resolver defers and retries NOT/predicate clauses).
(testing "reorderable NOT — both engines handle correctly"
(is (set? (d/q '[:find ?e :where (not [?e :mname "Ivan"]) [?e :mname]] test-db))))
(testing "NOT-JOIN with inner vars bound within body"
(is (set? (d/q '[:find ?e :where [?e :mname]
(not-join [?e] (not [1 :age ?a]) [?e :age ?a])]
test-db))))

;; Truly unbound vars still error — message changes from
;; "Insufficient bindings" to "Cannot resolve any more clauses".
(testing "truly unbound vars throw"
(is (thrown-with-msg? Throwable #"Insufficient bindings"
(is (thrown-with-msg? Throwable #"Cannot resolve any more clauses|Insufficient bindings"
(d/q '[:find ?e :where [?e :mname] (not [?a :mname "Ivan"])]
test-db)))))
4 changes: 3 additions & 1 deletion test/datahike/test/attribute_refs/query_or_test.cljc
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,9 @@
[?e :age ?a])]
test-db)))

(is (thrown-with-msg? Throwable #"Insufficient bindings: #\{\?e\} not bound"
;; or-join required-vars now defers; if no clause binds them, the
;; iterative resolver raises "Cannot resolve any more clauses".
(is (thrown-with-msg? Throwable #"Cannot resolve any more clauses|Insufficient bindings"
(d/q '[:find ?e
:where (or-join [[?e]]
[?e :weight 40])]
Expand Down
Loading