From b58e7176820d42024a4c32ee34629e400d8c59bc Mon Sep 17 00:00:00 2001
From: James Coglan <james@neighbourhood.ie>
Date: Tue, 13 Jan 2026 14:28:28 +0000
Subject: [PATCH 1/9] feat: Make mango_selector:match/2 return a list of
 failures

Rather than returning a boolean to indicate just success or failure,
`mango_selector:match/2` now returns a list of "failures" describing the
ways in which the selector failed to match the input. If this list is
empty, the match was a success.
---
 src/mango/src/mango_selector.erl | 256 +++++++++++++++++--------------
 1 file changed, 144 insertions(+), 112 deletions(-)

diff --git a/src/mango/src/mango_selector.erl b/src/mango/src/mango_selector.erl
index e53820c4a0..925fedfeed 100644
--- a/src/mango/src/mango_selector.erl
+++ b/src/mango/src/mango_selector.erl
@@ -23,6 +23,12 @@
 -include_lib("couch/include/couch_db.hrl").
 -include("mango.hrl").
 
+-record(failure, {
+    op,
+    type = mismatch,
+    params = []
+}).
+
 % Validate and normalize each operator. This translates
 % every selector operator into a consistent version that
 % we can then rely on for all other selector functions.
@@ -53,12 +59,19 @@ match(Selector, D) ->
     couch_stats:increment_counter([mango, evaluate_selector]),
     match_int(Selector, D).
 
+match_int(Selector, D) ->
+    case match_failures(Selector, D) of
+        [] -> true;
+        [_ | _] -> false;
+        Other -> Other
+    end.
+
 % An empty selector matches any value.
-match_int({[]}, _) ->
-    true;
-match_int(Selector, #doc{body = Body}) ->
+match_failures({[]}, _) ->
+    [];
+match_failures(Selector, #doc{body = Body}) ->
     match(Selector, Body, fun mango_json:cmp/2);
-match_int(Selector, {Props}) ->
+match_failures(Selector, {Props}) ->
     match(Selector, {Props}, fun mango_json:cmp/2).
 
 % Convert each operator into a normalized version as well
@@ -364,35 +377,45 @@ negate({[{Field, Cond}]}) ->
 % We need to treat an empty array as always true. This will be applied
 % for $or, $in, $all, $nin as well.
 match({[{<<"$and">>, []}]}, _, _) ->
-    true;
+    [];
 match({[{<<"$and">>, Args}]}, Value, Cmp) ->
-    Pred = fun(SubSel) -> match(SubSel, Value, Cmp) end,
-    lists:all(Pred, Args);
+    MatchSubSel = fun(SubSel) -> match(SubSel, Value, Cmp) end,
+    lists:flatmap(MatchSubSel, Args);
 match({[{<<"$or">>, []}]}, _, _) ->
-    true;
+    [];
 match({[{<<"$or">>, Args}]}, Value, Cmp) ->
-    Pred = fun(SubSel) -> match(SubSel, Value, Cmp) end,
-    lists:any(Pred, Args);
+    SubSelFailures = [match(A, Value, Cmp) || A <- Args],
+    case lists:member([], SubSelFailures) of
+        true -> [];
+        _ -> lists:flatten(SubSelFailures)
+    end;
+% TODO: producing good failure messages requires that normalize/1 fully removes
+% $not from the tree by pushing it to the leaves.
 match({[{<<"$not">>, Arg}]}, Value, Cmp) ->
-    not match(Arg, Value, Cmp);
-match({[{<<"$all">>, []}]}, _, _) ->
-    false;
+    case match(Arg, Value, Cmp) of
+        [] -> [#failure{op = 'not'}];
+        _ -> []
+    end;
 % All of the values in Args must exist in Values or
 % Values == hd(Args) if Args is a single element list
 % that contains a list.
+match({[{<<"$all">>, []}]}, _Values, _Cmp) ->
+    % { "$all": [] } is defined to eval to false, so return a failure
+    [#failure{op = all, params = [[]]}];
+match({[{<<"$all">>, [A]}]}, Values, _Cmp) when is_list(A), A == Values ->
+    [];
 match({[{<<"$all">>, Args}]}, Values, _Cmp) when is_list(Values) ->
-    Pred = fun(A) -> lists:member(A, Values) end,
-    HasArgs = lists:all(Pred, Args),
-    IsArgs =
-        case Args of
-            [A] when is_list(A) ->
-                A == Values;
-            _ ->
-                false
+    lists:flatmap(
+        fun(Arg) ->
+            case lists:member(Arg, Values) of
+                true -> [];
+                _ -> [#failure{op = all, params = [Arg]}]
+            end
         end,
-    HasArgs orelse IsArgs;
-match({[{<<"$all">>, _Args}]}, _Values, _Cmp) ->
-    false;
+        Args
+    );
+match({[{<<"$all">>, _}]}, Value, _Cmp) ->
+    [#failure{op = all, type = bad_value, params = [Value]}];
 %% This is for $elemMatch, $allMatch, and possibly $in because of our normalizer.
 %% A selector such as {"field_name": {"$elemMatch": {"$gte": 80, "$lt": 85}}}
 %% gets normalized to:
@@ -409,83 +432,48 @@ match({[{<<>>, Arg}]}, Values, Cmp) ->
     match(Arg, Values, Cmp);
 % Matches when any element in values matches the
 % sub-selector Arg.
+match({[{<<"$elemMatch">>, _Arg}]}, [], _Cmp) ->
+    [#failure{op = elemMatch, type = empty_list}];
 match({[{<<"$elemMatch">>, Arg}]}, Values, Cmp) when is_list(Values) ->
-    try
-        lists:foreach(
-            fun(V) ->
-                case match(Arg, V, Cmp) of
-                    true -> throw(matched);
-                    _ -> ok
-                end
-            end,
-            Values
-        ),
-        false
-    catch
-        throw:matched ->
-            true;
-        _:_ ->
-            false
+    ValueFailures = [match(Arg, V, Cmp) || V <- Values],
+    case lists:member([], ValueFailures) of
+        true -> [];
+        _ -> lists:flatten(ValueFailures)
     end;
-match({[{<<"$elemMatch">>, _Arg}]}, _Value, _Cmp) ->
-    false;
+match({[{<<"$elemMatch">>, _}]}, Value, _Cmp) ->
+    [#failure{op = elemMatch, type = bad_value, params = [Value]}];
 % Matches when all elements in values match the
 % sub-selector Arg.
 match({[{<<"$allMatch">>, Arg}]}, [_ | _] = Values, Cmp) ->
-    try
-        lists:foreach(
-            fun(V) ->
-                case match(Arg, V, Cmp) of
-                    false -> throw(unmatched);
-                    _ -> ok
-                end
-            end,
-            Values
-        ),
-        true
-    catch
-        _:_ ->
-            false
-    end;
-match({[{<<"$allMatch">>, _Arg}]}, _Value, _Cmp) ->
-    false;
+    MatchValue = fun(Value) -> match(Arg, Value, Cmp) end,
+    lists:flatmap(MatchValue, Values);
+match({[{<<"$allMatch">>, _}]}, Value, _Cmp) ->
+    [#failure{op = allMatch, type = bad_value, params = [Value]}];
 % Matches when any key in the map value matches the
 % sub-selector Arg.
-match({[{<<"$keyMapMatch">>, Arg}]}, Value, Cmp) when is_tuple(Value) ->
-    try
-        lists:foreach(
-            fun(V) ->
-                case match(Arg, V, Cmp) of
-                    true -> throw(matched);
-                    _ -> ok
-                end
-            end,
-            [Key || {Key, _} <- element(1, Value)]
-        ),
-        false
-    catch
-        throw:matched ->
-            true;
-        _:_ ->
-            false
+match({[{<<"$keyMapMatch">>, _Arg}]}, {[]}, _Cmp) ->
+    [#failure{op = keyMapMatch, type = empty_list}];
+match({[{<<"$keyMapMatch">>, Arg}]}, {Value}, Cmp) when is_list(Value) ->
+    KeyFailures = [match(Arg, K, Cmp) || {K, _} <- Value],
+    case lists:member([], KeyFailures) of
+        true -> [];
+        _ -> lists:flatten(KeyFailures)
     end;
-match({[{<<"$keyMapMatch">>, _Arg}]}, _Value, _Cmp) ->
-    false;
+match({[{<<"$keyMapMatch">>, _}]}, Value, _Cmp) ->
+    [#failure{op = keyMapMatch, type = bad_value, params = [Value]}];
 % Our comparison operators are fairly straight forward
 match({[{<<"$lt">>, Arg}]}, Value, Cmp) ->
-    Cmp(Value, Arg) < 0;
+    compare(lt, Arg, Cmp(Value, Arg) < 0);
 match({[{<<"$lte">>, Arg}]}, Value, Cmp) ->
-    Cmp(Value, Arg) =< 0;
+    compare(lte, Arg, Cmp(Value, Arg) =< 0);
 match({[{<<"$eq">>, Arg}]}, Value, Cmp) ->
-    Cmp(Value, Arg) == 0;
+    compare(eq, Arg, Cmp(Value, Arg) == 0);
 match({[{<<"$ne">>, Arg}]}, Value, Cmp) ->
-    Cmp(Value, Arg) /= 0;
+    compare(ne, Arg, Cmp(Value, Arg) /= 0);
 match({[{<<"$gte">>, Arg}]}, Value, Cmp) ->
-    Cmp(Value, Arg) >= 0;
+    compare(gte, Arg, Cmp(Value, Arg) >= 0);
 match({[{<<"$gt">>, Arg}]}, Value, Cmp) ->
-    Cmp(Value, Arg) > 0;
-match({[{<<"$in">>, []}]}, _, _) ->
-    false;
+    compare(gt, Arg, Cmp(Value, Arg) > 0);
 match({[{<<"$in">>, Args}]}, Values, Cmp) when is_list(Values) ->
     Pred = fun(Arg) ->
         lists:foldl(
@@ -496,50 +484,88 @@ match({[{<<"$in">>, Args}]}, Values, Cmp) when is_list(Values) ->
             Values
         )
     end,
-    lists:any(Pred, Args);
+    case lists:any(Pred, Args) of
+        true -> [];
+        _ -> [#failure{op = in, params = [Args]}]
+    end;
 match({[{<<"$in">>, Args}]}, Value, Cmp) ->
     Pred = fun(Arg) -> Cmp(Value, Arg) == 0 end,
-    lists:any(Pred, Args);
-match({[{<<"$nin">>, []}]}, _, _) ->
-    true;
+    case lists:any(Pred, Args) of
+        true -> [];
+        _ -> [#failure{op = in, params = [Args]}]
+    end;
 match({[{<<"$nin">>, Args}]}, Values, Cmp) when is_list(Values) ->
-    not match({[{<<"$in">>, Args}]}, Values, Cmp);
+    Pred = fun(Arg) ->
+        lists:foldl(
+            fun(Value, Match) ->
+                (Cmp(Value, Arg) /= 0) and Match
+            end,
+            true,
+            Values
+        )
+    end,
+    case lists:all(Pred, Args) of
+        true -> [];
+        _ -> [#failure{op = nin, params = [Args]}]
+    end;
 match({[{<<"$nin">>, Args}]}, Value, Cmp) ->
     Pred = fun(Arg) -> Cmp(Value, Arg) /= 0 end,
-    lists:all(Pred, Args);
+    case lists:all(Pred, Args) of
+        true -> [];
+        _ -> [#failure{op = nin, params = [Args]}]
+    end;
 % This logic is a bit subtle. Basically, if value is
 % not undefined, then it exists.
 match({[{<<"$exists">>, ShouldExist}]}, Value, _Cmp) ->
-    Exists = Value /= undefined,
-    ShouldExist andalso Exists;
+    case {ShouldExist, Value} of
+        {true, undefined} -> [#failure{op = exists, params = [ShouldExist]}];
+        {true, _} -> [];
+        {false, undefined} -> [];
+        {false, _} -> [#failure{op = exists, params = [ShouldExist]}]
+    end;
 match({[{<<"$type">>, Arg}]}, Value, _Cmp) when is_binary(Arg) ->
-    Arg == mango_json:type(Value);
+    case mango_json:type(Value) of
+        Arg -> [];
+        _ -> [#failure{op = type, params = [Arg]}]
+    end;
 match({[{<<"$mod">>, [D, R]}]}, Value, _Cmp) when is_integer(Value) ->
-    Value rem D == R;
-match({[{<<"$mod">>, _}]}, _Value, _Cmp) ->
-    false;
+    case Value rem D of
+        R -> [];
+        _ -> [#failure{op = mod, params = [D, R]}]
+    end;
+match({[{<<"$mod">>, _}]}, Value, _Cmp) ->
+    [#failure{op = mod, type = bad_value, params = [Value]}];
 match({[{<<"$beginsWith">>, Prefix}]}, Value, _Cmp) when is_binary(Prefix), is_binary(Value) ->
-    string:prefix(Value, Prefix) /= nomatch;
+    case string:prefix(Value, Prefix) of
+        nomatch -> [#failure{op = beginsWith, params = [Prefix]}];
+        _ -> []
+    end;
 % When Value is not a string, do not match
-match({[{<<"$beginsWith">>, Prefix}]}, _, _Cmp) when is_binary(Prefix) ->
-    false;
+match({[{<<"$beginsWith">>, Prefix}]}, Value, _Cmp) when is_binary(Prefix) ->
+    [#failure{op = beginsWith, type = bad_value, params = [Value]}];
 match({[{<<"$regex">>, Regex}]}, Value, _Cmp) when is_binary(Value) ->
     try
-        match == re:run(Value, Regex, [{capture, none}])
+        case re:run(Value, Regex, [{capture, none}]) of
+            match -> [];
+            _ -> [#failure{op = regex, params = [Regex]}]
+        end
     catch
         _:_ ->
-            false
+            [#failure{op = regex, params = [Regex]}]
     end;
-match({[{<<"$regex">>, _}]}, _Value, _Cmp) ->
-    false;
+match({[{<<"$regex">>, _}]}, Value, _Cmp) ->
+    [#failure{op = regex, type = bad_value, params = [Value]}];
 match({[{<<"$size">>, Arg}]}, Values, _Cmp) when is_list(Values) ->
-    length(Values) == Arg;
-match({[{<<"$size">>, _}]}, _Value, _Cmp) ->
-    false;
+    case length(Values) of
+        Arg -> [];
+        _ -> [#failure{op = size, params = [Arg]}]
+    end;
+match({[{<<"$size">>, _}]}, Value, _Cmp) ->
+    [#failure{op = size, type = bad_value, params = [Value]}];
 % We don't have any choice but to believe that the text
 % index returned valid matches
 match({[{<<"$default">>, _}]}, _Value, _Cmp) ->
-    true;
+    [];
 % All other operators are internal assertion errors for
 % matching because we either should've removed them during
 % normalization or something else broke.
@@ -551,11 +577,11 @@ match({[{<<"$", _/binary>> = Op, _}]}, _, _) ->
 match({[{Field, Cond}]}, Value, Cmp) ->
     case mango_doc:get_field(Value, Field) of
         not_found when Cond == {[{<<"$exists">>, false}]} ->
-            true;
+            [];
         not_found ->
-            false;
+            [#failure{op = '$'}];
         bad_path ->
-            false;
+            [#failure{op = '$'}];
         SubValue when Field == <<"_id">> ->
             match(Cond, SubValue, fun mango_json:cmp_raw/2);
         SubValue ->
@@ -564,6 +590,12 @@ match({[{Field, Cond}]}, Value, Cmp) ->
 match({[_, _ | _] = _Props} = Sel, _Value, _Cmp) ->
     error({unnormalized_selector, Sel}).
 
+compare(Op, Arg, Cond) ->
+    case Cond of
+        true -> [];
+        _ -> [#failure{op = Op, params = [Arg]}]
+    end.
+
 % Returns true if Selector requires all
 % fields in RequiredFields to exist in any matching documents.
 

From 09b3ed5ae9353056b6ee857bbe1b254571cb704f Mon Sep 17 00:00:00 2001
From: James Coglan <james@neighbourhood.ie>
Date: Thu, 15 Jan 2026 15:13:30 +0000
Subject: [PATCH 2/9] chore: Replace `Cmp` argument to `mango_selector:match/3`
 with a record

We will need to pass other things around between `match` calls as well
the current `Cmp` function, so here we replace this argument with a
`#ctx` record that intially just contains a `cmp` field.
---
 src/mango/src/mango_selector.erl | 106 ++++++++++++++++---------------
 1 file changed, 55 insertions(+), 51 deletions(-)

diff --git a/src/mango/src/mango_selector.erl b/src/mango/src/mango_selector.erl
index 925fedfeed..8582f093ec 100644
--- a/src/mango/src/mango_selector.erl
+++ b/src/mango/src/mango_selector.erl
@@ -23,6 +23,10 @@
 -include_lib("couch/include/couch_db.hrl").
 -include("mango.hrl").
 
+-record(ctx, {
+    cmp
+}).
+
 -record(failure, {
     op,
     type = mismatch,
@@ -70,9 +74,9 @@ match_int(Selector, D) ->
 match_failures({[]}, _) ->
     [];
 match_failures(Selector, #doc{body = Body}) ->
-    match(Selector, Body, fun mango_json:cmp/2);
+    match_failures(Selector, Body);
 match_failures(Selector, {Props}) ->
-    match(Selector, {Props}, fun mango_json:cmp/2).
+    match(Selector, {Props}, #ctx{cmp = fun mango_json:cmp/2}).
 
 % Convert each operator into a normalized version as well
 % as convert an implicit operators into their explicit
@@ -378,33 +382,33 @@ negate({[{Field, Cond}]}) ->
 % for $or, $in, $all, $nin as well.
 match({[{<<"$and">>, []}]}, _, _) ->
     [];
-match({[{<<"$and">>, Args}]}, Value, Cmp) ->
-    MatchSubSel = fun(SubSel) -> match(SubSel, Value, Cmp) end,
+match({[{<<"$and">>, Args}]}, Value, Ctx) ->
+    MatchSubSel = fun(SubSel) -> match(SubSel, Value, Ctx) end,
     lists:flatmap(MatchSubSel, Args);
 match({[{<<"$or">>, []}]}, _, _) ->
     [];
-match({[{<<"$or">>, Args}]}, Value, Cmp) ->
-    SubSelFailures = [match(A, Value, Cmp) || A <- Args],
-    case lists:member([], SubSelFailures) of
+match({[{<<"$or">>, Args}]}, Value, Ctx) ->
+    SubSelFailures = [match(A, Value, Ctx) || A <- Args],
+    case lists:any(fun(Res) -> Res == [] end, SubSelFailures) of
         true -> [];
         _ -> lists:flatten(SubSelFailures)
     end;
 % TODO: producing good failure messages requires that normalize/1 fully removes
 % $not from the tree by pushing it to the leaves.
-match({[{<<"$not">>, Arg}]}, Value, Cmp) ->
-    case match(Arg, Value, Cmp) of
+match({[{<<"$not">>, Arg}]}, Value, Ctx) ->
+    case match(Arg, Value, Ctx) of
         [] -> [#failure{op = 'not'}];
         _ -> []
     end;
 % All of the values in Args must exist in Values or
 % Values == hd(Args) if Args is a single element list
 % that contains a list.
-match({[{<<"$all">>, []}]}, _Values, _Cmp) ->
+match({[{<<"$all">>, []}]}, _Values, _Ctx) ->
     % { "$all": [] } is defined to eval to false, so return a failure
     [#failure{op = all, params = [[]]}];
-match({[{<<"$all">>, [A]}]}, Values, _Cmp) when is_list(A), A == Values ->
+match({[{<<"$all">>, [A]}]}, Values, _Ctx) when is_list(A), A == Values ->
     [];
-match({[{<<"$all">>, Args}]}, Values, _Cmp) when is_list(Values) ->
+match({[{<<"$all">>, Args}]}, Values, _Ctx) when is_list(Values) ->
     lists:flatmap(
         fun(Arg) ->
             case lists:member(Arg, Values) of
@@ -414,7 +418,7 @@ match({[{<<"$all">>, Args}]}, Values, _Cmp) when is_list(Values) ->
         end,
         Args
     );
-match({[{<<"$all">>, _}]}, Value, _Cmp) ->
+match({[{<<"$all">>, _}]}, Value, _Ctx) ->
     [#failure{op = all, type = bad_value, params = [Value]}];
 %% This is for $elemMatch, $allMatch, and possibly $in because of our normalizer.
 %% A selector such as {"field_name": {"$elemMatch": {"$gte": 80, "$lt": 85}}}
@@ -428,53 +432,53 @@ match({[{<<"$all">>, _}]}, Value, _Cmp) ->
 %%     }]}
 %% }]}.
 %% So we filter out the <<>>.
-match({[{<<>>, Arg}]}, Values, Cmp) ->
-    match(Arg, Values, Cmp);
+match({[{<<>>, Arg}]}, Values, Ctx) ->
+    match(Arg, Values, Ctx);
 % Matches when any element in values matches the
 % sub-selector Arg.
-match({[{<<"$elemMatch">>, _Arg}]}, [], _Cmp) ->
+match({[{<<"$elemMatch">>, _Arg}]}, [], _Ctx) ->
     [#failure{op = elemMatch, type = empty_list}];
-match({[{<<"$elemMatch">>, Arg}]}, Values, Cmp) when is_list(Values) ->
-    ValueFailures = [match(Arg, V, Cmp) || V <- Values],
+match({[{<<"$elemMatch">>, Arg}]}, Values, Ctx) when is_list(Values) ->
+    ValueFailures = [match(Arg, V, Ctx) || V <- Values],
     case lists:member([], ValueFailures) of
         true -> [];
         _ -> lists:flatten(ValueFailures)
     end;
-match({[{<<"$elemMatch">>, _}]}, Value, _Cmp) ->
+match({[{<<"$elemMatch">>, _}]}, Value, _Ctx) ->
     [#failure{op = elemMatch, type = bad_value, params = [Value]}];
 % Matches when all elements in values match the
 % sub-selector Arg.
-match({[{<<"$allMatch">>, Arg}]}, [_ | _] = Values, Cmp) ->
-    MatchValue = fun(Value) -> match(Arg, Value, Cmp) end,
+match({[{<<"$allMatch">>, Arg}]}, [_ | _] = Values, Ctx) ->
+    MatchValue = fun(Value) -> match(Arg, Value, Ctx) end,
     lists:flatmap(MatchValue, Values);
-match({[{<<"$allMatch">>, _}]}, Value, _Cmp) ->
+match({[{<<"$allMatch">>, _}]}, Value, _Ctx) ->
     [#failure{op = allMatch, type = bad_value, params = [Value]}];
 % Matches when any key in the map value matches the
 % sub-selector Arg.
-match({[{<<"$keyMapMatch">>, _Arg}]}, {[]}, _Cmp) ->
+match({[{<<"$keyMapMatch">>, _Arg}]}, {[]}, _Ctx) ->
     [#failure{op = keyMapMatch, type = empty_list}];
-match({[{<<"$keyMapMatch">>, Arg}]}, {Value}, Cmp) when is_list(Value) ->
-    KeyFailures = [match(Arg, K, Cmp) || {K, _} <- Value],
+match({[{<<"$keyMapMatch">>, Arg}]}, {Value}, Ctx) when is_list(Value) ->
+    KeyFailures = [match(Arg, K, Ctx) || {K, _} <- Value],
     case lists:member([], KeyFailures) of
         true -> [];
         _ -> lists:flatten(KeyFailures)
     end;
-match({[{<<"$keyMapMatch">>, _}]}, Value, _Cmp) ->
+match({[{<<"$keyMapMatch">>, _}]}, Value, _Ctx) ->
     [#failure{op = keyMapMatch, type = bad_value, params = [Value]}];
 % Our comparison operators are fairly straight forward
-match({[{<<"$lt">>, Arg}]}, Value, Cmp) ->
+match({[{<<"$lt">>, Arg}]}, Value, #ctx{cmp = Cmp}) ->
     compare(lt, Arg, Cmp(Value, Arg) < 0);
-match({[{<<"$lte">>, Arg}]}, Value, Cmp) ->
+match({[{<<"$lte">>, Arg}]}, Value, #ctx{cmp = Cmp}) ->
     compare(lte, Arg, Cmp(Value, Arg) =< 0);
-match({[{<<"$eq">>, Arg}]}, Value, Cmp) ->
+match({[{<<"$eq">>, Arg}]}, Value, #ctx{cmp = Cmp}) ->
     compare(eq, Arg, Cmp(Value, Arg) == 0);
-match({[{<<"$ne">>, Arg}]}, Value, Cmp) ->
+match({[{<<"$ne">>, Arg}]}, Value, #ctx{cmp = Cmp}) ->
     compare(ne, Arg, Cmp(Value, Arg) /= 0);
-match({[{<<"$gte">>, Arg}]}, Value, Cmp) ->
+match({[{<<"$gte">>, Arg}]}, Value, #ctx{cmp = Cmp}) ->
     compare(gte, Arg, Cmp(Value, Arg) >= 0);
-match({[{<<"$gt">>, Arg}]}, Value, Cmp) ->
+match({[{<<"$gt">>, Arg}]}, Value, #ctx{cmp = Cmp}) ->
     compare(gt, Arg, Cmp(Value, Arg) > 0);
-match({[{<<"$in">>, Args}]}, Values, Cmp) when is_list(Values) ->
+match({[{<<"$in">>, Args}]}, Values, #ctx{cmp = Cmp}) when is_list(Values) ->
     Pred = fun(Arg) ->
         lists:foldl(
             fun(Value, Match) ->
@@ -488,13 +492,13 @@ match({[{<<"$in">>, Args}]}, Values, Cmp) when is_list(Values) ->
         true -> [];
         _ -> [#failure{op = in, params = [Args]}]
     end;
-match({[{<<"$in">>, Args}]}, Value, Cmp) ->
+match({[{<<"$in">>, Args}]}, Value, #ctx{cmp = Cmp}) ->
     Pred = fun(Arg) -> Cmp(Value, Arg) == 0 end,
     case lists:any(Pred, Args) of
         true -> [];
         _ -> [#failure{op = in, params = [Args]}]
     end;
-match({[{<<"$nin">>, Args}]}, Values, Cmp) when is_list(Values) ->
+match({[{<<"$nin">>, Args}]}, Values, #ctx{cmp = Cmp}) when is_list(Values) ->
     Pred = fun(Arg) ->
         lists:foldl(
             fun(Value, Match) ->
@@ -508,7 +512,7 @@ match({[{<<"$nin">>, Args}]}, Values, Cmp) when is_list(Values) ->
         true -> [];
         _ -> [#failure{op = nin, params = [Args]}]
     end;
-match({[{<<"$nin">>, Args}]}, Value, Cmp) ->
+match({[{<<"$nin">>, Args}]}, Value, #ctx{cmp = Cmp}) ->
     Pred = fun(Arg) -> Cmp(Value, Arg) /= 0 end,
     case lists:all(Pred, Args) of
         true -> [];
@@ -516,34 +520,34 @@ match({[{<<"$nin">>, Args}]}, Value, Cmp) ->
     end;
 % This logic is a bit subtle. Basically, if value is
 % not undefined, then it exists.
-match({[{<<"$exists">>, ShouldExist}]}, Value, _Cmp) ->
+match({[{<<"$exists">>, ShouldExist}]}, Value, _Ctx) ->
     case {ShouldExist, Value} of
         {true, undefined} -> [#failure{op = exists, params = [ShouldExist]}];
         {true, _} -> [];
         {false, undefined} -> [];
         {false, _} -> [#failure{op = exists, params = [ShouldExist]}]
     end;
-match({[{<<"$type">>, Arg}]}, Value, _Cmp) when is_binary(Arg) ->
+match({[{<<"$type">>, Arg}]}, Value, _Ctx) when is_binary(Arg) ->
     case mango_json:type(Value) of
         Arg -> [];
         _ -> [#failure{op = type, params = [Arg]}]
     end;
-match({[{<<"$mod">>, [D, R]}]}, Value, _Cmp) when is_integer(Value) ->
+match({[{<<"$mod">>, [D, R]}]}, Value, _Ctx) when is_integer(Value) ->
     case Value rem D of
         R -> [];
         _ -> [#failure{op = mod, params = [D, R]}]
     end;
-match({[{<<"$mod">>, _}]}, Value, _Cmp) ->
+match({[{<<"$mod">>, _}]}, Value, _Ctx) ->
     [#failure{op = mod, type = bad_value, params = [Value]}];
-match({[{<<"$beginsWith">>, Prefix}]}, Value, _Cmp) when is_binary(Prefix), is_binary(Value) ->
+match({[{<<"$beginsWith">>, Prefix}]}, Value, _Ctx) when is_binary(Prefix), is_binary(Value) ->
     case string:prefix(Value, Prefix) of
         nomatch -> [#failure{op = beginsWith, params = [Prefix]}];
         _ -> []
     end;
 % When Value is not a string, do not match
-match({[{<<"$beginsWith">>, Prefix}]}, Value, _Cmp) when is_binary(Prefix) ->
+match({[{<<"$beginsWith">>, Prefix}]}, Value, _Ctx) when is_binary(Prefix) ->
     [#failure{op = beginsWith, type = bad_value, params = [Value]}];
-match({[{<<"$regex">>, Regex}]}, Value, _Cmp) when is_binary(Value) ->
+match({[{<<"$regex">>, Regex}]}, Value, _Ctx) when is_binary(Value) ->
     try
         case re:run(Value, Regex, [{capture, none}]) of
             match -> [];
@@ -553,18 +557,18 @@ match({[{<<"$regex">>, Regex}]}, Value, _Cmp) when is_binary(Value) ->
         _:_ ->
             [#failure{op = regex, params = [Regex]}]
     end;
-match({[{<<"$regex">>, _}]}, Value, _Cmp) ->
+match({[{<<"$regex">>, _}]}, Value, _Ctx) ->
     [#failure{op = regex, type = bad_value, params = [Value]}];
-match({[{<<"$size">>, Arg}]}, Values, _Cmp) when is_list(Values) ->
+match({[{<<"$size">>, Arg}]}, Values, _Ctx) when is_list(Values) ->
     case length(Values) of
         Arg -> [];
         _ -> [#failure{op = size, params = [Arg]}]
     end;
-match({[{<<"$size">>, _}]}, Value, _Cmp) ->
+match({[{<<"$size">>, _}]}, Value, _Ctx) ->
     [#failure{op = size, type = bad_value, params = [Value]}];
 % We don't have any choice but to believe that the text
 % index returned valid matches
-match({[{<<"$default">>, _}]}, _Value, _Cmp) ->
+match({[{<<"$default">>, _}]}, _Value, _Ctx) ->
     [];
 % All other operators are internal assertion errors for
 % matching because we either should've removed them during
@@ -574,7 +578,7 @@ match({[{<<"$", _/binary>> = Op, _}]}, _, _) ->
 % We need to traverse value to find field. The call to
 % mango_doc:get_field/2 may return either not_found or
 % bad_path in which case matching fails.
-match({[{Field, Cond}]}, Value, Cmp) ->
+match({[{Field, Cond}]}, Value, Ctx) ->
     case mango_doc:get_field(Value, Field) of
         not_found when Cond == {[{<<"$exists">>, false}]} ->
             [];
@@ -583,11 +587,11 @@ match({[{Field, Cond}]}, Value, Cmp) ->
         bad_path ->
             [#failure{op = '$'}];
         SubValue when Field == <<"_id">> ->
-            match(Cond, SubValue, fun mango_json:cmp_raw/2);
+            match(Cond, SubValue, Ctx#ctx{cmp = fun mango_json:cmp_raw/2});
         SubValue ->
-            match(Cond, SubValue, Cmp)
+            match(Cond, SubValue, Ctx)
     end;
-match({[_, _ | _] = _Props} = Sel, _Value, _Cmp) ->
+match({[_, _ | _] = _Props} = Sel, _Value, _Ctx) ->
     error({unnormalized_selector, Sel}).
 
 compare(Op, Arg, Cond) ->

From 412bb3e67ee9e972135f24a0d444a69ae47f7da4 Mon Sep 17 00:00:00 2001
From: James Coglan <james@neighbourhood.ie>
Date: Thu, 15 Jan 2026 16:57:29 +0000
Subject: [PATCH 3/9] feat: Record the paths where selectors fail to match

To give detailed feedback to the caller, the `#ctx` argument to
`mango_selector:match/3` now records the path that was taken to reach
each value, and this path is added to the `#failure` records.

Each path segment is either a binary, if it represents an object
property, or an integer if it represents an array index. Items are
pushed on the front of `#ctx.path` as this is faster than pushing onto
the back of a list. This list can then be reversed once the final list
of failures has been generated, before the failures are presented to the
caller.
---
 src/mango/src/mango_selector.erl | 329 ++++++++++++++++++++++++-------
 1 file changed, 256 insertions(+), 73 deletions(-)

diff --git a/src/mango/src/mango_selector.erl b/src/mango/src/mango_selector.erl
index 8582f093ec..e532a6b9a9 100644
--- a/src/mango/src/mango_selector.erl
+++ b/src/mango/src/mango_selector.erl
@@ -24,13 +24,15 @@
 -include("mango.hrl").
 
 -record(ctx, {
-    cmp
+    cmp,
+    path = []
 }).
 
 -record(failure, {
     op,
     type = mismatch,
-    params = []
+    params = [],
+    path = []
 }).
 
 % Validate and normalize each operator. This translates
@@ -397,29 +399,29 @@ match({[{<<"$or">>, Args}]}, Value, Ctx) ->
 % $not from the tree by pushing it to the leaves.
 match({[{<<"$not">>, Arg}]}, Value, Ctx) ->
     case match(Arg, Value, Ctx) of
-        [] -> [#failure{op = 'not'}];
+        [] -> [#failure{op = 'not', path = Ctx#ctx.path}];
         _ -> []
     end;
 % All of the values in Args must exist in Values or
 % Values == hd(Args) if Args is a single element list
 % that contains a list.
-match({[{<<"$all">>, []}]}, _Values, _Ctx) ->
+match({[{<<"$all">>, []}]}, _Values, Ctx) ->
     % { "$all": [] } is defined to eval to false, so return a failure
-    [#failure{op = all, params = [[]]}];
+    [#failure{op = all, params = [[]], path = Ctx#ctx.path}];
 match({[{<<"$all">>, [A]}]}, Values, _Ctx) when is_list(A), A == Values ->
     [];
-match({[{<<"$all">>, Args}]}, Values, _Ctx) when is_list(Values) ->
+match({[{<<"$all">>, Args}]}, Values, Ctx) when is_list(Values) ->
     lists:flatmap(
         fun(Arg) ->
             case lists:member(Arg, Values) of
                 true -> [];
-                _ -> [#failure{op = all, params = [Arg]}]
+                _ -> [#failure{op = all, params = [Arg], path = Ctx#ctx.path}]
             end
         end,
         Args
     );
-match({[{<<"$all">>, _}]}, Value, _Ctx) ->
-    [#failure{op = all, type = bad_value, params = [Value]}];
+match({[{<<"$all">>, _}]}, Value, Ctx) ->
+    [#failure{op = all, type = bad_value, params = [Value], path = Ctx#ctx.path}];
 %% This is for $elemMatch, $allMatch, and possibly $in because of our normalizer.
 %% A selector such as {"field_name": {"$elemMatch": {"$gte": 80, "$lt": 85}}}
 %% gets normalized to:
@@ -436,49 +438,53 @@ match({[{<<>>, Arg}]}, Values, Ctx) ->
     match(Arg, Values, Ctx);
 % Matches when any element in values matches the
 % sub-selector Arg.
-match({[{<<"$elemMatch">>, _Arg}]}, [], _Ctx) ->
-    [#failure{op = elemMatch, type = empty_list}];
-match({[{<<"$elemMatch">>, Arg}]}, Values, Ctx) when is_list(Values) ->
-    ValueFailures = [match(Arg, V, Ctx) || V <- Values],
+match({[{<<"$elemMatch">>, _Arg}]}, [], Ctx) ->
+    [#failure{op = elemMatch, type = empty_list, path = Ctx#ctx.path}];
+match({[{<<"$elemMatch">>, Arg}]}, Values, #ctx{path = Path} = Ctx) when is_list(Values) ->
+    ValueFailures = [
+        match(Arg, V, Ctx#ctx{path = [Idx | Path]})
+     || {Idx, V} <- lists:enumerate(0, Values)
+    ],
     case lists:member([], ValueFailures) of
         true -> [];
         _ -> lists:flatten(ValueFailures)
     end;
-match({[{<<"$elemMatch">>, _}]}, Value, _Ctx) ->
-    [#failure{op = elemMatch, type = bad_value, params = [Value]}];
+match({[{<<"$elemMatch">>, _}]}, Value, Ctx) ->
+    [#failure{op = elemMatch, type = bad_value, params = [Value], path = Ctx#ctx.path}];
 % Matches when all elements in values match the
 % sub-selector Arg.
-match({[{<<"$allMatch">>, Arg}]}, [_ | _] = Values, Ctx) ->
-    MatchValue = fun(Value) -> match(Arg, Value, Ctx) end,
-    lists:flatmap(MatchValue, Values);
-match({[{<<"$allMatch">>, _}]}, Value, _Ctx) ->
-    [#failure{op = allMatch, type = bad_value, params = [Value]}];
+match({[{<<"$allMatch">>, Arg}]}, [_ | _] = Values, #ctx{path = Path} = Ctx) ->
+    EnumValues = lists:enumerate(0, Values),
+    MatchValue = fun({Idx, Value}) -> match(Arg, Value, Ctx#ctx{path = [Idx | Path]}) end,
+    lists:flatmap(MatchValue, EnumValues);
+match({[{<<"$allMatch">>, _}]}, Value, Ctx) ->
+    [#failure{op = allMatch, type = bad_value, params = [Value], path = Ctx#ctx.path}];
 % Matches when any key in the map value matches the
 % sub-selector Arg.
-match({[{<<"$keyMapMatch">>, _Arg}]}, {[]}, _Ctx) ->
-    [#failure{op = keyMapMatch, type = empty_list}];
+match({[{<<"$keyMapMatch">>, _Arg}]}, {[]}, Ctx) ->
+    [#failure{op = keyMapMatch, type = empty_list, path = Ctx#ctx.path}];
 match({[{<<"$keyMapMatch">>, Arg}]}, {Value}, Ctx) when is_list(Value) ->
     KeyFailures = [match(Arg, K, Ctx) || {K, _} <- Value],
     case lists:member([], KeyFailures) of
         true -> [];
         _ -> lists:flatten(KeyFailures)
     end;
-match({[{<<"$keyMapMatch">>, _}]}, Value, _Ctx) ->
-    [#failure{op = keyMapMatch, type = bad_value, params = [Value]}];
+match({[{<<"$keyMapMatch">>, _}]}, Value, Ctx) ->
+    [#failure{op = keyMapMatch, type = bad_value, params = [Value], path = Ctx#ctx.path}];
 % Our comparison operators are fairly straight forward
-match({[{<<"$lt">>, Arg}]}, Value, #ctx{cmp = Cmp}) ->
-    compare(lt, Arg, Cmp(Value, Arg) < 0);
-match({[{<<"$lte">>, Arg}]}, Value, #ctx{cmp = Cmp}) ->
-    compare(lte, Arg, Cmp(Value, Arg) =< 0);
-match({[{<<"$eq">>, Arg}]}, Value, #ctx{cmp = Cmp}) ->
-    compare(eq, Arg, Cmp(Value, Arg) == 0);
-match({[{<<"$ne">>, Arg}]}, Value, #ctx{cmp = Cmp}) ->
-    compare(ne, Arg, Cmp(Value, Arg) /= 0);
-match({[{<<"$gte">>, Arg}]}, Value, #ctx{cmp = Cmp}) ->
-    compare(gte, Arg, Cmp(Value, Arg) >= 0);
-match({[{<<"$gt">>, Arg}]}, Value, #ctx{cmp = Cmp}) ->
-    compare(gt, Arg, Cmp(Value, Arg) > 0);
-match({[{<<"$in">>, Args}]}, Values, #ctx{cmp = Cmp}) when is_list(Values) ->
+match({[{<<"$lt">>, Arg}]}, Value, #ctx{cmp = Cmp, path = Path}) ->
+    compare(lt, Arg, Path, Cmp(Value, Arg) < 0);
+match({[{<<"$lte">>, Arg}]}, Value, #ctx{cmp = Cmp, path = Path}) ->
+    compare(lte, Arg, Path, Cmp(Value, Arg) =< 0);
+match({[{<<"$eq">>, Arg}]}, Value, #ctx{cmp = Cmp, path = Path}) ->
+    compare(eq, Arg, Path, Cmp(Value, Arg) == 0);
+match({[{<<"$ne">>, Arg}]}, Value, #ctx{cmp = Cmp, path = Path}) ->
+    compare(ne, Arg, Path, Cmp(Value, Arg) /= 0);
+match({[{<<"$gte">>, Arg}]}, Value, #ctx{cmp = Cmp, path = Path}) ->
+    compare(gte, Arg, Path, Cmp(Value, Arg) >= 0);
+match({[{<<"$gt">>, Arg}]}, Value, #ctx{cmp = Cmp, path = Path}) ->
+    compare(gt, Arg, Path, Cmp(Value, Arg) > 0);
+match({[{<<"$in">>, Args}]}, Values, #ctx{cmp = Cmp, path = Path}) when is_list(Values) ->
     Pred = fun(Arg) ->
         lists:foldl(
             fun(Value, Match) ->
@@ -490,15 +496,15 @@ match({[{<<"$in">>, Args}]}, Values, #ctx{cmp = Cmp}) when is_list(Values) ->
     end,
     case lists:any(Pred, Args) of
         true -> [];
-        _ -> [#failure{op = in, params = [Args]}]
+        _ -> [#failure{op = in, params = [Args], path = Path}]
     end;
-match({[{<<"$in">>, Args}]}, Value, #ctx{cmp = Cmp}) ->
+match({[{<<"$in">>, Args}]}, Value, #ctx{cmp = Cmp, path = Path}) ->
     Pred = fun(Arg) -> Cmp(Value, Arg) == 0 end,
     case lists:any(Pred, Args) of
         true -> [];
-        _ -> [#failure{op = in, params = [Args]}]
+        _ -> [#failure{op = in, params = [Args], path = Path}]
     end;
-match({[{<<"$nin">>, Args}]}, Values, #ctx{cmp = Cmp}) when is_list(Values) ->
+match({[{<<"$nin">>, Args}]}, Values, #ctx{cmp = Cmp, path = Path}) when is_list(Values) ->
     Pred = fun(Arg) ->
         lists:foldl(
             fun(Value, Match) ->
@@ -510,62 +516,62 @@ match({[{<<"$nin">>, Args}]}, Values, #ctx{cmp = Cmp}) when is_list(Values) ->
     end,
     case lists:all(Pred, Args) of
         true -> [];
-        _ -> [#failure{op = nin, params = [Args]}]
+        _ -> [#failure{op = nin, params = [Args], path = Path}]
     end;
-match({[{<<"$nin">>, Args}]}, Value, #ctx{cmp = Cmp}) ->
+match({[{<<"$nin">>, Args}]}, Value, #ctx{cmp = Cmp, path = Path}) ->
     Pred = fun(Arg) -> Cmp(Value, Arg) /= 0 end,
     case lists:all(Pred, Args) of
         true -> [];
-        _ -> [#failure{op = nin, params = [Args]}]
+        _ -> [#failure{op = nin, params = [Args], path = Path}]
     end;
 % This logic is a bit subtle. Basically, if value is
 % not undefined, then it exists.
-match({[{<<"$exists">>, ShouldExist}]}, Value, _Ctx) ->
+match({[{<<"$exists">>, ShouldExist}]}, Value, Ctx) ->
     case {ShouldExist, Value} of
-        {true, undefined} -> [#failure{op = exists, params = [ShouldExist]}];
+        {true, undefined} -> [#failure{op = exists, params = [ShouldExist], path = Ctx#ctx.path}];
         {true, _} -> [];
         {false, undefined} -> [];
-        {false, _} -> [#failure{op = exists, params = [ShouldExist]}]
+        {false, _} -> [#failure{op = exists, params = [ShouldExist], path = Ctx#ctx.path}]
     end;
-match({[{<<"$type">>, Arg}]}, Value, _Ctx) when is_binary(Arg) ->
+match({[{<<"$type">>, Arg}]}, Value, Ctx) when is_binary(Arg) ->
     case mango_json:type(Value) of
         Arg -> [];
-        _ -> [#failure{op = type, params = [Arg]}]
+        _ -> [#failure{op = type, params = [Arg], path = Ctx#ctx.path}]
     end;
-match({[{<<"$mod">>, [D, R]}]}, Value, _Ctx) when is_integer(Value) ->
+match({[{<<"$mod">>, [D, R]}]}, Value, Ctx) when is_integer(Value) ->
     case Value rem D of
         R -> [];
-        _ -> [#failure{op = mod, params = [D, R]}]
+        _ -> [#failure{op = mod, params = [D, R], path = Ctx#ctx.path}]
     end;
-match({[{<<"$mod">>, _}]}, Value, _Ctx) ->
-    [#failure{op = mod, type = bad_value, params = [Value]}];
-match({[{<<"$beginsWith">>, Prefix}]}, Value, _Ctx) when is_binary(Prefix), is_binary(Value) ->
+match({[{<<"$mod">>, _}]}, Value, Ctx) ->
+    [#failure{op = mod, type = bad_value, params = [Value], path = Ctx#ctx.path}];
+match({[{<<"$beginsWith">>, Prefix}]}, Value, Ctx) when is_binary(Prefix), is_binary(Value) ->
     case string:prefix(Value, Prefix) of
-        nomatch -> [#failure{op = beginsWith, params = [Prefix]}];
+        nomatch -> [#failure{op = beginsWith, params = [Prefix], path = Ctx#ctx.path}];
         _ -> []
     end;
 % When Value is not a string, do not match
-match({[{<<"$beginsWith">>, Prefix}]}, Value, _Ctx) when is_binary(Prefix) ->
-    [#failure{op = beginsWith, type = bad_value, params = [Value]}];
-match({[{<<"$regex">>, Regex}]}, Value, _Ctx) when is_binary(Value) ->
+match({[{<<"$beginsWith">>, Prefix}]}, Value, Ctx) when is_binary(Prefix) ->
+    [#failure{op = beginsWith, type = bad_value, params = [Value], path = Ctx#ctx.path}];
+match({[{<<"$regex">>, Regex}]}, Value, Ctx) when is_binary(Value) ->
     try
         case re:run(Value, Regex, [{capture, none}]) of
             match -> [];
-            _ -> [#failure{op = regex, params = [Regex]}]
+            _ -> [#failure{op = regex, params = [Regex], path = Ctx#ctx.path}]
         end
     catch
         _:_ ->
-            [#failure{op = regex, params = [Regex]}]
+            [#failure{op = regex, params = [Regex], path = Ctx#ctx.path}]
     end;
-match({[{<<"$regex">>, _}]}, Value, _Ctx) ->
-    [#failure{op = regex, type = bad_value, params = [Value]}];
-match({[{<<"$size">>, Arg}]}, Values, _Ctx) when is_list(Values) ->
+match({[{<<"$regex">>, _}]}, Value, Ctx) ->
+    [#failure{op = regex, type = bad_value, params = [Value], path = Ctx#ctx.path}];
+match({[{<<"$size">>, Arg}]}, Values, Ctx) when is_list(Values) ->
     case length(Values) of
         Arg -> [];
-        _ -> [#failure{op = size, params = [Arg]}]
+        _ -> [#failure{op = size, params = [Arg], path = Ctx#ctx.path}]
     end;
-match({[{<<"$size">>, _}]}, Value, _Ctx) ->
-    [#failure{op = size, type = bad_value, params = [Value]}];
+match({[{<<"$size">>, _}]}, Value, Ctx) ->
+    [#failure{op = size, type = bad_value, params = [Value], path = Ctx#ctx.path}];
 % We don't have any choice but to believe that the text
 % index returned valid matches
 match({[{<<"$default">>, _}]}, _Value, _Ctx) ->
@@ -578,26 +584,33 @@ match({[{<<"$", _/binary>> = Op, _}]}, _, _) ->
 % We need to traverse value to find field. The call to
 % mango_doc:get_field/2 may return either not_found or
 % bad_path in which case matching fails.
-match({[{Field, Cond}]}, Value, Ctx) ->
+match({[{Field, Cond}]}, Value, #ctx{path = Path} = Ctx) ->
+    InnerPath = extend_path(Field, Path),
+    InnerCtx = Ctx#ctx{path = InnerPath},
     case mango_doc:get_field(Value, Field) of
         not_found when Cond == {[{<<"$exists">>, false}]} ->
             [];
         not_found ->
-            [#failure{op = '$'}];
+            [#failure{op = field, type = not_found, path = InnerCtx#ctx.path}];
         bad_path ->
-            [#failure{op = '$'}];
+            [#failure{op = field, type = bad_path, path = InnerCtx#ctx.path}];
         SubValue when Field == <<"_id">> ->
-            match(Cond, SubValue, Ctx#ctx{cmp = fun mango_json:cmp_raw/2});
+            match(Cond, SubValue, InnerCtx#ctx{cmp = fun mango_json:cmp_raw/2});
         SubValue ->
-            match(Cond, SubValue, Ctx)
+            match(Cond, SubValue, InnerCtx)
     end;
 match({[_, _ | _] = _Props} = Sel, _Value, _Ctx) ->
     error({unnormalized_selector, Sel}).
 
-compare(Op, Arg, Cond) ->
+extend_path(Field, Path) when is_binary(Field) ->
+    [Field | Path];
+extend_path(Field, Path) when is_list(Field) ->
+    lists:foldl(fun(F, Acc) -> [F | Acc] end, Path, Field).
+
+compare(Op, Arg, Path, Cond) ->
     case Cond of
         true -> [];
-        _ -> [#failure{op = Op, params = [Arg]}]
+        _ -> [#failure{op = Op, params = [Arg], path = Path}]
     end.
 
 % Returns true if Selector requires all
@@ -1748,4 +1761,174 @@ match_nor_test() ->
     ?assertEqual(false, match_int(SelMulti, {[{<<"x">>, 9}]})),
     ?assertEqual(false, match_int(SelMulti, {[]})).
 
+match_failures_object_test() ->
+    Selector = normalize(
+        {[
+            {<<"a">>, 1},
+            {<<"b">>, {[{<<"c">>, 3}]}}
+        ]}
+    ),
+
+    Fails0 = match_failures(
+        Selector,
+        {[
+            {<<"a">>, 1},
+            {<<"b">>, {[{<<"c">>, 3}]}}
+        ]}
+    ),
+    ?assertEqual([], Fails0),
+
+    Fails1 = match_failures(
+        Selector,
+        {[
+            {<<"a">>, 0},
+            {<<"b">>, {[{<<"c">>, 3}]}}
+        ]}
+    ),
+    ?assertEqual(
+        [#failure{op = eq, type = mismatch, params = [1], path = [<<"a">>]}],
+        Fails1
+    ),
+
+    Fails2 = match_failures(
+        Selector,
+        {[
+            {<<"a">>, 1},
+            {<<"b">>, {[{<<"c">>, 4}]}}
+        ]}
+    ),
+    ?assertEqual(
+        [#failure{op = eq, type = mismatch, params = [3], path = [<<"c">>, <<"b">>]}],
+        Fails2
+    ),
+
+    Fails3 = match_failures(
+        Selector,
+        {[
+            {<<"a">>, 2},
+            {<<"b">>, {[{<<"c">>, 4}]}}
+        ]}
+    ),
+    ?assertEqual(
+        [
+            #failure{op = eq, type = mismatch, params = [1], path = [<<"a">>]},
+            #failure{op = eq, type = mismatch, params = [3], path = [<<"c">>, <<"b">>]}
+        ],
+        Fails3
+    ).
+
+match_failures_elemmatch_test() ->
+    SelElemMatch = normalize(
+        {[
+            {<<"a">>,
+                {[
+                    {<<"$elemMatch">>, {[{<<"$gt">>, 4}]}}
+                ]}}
+        ]}
+    ),
+
+    Fails0 = match_failures(
+        SelElemMatch, {[{<<"a">>, [5, 3, 2]}]}
+    ),
+    ?assertEqual([], Fails0),
+
+    Fails1 = match_failures(
+        SelElemMatch, {[{<<"a">>, []}]}
+    ),
+    ?assertEqual(
+        [#failure{op = elemMatch, type = empty_list, params = [], path = [<<"a">>]}],
+        Fails1
+    ),
+
+    Fails2 = match_failures(
+        SelElemMatch, {[{<<"a">>, [3, 2]}]}
+    ),
+    ?assertEqual(
+        [
+            #failure{op = gt, type = mismatch, params = [4], path = [0, <<"a">>]},
+            #failure{op = gt, type = mismatch, params = [4], path = [1, <<"a">>]}
+        ],
+        Fails2
+    ).
+
+match_failures_allmatch_test() ->
+    SelAllMatch = normalize(
+        {[
+            {<<"a">>,
+                {[
+                    {<<"$allMatch">>, {[{<<"$gt">>, 4}]}}
+                ]}}
+        ]}
+    ),
+
+    Fails0 = match_failures(
+        SelAllMatch, {[{<<"a">>, [5]}]}
+    ),
+    ?assertEqual([], Fails0),
+
+    Fails1 = match_failures(
+        SelAllMatch, {[{<<"a">>, [4]}]}
+    ),
+    ?assertEqual(
+        [#failure{op = gt, type = mismatch, params = [4], path = [0, <<"a">>]}],
+        Fails1
+    ),
+
+    Fails2 = match_failures(
+        SelAllMatch, {[{<<"a">>, [5, 6, 3, 7, 0]}]}
+    ),
+    ?assertEqual(
+        [
+            #failure{op = gt, type = mismatch, params = [4], path = [2, <<"a">>]},
+            #failure{op = gt, type = mismatch, params = [4], path = [4, <<"a">>]}
+        ],
+        Fails2
+    ).
+
+match_failures_allmatch_object_test() ->
+    SelAllMatch = normalize(
+        {[
+            {<<"a.b">>,
+                {[
+                    {<<"$allMatch">>, {[{<<"c">>, {[{<<"$gt">>, 4}]}}]}}
+                ]}}
+        ]}
+    ),
+
+    Fails0 = match_failures(
+        SelAllMatch, {[{<<"a">>, {[{<<"b">>, [{[{<<"c">>, 5}]}]}]}}]}
+    ),
+    ?assertEqual([], Fails0),
+
+    Fails1 = match_failures(
+        SelAllMatch, {[{<<"a">>, {[{<<"b">>, [{[{<<"c">>, 4}]}]}]}}]}
+    ),
+    ?assertEqual(
+        [#failure{op = gt, type = mismatch, params = [4], path = [<<"c">>, 0, <<"b">>, <<"a">>]}],
+        Fails1
+    ),
+
+    Fails2 = match_failures(
+        SelAllMatch,
+        {[{<<"a">>, {[{<<"b">>, [{[{<<"c">>, 5}]}, {[{<<"c">>, 6}]}, {[{<<"c">>, 3}]}]}]}}]}
+    ),
+    ?assertEqual(
+        [#failure{op = gt, type = mismatch, params = [4], path = [<<"c">>, 2, <<"b">>, <<"a">>]}],
+        Fails2
+    ),
+
+    Fails3 = match_failures(
+        SelAllMatch,
+        {[{<<"a">>, {[{<<"b">>, [{[{<<"c">>, 1}]}, {[]}]}]}}]}
+    ),
+    ?assertEqual(
+        [
+            #failure{op = gt, type = mismatch, params = [4], path = [<<"c">>, 0, <<"b">>, <<"a">>]},
+            #failure{
+                op = field, type = not_found, params = [], path = [<<"c">>, 1, <<"b">>, <<"a">>]
+            }
+        ],
+        Fails3
+    ).
+
 -endif.

From d05fbe29fe29ece0e1f102283afa673bda8538c6 Mon Sep 17 00:00:00 2001
From: James Coglan <james@neighbourhood.ie>
Date: Fri, 16 Jan 2026 15:41:43 +0000
Subject: [PATCH 4/9] feat: "verbose" mode for `mango_selector:match/3`

Collecting detailed `#failure` records rather than a boolean true/false
when evaluating selectors imposes a performance penalty, so we would
like to only do this when a selector is used for a VDU, not when it is
used for indexing/filtering.

To this end we introduce "verbose" mode signalled via the `#ctx.verbose`
field, and each branch of `mango_selector:match/3` now has 3 distinct
versions:

- `#ctx{verbose = false}`: this is the original version that returns
  true/false, taken when a selector is used for Mango queries.

- `#ctx{verbose = true, negate = false}`: verbose mode, when the
  operator is not negated by an enclosing `$not` operator. Returns a
  list of `#failure` records which may be empty.

- `#ctx{verbose = true, negate = true}`: verbose mode, when the operator
  is negated by an enclosing `$not` operator. Returns a list of
  `#failure` records.

The different negation modes are needed because, in order to generate
meaningful failure messages, we need to record whether an operator was
negated. The behaviour of combinators like `$and`, `$or`, `$allMatch`
and `$elemMatch` means not all `$not` operators can be normalized out of
the selector before evaluation. Instead, when we encounter a `$not`
during evaluation, we flip the `#ctx.negate` field before evaluating the
inner operator.
---
 src/mango/src/mango_selector.erl | 487 ++++++++++++++++++++-----------
 1 file changed, 312 insertions(+), 175 deletions(-)

diff --git a/src/mango/src/mango_selector.erl b/src/mango/src/mango_selector.erl
index e532a6b9a9..347bd74bfc 100644
--- a/src/mango/src/mango_selector.erl
+++ b/src/mango/src/mango_selector.erl
@@ -15,6 +15,7 @@
 -export([
     normalize/1,
     match/2,
+    match_failures/2,
     has_required_fields/2,
     is_constant_field/2,
     fields/1
@@ -25,6 +26,8 @@
 
 -record(ctx, {
     cmp,
+    verbose = false,
+    negate = false,
     path = []
 }).
 
@@ -32,7 +35,7 @@
     op,
     type = mismatch,
     params = [],
-    path = []
+    ctx
 }).
 
 % Validate and normalize each operator. This translates
@@ -65,20 +68,19 @@ match(Selector, D) ->
     couch_stats:increment_counter([mango, evaluate_selector]),
     match_int(Selector, D).
 
+match_failures(Selector, D) ->
+    couch_stats:increment_counter([mango, evaluate_selector]),
+    match_int(Selector, D, true).
+
 match_int(Selector, D) ->
-    case match_failures(Selector, D) of
-        [] -> true;
-        [_ | _] -> false;
-        Other -> Other
-    end.
+    match_int(Selector, D, false).
 
-% An empty selector matches any value.
-match_failures({[]}, _) ->
-    [];
-match_failures(Selector, #doc{body = Body}) ->
-    match_failures(Selector, Body);
-match_failures(Selector, {Props}) ->
-    match(Selector, {Props}, #ctx{cmp = fun mango_json:cmp/2}).
+match_int(Selector, D, Verbose) ->
+    Ctx = #ctx{cmp = fun mango_json:cmp/2, verbose = Verbose},
+    case D of
+        #doc{body = Body} -> match(Selector, Body, Ctx);
+        Other -> match(Selector, Other, Ctx)
+    end.
 
 % Convert each operator into a normalized version as well
 % as convert an implicit operators into their explicit
@@ -380,48 +382,76 @@ negate({[{<<"$", _/binary>>, _}]} = Cond) ->
 negate({[{Field, Cond}]}) ->
     {[{Field, negate(Cond)}]}.
 
+% An empty selector matches any value.
+match({[]}, _, #ctx{verbose = false}) ->
+    true;
+match({[]}, _, #ctx{verbose = true}) ->
+    [];
 % We need to treat an empty array as always true. This will be applied
 % for $or, $in, $all, $nin as well.
-match({[{<<"$and">>, []}]}, _, _) ->
+match({[{<<"$and">>, []}]}, _, #ctx{verbose = false}) ->
+    true;
+match({[{<<"$and">>, []}]}, _, #ctx{negate = false}) ->
     [];
+match({[{<<"$and">>, []}]}, _, Ctx) ->
+    [#failure{op = 'and', type = empty_list, params = [[]], ctx = Ctx}];
+match({[{<<"$and">>, Args}]}, Value, #ctx{verbose = false} = Ctx) ->
+    Pred = fun(SubSel) -> match(SubSel, Value, Ctx) end,
+    lists:all(Pred, Args);
+match({[{<<"$and">>, Args}]}, Value, #ctx{negate = true} = Ctx) ->
+    NotArgs = [{[{<<"$not">>, A}]} || A <- Args],
+    PosCtx = Ctx#ctx{negate = false},
+    match({[{<<"$or">>, NotArgs}]}, Value, PosCtx);
 match({[{<<"$and">>, Args}]}, Value, Ctx) ->
     MatchSubSel = fun(SubSel) -> match(SubSel, Value, Ctx) end,
     lists:flatmap(MatchSubSel, Args);
-match({[{<<"$or">>, []}]}, _, _) ->
+match({[{<<"$or">>, []}]}, _, #ctx{verbose = false}) ->
+    true;
+match({[{<<"$or">>, []}]}, _, #ctx{negate = false}) ->
     [];
+match({[{<<"$or">>, []}]}, _, Ctx) ->
+    [#failure{op = 'or', type = empty_list, params = [[]], ctx = Ctx}];
+match({[{<<"$or">>, Args}]}, Value, #ctx{verbose = false} = Ctx) ->
+    Pred = fun(SubSel) -> match(SubSel, Value, Ctx) end,
+    lists:any(Pred, Args);
+match({[{<<"$or">>, Args}]}, Value, #ctx{negate = true} = Ctx) ->
+    NotArgs = [{[{<<"$not">>, A}]} || A <- Args],
+    PosCtx = Ctx#ctx{negate = false},
+    match({[{<<"$and">>, NotArgs}]}, Value, PosCtx);
 match({[{<<"$or">>, Args}]}, Value, Ctx) ->
     SubSelFailures = [match(A, Value, Ctx) || A <- Args],
-    case lists:any(fun(Res) -> Res == [] end, SubSelFailures) of
+    case lists:member([], SubSelFailures) of
         true -> [];
-        _ -> lists:flatten(SubSelFailures)
-    end;
-% TODO: producing good failure messages requires that normalize/1 fully removes
-% $not from the tree by pushing it to the leaves.
-match({[{<<"$not">>, Arg}]}, Value, Ctx) ->
-    case match(Arg, Value, Ctx) of
-        [] -> [#failure{op = 'not', path = Ctx#ctx.path}];
-        _ -> []
+        false -> lists:flatten(SubSelFailures)
     end;
+match({[{<<"$not">>, Arg}]}, Value, #ctx{verbose = false} = Ctx) ->
+    not match(Arg, Value, Ctx);
+match({[{<<"$not">>, Arg}]}, Value, #ctx{negate = Neg} = Ctx) ->
+    match(Arg, Value, Ctx#ctx{negate = not Neg});
+match({[{<<"$all">>, []}]}, _, #ctx{verbose = false}) ->
+    false;
+match({[{<<"$all">>, []}]}, _, #ctx{negate = false} = Ctx) ->
+    [#failure{op = all, type = empty_list, params = [[]], ctx = Ctx}];
+match({[{<<"$all">>, []}]}, _, #ctx{negate = true}) ->
+    [];
 % All of the values in Args must exist in Values or
 % Values == hd(Args) if Args is a single element list
 % that contains a list.
-match({[{<<"$all">>, []}]}, _Values, Ctx) ->
-    % { "$all": [] } is defined to eval to false, so return a failure
-    [#failure{op = all, params = [[]], path = Ctx#ctx.path}];
-match({[{<<"$all">>, [A]}]}, Values, _Ctx) when is_list(A), A == Values ->
-    [];
-match({[{<<"$all">>, Args}]}, Values, Ctx) when is_list(Values) ->
-    lists:flatmap(
-        fun(Arg) ->
-            case lists:member(Arg, Values) of
-                true -> [];
-                _ -> [#failure{op = all, params = [Arg], path = Ctx#ctx.path}]
-            end
+match({[{<<"$all">>, Args}]}, Values, #ctx{verbose = false}) when is_list(Values) ->
+    Pred = fun(A) -> lists:member(A, Values) end,
+    HasArgs = lists:all(Pred, Args),
+    IsArgs =
+        case Args of
+            [A] when is_list(A) ->
+                A == Values;
+            _ ->
+                false
         end,
-        Args
-    );
-match({[{<<"$all">>, _}]}, Value, Ctx) ->
-    [#failure{op = all, type = bad_value, params = [Value], path = Ctx#ctx.path}];
+    HasArgs orelse IsArgs;
+match({[{<<"$all">>, _Args}]}, _Values, #ctx{verbose = false}) ->
+    false;
+match({[{<<"$all">>, Args}]} = Expr, Values, Ctx) ->
+    match_with_failure(Expr, Values, all, [Args], Ctx);
 %% This is for $elemMatch, $allMatch, and possibly $in because of our normalizer.
 %% A selector such as {"field_name": {"$elemMatch": {"$gte": 80, "$lt": 85}}}
 %% gets normalized to:
@@ -438,53 +468,130 @@ match({[{<<>>, Arg}]}, Values, Ctx) ->
     match(Arg, Values, Ctx);
 % Matches when any element in values matches the
 % sub-selector Arg.
-match({[{<<"$elemMatch">>, _Arg}]}, [], Ctx) ->
-    [#failure{op = elemMatch, type = empty_list, path = Ctx#ctx.path}];
-match({[{<<"$elemMatch">>, Arg}]}, Values, #ctx{path = Path} = Ctx) when is_list(Values) ->
+match({[{<<"$elemMatch">>, Arg}]}, Values, #ctx{verbose = false} = Ctx) when is_list(Values) ->
+    try
+        lists:foreach(
+            fun(V) ->
+                case match(Arg, V, Ctx) of
+                    true -> throw(matched);
+                    _ -> ok
+                end
+            end,
+            Values
+        ),
+        false
+    catch
+        throw:matched ->
+            true;
+        _:_ ->
+            false
+    end;
+match({[{<<"$elemMatch">>, _Arg}]}, _Value, #ctx{verbose = false}) ->
+    false;
+match({[{<<"$elemMatch">>, _Arg}]}, [], #ctx{negate = false} = Ctx) ->
+    [#failure{op = elemMatch, type = empty_list, ctx = Ctx}];
+match({[{<<"$elemMatch">>, _Arg}]}, [], #ctx{negate = true}) ->
+    [];
+match({[{<<"$elemMatch">>, Arg}]}, Values, #ctx{negate = true} = Ctx) ->
+    PosCtx = Ctx#ctx{negate = false},
+    match({[{<<"$allMatch">>, {[{<<"$not">>, Arg}]}}]}, Values, PosCtx);
+match({[{<<"$elemMatch">>, Arg}]}, Values, #ctx{path = Path} = Ctx) ->
     ValueFailures = [
         match(Arg, V, Ctx#ctx{path = [Idx | Path]})
      || {Idx, V} <- lists:enumerate(0, Values)
     ],
     case lists:member([], ValueFailures) of
         true -> [];
-        _ -> lists:flatten(ValueFailures)
+        false -> lists:flatten(ValueFailures)
     end;
-match({[{<<"$elemMatch">>, _}]}, Value, Ctx) ->
-    [#failure{op = elemMatch, type = bad_value, params = [Value], path = Ctx#ctx.path}];
 % Matches when all elements in values match the
 % sub-selector Arg.
-match({[{<<"$allMatch">>, Arg}]}, [_ | _] = Values, #ctx{path = Path} = Ctx) ->
+match({[{<<"$allMatch">>, Arg}]}, [_ | _] = Values, #ctx{verbose = false} = Ctx) ->
+    try
+        lists:foreach(
+            fun(V) ->
+                case match(Arg, V, Ctx) of
+                    false -> throw(unmatched);
+                    _ -> ok
+                end
+            end,
+            Values
+        ),
+        true
+    catch
+        _:_ ->
+            false
+    end;
+match({[{<<"$allMatch">>, _Arg}]}, _Value, #ctx{verbose = false}) ->
+    false;
+match({[{<<"$allMatch">>, _Arg}]}, [], #ctx{negate = false} = Ctx) ->
+    [#failure{op = allMatch, type = empty_list, ctx = Ctx}];
+match({[{<<"$allMatch">>, _Arg}]}, [], #ctx{negate = true}) ->
+    [];
+match({[{<<"$allMatch">>, Arg}]}, Values, #ctx{negate = true} = Ctx) ->
+    PosCtx = Ctx#ctx{negate = false},
+    match({[{<<"$elemMatch">>, {[{<<"$not">>, Arg}]}}]}, Values, PosCtx);
+match({[{<<"$allMatch">>, Arg}]}, Values, #ctx{path = Path} = Ctx) ->
+    MatchValue = fun({Idx, V}) -> match(Arg, V, Ctx#ctx{path = [Idx | Path]}) end,
     EnumValues = lists:enumerate(0, Values),
-    MatchValue = fun({Idx, Value}) -> match(Arg, Value, Ctx#ctx{path = [Idx | Path]}) end,
     lists:flatmap(MatchValue, EnumValues);
-match({[{<<"$allMatch">>, _}]}, Value, Ctx) ->
-    [#failure{op = allMatch, type = bad_value, params = [Value], path = Ctx#ctx.path}];
 % Matches when any key in the map value matches the
 % sub-selector Arg.
-match({[{<<"$keyMapMatch">>, _Arg}]}, {[]}, Ctx) ->
-    [#failure{op = keyMapMatch, type = empty_list, path = Ctx#ctx.path}];
-match({[{<<"$keyMapMatch">>, Arg}]}, {Value}, Ctx) when is_list(Value) ->
-    KeyFailures = [match(Arg, K, Ctx) || {K, _} <- Value],
+match({[{<<"$keyMapMatch">>, Arg}]}, Value, #ctx{verbose = false} = Ctx) when is_tuple(Value) ->
+    try
+        lists:foreach(
+            fun(V) ->
+                case match(Arg, V, Ctx) of
+                    true -> throw(matched);
+                    _ -> ok
+                end
+            end,
+            [Key || {Key, _} <- element(1, Value)]
+        ),
+        false
+    catch
+        throw:matched ->
+            true;
+        _:_ ->
+            false
+    end;
+match({[{<<"$keyMapMatch">>, _Arg}]}, _Value, #ctx{verbose = false}) ->
+    false;
+match({[{<<"$keyMapMatch">>, _Arg}]}, {[]}, #ctx{negate = false} = Ctx) ->
+    [#failure{op = keyMapMatch, type = empty_list, ctx = Ctx}];
+match({[{<<"$keyMapMatch">>, _Arg}]}, {[]}, #ctx{negate = true}) ->
+    [];
+match({[{<<"$keyMapMatch">>, Arg}]}, Value, #ctx{negate = true, path = Path} = Ctx) when
+    is_tuple(Value)
+->
+    Keys = [Key || {Key, _} <- element(1, Value)],
+    MatchKey = fun(K) -> match(Arg, K, Ctx#ctx{path = [K | Path]}) end,
+    lists:flatmap(MatchKey, Keys);
+match({[{<<"$keyMapMatch">>, Arg}]}, Value, #ctx{path = Path} = Ctx) when is_tuple(Value) ->
+    Keys = [Key || {Key, _} <- element(1, Value)],
+    KeyFailures = [match(Arg, K, Ctx#ctx{path = [K | Path]}) || K <- Keys],
     case lists:member([], KeyFailures) of
         true -> [];
-        _ -> lists:flatten(KeyFailures)
+        false -> lists:flatten(KeyFailures)
     end;
-match({[{<<"$keyMapMatch">>, _}]}, Value, Ctx) ->
-    [#failure{op = keyMapMatch, type = bad_value, params = [Value], path = Ctx#ctx.path}];
+match({[{<<"$keyMapMatch">>, _Arg}]}, _Value, Ctx) ->
+    [#failure{op = keyMapMatch, type = bad_value, ctx = Ctx}];
 % Our comparison operators are fairly straight forward
-match({[{<<"$lt">>, Arg}]}, Value, #ctx{cmp = Cmp, path = Path}) ->
-    compare(lt, Arg, Path, Cmp(Value, Arg) < 0);
-match({[{<<"$lte">>, Arg}]}, Value, #ctx{cmp = Cmp, path = Path}) ->
-    compare(lte, Arg, Path, Cmp(Value, Arg) =< 0);
-match({[{<<"$eq">>, Arg}]}, Value, #ctx{cmp = Cmp, path = Path}) ->
-    compare(eq, Arg, Path, Cmp(Value, Arg) == 0);
-match({[{<<"$ne">>, Arg}]}, Value, #ctx{cmp = Cmp, path = Path}) ->
-    compare(ne, Arg, Path, Cmp(Value, Arg) /= 0);
-match({[{<<"$gte">>, Arg}]}, Value, #ctx{cmp = Cmp, path = Path}) ->
-    compare(gte, Arg, Path, Cmp(Value, Arg) >= 0);
-match({[{<<"$gt">>, Arg}]}, Value, #ctx{cmp = Cmp, path = Path}) ->
-    compare(gt, Arg, Path, Cmp(Value, Arg) > 0);
-match({[{<<"$in">>, Args}]}, Values, #ctx{cmp = Cmp, path = Path}) when is_list(Values) ->
+match({[{<<"$lt">>, Arg}]}, Value, #ctx{cmp = Cmp} = Ctx) ->
+    compare(lt, Arg, Ctx, Cmp(Value, Arg) < 0);
+match({[{<<"$lte">>, Arg}]}, Value, #ctx{cmp = Cmp} = Ctx) ->
+    compare(lte, Arg, Ctx, Cmp(Value, Arg) =< 0);
+match({[{<<"$eq">>, Arg}]}, Value, #ctx{cmp = Cmp} = Ctx) ->
+    compare(eq, Arg, Ctx, Cmp(Value, Arg) == 0);
+match({[{<<"$ne">>, Arg}]}, Value, #ctx{cmp = Cmp} = Ctx) ->
+    compare(ne, Arg, Ctx, Cmp(Value, Arg) /= 0);
+match({[{<<"$gte">>, Arg}]}, Value, #ctx{cmp = Cmp} = Ctx) ->
+    compare(gte, Arg, Ctx, Cmp(Value, Arg) >= 0);
+match({[{<<"$gt">>, Arg}]}, Value, #ctx{cmp = Cmp} = Ctx) ->
+    compare(gt, Arg, Ctx, Cmp(Value, Arg) > 0);
+match({[{<<"$in">>, []}]}, _, #ctx{verbose = false}) ->
+    false;
+match({[{<<"$in">>, Args}]}, Values, #ctx{verbose = false, cmp = Cmp}) when is_list(Values) ->
     Pred = fun(Arg) ->
         lists:foldl(
             fun(Value, Match) ->
@@ -494,88 +601,68 @@ match({[{<<"$in">>, Args}]}, Values, #ctx{cmp = Cmp, path = Path}) when is_list(
             Values
         )
     end,
-    case lists:any(Pred, Args) of
-        true -> [];
-        _ -> [#failure{op = in, params = [Args], path = Path}]
-    end;
-match({[{<<"$in">>, Args}]}, Value, #ctx{cmp = Cmp, path = Path}) ->
+    lists:any(Pred, Args);
+match({[{<<"$in">>, Args}]}, Value, #ctx{verbose = false, cmp = Cmp}) ->
     Pred = fun(Arg) -> Cmp(Value, Arg) == 0 end,
-    case lists:any(Pred, Args) of
-        true -> [];
-        _ -> [#failure{op = in, params = [Args], path = Path}]
-    end;
-match({[{<<"$nin">>, Args}]}, Values, #ctx{cmp = Cmp, path = Path}) when is_list(Values) ->
-    Pred = fun(Arg) ->
-        lists:foldl(
-            fun(Value, Match) ->
-                (Cmp(Value, Arg) /= 0) and Match
-            end,
-            true,
-            Values
-        )
-    end,
-    case lists:all(Pred, Args) of
-        true -> [];
-        _ -> [#failure{op = nin, params = [Args], path = Path}]
-    end;
-match({[{<<"$nin">>, Args}]}, Value, #ctx{cmp = Cmp, path = Path}) ->
+    lists:any(Pred, Args);
+match({[{<<"$in">>, Args}]} = Expr, Value, Ctx) ->
+    match_with_failure(Expr, Value, in, [Args], Ctx);
+match({[{<<"$nin">>, []}]}, _, #ctx{verbose = false}) ->
+    true;
+match({[{<<"$nin">>, Args}]}, Values, #ctx{verbose = false} = Ctx) when is_list(Values) ->
+    not match({[{<<"$in">>, Args}]}, Values, Ctx);
+match({[{<<"$nin">>, Args}]}, Value, #ctx{verbose = false, cmp = Cmp}) ->
     Pred = fun(Arg) -> Cmp(Value, Arg) /= 0 end,
-    case lists:all(Pred, Args) of
-        true -> [];
-        _ -> [#failure{op = nin, params = [Args], path = Path}]
-    end;
+    lists:all(Pred, Args);
+match({[{<<"$nin">>, Args}]} = Expr, Value, Ctx) ->
+    match_with_failure(Expr, Value, nin, [Args], Ctx);
 % This logic is a bit subtle. Basically, if value is
 % not undefined, then it exists.
-match({[{<<"$exists">>, ShouldExist}]}, Value, Ctx) ->
-    case {ShouldExist, Value} of
-        {true, undefined} -> [#failure{op = exists, params = [ShouldExist], path = Ctx#ctx.path}];
-        {true, _} -> [];
-        {false, undefined} -> [];
-        {false, _} -> [#failure{op = exists, params = [ShouldExist], path = Ctx#ctx.path}]
-    end;
-match({[{<<"$type">>, Arg}]}, Value, Ctx) when is_binary(Arg) ->
-    case mango_json:type(Value) of
-        Arg -> [];
-        _ -> [#failure{op = type, params = [Arg], path = Ctx#ctx.path}]
-    end;
-match({[{<<"$mod">>, [D, R]}]}, Value, Ctx) when is_integer(Value) ->
-    case Value rem D of
-        R -> [];
-        _ -> [#failure{op = mod, params = [D, R], path = Ctx#ctx.path}]
-    end;
-match({[{<<"$mod">>, _}]}, Value, Ctx) ->
-    [#failure{op = mod, type = bad_value, params = [Value], path = Ctx#ctx.path}];
-match({[{<<"$beginsWith">>, Prefix}]}, Value, Ctx) when is_binary(Prefix), is_binary(Value) ->
-    case string:prefix(Value, Prefix) of
-        nomatch -> [#failure{op = beginsWith, params = [Prefix], path = Ctx#ctx.path}];
-        _ -> []
-    end;
+match({[{<<"$exists">>, ShouldExist}]}, Value, #ctx{verbose = false}) ->
+    Exists = Value /= undefined,
+    ShouldExist andalso Exists;
+match({[{<<"$exists">>, ShouldExist}]} = Expr, Value, Ctx) ->
+    match_with_failure(Expr, Value, exists, [ShouldExist], Ctx);
+match({[{<<"$type">>, Arg}]}, Value, #ctx{verbose = false}) when is_binary(Arg) ->
+    Arg == mango_json:type(Value);
+match({[{<<"$type">>, Arg}]} = Expr, Value, Ctx) ->
+    match_with_failure(Expr, Value, type, [Arg], Ctx);
+match({[{<<"$mod">>, [D, R]}]}, Value, #ctx{verbose = false}) when is_integer(Value) ->
+    Value rem D == R;
+match({[{<<"$mod">>, _}]}, _Value, #ctx{verbose = false}) ->
+    false;
+match({[{<<"$mod">>, [D, R]}]} = Expr, Value, Ctx) ->
+    match_with_failure(Expr, Value, mod, [D, R], Ctx);
+match({[{<<"$beginsWith">>, Prefix}]}, Value, #ctx{verbose = false}) when
+    is_binary(Prefix), is_binary(Value)
+->
+    string:prefix(Value, Prefix) /= nomatch;
 % When Value is not a string, do not match
-match({[{<<"$beginsWith">>, Prefix}]}, Value, Ctx) when is_binary(Prefix) ->
-    [#failure{op = beginsWith, type = bad_value, params = [Value], path = Ctx#ctx.path}];
-match({[{<<"$regex">>, Regex}]}, Value, Ctx) when is_binary(Value) ->
+match({[{<<"$beginsWith">>, Prefix}]}, _, #ctx{verbose = false}) when is_binary(Prefix) ->
+    false;
+match({[{<<"$beginsWith">>, Prefix}]} = Expr, Value, Ctx) ->
+    match_with_failure(Expr, Value, beginsWith, [Prefix], Ctx);
+match({[{<<"$regex">>, Regex}]}, Value, #ctx{verbose = false}) when is_binary(Value) ->
     try
-        case re:run(Value, Regex, [{capture, none}]) of
-            match -> [];
-            _ -> [#failure{op = regex, params = [Regex], path = Ctx#ctx.path}]
-        end
+        match == re:run(Value, Regex, [{capture, none}])
     catch
         _:_ ->
-            [#failure{op = regex, params = [Regex], path = Ctx#ctx.path}]
-    end;
-match({[{<<"$regex">>, _}]}, Value, Ctx) ->
-    [#failure{op = regex, type = bad_value, params = [Value], path = Ctx#ctx.path}];
-match({[{<<"$size">>, Arg}]}, Values, Ctx) when is_list(Values) ->
-    case length(Values) of
-        Arg -> [];
-        _ -> [#failure{op = size, params = [Arg], path = Ctx#ctx.path}]
+            false
     end;
-match({[{<<"$size">>, _}]}, Value, Ctx) ->
-    [#failure{op = size, type = bad_value, params = [Value], path = Ctx#ctx.path}];
+match({[{<<"$regex">>, _}]}, _Value, #ctx{verbose = false}) ->
+    false;
+match({[{<<"$regex">>, Regex}]} = Expr, Value, Ctx) ->
+    match_with_failure(Expr, Value, regex, [Regex], Ctx);
+match({[{<<"$size">>, Arg}]}, Values, #ctx{verbose = false}) when is_list(Values) ->
+    length(Values) == Arg;
+match({[{<<"$size">>, _}]}, _Value, #ctx{verbose = false}) ->
+    false;
+match({[{<<"$size">>, Arg}]} = Expr, Value, Ctx) ->
+    match_with_failure(Expr, Value, size, [Arg], Ctx);
 % We don't have any choice but to believe that the text
 % index returned valid matches
-match({[{<<"$default">>, _}]}, _Value, _Ctx) ->
-    [];
+match({[{<<"$default">>, _}]}, _Value, #ctx{verbose = false}) ->
+    true;
 % All other operators are internal assertion errors for
 % matching because we either should've removed them during
 % normalization or something else broke.
@@ -584,16 +671,25 @@ match({[{<<"$", _/binary>> = Op, _}]}, _, _) ->
 % We need to traverse value to find field. The call to
 % mango_doc:get_field/2 may return either not_found or
 % bad_path in which case matching fails.
-match({[{Field, Cond}]}, Value, #ctx{path = Path} = Ctx) ->
+match({[{Field, Cond}]}, Value, #ctx{verbose = Verb, path = Path} = Ctx) ->
     InnerPath = extend_path(Field, Path),
     InnerCtx = Ctx#ctx{path = InnerPath},
     case mango_doc:get_field(Value, Field) of
         not_found when Cond == {[{<<"$exists">>, false}]} ->
-            [];
+            case Verb of
+                true -> [];
+                false -> true
+            end;
         not_found ->
-            [#failure{op = field, type = not_found, path = InnerCtx#ctx.path}];
+            case Verb of
+                true -> [#failure{op = field, type = not_found, ctx = InnerCtx}];
+                false -> false
+            end;
         bad_path ->
-            [#failure{op = field, type = bad_path, path = InnerCtx#ctx.path}];
+            case Verb of
+                true -> [#failure{op = field, type = bad_path, ctx = InnerCtx}];
+                false -> false
+            end;
         SubValue when Field == <<"_id">> ->
             match(Cond, SubValue, InnerCtx#ctx{cmp = fun mango_json:cmp_raw/2});
         SubValue ->
@@ -607,10 +703,18 @@ extend_path(Field, Path) when is_binary(Field) ->
 extend_path(Field, Path) when is_list(Field) ->
     lists:foldl(fun(F, Acc) -> [F | Acc] end, Path, Field).
 
-compare(Op, Arg, Path, Cond) ->
-    case Cond of
-        true -> [];
-        _ -> [#failure{op = Op, params = [Arg], path = Path}]
+match_with_failure(Expr, Value, Op, Params, #ctx{negate = Neg} = Ctx) ->
+    case not match(Expr, Value, Ctx#ctx{verbose = false}) of
+        Neg -> [];
+        _ -> [#failure{op = Op, params = Params, ctx = Ctx}]
+    end.
+
+compare(_, _, #ctx{verbose = false}, Cond) ->
+    Cond;
+compare(Op, Arg, #ctx{negate = Neg} = Ctx, Cond) ->
+    case not Cond of
+        Neg -> [];
+        _ -> [#failure{op = Op, params = [Arg], ctx = Ctx}]
     end.
 
 % Returns true if Selector requires all
@@ -1149,10 +1253,21 @@ check_selector(Selector, Results) ->
     SelPos = normalize({[{<<"x">>, Selector}]}),
     SelNeg = normalize({[{<<"x">>, {[{<<"$not">>, Selector}]}}]}),
 
+    ListToBool = fun(List) ->
+        case List of
+            [] -> true;
+            [_ | _] -> false
+        end
+    end,
+
     Check = fun({Result, Value}) ->
         Doc = {[{<<"x">>, Value}]},
-        ?assertEqual(Result, match_int(SelPos, Doc)),
-        ?assertEqual(not Result, match_int(SelNeg, Doc))
+
+        ?assertEqual(Result, match_int(SelPos, Doc, false)),
+        ?assertEqual(Result, ListToBool(match_int(SelPos, Doc, true))),
+
+        ?assertEqual(not Result, match_int(SelNeg, Doc, false)),
+        ?assertEqual(not Result, ListToBool(match_int(SelNeg, Doc, true)))
     end,
 
     lists:foreach(Check, Results).
@@ -1785,8 +1900,8 @@ match_failures_object_test() ->
             {<<"b">>, {[{<<"c">>, 3}]}}
         ]}
     ),
-    ?assertEqual(
-        [#failure{op = eq, type = mismatch, params = [1], path = [<<"a">>]}],
+    ?assertMatch(
+        [#failure{op = eq, type = mismatch, params = [1], ctx = #ctx{path = [<<"a">>]}}],
         Fails1
     ),
 
@@ -1797,8 +1912,8 @@ match_failures_object_test() ->
             {<<"b">>, {[{<<"c">>, 4}]}}
         ]}
     ),
-    ?assertEqual(
-        [#failure{op = eq, type = mismatch, params = [3], path = [<<"c">>, <<"b">>]}],
+    ?assertMatch(
+        [#failure{op = eq, type = mismatch, params = [3], ctx = #ctx{path = [<<"c">>, <<"b">>]}}],
         Fails2
     ),
 
@@ -1809,10 +1924,10 @@ match_failures_object_test() ->
             {<<"b">>, {[{<<"c">>, 4}]}}
         ]}
     ),
-    ?assertEqual(
+    ?assertMatch(
         [
-            #failure{op = eq, type = mismatch, params = [1], path = [<<"a">>]},
-            #failure{op = eq, type = mismatch, params = [3], path = [<<"c">>, <<"b">>]}
+            #failure{op = eq, type = mismatch, params = [1], ctx = #ctx{path = [<<"a">>]}},
+            #failure{op = eq, type = mismatch, params = [3], ctx = #ctx{path = [<<"c">>, <<"b">>]}}
         ],
         Fails3
     ).
@@ -1835,18 +1950,18 @@ match_failures_elemmatch_test() ->
     Fails1 = match_failures(
         SelElemMatch, {[{<<"a">>, []}]}
     ),
-    ?assertEqual(
-        [#failure{op = elemMatch, type = empty_list, params = [], path = [<<"a">>]}],
+    ?assertMatch(
+        [#failure{op = elemMatch, type = empty_list, params = [], ctx = #ctx{path = [<<"a">>]}}],
         Fails1
     ),
 
     Fails2 = match_failures(
         SelElemMatch, {[{<<"a">>, [3, 2]}]}
     ),
-    ?assertEqual(
+    ?assertMatch(
         [
-            #failure{op = gt, type = mismatch, params = [4], path = [0, <<"a">>]},
-            #failure{op = gt, type = mismatch, params = [4], path = [1, <<"a">>]}
+            #failure{op = gt, type = mismatch, params = [4], ctx = #ctx{path = [0, <<"a">>]}},
+            #failure{op = gt, type = mismatch, params = [4], ctx = #ctx{path = [1, <<"a">>]}}
         ],
         Fails2
     ).
@@ -1869,18 +1984,18 @@ match_failures_allmatch_test() ->
     Fails1 = match_failures(
         SelAllMatch, {[{<<"a">>, [4]}]}
     ),
-    ?assertEqual(
-        [#failure{op = gt, type = mismatch, params = [4], path = [0, <<"a">>]}],
+    ?assertMatch(
+        [#failure{op = gt, type = mismatch, params = [4], ctx = #ctx{path = [0, <<"a">>]}}],
         Fails1
     ),
 
     Fails2 = match_failures(
         SelAllMatch, {[{<<"a">>, [5, 6, 3, 7, 0]}]}
     ),
-    ?assertEqual(
+    ?assertMatch(
         [
-            #failure{op = gt, type = mismatch, params = [4], path = [2, <<"a">>]},
-            #failure{op = gt, type = mismatch, params = [4], path = [4, <<"a">>]}
+            #failure{op = gt, type = mismatch, params = [4], ctx = #ctx{path = [2, <<"a">>]}},
+            #failure{op = gt, type = mismatch, params = [4], ctx = #ctx{path = [4, <<"a">>]}}
         ],
         Fails2
     ).
@@ -1903,8 +2018,15 @@ match_failures_allmatch_object_test() ->
     Fails1 = match_failures(
         SelAllMatch, {[{<<"a">>, {[{<<"b">>, [{[{<<"c">>, 4}]}]}]}}]}
     ),
-    ?assertEqual(
-        [#failure{op = gt, type = mismatch, params = [4], path = [<<"c">>, 0, <<"b">>, <<"a">>]}],
+    ?assertMatch(
+        [
+            #failure{
+                op = gt,
+                type = mismatch,
+                params = [4],
+                ctx = #ctx{path = [<<"c">>, 0, <<"b">>, <<"a">>]}
+            }
+        ],
         Fails1
     ),
 
@@ -1912,8 +2034,15 @@ match_failures_allmatch_object_test() ->
         SelAllMatch,
         {[{<<"a">>, {[{<<"b">>, [{[{<<"c">>, 5}]}, {[{<<"c">>, 6}]}, {[{<<"c">>, 3}]}]}]}}]}
     ),
-    ?assertEqual(
-        [#failure{op = gt, type = mismatch, params = [4], path = [<<"c">>, 2, <<"b">>, <<"a">>]}],
+    ?assertMatch(
+        [
+            #failure{
+                op = gt,
+                type = mismatch,
+                params = [4],
+                ctx = #ctx{path = [<<"c">>, 2, <<"b">>, <<"a">>]}
+            }
+        ],
         Fails2
     ),
 
@@ -1921,11 +2050,19 @@ match_failures_allmatch_object_test() ->
         SelAllMatch,
         {[{<<"a">>, {[{<<"b">>, [{[{<<"c">>, 1}]}, {[]}]}]}}]}
     ),
-    ?assertEqual(
+    ?assertMatch(
         [
-            #failure{op = gt, type = mismatch, params = [4], path = [<<"c">>, 0, <<"b">>, <<"a">>]},
             #failure{
-                op = field, type = not_found, params = [], path = [<<"c">>, 1, <<"b">>, <<"a">>]
+                op = gt,
+                type = mismatch,
+                params = [4],
+                ctx = #ctx{path = [<<"c">>, 0, <<"b">>, <<"a">>]}
+            },
+            #failure{
+                op = field,
+                type = not_found,
+                params = [],
+                ctx = #ctx{path = [<<"c">>, 1, <<"b">>, <<"a">>]}
             }
         ],
         Fails3

From 16bb9f1ed62741c0e36379cdfc8b82f40ece256a Mon Sep 17 00:00:00 2001
From: James Coglan <james@neighbourhood.ie>
Date: Fri, 30 Jan 2026 17:44:25 +0000
Subject: [PATCH 5/9] feat: Return VDU failure reports to the caller

Until now, document updates rejected by a Mango VDU returned an opaque
"forbidden" message to the client. This commit adds a detailed list of
failures, obtained by converting the `#failure` records returned by
`mango_selector:match/3` into human-readable messages.
---
 src/couch/src/couch_query_servers.erl         |   2 +
 src/mango/src/mango_native_proc.erl           |  11 +-
 src/mango/src/mango_selector.erl              | 424 ++++++++++++++----
 test/elixir/test/validate_doc_update_test.exs |   3 +
 4 files changed, 353 insertions(+), 87 deletions(-)

diff --git a/src/couch/src/couch_query_servers.erl b/src/couch/src/couch_query_servers.erl
index 7ab662f850..5d7e9ac1fa 100644
--- a/src/couch/src/couch_query_servers.erl
+++ b/src/couch/src/couch_query_servers.erl
@@ -490,6 +490,8 @@ validate_doc_update(Db, DDoc, EditDoc, DiskDoc, Ctx, SecObj) ->
     case Resp of
         ok ->
             ok;
+        {[{<<"forbidden">>, Message}, {<<"failures">>, Failures}]} ->
+            throw({forbidden, Message, Failures});
         {[{<<"forbidden">>, Message}]} ->
             throw({forbidden, Message});
         {[{<<"unauthorized">>, Message}]} ->
diff --git a/src/mango/src/mango_native_proc.erl b/src/mango/src/mango_native_proc.erl
index edcecd4b6f..b540047e50 100644
--- a/src/mango/src/mango_native_proc.erl
+++ b/src/mango/src/mango_native_proc.erl
@@ -115,9 +115,14 @@ handle_call({prompt, [<<"ddoc">>, DDocId, [<<"validate_doc_update">>], Args]}, _
             [NewDoc, OldDoc, _Ctx, _SecObj] = Args,
             Struct = {[{<<"newDoc">>, NewDoc}, {<<"oldDoc">>, OldDoc}]},
             Reply =
-                case mango_selector:match(Selector, Struct) of
-                    true -> true;
-                    _ -> {[{<<"forbidden">>, <<"document is not valid">>}]}
+                case mango_selector:match_failures(Selector, Struct) of
+                    [] ->
+                        true;
+                    Failures ->
+                        {[
+                            {<<"forbidden">>, <<"forbidden">>},
+                            {<<"failures">>, Failures}
+                        ]}
                 end,
             {reply, Reply, St}
     end;
diff --git a/src/mango/src/mango_selector.erl b/src/mango/src/mango_selector.erl
index 347bd74bfc..7ba0c86b05 100644
--- a/src/mango/src/mango_selector.erl
+++ b/src/mango/src/mango_selector.erl
@@ -70,7 +70,7 @@ match(Selector, D) ->
 
 match_failures(Selector, D) ->
     couch_stats:increment_counter([mango, evaluate_selector]),
-    match_int(Selector, D, true).
+    [format_failure(F) || F <- match_int(Selector, D, true)].
 
 match_int(Selector, D) ->
     match_int(Selector, D, false).
@@ -574,8 +574,8 @@ match({[{<<"$keyMapMatch">>, Arg}]}, Value, #ctx{path = Path} = Ctx) when is_tup
         true -> [];
         false -> lists:flatten(KeyFailures)
     end;
-match({[{<<"$keyMapMatch">>, _Arg}]}, _Value, Ctx) ->
-    [#failure{op = keyMapMatch, type = bad_value, ctx = Ctx}];
+match({[{<<"$keyMapMatch">>, _Arg}]}, Value, Ctx) ->
+    [#failure{op = keyMapMatch, type = bad_value, params = [Value], ctx = Ctx}];
 % Our comparison operators are fairly straight forward
 match({[{<<"$lt">>, Arg}]}, Value, #ctx{cmp = Cmp} = Ctx) ->
     compare(lt, Arg, Ctx, Cmp(Value, Arg) < 0);
@@ -717,6 +717,90 @@ compare(Op, Arg, #ctx{negate = Neg} = Ctx, Cond) ->
         _ -> [#failure{op = Op, params = [Arg], ctx = Ctx}]
     end.
 
+format_failure(#failure{op = Op, type = Type, params = Params, ctx = Ctx}) ->
+    Path = format_path(Ctx#ctx.path),
+    Msg = format_op(Op, Ctx#ctx.negate, Type, Params),
+    {[{<<"path">>, Path}, {<<"message">>, list_to_binary(Msg)}]}.
+
+format_op(Op, _, empty_list, _) ->
+    io_lib:format("operator $~p was invoked with an empty list", [Op]);
+format_op(Op, _, bad_value, [Value]) ->
+    io_lib:format("operator $~p was invoked with a bad value: ~s", [Op, jiffy:encode(Value)]);
+format_op(_, _, not_found, []) ->
+    io_lib:format("must be present", []);
+format_op(_, _, bad_path, []) ->
+    io_lib:format("used an invalid path", []);
+format_op(eq, false, mismatch, [X]) ->
+    io_lib:format("must be equal to ~s", [jiffy:encode(X)]);
+format_op(eq, true, Type, Params) ->
+    format_op(ne, false, Type, Params);
+format_op(ne, false, mismatch, [X]) ->
+    io_lib:format("must not be equal to ~s", [jiffy:encode(X)]);
+format_op(ne, true, Type, Params) ->
+    format_op(eq, false, Type, Params);
+format_op(lt, false, mismatch, [X]) ->
+    io_lib:format("must be less than ~s", [jiffy:encode(X)]);
+format_op(lt, true, Type, Params) ->
+    format_op(gte, false, Type, Params);
+format_op(lte, false, mismatch, [X]) ->
+    io_lib:format("must be less than or equal to ~s", [jiffy:encode(X)]);
+format_op(lte, true, Type, Params) ->
+    format_op(gt, false, Type, Params);
+format_op(gt, false, mismatch, [X]) ->
+    io_lib:format("must be greater than ~s", [jiffy:encode(X)]);
+format_op(gt, true, Type, Params) ->
+    format_op(lte, false, Type, Params);
+format_op(gte, false, mismatch, [X]) ->
+    io_lib:format("must be greater than or equal to ~s", [jiffy:encode(X)]);
+format_op(gte, true, Type, Params) ->
+    format_op(le, false, Type, Params);
+format_op(in, false, mismatch, [X]) ->
+    io_lib:format("must be one of ~s", [jiffy:encode(X)]);
+format_op(in, true, Type, Params) ->
+    format_op(nin, false, Type, Params);
+format_op(nin, false, mismatch, [X]) ->
+    io_lib:format("must not be one of ~s", [jiffy:encode(X)]);
+format_op(nin, true, Type, Params) ->
+    format_op(in, false, Type, Params);
+format_op(all, false, mismatch, [X]) ->
+    io_lib:format("must contain all the values in ~s", [jiffy:encode(X)]);
+format_op(all, true, mismatch, [X]) ->
+    io_lib:format("must not contain at least one of the values in ~s", [jiffy:encode(X)]);
+format_op(exists, false, mismatch, [true]) ->
+    io_lib:format("must be present", []);
+format_op(exists, false, mismatch, [false]) ->
+    io_lib:format("must not be present", []);
+format_op(exists, true, Type, [Exist]) ->
+    format_op(exists, false, Type, [not Exist]);
+format_op(type, false, mismatch, [Type]) ->
+    io_lib:format("must be of type '~s'", [Type]);
+format_op(type, true, mismatch, [Type]) ->
+    io_lib:format("must not be of type '~s'", [Type]);
+format_op(mod, false, mismatch, [D, R]) ->
+    io_lib:format("must leave a remainder of ~p when divided by ~p", [R, D]);
+format_op(mod, true, mismatch, [D, R]) ->
+    io_lib:format("must leave a remainder other than ~p when divided by ~p", [R, D]);
+format_op(regex, false, mismatch, [P]) ->
+    io_lib:format("must match the pattern '~s'", [P]);
+format_op(regex, true, mismatch, [P]) ->
+    io_lib:format("must not match the pattern '~s'", [P]);
+format_op(beginsWith, false, mismatch, [P]) ->
+    io_lib:format("must begin with '~s'", [P]);
+format_op(beginsWith, true, mismatch, [P]) ->
+    io_lib:format("must not begin with '~s'", [P]);
+format_op(size, false, mismatch, [N]) ->
+    io_lib:format("must contain ~p items", [N]);
+format_op(size, true, mismatch, [N]) ->
+    io_lib:format("must not contain ~p items", [N]).
+
+format_path([]) ->
+    [];
+format_path([Item | Rest]) when is_binary(Item) ->
+    {ok, Path} = mango_util:parse_field(Item),
+    format_path(Rest) ++ Path;
+format_path([Item | Rest]) when is_integer(Item) ->
+    format_path(Rest) ++ [Item].
+
 % Returns true if Selector requires all
 % fields in RequiredFields to exist in any matching documents.
 
@@ -1287,7 +1371,14 @@ match_lt_test() ->
         {false, [1, 2, 3]},
         {false, [1, 2, 4]},
         {false, [1, 3]}
-    ]).
+    ]),
+
+    Sel = normalize({[{<<"x">>, {[{<<"$lt">>, 5}]}}]}),
+    Fails = match_failures(Sel, {[{<<"x">>, 6}]}),
+    ?assertEqual(
+        [{[{<<"path">>, [<<"x">>]}, {<<"message">>, <<"must be less than 5">>}]}],
+        Fails
+    ).
 
 match_lte_test() ->
     check_selector({[{<<"$lte">>, 5}]}, [{true, 4}, {true, 5}, {false, 6}]),
@@ -1304,7 +1395,14 @@ match_lte_test() ->
         {true, [1, 2, 3]},
         {false, [1, 2, 4]},
         {false, [1, 3]}
-    ]).
+    ]),
+
+    Sel = normalize({[{<<"x">>, {[{<<"$lte">>, 5}]}}]}),
+    Fails = match_failures(Sel, {[{<<"x">>, 6}]}),
+    ?assertEqual(
+        [{[{<<"path">>, [<<"x">>]}, {<<"message">>, <<"must be less than or equal to 5">>}]}],
+        Fails
+    ).
 
 match_gt_test() ->
     check_selector({[{<<"$gt">>, 5}]}, [{false, 4}, {false, 5}, {true, 6}]),
@@ -1321,7 +1419,14 @@ match_gt_test() ->
         {false, [1, 2, 3]},
         {true, [1, 2, 4]},
         {true, [1, 3]}
-    ]).
+    ]),
+
+    Sel = normalize({[{<<"x">>, {[{<<"$gt">>, 5}]}}]}),
+    Fails = match_failures(Sel, {[{<<"x">>, 4}]}),
+    ?assertEqual(
+        [{[{<<"path">>, [<<"x">>]}, {<<"message">>, <<"must be greater than 5">>}]}],
+        Fails
+    ).
 
 match_gte_test() ->
     check_selector({[{<<"$gte">>, 5}]}, [{false, 4}, {true, 5}, {true, 6}]),
@@ -1338,7 +1443,14 @@ match_gte_test() ->
         {true, [1, 2, 3]},
         {true, [1, 2, 4]},
         {true, [1, 3]}
-    ]).
+    ]),
+
+    Sel = normalize({[{<<"x">>, {[{<<"$gte">>, 5}]}}]}),
+    Fails = match_failures(Sel, {[{<<"x">>, 4}]}),
+    ?assertEqual(
+        [{[{<<"path">>, [<<"x">>]}, {<<"message">>, <<"must be greater than or equal to 5">>}]}],
+        Fails
+    ).
 
 match_eq_test() ->
     check_selector({[{<<"$eq">>, 5}]}, [{true, 5}, {false, 6}]),
@@ -1356,7 +1468,14 @@ match_eq_test() ->
         {false, {[{<<"a">>, {[{<<"b">>, {[{<<"c">>, 8}]}}]}}]}},
         {false, {[{<<"a">>, {[{<<"b">>, {[{<<"d">>, 7}]}}]}}]}},
         {false, {[{<<"a">>, {[{<<"d">>, {[{<<"c">>, 7}]}}]}}]}}
-    ]).
+    ]),
+
+    Sel = normalize({[{<<"x">>, 5}]}),
+    Fails = match_failures(Sel, {[{<<"x">>, 4}]}),
+    ?assertEqual(
+        [{[{<<"path">>, [<<"x">>]}, {<<"message">>, <<"must be equal to 5">>}]}],
+        Fails
+    ).
 
 match_ne_test() ->
     check_selector({[{<<"$ne">>, 5}]}, [{false, 5}, {true, 6}]),
@@ -1384,7 +1503,14 @@ match_ne_test() ->
         {true, {[{<<"a">>, {[{<<"b">>, {[{<<"c">>, 8}]}}]}}]}},
         {true, {[{<<"a">>, {[{<<"b">>, {[{<<"d">>, 7}]}}]}}]}},
         {true, {[{<<"a">>, {[{<<"d">>, {[{<<"c">>, 7}]}}]}}]}}
-    ]).
+    ]),
+
+    Sel = normalize({[{<<"x">>, {[{<<"$ne">>, 5}]}}]}),
+    Fails = match_failures(Sel, {[{<<"x">>, 5}]}),
+    ?assertEqual(
+        [{[{<<"path">>, [<<"x">>]}, {<<"message">>, <<"must not be equal to 5">>}]}],
+        Fails
+    ).
 
 match_in_test() ->
     check_selector({[{<<"$in">>, []}]}, [
@@ -1426,6 +1552,13 @@ match_in_test() ->
             {false, [[<<"nested">>], <<"list">>]},
             {true, [0, [[<<"nested">>], <<"list">>]]}
         ]
+    ),
+
+    Sel = normalize({[{<<"x">>, {[{<<"$in">>, [42, false, [<<"bar">>]]}]}}]}),
+    Fails = match_failures(Sel, {[{<<"x">>, 5}]}),
+    ?assertEqual(
+        [{[{<<"path">>, [<<"x">>]}, {<<"message">>, <<"must be one of [42,false,[\"bar\"]]">>}]}],
+        Fails
     ).
 
 match_nin_test() ->
@@ -1468,6 +1601,18 @@ match_nin_test() ->
             {true, [[<<"nested">>], <<"list">>]},
             {false, [0, [[<<"nested">>], <<"list">>]]}
         ]
+    ),
+
+    Sel = normalize({[{<<"x">>, {[{<<"$nin">>, [42, false, [<<"bar">>]]}]}}]}),
+    Fails = match_failures(Sel, {[{<<"x">>, false}]}),
+    ?assertEqual(
+        [
+            {[
+                {<<"path">>, [<<"x">>]},
+                {<<"message">>, <<"must not be one of [42,false,[\"bar\"]]">>}
+            ]}
+        ],
+        Fails
     ).
 
 match_all_test() ->
@@ -1518,7 +1663,31 @@ match_all_test() ->
     check_selector({[{<<"$all">>, [{[{<<"a">>, 1}]}]}]}, [
         {true, [{[{<<"a">>, 1}]}]},
         {false, [{[{<<"a">>, 1}, {<<"b">>, 2}]}]}
-    ]).
+    ]),
+
+    SelEmpty = normalize({[{<<"x">>, {[{<<"$all">>, []}]}}]}),
+    FailsEmpty = match_failures(SelEmpty, {[{<<"x">>, 0}]}),
+    ?assertEqual(
+        [
+            {[
+                {<<"path">>, [<<"x">>]},
+                {<<"message">>, <<"operator $all was invoked with an empty list">>}
+            ]}
+        ],
+        FailsEmpty
+    ),
+
+    Sel = normalize({[{<<"x">>, {[{<<"$all">>, [1, 2, 3]}]}}]}),
+    Fails = match_failures(Sel, {[{<<"x">>, [2, 3]}]}),
+    ?assertEqual(
+        [
+            {[
+                {<<"path">>, [<<"x">>]},
+                {<<"message">>, <<"must contain all the values in [1,2,3]">>}
+            ]}
+        ],
+        Fails
+    ).
 
 match_exists_test() ->
     check_selector({[{<<"x">>, {[{<<"$exists">>, true}]}}]}, [
@@ -1556,6 +1725,13 @@ match_exists_test() ->
             {false, {[{<<"x">>, 0}]}},
             {true, {[]}}
         ]
+    ),
+
+    Sel = normalize({[{<<"x">>, {[{<<"$exists">>, true}]}}]}),
+    Fails = match_failures(Sel, {[]}),
+    ?assertEqual(
+        [{[{<<"path">>, [<<"x">>]}, {<<"message">>, <<"must be present">>}]}],
+        Fails
     ).
 
 match_type_test() ->
@@ -1598,7 +1774,14 @@ match_type_test() ->
         {true, {[{<<"a">>, 1}]}},
         {false, [{<<"a">>, 1}]},
         {false, null}
-    ]).
+    ]),
+
+    Sel = normalize({[{<<"x">>, {[{<<"$type">>, <<"number">>}]}}]}),
+    Fails = match_failures(Sel, {[{<<"x">>, true}]}),
+    ?assertEqual(
+        [{[{<<"path">>, [<<"x">>]}, {<<"message">>, <<"must be of type 'number'">>}]}],
+        Fails
+    ).
 
 match_regex_test() ->
     check_selector({[{<<"$regex">>, <<"^[0-9a-f]+$">>}]}, [
@@ -1606,7 +1789,14 @@ match_regex_test() ->
         {true, <<"3a0df5e">>},
         {false, <<"3a0gf5e">>},
         {false, 42}
-    ]).
+    ]),
+
+    Sel = normalize({[{<<"x">>, {[{<<"$regex">>, <<"^[0-9a-f]+$">>}]}}]}),
+    Fails = match_failures(Sel, {[{<<"x">>, <<"hello">>}]}),
+    ?assertEqual(
+        [{[{<<"path">>, [<<"x">>]}, {<<"message">>, <<"must match the pattern '^[0-9a-f]+$'">>}]}],
+        Fails
+    ).
 
 match_beginswith_test() ->
     check_selector({[{<<"$beginsWith">>, <<"foo">>}]}, [
@@ -1616,7 +1806,14 @@ match_beginswith_test() ->
         {false, <<"more food">>},
         {false, <<"fo">>},
         {false, 42}
-    ]).
+    ]),
+
+    Sel = normalize({[{<<"x">>, {[{<<"$beginsWith">>, <<"foo">>}]}}]}),
+    Fails = match_failures(Sel, {[{<<"x">>, <<"hello">>}]}),
+    ?assertEqual(
+        [{[{<<"path">>, [<<"x">>]}, {<<"message">>, <<"must begin with 'foo'">>}]}],
+        Fails
+    ).
 
 match_mod_test() ->
     check_selector({[{<<"$mod">>, [28, 1]}]}, [
@@ -1625,7 +1822,19 @@ match_mod_test() ->
         {true, 57},
         {false, 58},
         {false, <<"57">>}
-    ]).
+    ]),
+
+    Sel = normalize({[{<<"x">>, {[{<<"$mod">>, [28, 1]}]}}]}),
+    Fails = match_failures(Sel, {[{<<"x">>, 27}]}),
+    ?assertEqual(
+        [
+            {[
+                {<<"path">>, [<<"x">>]},
+                {<<"message">>, <<"must leave a remainder of 1 when divided by 28">>}
+            ]}
+        ],
+        Fails
+    ).
 
 match_size_test() ->
     check_selector({[{<<"$size">>, 3}]}, [
@@ -1634,7 +1843,14 @@ match_size_test() ->
         {true, [0, 0, 0]},
         {false, [0, 0]},
         {false, [0, 0, 0, 0]}
-    ]).
+    ]),
+
+    Sel = normalize({[{<<"x">>, {[{<<"$size">>, 3}]}}]}),
+    Fails = match_failures(Sel, {[{<<"x">>, [0, 1]}]}),
+    ?assertEqual(
+        [{[{<<"path">>, [<<"x">>]}, {<<"message">>, <<"must contain 3 items">>}]}],
+        Fails
+    ).
 
 match_allmatch_test() ->
     % $allMatch is defined to return false for empty lists
@@ -1652,7 +1868,14 @@ match_allmatch_test() ->
         {false, [0]},
         {true, [1]},
         {true, [0, 1]}
-    ]).
+    ]),
+
+    Sel = normalize({[{<<"x">>, {[{<<"$allMatch">>, {[{<<"$eq">>, 0}]}}]}}]}),
+    Fails = match_failures(Sel, {[{<<"x">>, [0, 1]}]}),
+    ?assertEqual(
+        [{[{<<"path">>, [<<"x">>, 1]}, {<<"message">>, <<"must be equal to 0">>}]}],
+        Fails
+    ).
 
 match_elemmatch_test() ->
     check_selector({[{<<"$elemMatch">>, {[{<<"$eq">>, 0}]}}]}, [
@@ -1660,7 +1883,17 @@ match_elemmatch_test() ->
         {true, [0]},
         {false, [1]},
         {true, [0, 1]}
-    ]).
+    ]),
+
+    Sel = normalize({[{<<"x">>, {[{<<"$elemMatch">>, {[{<<"$eq">>, 0}]}}]}}]}),
+    Fails = match_failures(Sel, {[{<<"x">>, [1, 2]}]}),
+    ?assertEqual(
+        [
+            {[{<<"path">>, [<<"x">>, 0]}, {<<"message">>, <<"must be equal to 0">>}]},
+            {[{<<"path">>, [<<"x">>, 1]}, {<<"message">>, <<"must be equal to 0">>}]}
+        ],
+        Fails
+    ).
 
 match_keymapmatch_test() ->
     check_selector({[{<<"$keyMapMatch">>, {[{<<"$regex">>, <<"^[a-z]+$">>}]}}]}, [
@@ -1669,7 +1902,17 @@ match_keymapmatch_test() ->
         {true, {[{<<"a">>, 1}, {<<"b4">>, 2}]}},
         {false, {[{<<"b4">>, 2}]}},
         {false, {[]}}
-    ]).
+    ]),
+
+    Sel = normalize({[{<<"x">>, {[{<<"$keyMapMatch">>, {[{<<"$beginsWith">>, <<"a">>}]}}]}}]}),
+    Fails = match_failures(Sel, {[{<<"x">>, {[{<<"bravo">>, 0}, {<<"charlie">>, 1}]}}]}),
+    ?assertEqual(
+        [
+            {[{<<"path">>, [<<"x">>, <<"bravo">>]}, {<<"message">>, <<"must begin with 'a'">>}]},
+            {[{<<"path">>, [<<"x">>, <<"charlie">>]}, {<<"message">>, <<"must begin with 'a'">>}]}
+        ],
+        Fails
+    ).
 
 match_object_test() ->
     Doc1 = {[]},
@@ -1738,7 +1981,13 @@ match_object_test() ->
     ?assertEqual(false, match_int(SelShort, Doc2)),
     ?assertEqual(false, match_int(SelShort, Doc3)),
     ?assertEqual(true, match_int(SelShort, Doc4)),
-    ?assertEqual(false, match_int(SelShort, Doc5)).
+    ?assertEqual(false, match_int(SelShort, Doc5)),
+
+    Fails = match_failures(SelShort, Doc3),
+    ?assertEqual(
+        [{[{<<"path">>, [<<"x">>, <<"b">>]}, {<<"message">>, <<"must be present">>}]}],
+        Fails
+    ).
 
 match_and_test() ->
     % $and with an empty array matches anything
@@ -1796,7 +2045,16 @@ match_and_test() ->
     ?assertEqual(false, match_int(SelNotMulti, {[{<<"x">>, 6}]})),
     ?assertEqual(true, match_int(SelNotMulti, {[{<<"x">>, 2}]})),
     ?assertEqual(true, match_int(SelNotMulti, {[{<<"x">>, 9}]})),
-    ?assertEqual(false, match_int(SelNotMulti, {[]})).
+    ?assertEqual(false, match_int(SelNotMulti, {[]})),
+
+    Fails = match_failures(SelNotMulti, {[{<<"x">>, 6}]}),
+    ?assertEqual(
+        [
+            {[{<<"path">>, [<<"x">>]}, {<<"message">>, <<"must be less than or equal to 3">>}]},
+            {[{<<"path">>, [<<"x">>]}, {<<"message">>, <<"must be greater than or equal to 7">>}]}
+        ],
+        Fails
+    ).
 
 match_or_test() ->
     % $or with an empty array matches anything
@@ -1849,7 +2107,15 @@ match_or_test() ->
     ?assertEqual(true, match_int(SelNotMulti, {[{<<"x">>, 6}]})),
     ?assertEqual(false, match_int(SelNotMulti, {[{<<"x">>, 2}]})),
     ?assertEqual(false, match_int(SelNotMulti, {[{<<"x">>, 9}]})),
-    ?assertEqual(false, match_int(SelNotMulti, {[]})).
+    ?assertEqual(false, match_int(SelNotMulti, {[]})),
+
+    Fails = match_failures(SelNotMulti, {[{<<"x">>, 2}]}),
+    ?assertEqual(
+        [
+            {[{<<"path">>, [<<"x">>]}, {<<"message">>, <<"must be greater than or equal to 3">>}]}
+        ],
+        Fails
+    ).
 
 match_nor_test() ->
     % $nor with an empty array matches anything
@@ -1874,7 +2140,15 @@ match_nor_test() ->
     ?assertEqual(true, match_int(SelMulti, {[{<<"x">>, 6}]})),
     ?assertEqual(false, match_int(SelMulti, {[{<<"x">>, 2}]})),
     ?assertEqual(false, match_int(SelMulti, {[{<<"x">>, 9}]})),
-    ?assertEqual(false, match_int(SelMulti, {[]})).
+    ?assertEqual(false, match_int(SelMulti, {[]})),
+
+    Fails = match_failures(SelMulti, {[{<<"x">>, 2}]}),
+    ?assertEqual(
+        [
+            {[{<<"path">>, [<<"x">>]}, {<<"message">>, <<"must be greater than or equal to 3">>}]}
+        ],
+        Fails
+    ).
 
 match_failures_object_test() ->
     Selector = normalize(
@@ -1901,7 +2175,7 @@ match_failures_object_test() ->
         ]}
     ),
     ?assertMatch(
-        [#failure{op = eq, type = mismatch, params = [1], ctx = #ctx{path = [<<"a">>]}}],
+        [{[{<<"path">>, [<<"a">>]}, {<<"message">>, <<"must be equal to 1">>}]}],
         Fails1
     ),
 
@@ -1913,7 +2187,7 @@ match_failures_object_test() ->
         ]}
     ),
     ?assertMatch(
-        [#failure{op = eq, type = mismatch, params = [3], ctx = #ctx{path = [<<"c">>, <<"b">>]}}],
+        [{[{<<"path">>, [<<"b">>, <<"c">>]}, {<<"message">>, <<"must be equal to 3">>}]}],
         Fails2
     ),
 
@@ -1926,8 +2200,8 @@ match_failures_object_test() ->
     ),
     ?assertMatch(
         [
-            #failure{op = eq, type = mismatch, params = [1], ctx = #ctx{path = [<<"a">>]}},
-            #failure{op = eq, type = mismatch, params = [3], ctx = #ctx{path = [<<"c">>, <<"b">>]}}
+            {[{<<"path">>, [<<"a">>]}, {<<"message">>, <<"must be equal to 1">>}]},
+            {[{<<"path">>, [<<"b">>, <<"c">>]}, {<<"message">>, <<"must be equal to 3">>}]}
         ],
         Fails3
     ).
@@ -1942,26 +2216,25 @@ match_failures_elemmatch_test() ->
         ]}
     ),
 
-    Fails0 = match_failures(
-        SelElemMatch, {[{<<"a">>, [5, 3, 2]}]}
-    ),
+    Fails0 = match_failures(SelElemMatch, {[{<<"a">>, [5, 3, 2]}]}),
     ?assertEqual([], Fails0),
 
-    Fails1 = match_failures(
-        SelElemMatch, {[{<<"a">>, []}]}
-    ),
+    Fails1 = match_failures(SelElemMatch, {[{<<"a">>, []}]}),
     ?assertMatch(
-        [#failure{op = elemMatch, type = empty_list, params = [], ctx = #ctx{path = [<<"a">>]}}],
+        [
+            {[
+                {<<"path">>, [<<"a">>]},
+                {<<"message">>, <<"operator $elemMatch was invoked with an empty list">>}
+            ]}
+        ],
         Fails1
     ),
 
-    Fails2 = match_failures(
-        SelElemMatch, {[{<<"a">>, [3, 2]}]}
-    ),
+    Fails2 = match_failures(SelElemMatch, {[{<<"a">>, [3, 2]}]}),
     ?assertMatch(
         [
-            #failure{op = gt, type = mismatch, params = [4], ctx = #ctx{path = [0, <<"a">>]}},
-            #failure{op = gt, type = mismatch, params = [4], ctx = #ctx{path = [1, <<"a">>]}}
+            {[{<<"path">>, [<<"a">>, 0]}, {<<"message">>, <<"must be greater than 4">>}]},
+            {[{<<"path">>, [<<"a">>, 1]}, {<<"message">>, <<"must be greater than 4">>}]}
         ],
         Fails2
     ).
@@ -1976,26 +2249,20 @@ match_failures_allmatch_test() ->
         ]}
     ),
 
-    Fails0 = match_failures(
-        SelAllMatch, {[{<<"a">>, [5]}]}
-    ),
+    Fails0 = match_failures(SelAllMatch, {[{<<"a">>, [5]}]}),
     ?assertEqual([], Fails0),
 
-    Fails1 = match_failures(
-        SelAllMatch, {[{<<"a">>, [4]}]}
-    ),
+    Fails1 = match_failures(SelAllMatch, {[{<<"a">>, [4]}]}),
     ?assertMatch(
-        [#failure{op = gt, type = mismatch, params = [4], ctx = #ctx{path = [0, <<"a">>]}}],
+        [{[{<<"path">>, [<<"a">>, 0]}, {<<"message">>, <<"must be greater than 4">>}]}],
         Fails1
     ),
 
-    Fails2 = match_failures(
-        SelAllMatch, {[{<<"a">>, [5, 6, 3, 7, 0]}]}
-    ),
+    Fails2 = match_failures(SelAllMatch, {[{<<"a">>, [5, 6, 3, 7, 0]}]}),
     ?assertMatch(
         [
-            #failure{op = gt, type = mismatch, params = [4], ctx = #ctx{path = [2, <<"a">>]}},
-            #failure{op = gt, type = mismatch, params = [4], ctx = #ctx{path = [4, <<"a">>]}}
+            {[{<<"path">>, [<<"a">>, 2]}, {<<"message">>, <<"must be greater than 4">>}]},
+            {[{<<"path">>, [<<"a">>, 4]}, {<<"message">>, <<"must be greater than 4">>}]}
         ],
         Fails2
     ).
@@ -2010,22 +2277,16 @@ match_failures_allmatch_object_test() ->
         ]}
     ),
 
-    Fails0 = match_failures(
-        SelAllMatch, {[{<<"a">>, {[{<<"b">>, [{[{<<"c">>, 5}]}]}]}}]}
-    ),
+    Fails0 = match_failures(SelAllMatch, {[{<<"a">>, {[{<<"b">>, [{[{<<"c">>, 5}]}]}]}}]}),
     ?assertEqual([], Fails0),
 
-    Fails1 = match_failures(
-        SelAllMatch, {[{<<"a">>, {[{<<"b">>, [{[{<<"c">>, 4}]}]}]}}]}
-    ),
+    Fails1 = match_failures(SelAllMatch, {[{<<"a">>, {[{<<"b">>, [{[{<<"c">>, 4}]}]}]}}]}),
     ?assertMatch(
         [
-            #failure{
-                op = gt,
-                type = mismatch,
-                params = [4],
-                ctx = #ctx{path = [<<"c">>, 0, <<"b">>, <<"a">>]}
-            }
+            {[
+                {<<"path">>, [<<"a">>, <<"b">>, 0, <<"c">>]},
+                {<<"message">>, <<"must be greater than 4">>}
+            ]}
         ],
         Fails1
     ),
@@ -2036,36 +2297,31 @@ match_failures_allmatch_object_test() ->
     ),
     ?assertMatch(
         [
-            #failure{
-                op = gt,
-                type = mismatch,
-                params = [4],
-                ctx = #ctx{path = [<<"c">>, 2, <<"b">>, <<"a">>]}
-            }
+            {[
+                {<<"path">>, [<<"a">>, <<"b">>, 2, <<"c">>]},
+                {<<"message">>, <<"must be greater than 4">>}
+            ]}
         ],
         Fails2
     ),
 
-    Fails3 = match_failures(
-        SelAllMatch,
-        {[{<<"a">>, {[{<<"b">>, [{[{<<"c">>, 1}]}, {[]}]}]}}]}
-    ),
+    Fails3 = match_failures(SelAllMatch, {[{<<"a">>, {[{<<"b">>, [{[{<<"c">>, 1}]}, {[]}]}]}}]}),
     ?assertMatch(
         [
-            #failure{
-                op = gt,
-                type = mismatch,
-                params = [4],
-                ctx = #ctx{path = [<<"c">>, 0, <<"b">>, <<"a">>]}
-            },
-            #failure{
-                op = field,
-                type = not_found,
-                params = [],
-                ctx = #ctx{path = [<<"c">>, 1, <<"b">>, <<"a">>]}
-            }
+            {[
+                {<<"path">>, [<<"a">>, <<"b">>, 0, <<"c">>]},
+                {<<"message">>, <<"must be greater than 4">>}
+            ]},
+            {[{<<"path">>, [<<"a">>, <<"b">>, 1, <<"c">>]}, {<<"message">>, <<"must be present">>}]}
         ],
         Fails3
     ).
 
+format_path_test() ->
+    ?assertEqual([], format_path([])),
+    ?assertEqual([<<"a">>], format_path([<<"a">>])),
+    ?assertEqual([<<"a">>, <<"b">>], format_path([<<"b">>, <<"a">>])),
+    ?assertEqual([<<"a">>, <<"b">>, <<"c">>], format_path([<<"b.c">>, <<"a">>])),
+    ?assertEqual([<<"a">>, 42, <<"b">>, <<"c">>], format_path([<<"b.c">>, 42, <<"a">>])).
+
 -endif.
diff --git a/test/elixir/test/validate_doc_update_test.exs b/test/elixir/test/validate_doc_update_test.exs
index 93ed8f177c..9279e1da0e 100644
--- a/test/elixir/test/validate_doc_update_test.exs
+++ b/test/elixir/test/validate_doc_update_test.exs
@@ -105,6 +105,9 @@ defmodule ValidateDocUpdateTest do
     resp = Couch.put("/#{db}/doc", body: %{"no" => "type"})
     assert resp.status_code == 403
     assert resp.body["error"] == "forbidden"
+    assert resp.body["reason"] == [
+      %{"path" => ["newDoc", "type"], "message" => "must be present"}
+    ]
   end
 
   @tag :with_db

From 0eab7adb8d76b5ac8b36385a7409cadb12a54f41 Mon Sep 17 00:00:00 2001
From: James Coglan <james@neighbourhood.ie>
Date: Mon, 26 Jan 2026 11:12:37 +0000
Subject: [PATCH 6/9] chore: Add some benchmarks for Mango selector matching

---
 src/mango/src/mango_selector.erl | 48 ++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

diff --git a/src/mango/src/mango_selector.erl b/src/mango/src/mango_selector.erl
index 7ba0c86b05..15e1111155 100644
--- a/src/mango/src/mango_selector.erl
+++ b/src/mango/src/mango_selector.erl
@@ -2324,4 +2324,52 @@ format_path_test() ->
     ?assertEqual([<<"a">>, <<"b">>, <<"c">>], format_path([<<"b.c">>, <<"a">>])),
     ?assertEqual([<<"a">>, 42, <<"b">>, <<"c">>], format_path([<<"b.c">>, 42, <<"a">>])).
 
+% The following benchmarks can be run by adding the following dependencies to
+% rebar.config.script:
+%
+%       {argparse, {url, "https://github.com/max-au/argparse"}, "1.2.4"},
+%       {erlperf, {url, "https://github.com/max-au/erlperf"}, "2.3.0"}
+%
+% and then uncommenting the following functions and running the unit tests for
+% this module via this command:
+%
+%       make eunit apps=mango suites=mango_selector
+%
+% bench(Name, Selector, Doc) ->
+%     Sel1 = normalize(Selector),
+%     [Normal, Verbose] = erlperf:compare(
+%         [
+%             #{runner => fun() -> match_int(Sel1, Doc, V) end}
+%          || V <- [false, true]
+%         ],
+%         #{}
+%     ),
+%     ?debugFmt("~nbench[~s: normal ] = ~p~n", [Name, Normal]),
+%     ?debugFmt("~nbench[~s: verbose] = ~p~n", [Name, Verbose]).
+%
+% bench_and_test() ->
+%     Sel =
+%         {[
+%             {<<"x">>,
+%                 {[
+%                     {<<"$and">>, [{[{<<"$gt">>, V}]} || V <- [100, 200, 300, 400, 500]]}
+%                 ]}}
+%         ]},
+%     Doc = {[{<<"x">>, 25}]},
+%     bench("$and", Sel, Doc).
+%
+% bench_allmatch_test() ->
+%     Sel =
+%         {[
+%             {<<"x">>,
+%                 {[
+%                     {<<"$allMatch">>, {[{<<"$gt">>, 10}]}}
+%                 ]}}
+%         ]},
+%     Doc =
+%         {[
+%             {<<"x">>, [0, 23, 45, 67, 89, 12, 34, 56, 78]}
+%         ]},
+%     bench("$allMatch", Sel, Doc).
+
 -endif.

From d467e70a67eff9666454da187c776c5c7aabb901 Mon Sep 17 00:00:00 2001
From: James Coglan <james@neighbourhood.ie>
Date: Wed, 8 Apr 2026 17:00:50 +0100
Subject: [PATCH 7/9] fix: Mango VDUs should omit the `oldDoc` field from the
 matching struct if the doc is newly created

---
 src/mango/src/mango_native_proc.erl | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/mango/src/mango_native_proc.erl b/src/mango/src/mango_native_proc.erl
index b540047e50..ee95707eda 100644
--- a/src/mango/src/mango_native_proc.erl
+++ b/src/mango/src/mango_native_proc.erl
@@ -113,7 +113,11 @@ handle_call({prompt, [<<"ddoc">>, DDocId, [<<"validate_doc_update">>], Args]}, _
             {stop, {invalid_call, Msg}, {invalid_call, Msg}, St};
         Selector ->
             [NewDoc, OldDoc, _Ctx, _SecObj] = Args,
-            Struct = {[{<<"newDoc">>, NewDoc}, {<<"oldDoc">>, OldDoc}]},
+            Struct =
+                case OldDoc of
+                    null -> {[{<<"newDoc">>, NewDoc}]};
+                    Doc -> {[{<<"newDoc">>, NewDoc}, {<<"oldDoc">>, Doc}]}
+                end,
             Reply =
                 case mango_selector:match_failures(Selector, Struct) of
                     [] ->

From 1d4dbb018cc6c1539e3f5e2e92972c40345e2e8d Mon Sep 17 00:00:00 2001
From: James Coglan <james@neighbourhood.ie>
Date: Wed, 8 Apr 2026 17:01:17 +0100
Subject: [PATCH 8/9] docs: More detailed explanation of the behaviour of the
 oldDocs field in Mango VDUs

---
 src/docs/src/ddocs/ddocs.rst | 72 +++++++++++++++++++++++++++++++++++-
 1 file changed, 71 insertions(+), 1 deletion(-)

diff --git a/src/docs/src/ddocs/ddocs.rst b/src/docs/src/ddocs/ddocs.rst
index 501501857c..8006d5c320 100644
--- a/src/docs/src/ddocs/ddocs.rst
+++ b/src/docs/src/ddocs/ddocs.rst
@@ -955,7 +955,8 @@ To use Mango selectors for validation, the design document must have the
 containing the following fields:
 
 * ``newDoc``: New version of document that will be stored.
-* ``oldDoc``: Previous version of document that is already stored.
+* ``oldDoc``: Previous version of document that is already stored; this field is
+  absent if the doc is being created for the first time.
 
 For example, to check that all docs contain a ``title`` which is a string, and a
 ``year`` which is a number:
@@ -1012,3 +1013,72 @@ this design document:
         }
       }
     }
+
+By using the ``oldDoc`` field, we can create rules that say a document can only
+be updated if it is currently in a certain state. For example, this rule would
+enforce that only documents describing actors can be updated:
+
+.. code-block:: json
+
+    {
+      "language": "query",
+
+      "validate_doc_update": {
+        "oldDoc": { "type": "actor" }
+      }
+    }
+
+This also makes it so that no new documents can be created, because a write is
+only accepted if a previous version of the doc already exists. To relax this
+constraint, allow ``oldDoc`` not to exist:
+
+.. code-block:: json
+
+    {
+      "language": "query",
+
+      "validate_doc_update": {
+        "oldDoc": {
+          "$or": [
+            { "$exists": false },
+            { "type": "actor" }
+          ]
+        }
+      }
+    }
+
+This validator will allow any new document creation, and updates to docs where
+the ``type`` field is ``"actor"``. We can also have multiple rules for new
+document states that depend on the current state, by combining ``$or`` with
+several sets of ``{ oldDoc, newDoc }`` rules:
+
+.. code-block:: json
+
+    {
+      "language": "query",
+
+      "validate_doc_update": {
+        "$or": [
+          // allow creation of docs with an acceptable type
+          {
+            "oldDoc": { "$exists": false },
+            "newDoc": {
+              "type": { "$in": ["movie", "actor"] }
+            }
+          },
+          // if a doc currently has "type": "actor", make sure its "movies"
+          // field is a non-empty list of strings
+          {
+            "oldDoc": { "type": "actor" },
+            "newDoc": {
+              "movies": {
+                "$type": "array",
+                "$not": { "$size": 0 },
+                "$allMatch": { "$type": "string" }
+              }
+            }
+          },
+          // etc.
+        ]
+      }
+    }

From ba11d389b1ec9cbbe337e8a6837027b217c4e9cc Mon Sep 17 00:00:00 2001
From: James Coglan <james@neighbourhood.ie>
Date: Thu, 9 Apr 2026 12:02:24 +0100
Subject: [PATCH 9/9] fix: Raise an error if a Mango VDU contains invalid
 top-level fields

---
 src/mango/src/mango_native_proc.erl           | 41 ++++++++-------
 src/mango/src/mango_selector.erl              | 51 +++++++++++++++++++
 test/elixir/test/config/suite.elixir          |  1 +
 test/elixir/test/validate_doc_update_test.exs | 18 +++++++
 4 files changed, 94 insertions(+), 17 deletions(-)

diff --git a/src/mango/src/mango_native_proc.erl b/src/mango/src/mango_native_proc.erl
index ee95707eda..c6d7890deb 100644
--- a/src/mango/src/mango_native_proc.erl
+++ b/src/mango/src/mango_native_proc.erl
@@ -112,23 +112,30 @@ handle_call({prompt, [<<"ddoc">>, DDocId, [<<"validate_doc_update">>], Args]}, _
             Msg = [<<"validate_doc_update">>, DDocId],
             {stop, {invalid_call, Msg}, {invalid_call, Msg}, St};
         Selector ->
-            [NewDoc, OldDoc, _Ctx, _SecObj] = Args,
-            Struct =
-                case OldDoc of
-                    null -> {[{<<"newDoc">>, NewDoc}]};
-                    Doc -> {[{<<"newDoc">>, NewDoc}, {<<"oldDoc">>, Doc}]}
-                end,
-            Reply =
-                case mango_selector:match_failures(Selector, Struct) of
-                    [] ->
-                        true;
-                    Failures ->
-                        {[
-                            {<<"forbidden">>, <<"forbidden">>},
-                            {<<"failures">>, Failures}
-                        ]}
-                end,
-            {reply, Reply, St}
+            case mango_selector:has_allowed_fields(Selector, [<<"newDoc">>, <<"oldDoc">>]) of
+                false ->
+                    Msg =
+                        <<"'validate_doc_update' may only contain 'newDoc' and 'oldDoc' as top-level fields">>,
+                    {stop, {invalid_call, Msg}, {invalid_call, Msg}, St};
+                true ->
+                    [NewDoc, OldDoc, _Ctx, _SecObj] = Args,
+                    Struct =
+                        case OldDoc of
+                            null -> {[{<<"newDoc">>, NewDoc}]};
+                            Doc -> {[{<<"newDoc">>, NewDoc}, {<<"oldDoc">>, Doc}]}
+                        end,
+                    Reply =
+                        case mango_selector:match_failures(Selector, Struct) of
+                            [] ->
+                                true;
+                            Failures ->
+                                {[
+                                    {<<"forbidden">>, <<"forbidden">>},
+                                    {<<"failures">>, Failures}
+                                ]}
+                        end,
+                    {reply, Reply, St}
+            end
     end;
 handle_call(Msg, _From, St) ->
     {stop, {invalid_call, Msg}, {invalid_call, Msg}, St}.
diff --git a/src/mango/src/mango_selector.erl b/src/mango/src/mango_selector.erl
index 15e1111155..d0da96b69a 100644
--- a/src/mango/src/mango_selector.erl
+++ b/src/mango/src/mango_selector.erl
@@ -17,6 +17,7 @@
     match/2,
     match_failures/2,
     has_required_fields/2,
+    has_allowed_fields/2,
     is_constant_field/2,
     fields/1
 ]).
@@ -876,6 +877,39 @@ has_required_fields_int([{[{Field, Cond}]} | Rest], RequiredFields) ->
             has_required_fields_int(Rest, lists:delete(Field, RequiredFields))
     end.
 
+has_allowed_fields(Selector, AllowedFields) ->
+    Paths = lists:map(
+        fun(Field) ->
+            {ok, Path} = mango_util:parse_field(Field),
+            Path
+        end,
+        AllowedFields
+    ),
+    has_allowed_fields_int(Selector, Paths).
+
+has_allowed_fields_int({[{Field, Cond}]}, Paths) when is_list(Field) ->
+    Stemmed = [match_prefix(Field, Path) || Path <- Paths],
+    Matched = [Path || Path <- Stemmed, Path /= nil],
+    case Matched of
+        [] -> false;
+        M -> has_allowed_fields_int(Cond, M)
+    end;
+has_allowed_fields_int({[{_Op, Conds}]}, Paths) when is_list(Conds) ->
+    lists:all(fun(Cond) -> has_allowed_fields_int(Cond, Paths) end, Conds);
+has_allowed_fields_int({[{_Op, Cond}]}, Paths) ->
+    has_allowed_fields_int(Cond, Paths);
+has_allowed_fields_int(_, _) ->
+    true.
+
+match_prefix([A | Rest1], [A | Rest2]) ->
+    match_prefix(Rest1, Rest2);
+match_prefix([], Rest) ->
+    Rest;
+match_prefix(_, []) ->
+    [];
+match_prefix(_, _) ->
+    nil.
+
 % Returns true if a field in the selector is a constant value e.g. {a: {$eq: 1}}
 is_constant_field(Selector, Field) when not is_list(Field) ->
     {ok, Path} = mango_util:parse_field(Field),
@@ -1255,6 +1289,23 @@ has_required_fields_or_nested_or_false_test() ->
     Normalized = normalize(Selector),
     ?assertEqual(false, has_required_fields(Normalized, RequiredFields)).
 
+has_allowed_fields_test() ->
+    Sel1 = normalize({[{<<"a">>, 1}]}),
+    ?assertEqual(has_allowed_fields(Sel1, [<<"a">>]), true),
+    ?assertEqual(has_allowed_fields(Sel1, [<<"a">>, <<"b">>]), true),
+    ?assertEqual(has_allowed_fields(Sel1, [<<"b">>]), false),
+
+    Sel2 = normalize({[{<<"$or">>, [{[{<<"a.b">>, 1}]}, {[{<<"c.d">>, 2}]}]}]}),
+    ?assertEqual(has_allowed_fields(Sel2, [<<"a">>, <<"c">>]), true),
+    ?assertEqual(has_allowed_fields(Sel2, [<<"a.b">>, <<"c.d">>]), true),
+    ?assertEqual(has_allowed_fields(Sel2, [<<"a">>]), false),
+
+    Sel3 = normalize({[{<<"a">>, {[{<<"$or">>, [{[{<<"b">>, 1}]}, {[{<<"c">>, 2}]}]}]}}]}),
+    ?assertEqual(has_allowed_fields(Sel3, [<<"a">>]), true),
+    ?assertEqual(has_allowed_fields(Sel3, [<<"a.b">>, <<"a.c">>]), true),
+    ?assertEqual(has_allowed_fields(Sel3, [<<"a.c">>]), false),
+    ?assertEqual(has_allowed_fields(Sel3, [<<"b">>]), false).
+
 check_match(Selector) ->
     % Call match_int/2 to avoid ERROR for missing metric; this is confusing
     % in the middle of test output.
diff --git a/test/elixir/test/config/suite.elixir b/test/elixir/test/config/suite.elixir
index 21bfe3dc03..57cf3334fe 100644
--- a/test/elixir/test/config/suite.elixir
+++ b/test/elixir/test/config/suite.elixir
@@ -532,6 +532,7 @@
     "converting a Mango VDU to JavaScript updates its effects",
     "deleting a Mango VDU removes its effects",
     "Mango VDU rejects a doc if any existing ddoc fails to match",
+    "Mango VDU rejects a design doc if it contains unknown fields",
   ],
   "SecurityValidationTest": [
     "Author presence and user security",
diff --git a/test/elixir/test/validate_doc_update_test.exs b/test/elixir/test/validate_doc_update_test.exs
index 9279e1da0e..333e6ec7e9 100644
--- a/test/elixir/test/validate_doc_update_test.exs
+++ b/test/elixir/test/validate_doc_update_test.exs
@@ -212,4 +212,22 @@ defmodule ValidateDocUpdateTest do
     assert resp.status_code == 403
     assert resp.body["error"] == "forbidden"
   end
+
+  @tag :with_db
+  test "Mango VDU rejects a design doc if it contains unknown fields", context do
+    db = context[:db_name]
+
+    ddoc = %{
+      language: "query",
+
+      validate_doc_update: %{
+        "wrongField" => %{"year" => %{"$lt" => 2026}}
+      }
+    }
+    resp = Couch.put("/#{db}/_design/mango-test-2", body: ddoc)
+    assert resp.status_code == 201
+
+    resp = Couch.put("/#{db}/doc", body: %{"year" => 1994})
+    assert resp.status_code == 500
+  end
 end