From b58e7176820d42024a4c32ee34629e400d8c59bc Mon Sep 17 00:00:00 2001 From: James Coglan Date: Tue, 13 Jan 2026 14:28:28 +0000 Subject: [PATCH 1/9] feat: Make mango_selector:match/2 return a list of failures Rather than returning a boolean to indicate just success or failure, `mango_selector:match/2` now returns a list of "failures" describing the ways in which the selector failed to match the input. If this list is empty, the match was a success. --- src/mango/src/mango_selector.erl | 256 +++++++++++++++++-------------- 1 file changed, 144 insertions(+), 112 deletions(-) diff --git a/src/mango/src/mango_selector.erl b/src/mango/src/mango_selector.erl index e53820c4a0..925fedfeed 100644 --- a/src/mango/src/mango_selector.erl +++ b/src/mango/src/mango_selector.erl @@ -23,6 +23,12 @@ -include_lib("couch/include/couch_db.hrl"). -include("mango.hrl"). +-record(failure, { + op, + type = mismatch, + params = [] +}). + % Validate and normalize each operator. This translates % every selector operator into a consistent version that % we can then rely on for all other selector functions. @@ -53,12 +59,19 @@ match(Selector, D) -> couch_stats:increment_counter([mango, evaluate_selector]), match_int(Selector, D). +match_int(Selector, D) -> + case match_failures(Selector, D) of + [] -> true; + [_ | _] -> false; + Other -> Other + end. + % An empty selector matches any value. -match_int({[]}, _) -> - true; -match_int(Selector, #doc{body = Body}) -> +match_failures({[]}, _) -> + []; +match_failures(Selector, #doc{body = Body}) -> match(Selector, Body, fun mango_json:cmp/2); -match_int(Selector, {Props}) -> +match_failures(Selector, {Props}) -> match(Selector, {Props}, fun mango_json:cmp/2). % Convert each operator into a normalized version as well @@ -364,35 +377,45 @@ negate({[{Field, Cond}]}) -> % We need to treat an empty array as always true. This will be applied % for $or, $in, $all, $nin as well. match({[{<<"$and">>, []}]}, _, _) -> - true; + []; match({[{<<"$and">>, Args}]}, Value, Cmp) -> - Pred = fun(SubSel) -> match(SubSel, Value, Cmp) end, - lists:all(Pred, Args); + MatchSubSel = fun(SubSel) -> match(SubSel, Value, Cmp) end, + lists:flatmap(MatchSubSel, Args); match({[{<<"$or">>, []}]}, _, _) -> - true; + []; match({[{<<"$or">>, Args}]}, Value, Cmp) -> - Pred = fun(SubSel) -> match(SubSel, Value, Cmp) end, - lists:any(Pred, Args); + SubSelFailures = [match(A, Value, Cmp) || A <- Args], + case lists:member([], SubSelFailures) of + true -> []; + _ -> lists:flatten(SubSelFailures) + end; +% TODO: producing good failure messages requires that normalize/1 fully removes +% $not from the tree by pushing it to the leaves. match({[{<<"$not">>, Arg}]}, Value, Cmp) -> - not match(Arg, Value, Cmp); -match({[{<<"$all">>, []}]}, _, _) -> - false; + case match(Arg, Value, Cmp) of + [] -> [#failure{op = 'not'}]; + _ -> [] + end; % All of the values in Args must exist in Values or % Values == hd(Args) if Args is a single element list % that contains a list. +match({[{<<"$all">>, []}]}, _Values, _Cmp) -> + % { "$all": [] } is defined to eval to false, so return a failure + [#failure{op = all, params = [[]]}]; +match({[{<<"$all">>, [A]}]}, Values, _Cmp) when is_list(A), A == Values -> + []; match({[{<<"$all">>, Args}]}, Values, _Cmp) when is_list(Values) -> - Pred = fun(A) -> lists:member(A, Values) end, - HasArgs = lists:all(Pred, Args), - IsArgs = - case Args of - [A] when is_list(A) -> - A == Values; - _ -> - false + lists:flatmap( + fun(Arg) -> + case lists:member(Arg, Values) of + true -> []; + _ -> [#failure{op = all, params = [Arg]}] + end end, - HasArgs orelse IsArgs; -match({[{<<"$all">>, _Args}]}, _Values, _Cmp) -> - false; + Args + ); +match({[{<<"$all">>, _}]}, Value, _Cmp) -> + [#failure{op = all, type = bad_value, params = [Value]}]; %% This is for $elemMatch, $allMatch, and possibly $in because of our normalizer. %% A selector such as {"field_name": {"$elemMatch": {"$gte": 80, "$lt": 85}}} %% gets normalized to: @@ -409,83 +432,48 @@ match({[{<<>>, Arg}]}, Values, Cmp) -> match(Arg, Values, Cmp); % Matches when any element in values matches the % sub-selector Arg. +match({[{<<"$elemMatch">>, _Arg}]}, [], _Cmp) -> + [#failure{op = elemMatch, type = empty_list}]; match({[{<<"$elemMatch">>, Arg}]}, Values, Cmp) when is_list(Values) -> - try - lists:foreach( - fun(V) -> - case match(Arg, V, Cmp) of - true -> throw(matched); - _ -> ok - end - end, - Values - ), - false - catch - throw:matched -> - true; - _:_ -> - false + ValueFailures = [match(Arg, V, Cmp) || V <- Values], + case lists:member([], ValueFailures) of + true -> []; + _ -> lists:flatten(ValueFailures) end; -match({[{<<"$elemMatch">>, _Arg}]}, _Value, _Cmp) -> - false; +match({[{<<"$elemMatch">>, _}]}, Value, _Cmp) -> + [#failure{op = elemMatch, type = bad_value, params = [Value]}]; % Matches when all elements in values match the % sub-selector Arg. match({[{<<"$allMatch">>, Arg}]}, [_ | _] = Values, Cmp) -> - try - lists:foreach( - fun(V) -> - case match(Arg, V, Cmp) of - false -> throw(unmatched); - _ -> ok - end - end, - Values - ), - true - catch - _:_ -> - false - end; -match({[{<<"$allMatch">>, _Arg}]}, _Value, _Cmp) -> - false; + MatchValue = fun(Value) -> match(Arg, Value, Cmp) end, + lists:flatmap(MatchValue, Values); +match({[{<<"$allMatch">>, _}]}, Value, _Cmp) -> + [#failure{op = allMatch, type = bad_value, params = [Value]}]; % Matches when any key in the map value matches the % sub-selector Arg. -match({[{<<"$keyMapMatch">>, Arg}]}, Value, Cmp) when is_tuple(Value) -> - try - lists:foreach( - fun(V) -> - case match(Arg, V, Cmp) of - true -> throw(matched); - _ -> ok - end - end, - [Key || {Key, _} <- element(1, Value)] - ), - false - catch - throw:matched -> - true; - _:_ -> - false +match({[{<<"$keyMapMatch">>, _Arg}]}, {[]}, _Cmp) -> + [#failure{op = keyMapMatch, type = empty_list}]; +match({[{<<"$keyMapMatch">>, Arg}]}, {Value}, Cmp) when is_list(Value) -> + KeyFailures = [match(Arg, K, Cmp) || {K, _} <- Value], + case lists:member([], KeyFailures) of + true -> []; + _ -> lists:flatten(KeyFailures) end; -match({[{<<"$keyMapMatch">>, _Arg}]}, _Value, _Cmp) -> - false; +match({[{<<"$keyMapMatch">>, _}]}, Value, _Cmp) -> + [#failure{op = keyMapMatch, type = bad_value, params = [Value]}]; % Our comparison operators are fairly straight forward match({[{<<"$lt">>, Arg}]}, Value, Cmp) -> - Cmp(Value, Arg) < 0; + compare(lt, Arg, Cmp(Value, Arg) < 0); match({[{<<"$lte">>, Arg}]}, Value, Cmp) -> - Cmp(Value, Arg) =< 0; + compare(lte, Arg, Cmp(Value, Arg) =< 0); match({[{<<"$eq">>, Arg}]}, Value, Cmp) -> - Cmp(Value, Arg) == 0; + compare(eq, Arg, Cmp(Value, Arg) == 0); match({[{<<"$ne">>, Arg}]}, Value, Cmp) -> - Cmp(Value, Arg) /= 0; + compare(ne, Arg, Cmp(Value, Arg) /= 0); match({[{<<"$gte">>, Arg}]}, Value, Cmp) -> - Cmp(Value, Arg) >= 0; + compare(gte, Arg, Cmp(Value, Arg) >= 0); match({[{<<"$gt">>, Arg}]}, Value, Cmp) -> - Cmp(Value, Arg) > 0; -match({[{<<"$in">>, []}]}, _, _) -> - false; + compare(gt, Arg, Cmp(Value, Arg) > 0); match({[{<<"$in">>, Args}]}, Values, Cmp) when is_list(Values) -> Pred = fun(Arg) -> lists:foldl( @@ -496,50 +484,88 @@ match({[{<<"$in">>, Args}]}, Values, Cmp) when is_list(Values) -> Values ) end, - lists:any(Pred, Args); + case lists:any(Pred, Args) of + true -> []; + _ -> [#failure{op = in, params = [Args]}] + end; match({[{<<"$in">>, Args}]}, Value, Cmp) -> Pred = fun(Arg) -> Cmp(Value, Arg) == 0 end, - lists:any(Pred, Args); -match({[{<<"$nin">>, []}]}, _, _) -> - true; + case lists:any(Pred, Args) of + true -> []; + _ -> [#failure{op = in, params = [Args]}] + end; match({[{<<"$nin">>, Args}]}, Values, Cmp) when is_list(Values) -> - not match({[{<<"$in">>, Args}]}, Values, Cmp); + Pred = fun(Arg) -> + lists:foldl( + fun(Value, Match) -> + (Cmp(Value, Arg) /= 0) and Match + end, + true, + Values + ) + end, + case lists:all(Pred, Args) of + true -> []; + _ -> [#failure{op = nin, params = [Args]}] + end; match({[{<<"$nin">>, Args}]}, Value, Cmp) -> Pred = fun(Arg) -> Cmp(Value, Arg) /= 0 end, - lists:all(Pred, Args); + case lists:all(Pred, Args) of + true -> []; + _ -> [#failure{op = nin, params = [Args]}] + end; % This logic is a bit subtle. Basically, if value is % not undefined, then it exists. match({[{<<"$exists">>, ShouldExist}]}, Value, _Cmp) -> - Exists = Value /= undefined, - ShouldExist andalso Exists; + case {ShouldExist, Value} of + {true, undefined} -> [#failure{op = exists, params = [ShouldExist]}]; + {true, _} -> []; + {false, undefined} -> []; + {false, _} -> [#failure{op = exists, params = [ShouldExist]}] + end; match({[{<<"$type">>, Arg}]}, Value, _Cmp) when is_binary(Arg) -> - Arg == mango_json:type(Value); + case mango_json:type(Value) of + Arg -> []; + _ -> [#failure{op = type, params = [Arg]}] + end; match({[{<<"$mod">>, [D, R]}]}, Value, _Cmp) when is_integer(Value) -> - Value rem D == R; -match({[{<<"$mod">>, _}]}, _Value, _Cmp) -> - false; + case Value rem D of + R -> []; + _ -> [#failure{op = mod, params = [D, R]}] + end; +match({[{<<"$mod">>, _}]}, Value, _Cmp) -> + [#failure{op = mod, type = bad_value, params = [Value]}]; match({[{<<"$beginsWith">>, Prefix}]}, Value, _Cmp) when is_binary(Prefix), is_binary(Value) -> - string:prefix(Value, Prefix) /= nomatch; + case string:prefix(Value, Prefix) of + nomatch -> [#failure{op = beginsWith, params = [Prefix]}]; + _ -> [] + end; % When Value is not a string, do not match -match({[{<<"$beginsWith">>, Prefix}]}, _, _Cmp) when is_binary(Prefix) -> - false; +match({[{<<"$beginsWith">>, Prefix}]}, Value, _Cmp) when is_binary(Prefix) -> + [#failure{op = beginsWith, type = bad_value, params = [Value]}]; match({[{<<"$regex">>, Regex}]}, Value, _Cmp) when is_binary(Value) -> try - match == re:run(Value, Regex, [{capture, none}]) + case re:run(Value, Regex, [{capture, none}]) of + match -> []; + _ -> [#failure{op = regex, params = [Regex]}] + end catch _:_ -> - false + [#failure{op = regex, params = [Regex]}] end; -match({[{<<"$regex">>, _}]}, _Value, _Cmp) -> - false; +match({[{<<"$regex">>, _}]}, Value, _Cmp) -> + [#failure{op = regex, type = bad_value, params = [Value]}]; match({[{<<"$size">>, Arg}]}, Values, _Cmp) when is_list(Values) -> - length(Values) == Arg; -match({[{<<"$size">>, _}]}, _Value, _Cmp) -> - false; + case length(Values) of + Arg -> []; + _ -> [#failure{op = size, params = [Arg]}] + end; +match({[{<<"$size">>, _}]}, Value, _Cmp) -> + [#failure{op = size, type = bad_value, params = [Value]}]; % We don't have any choice but to believe that the text % index returned valid matches match({[{<<"$default">>, _}]}, _Value, _Cmp) -> - true; + []; % All other operators are internal assertion errors for % matching because we either should've removed them during % normalization or something else broke. @@ -551,11 +577,11 @@ match({[{<<"$", _/binary>> = Op, _}]}, _, _) -> match({[{Field, Cond}]}, Value, Cmp) -> case mango_doc:get_field(Value, Field) of not_found when Cond == {[{<<"$exists">>, false}]} -> - true; + []; not_found -> - false; + [#failure{op = '$'}]; bad_path -> - false; + [#failure{op = '$'}]; SubValue when Field == <<"_id">> -> match(Cond, SubValue, fun mango_json:cmp_raw/2); SubValue -> @@ -564,6 +590,12 @@ match({[{Field, Cond}]}, Value, Cmp) -> match({[_, _ | _] = _Props} = Sel, _Value, _Cmp) -> error({unnormalized_selector, Sel}). +compare(Op, Arg, Cond) -> + case Cond of + true -> []; + _ -> [#failure{op = Op, params = [Arg]}] + end. + % Returns true if Selector requires all % fields in RequiredFields to exist in any matching documents. From 09b3ed5ae9353056b6ee857bbe1b254571cb704f Mon Sep 17 00:00:00 2001 From: James Coglan Date: Thu, 15 Jan 2026 15:13:30 +0000 Subject: [PATCH 2/9] chore: Replace `Cmp` argument to `mango_selector:match/3` with a record We will need to pass other things around between `match` calls as well the current `Cmp` function, so here we replace this argument with a `#ctx` record that intially just contains a `cmp` field. --- src/mango/src/mango_selector.erl | 106 ++++++++++++++++--------------- 1 file changed, 55 insertions(+), 51 deletions(-) diff --git a/src/mango/src/mango_selector.erl b/src/mango/src/mango_selector.erl index 925fedfeed..8582f093ec 100644 --- a/src/mango/src/mango_selector.erl +++ b/src/mango/src/mango_selector.erl @@ -23,6 +23,10 @@ -include_lib("couch/include/couch_db.hrl"). -include("mango.hrl"). +-record(ctx, { + cmp +}). + -record(failure, { op, type = mismatch, @@ -70,9 +74,9 @@ match_int(Selector, D) -> match_failures({[]}, _) -> []; match_failures(Selector, #doc{body = Body}) -> - match(Selector, Body, fun mango_json:cmp/2); + match_failures(Selector, Body); match_failures(Selector, {Props}) -> - match(Selector, {Props}, fun mango_json:cmp/2). + match(Selector, {Props}, #ctx{cmp = fun mango_json:cmp/2}). % Convert each operator into a normalized version as well % as convert an implicit operators into their explicit @@ -378,33 +382,33 @@ negate({[{Field, Cond}]}) -> % for $or, $in, $all, $nin as well. match({[{<<"$and">>, []}]}, _, _) -> []; -match({[{<<"$and">>, Args}]}, Value, Cmp) -> - MatchSubSel = fun(SubSel) -> match(SubSel, Value, Cmp) end, +match({[{<<"$and">>, Args}]}, Value, Ctx) -> + MatchSubSel = fun(SubSel) -> match(SubSel, Value, Ctx) end, lists:flatmap(MatchSubSel, Args); match({[{<<"$or">>, []}]}, _, _) -> []; -match({[{<<"$or">>, Args}]}, Value, Cmp) -> - SubSelFailures = [match(A, Value, Cmp) || A <- Args], - case lists:member([], SubSelFailures) of +match({[{<<"$or">>, Args}]}, Value, Ctx) -> + SubSelFailures = [match(A, Value, Ctx) || A <- Args], + case lists:any(fun(Res) -> Res == [] end, SubSelFailures) of true -> []; _ -> lists:flatten(SubSelFailures) end; % TODO: producing good failure messages requires that normalize/1 fully removes % $not from the tree by pushing it to the leaves. -match({[{<<"$not">>, Arg}]}, Value, Cmp) -> - case match(Arg, Value, Cmp) of +match({[{<<"$not">>, Arg}]}, Value, Ctx) -> + case match(Arg, Value, Ctx) of [] -> [#failure{op = 'not'}]; _ -> [] end; % All of the values in Args must exist in Values or % Values == hd(Args) if Args is a single element list % that contains a list. -match({[{<<"$all">>, []}]}, _Values, _Cmp) -> +match({[{<<"$all">>, []}]}, _Values, _Ctx) -> % { "$all": [] } is defined to eval to false, so return a failure [#failure{op = all, params = [[]]}]; -match({[{<<"$all">>, [A]}]}, Values, _Cmp) when is_list(A), A == Values -> +match({[{<<"$all">>, [A]}]}, Values, _Ctx) when is_list(A), A == Values -> []; -match({[{<<"$all">>, Args}]}, Values, _Cmp) when is_list(Values) -> +match({[{<<"$all">>, Args}]}, Values, _Ctx) when is_list(Values) -> lists:flatmap( fun(Arg) -> case lists:member(Arg, Values) of @@ -414,7 +418,7 @@ match({[{<<"$all">>, Args}]}, Values, _Cmp) when is_list(Values) -> end, Args ); -match({[{<<"$all">>, _}]}, Value, _Cmp) -> +match({[{<<"$all">>, _}]}, Value, _Ctx) -> [#failure{op = all, type = bad_value, params = [Value]}]; %% This is for $elemMatch, $allMatch, and possibly $in because of our normalizer. %% A selector such as {"field_name": {"$elemMatch": {"$gte": 80, "$lt": 85}}} @@ -428,53 +432,53 @@ match({[{<<"$all">>, _}]}, Value, _Cmp) -> %% }]} %% }]}. %% So we filter out the <<>>. -match({[{<<>>, Arg}]}, Values, Cmp) -> - match(Arg, Values, Cmp); +match({[{<<>>, Arg}]}, Values, Ctx) -> + match(Arg, Values, Ctx); % Matches when any element in values matches the % sub-selector Arg. -match({[{<<"$elemMatch">>, _Arg}]}, [], _Cmp) -> +match({[{<<"$elemMatch">>, _Arg}]}, [], _Ctx) -> [#failure{op = elemMatch, type = empty_list}]; -match({[{<<"$elemMatch">>, Arg}]}, Values, Cmp) when is_list(Values) -> - ValueFailures = [match(Arg, V, Cmp) || V <- Values], +match({[{<<"$elemMatch">>, Arg}]}, Values, Ctx) when is_list(Values) -> + ValueFailures = [match(Arg, V, Ctx) || V <- Values], case lists:member([], ValueFailures) of true -> []; _ -> lists:flatten(ValueFailures) end; -match({[{<<"$elemMatch">>, _}]}, Value, _Cmp) -> +match({[{<<"$elemMatch">>, _}]}, Value, _Ctx) -> [#failure{op = elemMatch, type = bad_value, params = [Value]}]; % Matches when all elements in values match the % sub-selector Arg. -match({[{<<"$allMatch">>, Arg}]}, [_ | _] = Values, Cmp) -> - MatchValue = fun(Value) -> match(Arg, Value, Cmp) end, +match({[{<<"$allMatch">>, Arg}]}, [_ | _] = Values, Ctx) -> + MatchValue = fun(Value) -> match(Arg, Value, Ctx) end, lists:flatmap(MatchValue, Values); -match({[{<<"$allMatch">>, _}]}, Value, _Cmp) -> +match({[{<<"$allMatch">>, _}]}, Value, _Ctx) -> [#failure{op = allMatch, type = bad_value, params = [Value]}]; % Matches when any key in the map value matches the % sub-selector Arg. -match({[{<<"$keyMapMatch">>, _Arg}]}, {[]}, _Cmp) -> +match({[{<<"$keyMapMatch">>, _Arg}]}, {[]}, _Ctx) -> [#failure{op = keyMapMatch, type = empty_list}]; -match({[{<<"$keyMapMatch">>, Arg}]}, {Value}, Cmp) when is_list(Value) -> - KeyFailures = [match(Arg, K, Cmp) || {K, _} <- Value], +match({[{<<"$keyMapMatch">>, Arg}]}, {Value}, Ctx) when is_list(Value) -> + KeyFailures = [match(Arg, K, Ctx) || {K, _} <- Value], case lists:member([], KeyFailures) of true -> []; _ -> lists:flatten(KeyFailures) end; -match({[{<<"$keyMapMatch">>, _}]}, Value, _Cmp) -> +match({[{<<"$keyMapMatch">>, _}]}, Value, _Ctx) -> [#failure{op = keyMapMatch, type = bad_value, params = [Value]}]; % Our comparison operators are fairly straight forward -match({[{<<"$lt">>, Arg}]}, Value, Cmp) -> +match({[{<<"$lt">>, Arg}]}, Value, #ctx{cmp = Cmp}) -> compare(lt, Arg, Cmp(Value, Arg) < 0); -match({[{<<"$lte">>, Arg}]}, Value, Cmp) -> +match({[{<<"$lte">>, Arg}]}, Value, #ctx{cmp = Cmp}) -> compare(lte, Arg, Cmp(Value, Arg) =< 0); -match({[{<<"$eq">>, Arg}]}, Value, Cmp) -> +match({[{<<"$eq">>, Arg}]}, Value, #ctx{cmp = Cmp}) -> compare(eq, Arg, Cmp(Value, Arg) == 0); -match({[{<<"$ne">>, Arg}]}, Value, Cmp) -> +match({[{<<"$ne">>, Arg}]}, Value, #ctx{cmp = Cmp}) -> compare(ne, Arg, Cmp(Value, Arg) /= 0); -match({[{<<"$gte">>, Arg}]}, Value, Cmp) -> +match({[{<<"$gte">>, Arg}]}, Value, #ctx{cmp = Cmp}) -> compare(gte, Arg, Cmp(Value, Arg) >= 0); -match({[{<<"$gt">>, Arg}]}, Value, Cmp) -> +match({[{<<"$gt">>, Arg}]}, Value, #ctx{cmp = Cmp}) -> compare(gt, Arg, Cmp(Value, Arg) > 0); -match({[{<<"$in">>, Args}]}, Values, Cmp) when is_list(Values) -> +match({[{<<"$in">>, Args}]}, Values, #ctx{cmp = Cmp}) when is_list(Values) -> Pred = fun(Arg) -> lists:foldl( fun(Value, Match) -> @@ -488,13 +492,13 @@ match({[{<<"$in">>, Args}]}, Values, Cmp) when is_list(Values) -> true -> []; _ -> [#failure{op = in, params = [Args]}] end; -match({[{<<"$in">>, Args}]}, Value, Cmp) -> +match({[{<<"$in">>, Args}]}, Value, #ctx{cmp = Cmp}) -> Pred = fun(Arg) -> Cmp(Value, Arg) == 0 end, case lists:any(Pred, Args) of true -> []; _ -> [#failure{op = in, params = [Args]}] end; -match({[{<<"$nin">>, Args}]}, Values, Cmp) when is_list(Values) -> +match({[{<<"$nin">>, Args}]}, Values, #ctx{cmp = Cmp}) when is_list(Values) -> Pred = fun(Arg) -> lists:foldl( fun(Value, Match) -> @@ -508,7 +512,7 @@ match({[{<<"$nin">>, Args}]}, Values, Cmp) when is_list(Values) -> true -> []; _ -> [#failure{op = nin, params = [Args]}] end; -match({[{<<"$nin">>, Args}]}, Value, Cmp) -> +match({[{<<"$nin">>, Args}]}, Value, #ctx{cmp = Cmp}) -> Pred = fun(Arg) -> Cmp(Value, Arg) /= 0 end, case lists:all(Pred, Args) of true -> []; @@ -516,34 +520,34 @@ match({[{<<"$nin">>, Args}]}, Value, Cmp) -> end; % This logic is a bit subtle. Basically, if value is % not undefined, then it exists. -match({[{<<"$exists">>, ShouldExist}]}, Value, _Cmp) -> +match({[{<<"$exists">>, ShouldExist}]}, Value, _Ctx) -> case {ShouldExist, Value} of {true, undefined} -> [#failure{op = exists, params = [ShouldExist]}]; {true, _} -> []; {false, undefined} -> []; {false, _} -> [#failure{op = exists, params = [ShouldExist]}] end; -match({[{<<"$type">>, Arg}]}, Value, _Cmp) when is_binary(Arg) -> +match({[{<<"$type">>, Arg}]}, Value, _Ctx) when is_binary(Arg) -> case mango_json:type(Value) of Arg -> []; _ -> [#failure{op = type, params = [Arg]}] end; -match({[{<<"$mod">>, [D, R]}]}, Value, _Cmp) when is_integer(Value) -> +match({[{<<"$mod">>, [D, R]}]}, Value, _Ctx) when is_integer(Value) -> case Value rem D of R -> []; _ -> [#failure{op = mod, params = [D, R]}] end; -match({[{<<"$mod">>, _}]}, Value, _Cmp) -> +match({[{<<"$mod">>, _}]}, Value, _Ctx) -> [#failure{op = mod, type = bad_value, params = [Value]}]; -match({[{<<"$beginsWith">>, Prefix}]}, Value, _Cmp) when is_binary(Prefix), is_binary(Value) -> +match({[{<<"$beginsWith">>, Prefix}]}, Value, _Ctx) when is_binary(Prefix), is_binary(Value) -> case string:prefix(Value, Prefix) of nomatch -> [#failure{op = beginsWith, params = [Prefix]}]; _ -> [] end; % When Value is not a string, do not match -match({[{<<"$beginsWith">>, Prefix}]}, Value, _Cmp) when is_binary(Prefix) -> +match({[{<<"$beginsWith">>, Prefix}]}, Value, _Ctx) when is_binary(Prefix) -> [#failure{op = beginsWith, type = bad_value, params = [Value]}]; -match({[{<<"$regex">>, Regex}]}, Value, _Cmp) when is_binary(Value) -> +match({[{<<"$regex">>, Regex}]}, Value, _Ctx) when is_binary(Value) -> try case re:run(Value, Regex, [{capture, none}]) of match -> []; @@ -553,18 +557,18 @@ match({[{<<"$regex">>, Regex}]}, Value, _Cmp) when is_binary(Value) -> _:_ -> [#failure{op = regex, params = [Regex]}] end; -match({[{<<"$regex">>, _}]}, Value, _Cmp) -> +match({[{<<"$regex">>, _}]}, Value, _Ctx) -> [#failure{op = regex, type = bad_value, params = [Value]}]; -match({[{<<"$size">>, Arg}]}, Values, _Cmp) when is_list(Values) -> +match({[{<<"$size">>, Arg}]}, Values, _Ctx) when is_list(Values) -> case length(Values) of Arg -> []; _ -> [#failure{op = size, params = [Arg]}] end; -match({[{<<"$size">>, _}]}, Value, _Cmp) -> +match({[{<<"$size">>, _}]}, Value, _Ctx) -> [#failure{op = size, type = bad_value, params = [Value]}]; % We don't have any choice but to believe that the text % index returned valid matches -match({[{<<"$default">>, _}]}, _Value, _Cmp) -> +match({[{<<"$default">>, _}]}, _Value, _Ctx) -> []; % All other operators are internal assertion errors for % matching because we either should've removed them during @@ -574,7 +578,7 @@ match({[{<<"$", _/binary>> = Op, _}]}, _, _) -> % We need to traverse value to find field. The call to % mango_doc:get_field/2 may return either not_found or % bad_path in which case matching fails. -match({[{Field, Cond}]}, Value, Cmp) -> +match({[{Field, Cond}]}, Value, Ctx) -> case mango_doc:get_field(Value, Field) of not_found when Cond == {[{<<"$exists">>, false}]} -> []; @@ -583,11 +587,11 @@ match({[{Field, Cond}]}, Value, Cmp) -> bad_path -> [#failure{op = '$'}]; SubValue when Field == <<"_id">> -> - match(Cond, SubValue, fun mango_json:cmp_raw/2); + match(Cond, SubValue, Ctx#ctx{cmp = fun mango_json:cmp_raw/2}); SubValue -> - match(Cond, SubValue, Cmp) + match(Cond, SubValue, Ctx) end; -match({[_, _ | _] = _Props} = Sel, _Value, _Cmp) -> +match({[_, _ | _] = _Props} = Sel, _Value, _Ctx) -> error({unnormalized_selector, Sel}). compare(Op, Arg, Cond) -> From 412bb3e67ee9e972135f24a0d444a69ae47f7da4 Mon Sep 17 00:00:00 2001 From: James Coglan Date: Thu, 15 Jan 2026 16:57:29 +0000 Subject: [PATCH 3/9] feat: Record the paths where selectors fail to match To give detailed feedback to the caller, the `#ctx` argument to `mango_selector:match/3` now records the path that was taken to reach each value, and this path is added to the `#failure` records. Each path segment is either a binary, if it represents an object property, or an integer if it represents an array index. Items are pushed on the front of `#ctx.path` as this is faster than pushing onto the back of a list. This list can then be reversed once the final list of failures has been generated, before the failures are presented to the caller. --- src/mango/src/mango_selector.erl | 329 ++++++++++++++++++++++++------- 1 file changed, 256 insertions(+), 73 deletions(-) diff --git a/src/mango/src/mango_selector.erl b/src/mango/src/mango_selector.erl index 8582f093ec..e532a6b9a9 100644 --- a/src/mango/src/mango_selector.erl +++ b/src/mango/src/mango_selector.erl @@ -24,13 +24,15 @@ -include("mango.hrl"). -record(ctx, { - cmp + cmp, + path = [] }). -record(failure, { op, type = mismatch, - params = [] + params = [], + path = [] }). % Validate and normalize each operator. This translates @@ -397,29 +399,29 @@ match({[{<<"$or">>, Args}]}, Value, Ctx) -> % $not from the tree by pushing it to the leaves. match({[{<<"$not">>, Arg}]}, Value, Ctx) -> case match(Arg, Value, Ctx) of - [] -> [#failure{op = 'not'}]; + [] -> [#failure{op = 'not', path = Ctx#ctx.path}]; _ -> [] end; % All of the values in Args must exist in Values or % Values == hd(Args) if Args is a single element list % that contains a list. -match({[{<<"$all">>, []}]}, _Values, _Ctx) -> +match({[{<<"$all">>, []}]}, _Values, Ctx) -> % { "$all": [] } is defined to eval to false, so return a failure - [#failure{op = all, params = [[]]}]; + [#failure{op = all, params = [[]], path = Ctx#ctx.path}]; match({[{<<"$all">>, [A]}]}, Values, _Ctx) when is_list(A), A == Values -> []; -match({[{<<"$all">>, Args}]}, Values, _Ctx) when is_list(Values) -> +match({[{<<"$all">>, Args}]}, Values, Ctx) when is_list(Values) -> lists:flatmap( fun(Arg) -> case lists:member(Arg, Values) of true -> []; - _ -> [#failure{op = all, params = [Arg]}] + _ -> [#failure{op = all, params = [Arg], path = Ctx#ctx.path}] end end, Args ); -match({[{<<"$all">>, _}]}, Value, _Ctx) -> - [#failure{op = all, type = bad_value, params = [Value]}]; +match({[{<<"$all">>, _}]}, Value, Ctx) -> + [#failure{op = all, type = bad_value, params = [Value], path = Ctx#ctx.path}]; %% This is for $elemMatch, $allMatch, and possibly $in because of our normalizer. %% A selector such as {"field_name": {"$elemMatch": {"$gte": 80, "$lt": 85}}} %% gets normalized to: @@ -436,49 +438,53 @@ match({[{<<>>, Arg}]}, Values, Ctx) -> match(Arg, Values, Ctx); % Matches when any element in values matches the % sub-selector Arg. -match({[{<<"$elemMatch">>, _Arg}]}, [], _Ctx) -> - [#failure{op = elemMatch, type = empty_list}]; -match({[{<<"$elemMatch">>, Arg}]}, Values, Ctx) when is_list(Values) -> - ValueFailures = [match(Arg, V, Ctx) || V <- Values], +match({[{<<"$elemMatch">>, _Arg}]}, [], Ctx) -> + [#failure{op = elemMatch, type = empty_list, path = Ctx#ctx.path}]; +match({[{<<"$elemMatch">>, Arg}]}, Values, #ctx{path = Path} = Ctx) when is_list(Values) -> + ValueFailures = [ + match(Arg, V, Ctx#ctx{path = [Idx | Path]}) + || {Idx, V} <- lists:enumerate(0, Values) + ], case lists:member([], ValueFailures) of true -> []; _ -> lists:flatten(ValueFailures) end; -match({[{<<"$elemMatch">>, _}]}, Value, _Ctx) -> - [#failure{op = elemMatch, type = bad_value, params = [Value]}]; +match({[{<<"$elemMatch">>, _}]}, Value, Ctx) -> + [#failure{op = elemMatch, type = bad_value, params = [Value], path = Ctx#ctx.path}]; % Matches when all elements in values match the % sub-selector Arg. -match({[{<<"$allMatch">>, Arg}]}, [_ | _] = Values, Ctx) -> - MatchValue = fun(Value) -> match(Arg, Value, Ctx) end, - lists:flatmap(MatchValue, Values); -match({[{<<"$allMatch">>, _}]}, Value, _Ctx) -> - [#failure{op = allMatch, type = bad_value, params = [Value]}]; +match({[{<<"$allMatch">>, Arg}]}, [_ | _] = Values, #ctx{path = Path} = Ctx) -> + EnumValues = lists:enumerate(0, Values), + MatchValue = fun({Idx, Value}) -> match(Arg, Value, Ctx#ctx{path = [Idx | Path]}) end, + lists:flatmap(MatchValue, EnumValues); +match({[{<<"$allMatch">>, _}]}, Value, Ctx) -> + [#failure{op = allMatch, type = bad_value, params = [Value], path = Ctx#ctx.path}]; % Matches when any key in the map value matches the % sub-selector Arg. -match({[{<<"$keyMapMatch">>, _Arg}]}, {[]}, _Ctx) -> - [#failure{op = keyMapMatch, type = empty_list}]; +match({[{<<"$keyMapMatch">>, _Arg}]}, {[]}, Ctx) -> + [#failure{op = keyMapMatch, type = empty_list, path = Ctx#ctx.path}]; match({[{<<"$keyMapMatch">>, Arg}]}, {Value}, Ctx) when is_list(Value) -> KeyFailures = [match(Arg, K, Ctx) || {K, _} <- Value], case lists:member([], KeyFailures) of true -> []; _ -> lists:flatten(KeyFailures) end; -match({[{<<"$keyMapMatch">>, _}]}, Value, _Ctx) -> - [#failure{op = keyMapMatch, type = bad_value, params = [Value]}]; +match({[{<<"$keyMapMatch">>, _}]}, Value, Ctx) -> + [#failure{op = keyMapMatch, type = bad_value, params = [Value], path = Ctx#ctx.path}]; % Our comparison operators are fairly straight forward -match({[{<<"$lt">>, Arg}]}, Value, #ctx{cmp = Cmp}) -> - compare(lt, Arg, Cmp(Value, Arg) < 0); -match({[{<<"$lte">>, Arg}]}, Value, #ctx{cmp = Cmp}) -> - compare(lte, Arg, Cmp(Value, Arg) =< 0); -match({[{<<"$eq">>, Arg}]}, Value, #ctx{cmp = Cmp}) -> - compare(eq, Arg, Cmp(Value, Arg) == 0); -match({[{<<"$ne">>, Arg}]}, Value, #ctx{cmp = Cmp}) -> - compare(ne, Arg, Cmp(Value, Arg) /= 0); -match({[{<<"$gte">>, Arg}]}, Value, #ctx{cmp = Cmp}) -> - compare(gte, Arg, Cmp(Value, Arg) >= 0); -match({[{<<"$gt">>, Arg}]}, Value, #ctx{cmp = Cmp}) -> - compare(gt, Arg, Cmp(Value, Arg) > 0); -match({[{<<"$in">>, Args}]}, Values, #ctx{cmp = Cmp}) when is_list(Values) -> +match({[{<<"$lt">>, Arg}]}, Value, #ctx{cmp = Cmp, path = Path}) -> + compare(lt, Arg, Path, Cmp(Value, Arg) < 0); +match({[{<<"$lte">>, Arg}]}, Value, #ctx{cmp = Cmp, path = Path}) -> + compare(lte, Arg, Path, Cmp(Value, Arg) =< 0); +match({[{<<"$eq">>, Arg}]}, Value, #ctx{cmp = Cmp, path = Path}) -> + compare(eq, Arg, Path, Cmp(Value, Arg) == 0); +match({[{<<"$ne">>, Arg}]}, Value, #ctx{cmp = Cmp, path = Path}) -> + compare(ne, Arg, Path, Cmp(Value, Arg) /= 0); +match({[{<<"$gte">>, Arg}]}, Value, #ctx{cmp = Cmp, path = Path}) -> + compare(gte, Arg, Path, Cmp(Value, Arg) >= 0); +match({[{<<"$gt">>, Arg}]}, Value, #ctx{cmp = Cmp, path = Path}) -> + compare(gt, Arg, Path, Cmp(Value, Arg) > 0); +match({[{<<"$in">>, Args}]}, Values, #ctx{cmp = Cmp, path = Path}) when is_list(Values) -> Pred = fun(Arg) -> lists:foldl( fun(Value, Match) -> @@ -490,15 +496,15 @@ match({[{<<"$in">>, Args}]}, Values, #ctx{cmp = Cmp}) when is_list(Values) -> end, case lists:any(Pred, Args) of true -> []; - _ -> [#failure{op = in, params = [Args]}] + _ -> [#failure{op = in, params = [Args], path = Path}] end; -match({[{<<"$in">>, Args}]}, Value, #ctx{cmp = Cmp}) -> +match({[{<<"$in">>, Args}]}, Value, #ctx{cmp = Cmp, path = Path}) -> Pred = fun(Arg) -> Cmp(Value, Arg) == 0 end, case lists:any(Pred, Args) of true -> []; - _ -> [#failure{op = in, params = [Args]}] + _ -> [#failure{op = in, params = [Args], path = Path}] end; -match({[{<<"$nin">>, Args}]}, Values, #ctx{cmp = Cmp}) when is_list(Values) -> +match({[{<<"$nin">>, Args}]}, Values, #ctx{cmp = Cmp, path = Path}) when is_list(Values) -> Pred = fun(Arg) -> lists:foldl( fun(Value, Match) -> @@ -510,62 +516,62 @@ match({[{<<"$nin">>, Args}]}, Values, #ctx{cmp = Cmp}) when is_list(Values) -> end, case lists:all(Pred, Args) of true -> []; - _ -> [#failure{op = nin, params = [Args]}] + _ -> [#failure{op = nin, params = [Args], path = Path}] end; -match({[{<<"$nin">>, Args}]}, Value, #ctx{cmp = Cmp}) -> +match({[{<<"$nin">>, Args}]}, Value, #ctx{cmp = Cmp, path = Path}) -> Pred = fun(Arg) -> Cmp(Value, Arg) /= 0 end, case lists:all(Pred, Args) of true -> []; - _ -> [#failure{op = nin, params = [Args]}] + _ -> [#failure{op = nin, params = [Args], path = Path}] end; % This logic is a bit subtle. Basically, if value is % not undefined, then it exists. -match({[{<<"$exists">>, ShouldExist}]}, Value, _Ctx) -> +match({[{<<"$exists">>, ShouldExist}]}, Value, Ctx) -> case {ShouldExist, Value} of - {true, undefined} -> [#failure{op = exists, params = [ShouldExist]}]; + {true, undefined} -> [#failure{op = exists, params = [ShouldExist], path = Ctx#ctx.path}]; {true, _} -> []; {false, undefined} -> []; - {false, _} -> [#failure{op = exists, params = [ShouldExist]}] + {false, _} -> [#failure{op = exists, params = [ShouldExist], path = Ctx#ctx.path}] end; -match({[{<<"$type">>, Arg}]}, Value, _Ctx) when is_binary(Arg) -> +match({[{<<"$type">>, Arg}]}, Value, Ctx) when is_binary(Arg) -> case mango_json:type(Value) of Arg -> []; - _ -> [#failure{op = type, params = [Arg]}] + _ -> [#failure{op = type, params = [Arg], path = Ctx#ctx.path}] end; -match({[{<<"$mod">>, [D, R]}]}, Value, _Ctx) when is_integer(Value) -> +match({[{<<"$mod">>, [D, R]}]}, Value, Ctx) when is_integer(Value) -> case Value rem D of R -> []; - _ -> [#failure{op = mod, params = [D, R]}] + _ -> [#failure{op = mod, params = [D, R], path = Ctx#ctx.path}] end; -match({[{<<"$mod">>, _}]}, Value, _Ctx) -> - [#failure{op = mod, type = bad_value, params = [Value]}]; -match({[{<<"$beginsWith">>, Prefix}]}, Value, _Ctx) when is_binary(Prefix), is_binary(Value) -> +match({[{<<"$mod">>, _}]}, Value, Ctx) -> + [#failure{op = mod, type = bad_value, params = [Value], path = Ctx#ctx.path}]; +match({[{<<"$beginsWith">>, Prefix}]}, Value, Ctx) when is_binary(Prefix), is_binary(Value) -> case string:prefix(Value, Prefix) of - nomatch -> [#failure{op = beginsWith, params = [Prefix]}]; + nomatch -> [#failure{op = beginsWith, params = [Prefix], path = Ctx#ctx.path}]; _ -> [] end; % When Value is not a string, do not match -match({[{<<"$beginsWith">>, Prefix}]}, Value, _Ctx) when is_binary(Prefix) -> - [#failure{op = beginsWith, type = bad_value, params = [Value]}]; -match({[{<<"$regex">>, Regex}]}, Value, _Ctx) when is_binary(Value) -> +match({[{<<"$beginsWith">>, Prefix}]}, Value, Ctx) when is_binary(Prefix) -> + [#failure{op = beginsWith, type = bad_value, params = [Value], path = Ctx#ctx.path}]; +match({[{<<"$regex">>, Regex}]}, Value, Ctx) when is_binary(Value) -> try case re:run(Value, Regex, [{capture, none}]) of match -> []; - _ -> [#failure{op = regex, params = [Regex]}] + _ -> [#failure{op = regex, params = [Regex], path = Ctx#ctx.path}] end catch _:_ -> - [#failure{op = regex, params = [Regex]}] + [#failure{op = regex, params = [Regex], path = Ctx#ctx.path}] end; -match({[{<<"$regex">>, _}]}, Value, _Ctx) -> - [#failure{op = regex, type = bad_value, params = [Value]}]; -match({[{<<"$size">>, Arg}]}, Values, _Ctx) when is_list(Values) -> +match({[{<<"$regex">>, _}]}, Value, Ctx) -> + [#failure{op = regex, type = bad_value, params = [Value], path = Ctx#ctx.path}]; +match({[{<<"$size">>, Arg}]}, Values, Ctx) when is_list(Values) -> case length(Values) of Arg -> []; - _ -> [#failure{op = size, params = [Arg]}] + _ -> [#failure{op = size, params = [Arg], path = Ctx#ctx.path}] end; -match({[{<<"$size">>, _}]}, Value, _Ctx) -> - [#failure{op = size, type = bad_value, params = [Value]}]; +match({[{<<"$size">>, _}]}, Value, Ctx) -> + [#failure{op = size, type = bad_value, params = [Value], path = Ctx#ctx.path}]; % We don't have any choice but to believe that the text % index returned valid matches match({[{<<"$default">>, _}]}, _Value, _Ctx) -> @@ -578,26 +584,33 @@ match({[{<<"$", _/binary>> = Op, _}]}, _, _) -> % We need to traverse value to find field. The call to % mango_doc:get_field/2 may return either not_found or % bad_path in which case matching fails. -match({[{Field, Cond}]}, Value, Ctx) -> +match({[{Field, Cond}]}, Value, #ctx{path = Path} = Ctx) -> + InnerPath = extend_path(Field, Path), + InnerCtx = Ctx#ctx{path = InnerPath}, case mango_doc:get_field(Value, Field) of not_found when Cond == {[{<<"$exists">>, false}]} -> []; not_found -> - [#failure{op = '$'}]; + [#failure{op = field, type = not_found, path = InnerCtx#ctx.path}]; bad_path -> - [#failure{op = '$'}]; + [#failure{op = field, type = bad_path, path = InnerCtx#ctx.path}]; SubValue when Field == <<"_id">> -> - match(Cond, SubValue, Ctx#ctx{cmp = fun mango_json:cmp_raw/2}); + match(Cond, SubValue, InnerCtx#ctx{cmp = fun mango_json:cmp_raw/2}); SubValue -> - match(Cond, SubValue, Ctx) + match(Cond, SubValue, InnerCtx) end; match({[_, _ | _] = _Props} = Sel, _Value, _Ctx) -> error({unnormalized_selector, Sel}). -compare(Op, Arg, Cond) -> +extend_path(Field, Path) when is_binary(Field) -> + [Field | Path]; +extend_path(Field, Path) when is_list(Field) -> + lists:foldl(fun(F, Acc) -> [F | Acc] end, Path, Field). + +compare(Op, Arg, Path, Cond) -> case Cond of true -> []; - _ -> [#failure{op = Op, params = [Arg]}] + _ -> [#failure{op = Op, params = [Arg], path = Path}] end. % Returns true if Selector requires all @@ -1748,4 +1761,174 @@ match_nor_test() -> ?assertEqual(false, match_int(SelMulti, {[{<<"x">>, 9}]})), ?assertEqual(false, match_int(SelMulti, {[]})). +match_failures_object_test() -> + Selector = normalize( + {[ + {<<"a">>, 1}, + {<<"b">>, {[{<<"c">>, 3}]}} + ]} + ), + + Fails0 = match_failures( + Selector, + {[ + {<<"a">>, 1}, + {<<"b">>, {[{<<"c">>, 3}]}} + ]} + ), + ?assertEqual([], Fails0), + + Fails1 = match_failures( + Selector, + {[ + {<<"a">>, 0}, + {<<"b">>, {[{<<"c">>, 3}]}} + ]} + ), + ?assertEqual( + [#failure{op = eq, type = mismatch, params = [1], path = [<<"a">>]}], + Fails1 + ), + + Fails2 = match_failures( + Selector, + {[ + {<<"a">>, 1}, + {<<"b">>, {[{<<"c">>, 4}]}} + ]} + ), + ?assertEqual( + [#failure{op = eq, type = mismatch, params = [3], path = [<<"c">>, <<"b">>]}], + Fails2 + ), + + Fails3 = match_failures( + Selector, + {[ + {<<"a">>, 2}, + {<<"b">>, {[{<<"c">>, 4}]}} + ]} + ), + ?assertEqual( + [ + #failure{op = eq, type = mismatch, params = [1], path = [<<"a">>]}, + #failure{op = eq, type = mismatch, params = [3], path = [<<"c">>, <<"b">>]} + ], + Fails3 + ). + +match_failures_elemmatch_test() -> + SelElemMatch = normalize( + {[ + {<<"a">>, + {[ + {<<"$elemMatch">>, {[{<<"$gt">>, 4}]}} + ]}} + ]} + ), + + Fails0 = match_failures( + SelElemMatch, {[{<<"a">>, [5, 3, 2]}]} + ), + ?assertEqual([], Fails0), + + Fails1 = match_failures( + SelElemMatch, {[{<<"a">>, []}]} + ), + ?assertEqual( + [#failure{op = elemMatch, type = empty_list, params = [], path = [<<"a">>]}], + Fails1 + ), + + Fails2 = match_failures( + SelElemMatch, {[{<<"a">>, [3, 2]}]} + ), + ?assertEqual( + [ + #failure{op = gt, type = mismatch, params = [4], path = [0, <<"a">>]}, + #failure{op = gt, type = mismatch, params = [4], path = [1, <<"a">>]} + ], + Fails2 + ). + +match_failures_allmatch_test() -> + SelAllMatch = normalize( + {[ + {<<"a">>, + {[ + {<<"$allMatch">>, {[{<<"$gt">>, 4}]}} + ]}} + ]} + ), + + Fails0 = match_failures( + SelAllMatch, {[{<<"a">>, [5]}]} + ), + ?assertEqual([], Fails0), + + Fails1 = match_failures( + SelAllMatch, {[{<<"a">>, [4]}]} + ), + ?assertEqual( + [#failure{op = gt, type = mismatch, params = [4], path = [0, <<"a">>]}], + Fails1 + ), + + Fails2 = match_failures( + SelAllMatch, {[{<<"a">>, [5, 6, 3, 7, 0]}]} + ), + ?assertEqual( + [ + #failure{op = gt, type = mismatch, params = [4], path = [2, <<"a">>]}, + #failure{op = gt, type = mismatch, params = [4], path = [4, <<"a">>]} + ], + Fails2 + ). + +match_failures_allmatch_object_test() -> + SelAllMatch = normalize( + {[ + {<<"a.b">>, + {[ + {<<"$allMatch">>, {[{<<"c">>, {[{<<"$gt">>, 4}]}}]}} + ]}} + ]} + ), + + Fails0 = match_failures( + SelAllMatch, {[{<<"a">>, {[{<<"b">>, [{[{<<"c">>, 5}]}]}]}}]} + ), + ?assertEqual([], Fails0), + + Fails1 = match_failures( + SelAllMatch, {[{<<"a">>, {[{<<"b">>, [{[{<<"c">>, 4}]}]}]}}]} + ), + ?assertEqual( + [#failure{op = gt, type = mismatch, params = [4], path = [<<"c">>, 0, <<"b">>, <<"a">>]}], + Fails1 + ), + + Fails2 = match_failures( + SelAllMatch, + {[{<<"a">>, {[{<<"b">>, [{[{<<"c">>, 5}]}, {[{<<"c">>, 6}]}, {[{<<"c">>, 3}]}]}]}}]} + ), + ?assertEqual( + [#failure{op = gt, type = mismatch, params = [4], path = [<<"c">>, 2, <<"b">>, <<"a">>]}], + Fails2 + ), + + Fails3 = match_failures( + SelAllMatch, + {[{<<"a">>, {[{<<"b">>, [{[{<<"c">>, 1}]}, {[]}]}]}}]} + ), + ?assertEqual( + [ + #failure{op = gt, type = mismatch, params = [4], path = [<<"c">>, 0, <<"b">>, <<"a">>]}, + #failure{ + op = field, type = not_found, params = [], path = [<<"c">>, 1, <<"b">>, <<"a">>] + } + ], + Fails3 + ). + -endif. From d05fbe29fe29ece0e1f102283afa673bda8538c6 Mon Sep 17 00:00:00 2001 From: James Coglan Date: Fri, 16 Jan 2026 15:41:43 +0000 Subject: [PATCH 4/9] feat: "verbose" mode for `mango_selector:match/3` Collecting detailed `#failure` records rather than a boolean true/false when evaluating selectors imposes a performance penalty, so we would like to only do this when a selector is used for a VDU, not when it is used for indexing/filtering. To this end we introduce "verbose" mode signalled via the `#ctx.verbose` field, and each branch of `mango_selector:match/3` now has 3 distinct versions: - `#ctx{verbose = false}`: this is the original version that returns true/false, taken when a selector is used for Mango queries. - `#ctx{verbose = true, negate = false}`: verbose mode, when the operator is not negated by an enclosing `$not` operator. Returns a list of `#failure` records which may be empty. - `#ctx{verbose = true, negate = true}`: verbose mode, when the operator is negated by an enclosing `$not` operator. Returns a list of `#failure` records. The different negation modes are needed because, in order to generate meaningful failure messages, we need to record whether an operator was negated. The behaviour of combinators like `$and`, `$or`, `$allMatch` and `$elemMatch` means not all `$not` operators can be normalized out of the selector before evaluation. Instead, when we encounter a `$not` during evaluation, we flip the `#ctx.negate` field before evaluating the inner operator. --- src/mango/src/mango_selector.erl | 487 ++++++++++++++++++++----------- 1 file changed, 312 insertions(+), 175 deletions(-) diff --git a/src/mango/src/mango_selector.erl b/src/mango/src/mango_selector.erl index e532a6b9a9..347bd74bfc 100644 --- a/src/mango/src/mango_selector.erl +++ b/src/mango/src/mango_selector.erl @@ -15,6 +15,7 @@ -export([ normalize/1, match/2, + match_failures/2, has_required_fields/2, is_constant_field/2, fields/1 @@ -25,6 +26,8 @@ -record(ctx, { cmp, + verbose = false, + negate = false, path = [] }). @@ -32,7 +35,7 @@ op, type = mismatch, params = [], - path = [] + ctx }). % Validate and normalize each operator. This translates @@ -65,20 +68,19 @@ match(Selector, D) -> couch_stats:increment_counter([mango, evaluate_selector]), match_int(Selector, D). +match_failures(Selector, D) -> + couch_stats:increment_counter([mango, evaluate_selector]), + match_int(Selector, D, true). + match_int(Selector, D) -> - case match_failures(Selector, D) of - [] -> true; - [_ | _] -> false; - Other -> Other - end. + match_int(Selector, D, false). -% An empty selector matches any value. -match_failures({[]}, _) -> - []; -match_failures(Selector, #doc{body = Body}) -> - match_failures(Selector, Body); -match_failures(Selector, {Props}) -> - match(Selector, {Props}, #ctx{cmp = fun mango_json:cmp/2}). +match_int(Selector, D, Verbose) -> + Ctx = #ctx{cmp = fun mango_json:cmp/2, verbose = Verbose}, + case D of + #doc{body = Body} -> match(Selector, Body, Ctx); + Other -> match(Selector, Other, Ctx) + end. % Convert each operator into a normalized version as well % as convert an implicit operators into their explicit @@ -380,48 +382,76 @@ negate({[{<<"$", _/binary>>, _}]} = Cond) -> negate({[{Field, Cond}]}) -> {[{Field, negate(Cond)}]}. +% An empty selector matches any value. +match({[]}, _, #ctx{verbose = false}) -> + true; +match({[]}, _, #ctx{verbose = true}) -> + []; % We need to treat an empty array as always true. This will be applied % for $or, $in, $all, $nin as well. -match({[{<<"$and">>, []}]}, _, _) -> +match({[{<<"$and">>, []}]}, _, #ctx{verbose = false}) -> + true; +match({[{<<"$and">>, []}]}, _, #ctx{negate = false}) -> []; +match({[{<<"$and">>, []}]}, _, Ctx) -> + [#failure{op = 'and', type = empty_list, params = [[]], ctx = Ctx}]; +match({[{<<"$and">>, Args}]}, Value, #ctx{verbose = false} = Ctx) -> + Pred = fun(SubSel) -> match(SubSel, Value, Ctx) end, + lists:all(Pred, Args); +match({[{<<"$and">>, Args}]}, Value, #ctx{negate = true} = Ctx) -> + NotArgs = [{[{<<"$not">>, A}]} || A <- Args], + PosCtx = Ctx#ctx{negate = false}, + match({[{<<"$or">>, NotArgs}]}, Value, PosCtx); match({[{<<"$and">>, Args}]}, Value, Ctx) -> MatchSubSel = fun(SubSel) -> match(SubSel, Value, Ctx) end, lists:flatmap(MatchSubSel, Args); -match({[{<<"$or">>, []}]}, _, _) -> +match({[{<<"$or">>, []}]}, _, #ctx{verbose = false}) -> + true; +match({[{<<"$or">>, []}]}, _, #ctx{negate = false}) -> []; +match({[{<<"$or">>, []}]}, _, Ctx) -> + [#failure{op = 'or', type = empty_list, params = [[]], ctx = Ctx}]; +match({[{<<"$or">>, Args}]}, Value, #ctx{verbose = false} = Ctx) -> + Pred = fun(SubSel) -> match(SubSel, Value, Ctx) end, + lists:any(Pred, Args); +match({[{<<"$or">>, Args}]}, Value, #ctx{negate = true} = Ctx) -> + NotArgs = [{[{<<"$not">>, A}]} || A <- Args], + PosCtx = Ctx#ctx{negate = false}, + match({[{<<"$and">>, NotArgs}]}, Value, PosCtx); match({[{<<"$or">>, Args}]}, Value, Ctx) -> SubSelFailures = [match(A, Value, Ctx) || A <- Args], - case lists:any(fun(Res) -> Res == [] end, SubSelFailures) of + case lists:member([], SubSelFailures) of true -> []; - _ -> lists:flatten(SubSelFailures) - end; -% TODO: producing good failure messages requires that normalize/1 fully removes -% $not from the tree by pushing it to the leaves. -match({[{<<"$not">>, Arg}]}, Value, Ctx) -> - case match(Arg, Value, Ctx) of - [] -> [#failure{op = 'not', path = Ctx#ctx.path}]; - _ -> [] + false -> lists:flatten(SubSelFailures) end; +match({[{<<"$not">>, Arg}]}, Value, #ctx{verbose = false} = Ctx) -> + not match(Arg, Value, Ctx); +match({[{<<"$not">>, Arg}]}, Value, #ctx{negate = Neg} = Ctx) -> + match(Arg, Value, Ctx#ctx{negate = not Neg}); +match({[{<<"$all">>, []}]}, _, #ctx{verbose = false}) -> + false; +match({[{<<"$all">>, []}]}, _, #ctx{negate = false} = Ctx) -> + [#failure{op = all, type = empty_list, params = [[]], ctx = Ctx}]; +match({[{<<"$all">>, []}]}, _, #ctx{negate = true}) -> + []; % All of the values in Args must exist in Values or % Values == hd(Args) if Args is a single element list % that contains a list. -match({[{<<"$all">>, []}]}, _Values, Ctx) -> - % { "$all": [] } is defined to eval to false, so return a failure - [#failure{op = all, params = [[]], path = Ctx#ctx.path}]; -match({[{<<"$all">>, [A]}]}, Values, _Ctx) when is_list(A), A == Values -> - []; -match({[{<<"$all">>, Args}]}, Values, Ctx) when is_list(Values) -> - lists:flatmap( - fun(Arg) -> - case lists:member(Arg, Values) of - true -> []; - _ -> [#failure{op = all, params = [Arg], path = Ctx#ctx.path}] - end +match({[{<<"$all">>, Args}]}, Values, #ctx{verbose = false}) when is_list(Values) -> + Pred = fun(A) -> lists:member(A, Values) end, + HasArgs = lists:all(Pred, Args), + IsArgs = + case Args of + [A] when is_list(A) -> + A == Values; + _ -> + false end, - Args - ); -match({[{<<"$all">>, _}]}, Value, Ctx) -> - [#failure{op = all, type = bad_value, params = [Value], path = Ctx#ctx.path}]; + HasArgs orelse IsArgs; +match({[{<<"$all">>, _Args}]}, _Values, #ctx{verbose = false}) -> + false; +match({[{<<"$all">>, Args}]} = Expr, Values, Ctx) -> + match_with_failure(Expr, Values, all, [Args], Ctx); %% This is for $elemMatch, $allMatch, and possibly $in because of our normalizer. %% A selector such as {"field_name": {"$elemMatch": {"$gte": 80, "$lt": 85}}} %% gets normalized to: @@ -438,53 +468,130 @@ match({[{<<>>, Arg}]}, Values, Ctx) -> match(Arg, Values, Ctx); % Matches when any element in values matches the % sub-selector Arg. -match({[{<<"$elemMatch">>, _Arg}]}, [], Ctx) -> - [#failure{op = elemMatch, type = empty_list, path = Ctx#ctx.path}]; -match({[{<<"$elemMatch">>, Arg}]}, Values, #ctx{path = Path} = Ctx) when is_list(Values) -> +match({[{<<"$elemMatch">>, Arg}]}, Values, #ctx{verbose = false} = Ctx) when is_list(Values) -> + try + lists:foreach( + fun(V) -> + case match(Arg, V, Ctx) of + true -> throw(matched); + _ -> ok + end + end, + Values + ), + false + catch + throw:matched -> + true; + _:_ -> + false + end; +match({[{<<"$elemMatch">>, _Arg}]}, _Value, #ctx{verbose = false}) -> + false; +match({[{<<"$elemMatch">>, _Arg}]}, [], #ctx{negate = false} = Ctx) -> + [#failure{op = elemMatch, type = empty_list, ctx = Ctx}]; +match({[{<<"$elemMatch">>, _Arg}]}, [], #ctx{negate = true}) -> + []; +match({[{<<"$elemMatch">>, Arg}]}, Values, #ctx{negate = true} = Ctx) -> + PosCtx = Ctx#ctx{negate = false}, + match({[{<<"$allMatch">>, {[{<<"$not">>, Arg}]}}]}, Values, PosCtx); +match({[{<<"$elemMatch">>, Arg}]}, Values, #ctx{path = Path} = Ctx) -> ValueFailures = [ match(Arg, V, Ctx#ctx{path = [Idx | Path]}) || {Idx, V} <- lists:enumerate(0, Values) ], case lists:member([], ValueFailures) of true -> []; - _ -> lists:flatten(ValueFailures) + false -> lists:flatten(ValueFailures) end; -match({[{<<"$elemMatch">>, _}]}, Value, Ctx) -> - [#failure{op = elemMatch, type = bad_value, params = [Value], path = Ctx#ctx.path}]; % Matches when all elements in values match the % sub-selector Arg. -match({[{<<"$allMatch">>, Arg}]}, [_ | _] = Values, #ctx{path = Path} = Ctx) -> +match({[{<<"$allMatch">>, Arg}]}, [_ | _] = Values, #ctx{verbose = false} = Ctx) -> + try + lists:foreach( + fun(V) -> + case match(Arg, V, Ctx) of + false -> throw(unmatched); + _ -> ok + end + end, + Values + ), + true + catch + _:_ -> + false + end; +match({[{<<"$allMatch">>, _Arg}]}, _Value, #ctx{verbose = false}) -> + false; +match({[{<<"$allMatch">>, _Arg}]}, [], #ctx{negate = false} = Ctx) -> + [#failure{op = allMatch, type = empty_list, ctx = Ctx}]; +match({[{<<"$allMatch">>, _Arg}]}, [], #ctx{negate = true}) -> + []; +match({[{<<"$allMatch">>, Arg}]}, Values, #ctx{negate = true} = Ctx) -> + PosCtx = Ctx#ctx{negate = false}, + match({[{<<"$elemMatch">>, {[{<<"$not">>, Arg}]}}]}, Values, PosCtx); +match({[{<<"$allMatch">>, Arg}]}, Values, #ctx{path = Path} = Ctx) -> + MatchValue = fun({Idx, V}) -> match(Arg, V, Ctx#ctx{path = [Idx | Path]}) end, EnumValues = lists:enumerate(0, Values), - MatchValue = fun({Idx, Value}) -> match(Arg, Value, Ctx#ctx{path = [Idx | Path]}) end, lists:flatmap(MatchValue, EnumValues); -match({[{<<"$allMatch">>, _}]}, Value, Ctx) -> - [#failure{op = allMatch, type = bad_value, params = [Value], path = Ctx#ctx.path}]; % Matches when any key in the map value matches the % sub-selector Arg. -match({[{<<"$keyMapMatch">>, _Arg}]}, {[]}, Ctx) -> - [#failure{op = keyMapMatch, type = empty_list, path = Ctx#ctx.path}]; -match({[{<<"$keyMapMatch">>, Arg}]}, {Value}, Ctx) when is_list(Value) -> - KeyFailures = [match(Arg, K, Ctx) || {K, _} <- Value], +match({[{<<"$keyMapMatch">>, Arg}]}, Value, #ctx{verbose = false} = Ctx) when is_tuple(Value) -> + try + lists:foreach( + fun(V) -> + case match(Arg, V, Ctx) of + true -> throw(matched); + _ -> ok + end + end, + [Key || {Key, _} <- element(1, Value)] + ), + false + catch + throw:matched -> + true; + _:_ -> + false + end; +match({[{<<"$keyMapMatch">>, _Arg}]}, _Value, #ctx{verbose = false}) -> + false; +match({[{<<"$keyMapMatch">>, _Arg}]}, {[]}, #ctx{negate = false} = Ctx) -> + [#failure{op = keyMapMatch, type = empty_list, ctx = Ctx}]; +match({[{<<"$keyMapMatch">>, _Arg}]}, {[]}, #ctx{negate = true}) -> + []; +match({[{<<"$keyMapMatch">>, Arg}]}, Value, #ctx{negate = true, path = Path} = Ctx) when + is_tuple(Value) +-> + Keys = [Key || {Key, _} <- element(1, Value)], + MatchKey = fun(K) -> match(Arg, K, Ctx#ctx{path = [K | Path]}) end, + lists:flatmap(MatchKey, Keys); +match({[{<<"$keyMapMatch">>, Arg}]}, Value, #ctx{path = Path} = Ctx) when is_tuple(Value) -> + Keys = [Key || {Key, _} <- element(1, Value)], + KeyFailures = [match(Arg, K, Ctx#ctx{path = [K | Path]}) || K <- Keys], case lists:member([], KeyFailures) of true -> []; - _ -> lists:flatten(KeyFailures) + false -> lists:flatten(KeyFailures) end; -match({[{<<"$keyMapMatch">>, _}]}, Value, Ctx) -> - [#failure{op = keyMapMatch, type = bad_value, params = [Value], path = Ctx#ctx.path}]; +match({[{<<"$keyMapMatch">>, _Arg}]}, _Value, Ctx) -> + [#failure{op = keyMapMatch, type = bad_value, ctx = Ctx}]; % Our comparison operators are fairly straight forward -match({[{<<"$lt">>, Arg}]}, Value, #ctx{cmp = Cmp, path = Path}) -> - compare(lt, Arg, Path, Cmp(Value, Arg) < 0); -match({[{<<"$lte">>, Arg}]}, Value, #ctx{cmp = Cmp, path = Path}) -> - compare(lte, Arg, Path, Cmp(Value, Arg) =< 0); -match({[{<<"$eq">>, Arg}]}, Value, #ctx{cmp = Cmp, path = Path}) -> - compare(eq, Arg, Path, Cmp(Value, Arg) == 0); -match({[{<<"$ne">>, Arg}]}, Value, #ctx{cmp = Cmp, path = Path}) -> - compare(ne, Arg, Path, Cmp(Value, Arg) /= 0); -match({[{<<"$gte">>, Arg}]}, Value, #ctx{cmp = Cmp, path = Path}) -> - compare(gte, Arg, Path, Cmp(Value, Arg) >= 0); -match({[{<<"$gt">>, Arg}]}, Value, #ctx{cmp = Cmp, path = Path}) -> - compare(gt, Arg, Path, Cmp(Value, Arg) > 0); -match({[{<<"$in">>, Args}]}, Values, #ctx{cmp = Cmp, path = Path}) when is_list(Values) -> +match({[{<<"$lt">>, Arg}]}, Value, #ctx{cmp = Cmp} = Ctx) -> + compare(lt, Arg, Ctx, Cmp(Value, Arg) < 0); +match({[{<<"$lte">>, Arg}]}, Value, #ctx{cmp = Cmp} = Ctx) -> + compare(lte, Arg, Ctx, Cmp(Value, Arg) =< 0); +match({[{<<"$eq">>, Arg}]}, Value, #ctx{cmp = Cmp} = Ctx) -> + compare(eq, Arg, Ctx, Cmp(Value, Arg) == 0); +match({[{<<"$ne">>, Arg}]}, Value, #ctx{cmp = Cmp} = Ctx) -> + compare(ne, Arg, Ctx, Cmp(Value, Arg) /= 0); +match({[{<<"$gte">>, Arg}]}, Value, #ctx{cmp = Cmp} = Ctx) -> + compare(gte, Arg, Ctx, Cmp(Value, Arg) >= 0); +match({[{<<"$gt">>, Arg}]}, Value, #ctx{cmp = Cmp} = Ctx) -> + compare(gt, Arg, Ctx, Cmp(Value, Arg) > 0); +match({[{<<"$in">>, []}]}, _, #ctx{verbose = false}) -> + false; +match({[{<<"$in">>, Args}]}, Values, #ctx{verbose = false, cmp = Cmp}) when is_list(Values) -> Pred = fun(Arg) -> lists:foldl( fun(Value, Match) -> @@ -494,88 +601,68 @@ match({[{<<"$in">>, Args}]}, Values, #ctx{cmp = Cmp, path = Path}) when is_list( Values ) end, - case lists:any(Pred, Args) of - true -> []; - _ -> [#failure{op = in, params = [Args], path = Path}] - end; -match({[{<<"$in">>, Args}]}, Value, #ctx{cmp = Cmp, path = Path}) -> + lists:any(Pred, Args); +match({[{<<"$in">>, Args}]}, Value, #ctx{verbose = false, cmp = Cmp}) -> Pred = fun(Arg) -> Cmp(Value, Arg) == 0 end, - case lists:any(Pred, Args) of - true -> []; - _ -> [#failure{op = in, params = [Args], path = Path}] - end; -match({[{<<"$nin">>, Args}]}, Values, #ctx{cmp = Cmp, path = Path}) when is_list(Values) -> - Pred = fun(Arg) -> - lists:foldl( - fun(Value, Match) -> - (Cmp(Value, Arg) /= 0) and Match - end, - true, - Values - ) - end, - case lists:all(Pred, Args) of - true -> []; - _ -> [#failure{op = nin, params = [Args], path = Path}] - end; -match({[{<<"$nin">>, Args}]}, Value, #ctx{cmp = Cmp, path = Path}) -> + lists:any(Pred, Args); +match({[{<<"$in">>, Args}]} = Expr, Value, Ctx) -> + match_with_failure(Expr, Value, in, [Args], Ctx); +match({[{<<"$nin">>, []}]}, _, #ctx{verbose = false}) -> + true; +match({[{<<"$nin">>, Args}]}, Values, #ctx{verbose = false} = Ctx) when is_list(Values) -> + not match({[{<<"$in">>, Args}]}, Values, Ctx); +match({[{<<"$nin">>, Args}]}, Value, #ctx{verbose = false, cmp = Cmp}) -> Pred = fun(Arg) -> Cmp(Value, Arg) /= 0 end, - case lists:all(Pred, Args) of - true -> []; - _ -> [#failure{op = nin, params = [Args], path = Path}] - end; + lists:all(Pred, Args); +match({[{<<"$nin">>, Args}]} = Expr, Value, Ctx) -> + match_with_failure(Expr, Value, nin, [Args], Ctx); % This logic is a bit subtle. Basically, if value is % not undefined, then it exists. -match({[{<<"$exists">>, ShouldExist}]}, Value, Ctx) -> - case {ShouldExist, Value} of - {true, undefined} -> [#failure{op = exists, params = [ShouldExist], path = Ctx#ctx.path}]; - {true, _} -> []; - {false, undefined} -> []; - {false, _} -> [#failure{op = exists, params = [ShouldExist], path = Ctx#ctx.path}] - end; -match({[{<<"$type">>, Arg}]}, Value, Ctx) when is_binary(Arg) -> - case mango_json:type(Value) of - Arg -> []; - _ -> [#failure{op = type, params = [Arg], path = Ctx#ctx.path}] - end; -match({[{<<"$mod">>, [D, R]}]}, Value, Ctx) when is_integer(Value) -> - case Value rem D of - R -> []; - _ -> [#failure{op = mod, params = [D, R], path = Ctx#ctx.path}] - end; -match({[{<<"$mod">>, _}]}, Value, Ctx) -> - [#failure{op = mod, type = bad_value, params = [Value], path = Ctx#ctx.path}]; -match({[{<<"$beginsWith">>, Prefix}]}, Value, Ctx) when is_binary(Prefix), is_binary(Value) -> - case string:prefix(Value, Prefix) of - nomatch -> [#failure{op = beginsWith, params = [Prefix], path = Ctx#ctx.path}]; - _ -> [] - end; +match({[{<<"$exists">>, ShouldExist}]}, Value, #ctx{verbose = false}) -> + Exists = Value /= undefined, + ShouldExist andalso Exists; +match({[{<<"$exists">>, ShouldExist}]} = Expr, Value, Ctx) -> + match_with_failure(Expr, Value, exists, [ShouldExist], Ctx); +match({[{<<"$type">>, Arg}]}, Value, #ctx{verbose = false}) when is_binary(Arg) -> + Arg == mango_json:type(Value); +match({[{<<"$type">>, Arg}]} = Expr, Value, Ctx) -> + match_with_failure(Expr, Value, type, [Arg], Ctx); +match({[{<<"$mod">>, [D, R]}]}, Value, #ctx{verbose = false}) when is_integer(Value) -> + Value rem D == R; +match({[{<<"$mod">>, _}]}, _Value, #ctx{verbose = false}) -> + false; +match({[{<<"$mod">>, [D, R]}]} = Expr, Value, Ctx) -> + match_with_failure(Expr, Value, mod, [D, R], Ctx); +match({[{<<"$beginsWith">>, Prefix}]}, Value, #ctx{verbose = false}) when + is_binary(Prefix), is_binary(Value) +-> + string:prefix(Value, Prefix) /= nomatch; % When Value is not a string, do not match -match({[{<<"$beginsWith">>, Prefix}]}, Value, Ctx) when is_binary(Prefix) -> - [#failure{op = beginsWith, type = bad_value, params = [Value], path = Ctx#ctx.path}]; -match({[{<<"$regex">>, Regex}]}, Value, Ctx) when is_binary(Value) -> +match({[{<<"$beginsWith">>, Prefix}]}, _, #ctx{verbose = false}) when is_binary(Prefix) -> + false; +match({[{<<"$beginsWith">>, Prefix}]} = Expr, Value, Ctx) -> + match_with_failure(Expr, Value, beginsWith, [Prefix], Ctx); +match({[{<<"$regex">>, Regex}]}, Value, #ctx{verbose = false}) when is_binary(Value) -> try - case re:run(Value, Regex, [{capture, none}]) of - match -> []; - _ -> [#failure{op = regex, params = [Regex], path = Ctx#ctx.path}] - end + match == re:run(Value, Regex, [{capture, none}]) catch _:_ -> - [#failure{op = regex, params = [Regex], path = Ctx#ctx.path}] - end; -match({[{<<"$regex">>, _}]}, Value, Ctx) -> - [#failure{op = regex, type = bad_value, params = [Value], path = Ctx#ctx.path}]; -match({[{<<"$size">>, Arg}]}, Values, Ctx) when is_list(Values) -> - case length(Values) of - Arg -> []; - _ -> [#failure{op = size, params = [Arg], path = Ctx#ctx.path}] + false end; -match({[{<<"$size">>, _}]}, Value, Ctx) -> - [#failure{op = size, type = bad_value, params = [Value], path = Ctx#ctx.path}]; +match({[{<<"$regex">>, _}]}, _Value, #ctx{verbose = false}) -> + false; +match({[{<<"$regex">>, Regex}]} = Expr, Value, Ctx) -> + match_with_failure(Expr, Value, regex, [Regex], Ctx); +match({[{<<"$size">>, Arg}]}, Values, #ctx{verbose = false}) when is_list(Values) -> + length(Values) == Arg; +match({[{<<"$size">>, _}]}, _Value, #ctx{verbose = false}) -> + false; +match({[{<<"$size">>, Arg}]} = Expr, Value, Ctx) -> + match_with_failure(Expr, Value, size, [Arg], Ctx); % We don't have any choice but to believe that the text % index returned valid matches -match({[{<<"$default">>, _}]}, _Value, _Ctx) -> - []; +match({[{<<"$default">>, _}]}, _Value, #ctx{verbose = false}) -> + true; % All other operators are internal assertion errors for % matching because we either should've removed them during % normalization or something else broke. @@ -584,16 +671,25 @@ match({[{<<"$", _/binary>> = Op, _}]}, _, _) -> % We need to traverse value to find field. The call to % mango_doc:get_field/2 may return either not_found or % bad_path in which case matching fails. -match({[{Field, Cond}]}, Value, #ctx{path = Path} = Ctx) -> +match({[{Field, Cond}]}, Value, #ctx{verbose = Verb, path = Path} = Ctx) -> InnerPath = extend_path(Field, Path), InnerCtx = Ctx#ctx{path = InnerPath}, case mango_doc:get_field(Value, Field) of not_found when Cond == {[{<<"$exists">>, false}]} -> - []; + case Verb of + true -> []; + false -> true + end; not_found -> - [#failure{op = field, type = not_found, path = InnerCtx#ctx.path}]; + case Verb of + true -> [#failure{op = field, type = not_found, ctx = InnerCtx}]; + false -> false + end; bad_path -> - [#failure{op = field, type = bad_path, path = InnerCtx#ctx.path}]; + case Verb of + true -> [#failure{op = field, type = bad_path, ctx = InnerCtx}]; + false -> false + end; SubValue when Field == <<"_id">> -> match(Cond, SubValue, InnerCtx#ctx{cmp = fun mango_json:cmp_raw/2}); SubValue -> @@ -607,10 +703,18 @@ extend_path(Field, Path) when is_binary(Field) -> extend_path(Field, Path) when is_list(Field) -> lists:foldl(fun(F, Acc) -> [F | Acc] end, Path, Field). -compare(Op, Arg, Path, Cond) -> - case Cond of - true -> []; - _ -> [#failure{op = Op, params = [Arg], path = Path}] +match_with_failure(Expr, Value, Op, Params, #ctx{negate = Neg} = Ctx) -> + case not match(Expr, Value, Ctx#ctx{verbose = false}) of + Neg -> []; + _ -> [#failure{op = Op, params = Params, ctx = Ctx}] + end. + +compare(_, _, #ctx{verbose = false}, Cond) -> + Cond; +compare(Op, Arg, #ctx{negate = Neg} = Ctx, Cond) -> + case not Cond of + Neg -> []; + _ -> [#failure{op = Op, params = [Arg], ctx = Ctx}] end. % Returns true if Selector requires all @@ -1149,10 +1253,21 @@ check_selector(Selector, Results) -> SelPos = normalize({[{<<"x">>, Selector}]}), SelNeg = normalize({[{<<"x">>, {[{<<"$not">>, Selector}]}}]}), + ListToBool = fun(List) -> + case List of + [] -> true; + [_ | _] -> false + end + end, + Check = fun({Result, Value}) -> Doc = {[{<<"x">>, Value}]}, - ?assertEqual(Result, match_int(SelPos, Doc)), - ?assertEqual(not Result, match_int(SelNeg, Doc)) + + ?assertEqual(Result, match_int(SelPos, Doc, false)), + ?assertEqual(Result, ListToBool(match_int(SelPos, Doc, true))), + + ?assertEqual(not Result, match_int(SelNeg, Doc, false)), + ?assertEqual(not Result, ListToBool(match_int(SelNeg, Doc, true))) end, lists:foreach(Check, Results). @@ -1785,8 +1900,8 @@ match_failures_object_test() -> {<<"b">>, {[{<<"c">>, 3}]}} ]} ), - ?assertEqual( - [#failure{op = eq, type = mismatch, params = [1], path = [<<"a">>]}], + ?assertMatch( + [#failure{op = eq, type = mismatch, params = [1], ctx = #ctx{path = [<<"a">>]}}], Fails1 ), @@ -1797,8 +1912,8 @@ match_failures_object_test() -> {<<"b">>, {[{<<"c">>, 4}]}} ]} ), - ?assertEqual( - [#failure{op = eq, type = mismatch, params = [3], path = [<<"c">>, <<"b">>]}], + ?assertMatch( + [#failure{op = eq, type = mismatch, params = [3], ctx = #ctx{path = [<<"c">>, <<"b">>]}}], Fails2 ), @@ -1809,10 +1924,10 @@ match_failures_object_test() -> {<<"b">>, {[{<<"c">>, 4}]}} ]} ), - ?assertEqual( + ?assertMatch( [ - #failure{op = eq, type = mismatch, params = [1], path = [<<"a">>]}, - #failure{op = eq, type = mismatch, params = [3], path = [<<"c">>, <<"b">>]} + #failure{op = eq, type = mismatch, params = [1], ctx = #ctx{path = [<<"a">>]}}, + #failure{op = eq, type = mismatch, params = [3], ctx = #ctx{path = [<<"c">>, <<"b">>]}} ], Fails3 ). @@ -1835,18 +1950,18 @@ match_failures_elemmatch_test() -> Fails1 = match_failures( SelElemMatch, {[{<<"a">>, []}]} ), - ?assertEqual( - [#failure{op = elemMatch, type = empty_list, params = [], path = [<<"a">>]}], + ?assertMatch( + [#failure{op = elemMatch, type = empty_list, params = [], ctx = #ctx{path = [<<"a">>]}}], Fails1 ), Fails2 = match_failures( SelElemMatch, {[{<<"a">>, [3, 2]}]} ), - ?assertEqual( + ?assertMatch( [ - #failure{op = gt, type = mismatch, params = [4], path = [0, <<"a">>]}, - #failure{op = gt, type = mismatch, params = [4], path = [1, <<"a">>]} + #failure{op = gt, type = mismatch, params = [4], ctx = #ctx{path = [0, <<"a">>]}}, + #failure{op = gt, type = mismatch, params = [4], ctx = #ctx{path = [1, <<"a">>]}} ], Fails2 ). @@ -1869,18 +1984,18 @@ match_failures_allmatch_test() -> Fails1 = match_failures( SelAllMatch, {[{<<"a">>, [4]}]} ), - ?assertEqual( - [#failure{op = gt, type = mismatch, params = [4], path = [0, <<"a">>]}], + ?assertMatch( + [#failure{op = gt, type = mismatch, params = [4], ctx = #ctx{path = [0, <<"a">>]}}], Fails1 ), Fails2 = match_failures( SelAllMatch, {[{<<"a">>, [5, 6, 3, 7, 0]}]} ), - ?assertEqual( + ?assertMatch( [ - #failure{op = gt, type = mismatch, params = [4], path = [2, <<"a">>]}, - #failure{op = gt, type = mismatch, params = [4], path = [4, <<"a">>]} + #failure{op = gt, type = mismatch, params = [4], ctx = #ctx{path = [2, <<"a">>]}}, + #failure{op = gt, type = mismatch, params = [4], ctx = #ctx{path = [4, <<"a">>]}} ], Fails2 ). @@ -1903,8 +2018,15 @@ match_failures_allmatch_object_test() -> Fails1 = match_failures( SelAllMatch, {[{<<"a">>, {[{<<"b">>, [{[{<<"c">>, 4}]}]}]}}]} ), - ?assertEqual( - [#failure{op = gt, type = mismatch, params = [4], path = [<<"c">>, 0, <<"b">>, <<"a">>]}], + ?assertMatch( + [ + #failure{ + op = gt, + type = mismatch, + params = [4], + ctx = #ctx{path = [<<"c">>, 0, <<"b">>, <<"a">>]} + } + ], Fails1 ), @@ -1912,8 +2034,15 @@ match_failures_allmatch_object_test() -> SelAllMatch, {[{<<"a">>, {[{<<"b">>, [{[{<<"c">>, 5}]}, {[{<<"c">>, 6}]}, {[{<<"c">>, 3}]}]}]}}]} ), - ?assertEqual( - [#failure{op = gt, type = mismatch, params = [4], path = [<<"c">>, 2, <<"b">>, <<"a">>]}], + ?assertMatch( + [ + #failure{ + op = gt, + type = mismatch, + params = [4], + ctx = #ctx{path = [<<"c">>, 2, <<"b">>, <<"a">>]} + } + ], Fails2 ), @@ -1921,11 +2050,19 @@ match_failures_allmatch_object_test() -> SelAllMatch, {[{<<"a">>, {[{<<"b">>, [{[{<<"c">>, 1}]}, {[]}]}]}}]} ), - ?assertEqual( + ?assertMatch( [ - #failure{op = gt, type = mismatch, params = [4], path = [<<"c">>, 0, <<"b">>, <<"a">>]}, #failure{ - op = field, type = not_found, params = [], path = [<<"c">>, 1, <<"b">>, <<"a">>] + op = gt, + type = mismatch, + params = [4], + ctx = #ctx{path = [<<"c">>, 0, <<"b">>, <<"a">>]} + }, + #failure{ + op = field, + type = not_found, + params = [], + ctx = #ctx{path = [<<"c">>, 1, <<"b">>, <<"a">>]} } ], Fails3 From 16bb9f1ed62741c0e36379cdfc8b82f40ece256a Mon Sep 17 00:00:00 2001 From: James Coglan Date: Fri, 30 Jan 2026 17:44:25 +0000 Subject: [PATCH 5/9] feat: Return VDU failure reports to the caller Until now, document updates rejected by a Mango VDU returned an opaque "forbidden" message to the client. This commit adds a detailed list of failures, obtained by converting the `#failure` records returned by `mango_selector:match/3` into human-readable messages. --- src/couch/src/couch_query_servers.erl | 2 + src/mango/src/mango_native_proc.erl | 11 +- src/mango/src/mango_selector.erl | 424 ++++++++++++++---- test/elixir/test/validate_doc_update_test.exs | 3 + 4 files changed, 353 insertions(+), 87 deletions(-) diff --git a/src/couch/src/couch_query_servers.erl b/src/couch/src/couch_query_servers.erl index 7ab662f850..5d7e9ac1fa 100644 --- a/src/couch/src/couch_query_servers.erl +++ b/src/couch/src/couch_query_servers.erl @@ -490,6 +490,8 @@ validate_doc_update(Db, DDoc, EditDoc, DiskDoc, Ctx, SecObj) -> case Resp of ok -> ok; + {[{<<"forbidden">>, Message}, {<<"failures">>, Failures}]} -> + throw({forbidden, Message, Failures}); {[{<<"forbidden">>, Message}]} -> throw({forbidden, Message}); {[{<<"unauthorized">>, Message}]} -> diff --git a/src/mango/src/mango_native_proc.erl b/src/mango/src/mango_native_proc.erl index edcecd4b6f..b540047e50 100644 --- a/src/mango/src/mango_native_proc.erl +++ b/src/mango/src/mango_native_proc.erl @@ -115,9 +115,14 @@ handle_call({prompt, [<<"ddoc">>, DDocId, [<<"validate_doc_update">>], Args]}, _ [NewDoc, OldDoc, _Ctx, _SecObj] = Args, Struct = {[{<<"newDoc">>, NewDoc}, {<<"oldDoc">>, OldDoc}]}, Reply = - case mango_selector:match(Selector, Struct) of - true -> true; - _ -> {[{<<"forbidden">>, <<"document is not valid">>}]} + case mango_selector:match_failures(Selector, Struct) of + [] -> + true; + Failures -> + {[ + {<<"forbidden">>, <<"forbidden">>}, + {<<"failures">>, Failures} + ]} end, {reply, Reply, St} end; diff --git a/src/mango/src/mango_selector.erl b/src/mango/src/mango_selector.erl index 347bd74bfc..7ba0c86b05 100644 --- a/src/mango/src/mango_selector.erl +++ b/src/mango/src/mango_selector.erl @@ -70,7 +70,7 @@ match(Selector, D) -> match_failures(Selector, D) -> couch_stats:increment_counter([mango, evaluate_selector]), - match_int(Selector, D, true). + [format_failure(F) || F <- match_int(Selector, D, true)]. match_int(Selector, D) -> match_int(Selector, D, false). @@ -574,8 +574,8 @@ match({[{<<"$keyMapMatch">>, Arg}]}, Value, #ctx{path = Path} = Ctx) when is_tup true -> []; false -> lists:flatten(KeyFailures) end; -match({[{<<"$keyMapMatch">>, _Arg}]}, _Value, Ctx) -> - [#failure{op = keyMapMatch, type = bad_value, ctx = Ctx}]; +match({[{<<"$keyMapMatch">>, _Arg}]}, Value, Ctx) -> + [#failure{op = keyMapMatch, type = bad_value, params = [Value], ctx = Ctx}]; % Our comparison operators are fairly straight forward match({[{<<"$lt">>, Arg}]}, Value, #ctx{cmp = Cmp} = Ctx) -> compare(lt, Arg, Ctx, Cmp(Value, Arg) < 0); @@ -717,6 +717,90 @@ compare(Op, Arg, #ctx{negate = Neg} = Ctx, Cond) -> _ -> [#failure{op = Op, params = [Arg], ctx = Ctx}] end. +format_failure(#failure{op = Op, type = Type, params = Params, ctx = Ctx}) -> + Path = format_path(Ctx#ctx.path), + Msg = format_op(Op, Ctx#ctx.negate, Type, Params), + {[{<<"path">>, Path}, {<<"message">>, list_to_binary(Msg)}]}. + +format_op(Op, _, empty_list, _) -> + io_lib:format("operator $~p was invoked with an empty list", [Op]); +format_op(Op, _, bad_value, [Value]) -> + io_lib:format("operator $~p was invoked with a bad value: ~s", [Op, jiffy:encode(Value)]); +format_op(_, _, not_found, []) -> + io_lib:format("must be present", []); +format_op(_, _, bad_path, []) -> + io_lib:format("used an invalid path", []); +format_op(eq, false, mismatch, [X]) -> + io_lib:format("must be equal to ~s", [jiffy:encode(X)]); +format_op(eq, true, Type, Params) -> + format_op(ne, false, Type, Params); +format_op(ne, false, mismatch, [X]) -> + io_lib:format("must not be equal to ~s", [jiffy:encode(X)]); +format_op(ne, true, Type, Params) -> + format_op(eq, false, Type, Params); +format_op(lt, false, mismatch, [X]) -> + io_lib:format("must be less than ~s", [jiffy:encode(X)]); +format_op(lt, true, Type, Params) -> + format_op(gte, false, Type, Params); +format_op(lte, false, mismatch, [X]) -> + io_lib:format("must be less than or equal to ~s", [jiffy:encode(X)]); +format_op(lte, true, Type, Params) -> + format_op(gt, false, Type, Params); +format_op(gt, false, mismatch, [X]) -> + io_lib:format("must be greater than ~s", [jiffy:encode(X)]); +format_op(gt, true, Type, Params) -> + format_op(lte, false, Type, Params); +format_op(gte, false, mismatch, [X]) -> + io_lib:format("must be greater than or equal to ~s", [jiffy:encode(X)]); +format_op(gte, true, Type, Params) -> + format_op(le, false, Type, Params); +format_op(in, false, mismatch, [X]) -> + io_lib:format("must be one of ~s", [jiffy:encode(X)]); +format_op(in, true, Type, Params) -> + format_op(nin, false, Type, Params); +format_op(nin, false, mismatch, [X]) -> + io_lib:format("must not be one of ~s", [jiffy:encode(X)]); +format_op(nin, true, Type, Params) -> + format_op(in, false, Type, Params); +format_op(all, false, mismatch, [X]) -> + io_lib:format("must contain all the values in ~s", [jiffy:encode(X)]); +format_op(all, true, mismatch, [X]) -> + io_lib:format("must not contain at least one of the values in ~s", [jiffy:encode(X)]); +format_op(exists, false, mismatch, [true]) -> + io_lib:format("must be present", []); +format_op(exists, false, mismatch, [false]) -> + io_lib:format("must not be present", []); +format_op(exists, true, Type, [Exist]) -> + format_op(exists, false, Type, [not Exist]); +format_op(type, false, mismatch, [Type]) -> + io_lib:format("must be of type '~s'", [Type]); +format_op(type, true, mismatch, [Type]) -> + io_lib:format("must not be of type '~s'", [Type]); +format_op(mod, false, mismatch, [D, R]) -> + io_lib:format("must leave a remainder of ~p when divided by ~p", [R, D]); +format_op(mod, true, mismatch, [D, R]) -> + io_lib:format("must leave a remainder other than ~p when divided by ~p", [R, D]); +format_op(regex, false, mismatch, [P]) -> + io_lib:format("must match the pattern '~s'", [P]); +format_op(regex, true, mismatch, [P]) -> + io_lib:format("must not match the pattern '~s'", [P]); +format_op(beginsWith, false, mismatch, [P]) -> + io_lib:format("must begin with '~s'", [P]); +format_op(beginsWith, true, mismatch, [P]) -> + io_lib:format("must not begin with '~s'", [P]); +format_op(size, false, mismatch, [N]) -> + io_lib:format("must contain ~p items", [N]); +format_op(size, true, mismatch, [N]) -> + io_lib:format("must not contain ~p items", [N]). + +format_path([]) -> + []; +format_path([Item | Rest]) when is_binary(Item) -> + {ok, Path} = mango_util:parse_field(Item), + format_path(Rest) ++ Path; +format_path([Item | Rest]) when is_integer(Item) -> + format_path(Rest) ++ [Item]. + % Returns true if Selector requires all % fields in RequiredFields to exist in any matching documents. @@ -1287,7 +1371,14 @@ match_lt_test() -> {false, [1, 2, 3]}, {false, [1, 2, 4]}, {false, [1, 3]} - ]). + ]), + + Sel = normalize({[{<<"x">>, {[{<<"$lt">>, 5}]}}]}), + Fails = match_failures(Sel, {[{<<"x">>, 6}]}), + ?assertEqual( + [{[{<<"path">>, [<<"x">>]}, {<<"message">>, <<"must be less than 5">>}]}], + Fails + ). match_lte_test() -> check_selector({[{<<"$lte">>, 5}]}, [{true, 4}, {true, 5}, {false, 6}]), @@ -1304,7 +1395,14 @@ match_lte_test() -> {true, [1, 2, 3]}, {false, [1, 2, 4]}, {false, [1, 3]} - ]). + ]), + + Sel = normalize({[{<<"x">>, {[{<<"$lte">>, 5}]}}]}), + Fails = match_failures(Sel, {[{<<"x">>, 6}]}), + ?assertEqual( + [{[{<<"path">>, [<<"x">>]}, {<<"message">>, <<"must be less than or equal to 5">>}]}], + Fails + ). match_gt_test() -> check_selector({[{<<"$gt">>, 5}]}, [{false, 4}, {false, 5}, {true, 6}]), @@ -1321,7 +1419,14 @@ match_gt_test() -> {false, [1, 2, 3]}, {true, [1, 2, 4]}, {true, [1, 3]} - ]). + ]), + + Sel = normalize({[{<<"x">>, {[{<<"$gt">>, 5}]}}]}), + Fails = match_failures(Sel, {[{<<"x">>, 4}]}), + ?assertEqual( + [{[{<<"path">>, [<<"x">>]}, {<<"message">>, <<"must be greater than 5">>}]}], + Fails + ). match_gte_test() -> check_selector({[{<<"$gte">>, 5}]}, [{false, 4}, {true, 5}, {true, 6}]), @@ -1338,7 +1443,14 @@ match_gte_test() -> {true, [1, 2, 3]}, {true, [1, 2, 4]}, {true, [1, 3]} - ]). + ]), + + Sel = normalize({[{<<"x">>, {[{<<"$gte">>, 5}]}}]}), + Fails = match_failures(Sel, {[{<<"x">>, 4}]}), + ?assertEqual( + [{[{<<"path">>, [<<"x">>]}, {<<"message">>, <<"must be greater than or equal to 5">>}]}], + Fails + ). match_eq_test() -> check_selector({[{<<"$eq">>, 5}]}, [{true, 5}, {false, 6}]), @@ -1356,7 +1468,14 @@ match_eq_test() -> {false, {[{<<"a">>, {[{<<"b">>, {[{<<"c">>, 8}]}}]}}]}}, {false, {[{<<"a">>, {[{<<"b">>, {[{<<"d">>, 7}]}}]}}]}}, {false, {[{<<"a">>, {[{<<"d">>, {[{<<"c">>, 7}]}}]}}]}} - ]). + ]), + + Sel = normalize({[{<<"x">>, 5}]}), + Fails = match_failures(Sel, {[{<<"x">>, 4}]}), + ?assertEqual( + [{[{<<"path">>, [<<"x">>]}, {<<"message">>, <<"must be equal to 5">>}]}], + Fails + ). match_ne_test() -> check_selector({[{<<"$ne">>, 5}]}, [{false, 5}, {true, 6}]), @@ -1384,7 +1503,14 @@ match_ne_test() -> {true, {[{<<"a">>, {[{<<"b">>, {[{<<"c">>, 8}]}}]}}]}}, {true, {[{<<"a">>, {[{<<"b">>, {[{<<"d">>, 7}]}}]}}]}}, {true, {[{<<"a">>, {[{<<"d">>, {[{<<"c">>, 7}]}}]}}]}} - ]). + ]), + + Sel = normalize({[{<<"x">>, {[{<<"$ne">>, 5}]}}]}), + Fails = match_failures(Sel, {[{<<"x">>, 5}]}), + ?assertEqual( + [{[{<<"path">>, [<<"x">>]}, {<<"message">>, <<"must not be equal to 5">>}]}], + Fails + ). match_in_test() -> check_selector({[{<<"$in">>, []}]}, [ @@ -1426,6 +1552,13 @@ match_in_test() -> {false, [[<<"nested">>], <<"list">>]}, {true, [0, [[<<"nested">>], <<"list">>]]} ] + ), + + Sel = normalize({[{<<"x">>, {[{<<"$in">>, [42, false, [<<"bar">>]]}]}}]}), + Fails = match_failures(Sel, {[{<<"x">>, 5}]}), + ?assertEqual( + [{[{<<"path">>, [<<"x">>]}, {<<"message">>, <<"must be one of [42,false,[\"bar\"]]">>}]}], + Fails ). match_nin_test() -> @@ -1468,6 +1601,18 @@ match_nin_test() -> {true, [[<<"nested">>], <<"list">>]}, {false, [0, [[<<"nested">>], <<"list">>]]} ] + ), + + Sel = normalize({[{<<"x">>, {[{<<"$nin">>, [42, false, [<<"bar">>]]}]}}]}), + Fails = match_failures(Sel, {[{<<"x">>, false}]}), + ?assertEqual( + [ + {[ + {<<"path">>, [<<"x">>]}, + {<<"message">>, <<"must not be one of [42,false,[\"bar\"]]">>} + ]} + ], + Fails ). match_all_test() -> @@ -1518,7 +1663,31 @@ match_all_test() -> check_selector({[{<<"$all">>, [{[{<<"a">>, 1}]}]}]}, [ {true, [{[{<<"a">>, 1}]}]}, {false, [{[{<<"a">>, 1}, {<<"b">>, 2}]}]} - ]). + ]), + + SelEmpty = normalize({[{<<"x">>, {[{<<"$all">>, []}]}}]}), + FailsEmpty = match_failures(SelEmpty, {[{<<"x">>, 0}]}), + ?assertEqual( + [ + {[ + {<<"path">>, [<<"x">>]}, + {<<"message">>, <<"operator $all was invoked with an empty list">>} + ]} + ], + FailsEmpty + ), + + Sel = normalize({[{<<"x">>, {[{<<"$all">>, [1, 2, 3]}]}}]}), + Fails = match_failures(Sel, {[{<<"x">>, [2, 3]}]}), + ?assertEqual( + [ + {[ + {<<"path">>, [<<"x">>]}, + {<<"message">>, <<"must contain all the values in [1,2,3]">>} + ]} + ], + Fails + ). match_exists_test() -> check_selector({[{<<"x">>, {[{<<"$exists">>, true}]}}]}, [ @@ -1556,6 +1725,13 @@ match_exists_test() -> {false, {[{<<"x">>, 0}]}}, {true, {[]}} ] + ), + + Sel = normalize({[{<<"x">>, {[{<<"$exists">>, true}]}}]}), + Fails = match_failures(Sel, {[]}), + ?assertEqual( + [{[{<<"path">>, [<<"x">>]}, {<<"message">>, <<"must be present">>}]}], + Fails ). match_type_test() -> @@ -1598,7 +1774,14 @@ match_type_test() -> {true, {[{<<"a">>, 1}]}}, {false, [{<<"a">>, 1}]}, {false, null} - ]). + ]), + + Sel = normalize({[{<<"x">>, {[{<<"$type">>, <<"number">>}]}}]}), + Fails = match_failures(Sel, {[{<<"x">>, true}]}), + ?assertEqual( + [{[{<<"path">>, [<<"x">>]}, {<<"message">>, <<"must be of type 'number'">>}]}], + Fails + ). match_regex_test() -> check_selector({[{<<"$regex">>, <<"^[0-9a-f]+$">>}]}, [ @@ -1606,7 +1789,14 @@ match_regex_test() -> {true, <<"3a0df5e">>}, {false, <<"3a0gf5e">>}, {false, 42} - ]). + ]), + + Sel = normalize({[{<<"x">>, {[{<<"$regex">>, <<"^[0-9a-f]+$">>}]}}]}), + Fails = match_failures(Sel, {[{<<"x">>, <<"hello">>}]}), + ?assertEqual( + [{[{<<"path">>, [<<"x">>]}, {<<"message">>, <<"must match the pattern '^[0-9a-f]+$'">>}]}], + Fails + ). match_beginswith_test() -> check_selector({[{<<"$beginsWith">>, <<"foo">>}]}, [ @@ -1616,7 +1806,14 @@ match_beginswith_test() -> {false, <<"more food">>}, {false, <<"fo">>}, {false, 42} - ]). + ]), + + Sel = normalize({[{<<"x">>, {[{<<"$beginsWith">>, <<"foo">>}]}}]}), + Fails = match_failures(Sel, {[{<<"x">>, <<"hello">>}]}), + ?assertEqual( + [{[{<<"path">>, [<<"x">>]}, {<<"message">>, <<"must begin with 'foo'">>}]}], + Fails + ). match_mod_test() -> check_selector({[{<<"$mod">>, [28, 1]}]}, [ @@ -1625,7 +1822,19 @@ match_mod_test() -> {true, 57}, {false, 58}, {false, <<"57">>} - ]). + ]), + + Sel = normalize({[{<<"x">>, {[{<<"$mod">>, [28, 1]}]}}]}), + Fails = match_failures(Sel, {[{<<"x">>, 27}]}), + ?assertEqual( + [ + {[ + {<<"path">>, [<<"x">>]}, + {<<"message">>, <<"must leave a remainder of 1 when divided by 28">>} + ]} + ], + Fails + ). match_size_test() -> check_selector({[{<<"$size">>, 3}]}, [ @@ -1634,7 +1843,14 @@ match_size_test() -> {true, [0, 0, 0]}, {false, [0, 0]}, {false, [0, 0, 0, 0]} - ]). + ]), + + Sel = normalize({[{<<"x">>, {[{<<"$size">>, 3}]}}]}), + Fails = match_failures(Sel, {[{<<"x">>, [0, 1]}]}), + ?assertEqual( + [{[{<<"path">>, [<<"x">>]}, {<<"message">>, <<"must contain 3 items">>}]}], + Fails + ). match_allmatch_test() -> % $allMatch is defined to return false for empty lists @@ -1652,7 +1868,14 @@ match_allmatch_test() -> {false, [0]}, {true, [1]}, {true, [0, 1]} - ]). + ]), + + Sel = normalize({[{<<"x">>, {[{<<"$allMatch">>, {[{<<"$eq">>, 0}]}}]}}]}), + Fails = match_failures(Sel, {[{<<"x">>, [0, 1]}]}), + ?assertEqual( + [{[{<<"path">>, [<<"x">>, 1]}, {<<"message">>, <<"must be equal to 0">>}]}], + Fails + ). match_elemmatch_test() -> check_selector({[{<<"$elemMatch">>, {[{<<"$eq">>, 0}]}}]}, [ @@ -1660,7 +1883,17 @@ match_elemmatch_test() -> {true, [0]}, {false, [1]}, {true, [0, 1]} - ]). + ]), + + Sel = normalize({[{<<"x">>, {[{<<"$elemMatch">>, {[{<<"$eq">>, 0}]}}]}}]}), + Fails = match_failures(Sel, {[{<<"x">>, [1, 2]}]}), + ?assertEqual( + [ + {[{<<"path">>, [<<"x">>, 0]}, {<<"message">>, <<"must be equal to 0">>}]}, + {[{<<"path">>, [<<"x">>, 1]}, {<<"message">>, <<"must be equal to 0">>}]} + ], + Fails + ). match_keymapmatch_test() -> check_selector({[{<<"$keyMapMatch">>, {[{<<"$regex">>, <<"^[a-z]+$">>}]}}]}, [ @@ -1669,7 +1902,17 @@ match_keymapmatch_test() -> {true, {[{<<"a">>, 1}, {<<"b4">>, 2}]}}, {false, {[{<<"b4">>, 2}]}}, {false, {[]}} - ]). + ]), + + Sel = normalize({[{<<"x">>, {[{<<"$keyMapMatch">>, {[{<<"$beginsWith">>, <<"a">>}]}}]}}]}), + Fails = match_failures(Sel, {[{<<"x">>, {[{<<"bravo">>, 0}, {<<"charlie">>, 1}]}}]}), + ?assertEqual( + [ + {[{<<"path">>, [<<"x">>, <<"bravo">>]}, {<<"message">>, <<"must begin with 'a'">>}]}, + {[{<<"path">>, [<<"x">>, <<"charlie">>]}, {<<"message">>, <<"must begin with 'a'">>}]} + ], + Fails + ). match_object_test() -> Doc1 = {[]}, @@ -1738,7 +1981,13 @@ match_object_test() -> ?assertEqual(false, match_int(SelShort, Doc2)), ?assertEqual(false, match_int(SelShort, Doc3)), ?assertEqual(true, match_int(SelShort, Doc4)), - ?assertEqual(false, match_int(SelShort, Doc5)). + ?assertEqual(false, match_int(SelShort, Doc5)), + + Fails = match_failures(SelShort, Doc3), + ?assertEqual( + [{[{<<"path">>, [<<"x">>, <<"b">>]}, {<<"message">>, <<"must be present">>}]}], + Fails + ). match_and_test() -> % $and with an empty array matches anything @@ -1796,7 +2045,16 @@ match_and_test() -> ?assertEqual(false, match_int(SelNotMulti, {[{<<"x">>, 6}]})), ?assertEqual(true, match_int(SelNotMulti, {[{<<"x">>, 2}]})), ?assertEqual(true, match_int(SelNotMulti, {[{<<"x">>, 9}]})), - ?assertEqual(false, match_int(SelNotMulti, {[]})). + ?assertEqual(false, match_int(SelNotMulti, {[]})), + + Fails = match_failures(SelNotMulti, {[{<<"x">>, 6}]}), + ?assertEqual( + [ + {[{<<"path">>, [<<"x">>]}, {<<"message">>, <<"must be less than or equal to 3">>}]}, + {[{<<"path">>, [<<"x">>]}, {<<"message">>, <<"must be greater than or equal to 7">>}]} + ], + Fails + ). match_or_test() -> % $or with an empty array matches anything @@ -1849,7 +2107,15 @@ match_or_test() -> ?assertEqual(true, match_int(SelNotMulti, {[{<<"x">>, 6}]})), ?assertEqual(false, match_int(SelNotMulti, {[{<<"x">>, 2}]})), ?assertEqual(false, match_int(SelNotMulti, {[{<<"x">>, 9}]})), - ?assertEqual(false, match_int(SelNotMulti, {[]})). + ?assertEqual(false, match_int(SelNotMulti, {[]})), + + Fails = match_failures(SelNotMulti, {[{<<"x">>, 2}]}), + ?assertEqual( + [ + {[{<<"path">>, [<<"x">>]}, {<<"message">>, <<"must be greater than or equal to 3">>}]} + ], + Fails + ). match_nor_test() -> % $nor with an empty array matches anything @@ -1874,7 +2140,15 @@ match_nor_test() -> ?assertEqual(true, match_int(SelMulti, {[{<<"x">>, 6}]})), ?assertEqual(false, match_int(SelMulti, {[{<<"x">>, 2}]})), ?assertEqual(false, match_int(SelMulti, {[{<<"x">>, 9}]})), - ?assertEqual(false, match_int(SelMulti, {[]})). + ?assertEqual(false, match_int(SelMulti, {[]})), + + Fails = match_failures(SelMulti, {[{<<"x">>, 2}]}), + ?assertEqual( + [ + {[{<<"path">>, [<<"x">>]}, {<<"message">>, <<"must be greater than or equal to 3">>}]} + ], + Fails + ). match_failures_object_test() -> Selector = normalize( @@ -1901,7 +2175,7 @@ match_failures_object_test() -> ]} ), ?assertMatch( - [#failure{op = eq, type = mismatch, params = [1], ctx = #ctx{path = [<<"a">>]}}], + [{[{<<"path">>, [<<"a">>]}, {<<"message">>, <<"must be equal to 1">>}]}], Fails1 ), @@ -1913,7 +2187,7 @@ match_failures_object_test() -> ]} ), ?assertMatch( - [#failure{op = eq, type = mismatch, params = [3], ctx = #ctx{path = [<<"c">>, <<"b">>]}}], + [{[{<<"path">>, [<<"b">>, <<"c">>]}, {<<"message">>, <<"must be equal to 3">>}]}], Fails2 ), @@ -1926,8 +2200,8 @@ match_failures_object_test() -> ), ?assertMatch( [ - #failure{op = eq, type = mismatch, params = [1], ctx = #ctx{path = [<<"a">>]}}, - #failure{op = eq, type = mismatch, params = [3], ctx = #ctx{path = [<<"c">>, <<"b">>]}} + {[{<<"path">>, [<<"a">>]}, {<<"message">>, <<"must be equal to 1">>}]}, + {[{<<"path">>, [<<"b">>, <<"c">>]}, {<<"message">>, <<"must be equal to 3">>}]} ], Fails3 ). @@ -1942,26 +2216,25 @@ match_failures_elemmatch_test() -> ]} ), - Fails0 = match_failures( - SelElemMatch, {[{<<"a">>, [5, 3, 2]}]} - ), + Fails0 = match_failures(SelElemMatch, {[{<<"a">>, [5, 3, 2]}]}), ?assertEqual([], Fails0), - Fails1 = match_failures( - SelElemMatch, {[{<<"a">>, []}]} - ), + Fails1 = match_failures(SelElemMatch, {[{<<"a">>, []}]}), ?assertMatch( - [#failure{op = elemMatch, type = empty_list, params = [], ctx = #ctx{path = [<<"a">>]}}], + [ + {[ + {<<"path">>, [<<"a">>]}, + {<<"message">>, <<"operator $elemMatch was invoked with an empty list">>} + ]} + ], Fails1 ), - Fails2 = match_failures( - SelElemMatch, {[{<<"a">>, [3, 2]}]} - ), + Fails2 = match_failures(SelElemMatch, {[{<<"a">>, [3, 2]}]}), ?assertMatch( [ - #failure{op = gt, type = mismatch, params = [4], ctx = #ctx{path = [0, <<"a">>]}}, - #failure{op = gt, type = mismatch, params = [4], ctx = #ctx{path = [1, <<"a">>]}} + {[{<<"path">>, [<<"a">>, 0]}, {<<"message">>, <<"must be greater than 4">>}]}, + {[{<<"path">>, [<<"a">>, 1]}, {<<"message">>, <<"must be greater than 4">>}]} ], Fails2 ). @@ -1976,26 +2249,20 @@ match_failures_allmatch_test() -> ]} ), - Fails0 = match_failures( - SelAllMatch, {[{<<"a">>, [5]}]} - ), + Fails0 = match_failures(SelAllMatch, {[{<<"a">>, [5]}]}), ?assertEqual([], Fails0), - Fails1 = match_failures( - SelAllMatch, {[{<<"a">>, [4]}]} - ), + Fails1 = match_failures(SelAllMatch, {[{<<"a">>, [4]}]}), ?assertMatch( - [#failure{op = gt, type = mismatch, params = [4], ctx = #ctx{path = [0, <<"a">>]}}], + [{[{<<"path">>, [<<"a">>, 0]}, {<<"message">>, <<"must be greater than 4">>}]}], Fails1 ), - Fails2 = match_failures( - SelAllMatch, {[{<<"a">>, [5, 6, 3, 7, 0]}]} - ), + Fails2 = match_failures(SelAllMatch, {[{<<"a">>, [5, 6, 3, 7, 0]}]}), ?assertMatch( [ - #failure{op = gt, type = mismatch, params = [4], ctx = #ctx{path = [2, <<"a">>]}}, - #failure{op = gt, type = mismatch, params = [4], ctx = #ctx{path = [4, <<"a">>]}} + {[{<<"path">>, [<<"a">>, 2]}, {<<"message">>, <<"must be greater than 4">>}]}, + {[{<<"path">>, [<<"a">>, 4]}, {<<"message">>, <<"must be greater than 4">>}]} ], Fails2 ). @@ -2010,22 +2277,16 @@ match_failures_allmatch_object_test() -> ]} ), - Fails0 = match_failures( - SelAllMatch, {[{<<"a">>, {[{<<"b">>, [{[{<<"c">>, 5}]}]}]}}]} - ), + Fails0 = match_failures(SelAllMatch, {[{<<"a">>, {[{<<"b">>, [{[{<<"c">>, 5}]}]}]}}]}), ?assertEqual([], Fails0), - Fails1 = match_failures( - SelAllMatch, {[{<<"a">>, {[{<<"b">>, [{[{<<"c">>, 4}]}]}]}}]} - ), + Fails1 = match_failures(SelAllMatch, {[{<<"a">>, {[{<<"b">>, [{[{<<"c">>, 4}]}]}]}}]}), ?assertMatch( [ - #failure{ - op = gt, - type = mismatch, - params = [4], - ctx = #ctx{path = [<<"c">>, 0, <<"b">>, <<"a">>]} - } + {[ + {<<"path">>, [<<"a">>, <<"b">>, 0, <<"c">>]}, + {<<"message">>, <<"must be greater than 4">>} + ]} ], Fails1 ), @@ -2036,36 +2297,31 @@ match_failures_allmatch_object_test() -> ), ?assertMatch( [ - #failure{ - op = gt, - type = mismatch, - params = [4], - ctx = #ctx{path = [<<"c">>, 2, <<"b">>, <<"a">>]} - } + {[ + {<<"path">>, [<<"a">>, <<"b">>, 2, <<"c">>]}, + {<<"message">>, <<"must be greater than 4">>} + ]} ], Fails2 ), - Fails3 = match_failures( - SelAllMatch, - {[{<<"a">>, {[{<<"b">>, [{[{<<"c">>, 1}]}, {[]}]}]}}]} - ), + Fails3 = match_failures(SelAllMatch, {[{<<"a">>, {[{<<"b">>, [{[{<<"c">>, 1}]}, {[]}]}]}}]}), ?assertMatch( [ - #failure{ - op = gt, - type = mismatch, - params = [4], - ctx = #ctx{path = [<<"c">>, 0, <<"b">>, <<"a">>]} - }, - #failure{ - op = field, - type = not_found, - params = [], - ctx = #ctx{path = [<<"c">>, 1, <<"b">>, <<"a">>]} - } + {[ + {<<"path">>, [<<"a">>, <<"b">>, 0, <<"c">>]}, + {<<"message">>, <<"must be greater than 4">>} + ]}, + {[{<<"path">>, [<<"a">>, <<"b">>, 1, <<"c">>]}, {<<"message">>, <<"must be present">>}]} ], Fails3 ). +format_path_test() -> + ?assertEqual([], format_path([])), + ?assertEqual([<<"a">>], format_path([<<"a">>])), + ?assertEqual([<<"a">>, <<"b">>], format_path([<<"b">>, <<"a">>])), + ?assertEqual([<<"a">>, <<"b">>, <<"c">>], format_path([<<"b.c">>, <<"a">>])), + ?assertEqual([<<"a">>, 42, <<"b">>, <<"c">>], format_path([<<"b.c">>, 42, <<"a">>])). + -endif. diff --git a/test/elixir/test/validate_doc_update_test.exs b/test/elixir/test/validate_doc_update_test.exs index 93ed8f177c..9279e1da0e 100644 --- a/test/elixir/test/validate_doc_update_test.exs +++ b/test/elixir/test/validate_doc_update_test.exs @@ -105,6 +105,9 @@ defmodule ValidateDocUpdateTest do resp = Couch.put("/#{db}/doc", body: %{"no" => "type"}) assert resp.status_code == 403 assert resp.body["error"] == "forbidden" + assert resp.body["reason"] == [ + %{"path" => ["newDoc", "type"], "message" => "must be present"} + ] end @tag :with_db From 0eab7adb8d76b5ac8b36385a7409cadb12a54f41 Mon Sep 17 00:00:00 2001 From: James Coglan Date: Mon, 26 Jan 2026 11:12:37 +0000 Subject: [PATCH 6/9] chore: Add some benchmarks for Mango selector matching --- src/mango/src/mango_selector.erl | 48 ++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/src/mango/src/mango_selector.erl b/src/mango/src/mango_selector.erl index 7ba0c86b05..15e1111155 100644 --- a/src/mango/src/mango_selector.erl +++ b/src/mango/src/mango_selector.erl @@ -2324,4 +2324,52 @@ format_path_test() -> ?assertEqual([<<"a">>, <<"b">>, <<"c">>], format_path([<<"b.c">>, <<"a">>])), ?assertEqual([<<"a">>, 42, <<"b">>, <<"c">>], format_path([<<"b.c">>, 42, <<"a">>])). +% The following benchmarks can be run by adding the following dependencies to +% rebar.config.script: +% +% {argparse, {url, "https://github.com/max-au/argparse"}, "1.2.4"}, +% {erlperf, {url, "https://github.com/max-au/erlperf"}, "2.3.0"} +% +% and then uncommenting the following functions and running the unit tests for +% this module via this command: +% +% make eunit apps=mango suites=mango_selector +% +% bench(Name, Selector, Doc) -> +% Sel1 = normalize(Selector), +% [Normal, Verbose] = erlperf:compare( +% [ +% #{runner => fun() -> match_int(Sel1, Doc, V) end} +% || V <- [false, true] +% ], +% #{} +% ), +% ?debugFmt("~nbench[~s: normal ] = ~p~n", [Name, Normal]), +% ?debugFmt("~nbench[~s: verbose] = ~p~n", [Name, Verbose]). +% +% bench_and_test() -> +% Sel = +% {[ +% {<<"x">>, +% {[ +% {<<"$and">>, [{[{<<"$gt">>, V}]} || V <- [100, 200, 300, 400, 500]]} +% ]}} +% ]}, +% Doc = {[{<<"x">>, 25}]}, +% bench("$and", Sel, Doc). +% +% bench_allmatch_test() -> +% Sel = +% {[ +% {<<"x">>, +% {[ +% {<<"$allMatch">>, {[{<<"$gt">>, 10}]}} +% ]}} +% ]}, +% Doc = +% {[ +% {<<"x">>, [0, 23, 45, 67, 89, 12, 34, 56, 78]} +% ]}, +% bench("$allMatch", Sel, Doc). + -endif. From d467e70a67eff9666454da187c776c5c7aabb901 Mon Sep 17 00:00:00 2001 From: James Coglan Date: Wed, 8 Apr 2026 17:00:50 +0100 Subject: [PATCH 7/9] fix: Mango VDUs should omit the `oldDoc` field from the matching struct if the doc is newly created --- src/mango/src/mango_native_proc.erl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/mango/src/mango_native_proc.erl b/src/mango/src/mango_native_proc.erl index b540047e50..ee95707eda 100644 --- a/src/mango/src/mango_native_proc.erl +++ b/src/mango/src/mango_native_proc.erl @@ -113,7 +113,11 @@ handle_call({prompt, [<<"ddoc">>, DDocId, [<<"validate_doc_update">>], Args]}, _ {stop, {invalid_call, Msg}, {invalid_call, Msg}, St}; Selector -> [NewDoc, OldDoc, _Ctx, _SecObj] = Args, - Struct = {[{<<"newDoc">>, NewDoc}, {<<"oldDoc">>, OldDoc}]}, + Struct = + case OldDoc of + null -> {[{<<"newDoc">>, NewDoc}]}; + Doc -> {[{<<"newDoc">>, NewDoc}, {<<"oldDoc">>, Doc}]} + end, Reply = case mango_selector:match_failures(Selector, Struct) of [] -> From 1d4dbb018cc6c1539e3f5e2e92972c40345e2e8d Mon Sep 17 00:00:00 2001 From: James Coglan Date: Wed, 8 Apr 2026 17:01:17 +0100 Subject: [PATCH 8/9] docs: More detailed explanation of the behaviour of the oldDocs field in Mango VDUs --- src/docs/src/ddocs/ddocs.rst | 72 +++++++++++++++++++++++++++++++++++- 1 file changed, 71 insertions(+), 1 deletion(-) diff --git a/src/docs/src/ddocs/ddocs.rst b/src/docs/src/ddocs/ddocs.rst index 501501857c..8006d5c320 100644 --- a/src/docs/src/ddocs/ddocs.rst +++ b/src/docs/src/ddocs/ddocs.rst @@ -955,7 +955,8 @@ To use Mango selectors for validation, the design document must have the containing the following fields: * ``newDoc``: New version of document that will be stored. -* ``oldDoc``: Previous version of document that is already stored. +* ``oldDoc``: Previous version of document that is already stored; this field is + absent if the doc is being created for the first time. For example, to check that all docs contain a ``title`` which is a string, and a ``year`` which is a number: @@ -1012,3 +1013,72 @@ this design document: } } } + +By using the ``oldDoc`` field, we can create rules that say a document can only +be updated if it is currently in a certain state. For example, this rule would +enforce that only documents describing actors can be updated: + +.. code-block:: json + + { + "language": "query", + + "validate_doc_update": { + "oldDoc": { "type": "actor" } + } + } + +This also makes it so that no new documents can be created, because a write is +only accepted if a previous version of the doc already exists. To relax this +constraint, allow ``oldDoc`` not to exist: + +.. code-block:: json + + { + "language": "query", + + "validate_doc_update": { + "oldDoc": { + "$or": [ + { "$exists": false }, + { "type": "actor" } + ] + } + } + } + +This validator will allow any new document creation, and updates to docs where +the ``type`` field is ``"actor"``. We can also have multiple rules for new +document states that depend on the current state, by combining ``$or`` with +several sets of ``{ oldDoc, newDoc }`` rules: + +.. code-block:: json + + { + "language": "query", + + "validate_doc_update": { + "$or": [ + // allow creation of docs with an acceptable type + { + "oldDoc": { "$exists": false }, + "newDoc": { + "type": { "$in": ["movie", "actor"] } + } + }, + // if a doc currently has "type": "actor", make sure its "movies" + // field is a non-empty list of strings + { + "oldDoc": { "type": "actor" }, + "newDoc": { + "movies": { + "$type": "array", + "$not": { "$size": 0 }, + "$allMatch": { "$type": "string" } + } + } + }, + // etc. + ] + } + } From ba11d389b1ec9cbbe337e8a6837027b217c4e9cc Mon Sep 17 00:00:00 2001 From: James Coglan Date: Thu, 9 Apr 2026 12:02:24 +0100 Subject: [PATCH 9/9] fix: Raise an error if a Mango VDU contains invalid top-level fields --- src/mango/src/mango_native_proc.erl | 41 ++++++++------- src/mango/src/mango_selector.erl | 51 +++++++++++++++++++ test/elixir/test/config/suite.elixir | 1 + test/elixir/test/validate_doc_update_test.exs | 18 +++++++ 4 files changed, 94 insertions(+), 17 deletions(-) diff --git a/src/mango/src/mango_native_proc.erl b/src/mango/src/mango_native_proc.erl index ee95707eda..c6d7890deb 100644 --- a/src/mango/src/mango_native_proc.erl +++ b/src/mango/src/mango_native_proc.erl @@ -112,23 +112,30 @@ handle_call({prompt, [<<"ddoc">>, DDocId, [<<"validate_doc_update">>], Args]}, _ Msg = [<<"validate_doc_update">>, DDocId], {stop, {invalid_call, Msg}, {invalid_call, Msg}, St}; Selector -> - [NewDoc, OldDoc, _Ctx, _SecObj] = Args, - Struct = - case OldDoc of - null -> {[{<<"newDoc">>, NewDoc}]}; - Doc -> {[{<<"newDoc">>, NewDoc}, {<<"oldDoc">>, Doc}]} - end, - Reply = - case mango_selector:match_failures(Selector, Struct) of - [] -> - true; - Failures -> - {[ - {<<"forbidden">>, <<"forbidden">>}, - {<<"failures">>, Failures} - ]} - end, - {reply, Reply, St} + case mango_selector:has_allowed_fields(Selector, [<<"newDoc">>, <<"oldDoc">>]) of + false -> + Msg = + <<"'validate_doc_update' may only contain 'newDoc' and 'oldDoc' as top-level fields">>, + {stop, {invalid_call, Msg}, {invalid_call, Msg}, St}; + true -> + [NewDoc, OldDoc, _Ctx, _SecObj] = Args, + Struct = + case OldDoc of + null -> {[{<<"newDoc">>, NewDoc}]}; + Doc -> {[{<<"newDoc">>, NewDoc}, {<<"oldDoc">>, Doc}]} + end, + Reply = + case mango_selector:match_failures(Selector, Struct) of + [] -> + true; + Failures -> + {[ + {<<"forbidden">>, <<"forbidden">>}, + {<<"failures">>, Failures} + ]} + end, + {reply, Reply, St} + end end; handle_call(Msg, _From, St) -> {stop, {invalid_call, Msg}, {invalid_call, Msg}, St}. diff --git a/src/mango/src/mango_selector.erl b/src/mango/src/mango_selector.erl index 15e1111155..d0da96b69a 100644 --- a/src/mango/src/mango_selector.erl +++ b/src/mango/src/mango_selector.erl @@ -17,6 +17,7 @@ match/2, match_failures/2, has_required_fields/2, + has_allowed_fields/2, is_constant_field/2, fields/1 ]). @@ -876,6 +877,39 @@ has_required_fields_int([{[{Field, Cond}]} | Rest], RequiredFields) -> has_required_fields_int(Rest, lists:delete(Field, RequiredFields)) end. +has_allowed_fields(Selector, AllowedFields) -> + Paths = lists:map( + fun(Field) -> + {ok, Path} = mango_util:parse_field(Field), + Path + end, + AllowedFields + ), + has_allowed_fields_int(Selector, Paths). + +has_allowed_fields_int({[{Field, Cond}]}, Paths) when is_list(Field) -> + Stemmed = [match_prefix(Field, Path) || Path <- Paths], + Matched = [Path || Path <- Stemmed, Path /= nil], + case Matched of + [] -> false; + M -> has_allowed_fields_int(Cond, M) + end; +has_allowed_fields_int({[{_Op, Conds}]}, Paths) when is_list(Conds) -> + lists:all(fun(Cond) -> has_allowed_fields_int(Cond, Paths) end, Conds); +has_allowed_fields_int({[{_Op, Cond}]}, Paths) -> + has_allowed_fields_int(Cond, Paths); +has_allowed_fields_int(_, _) -> + true. + +match_prefix([A | Rest1], [A | Rest2]) -> + match_prefix(Rest1, Rest2); +match_prefix([], Rest) -> + Rest; +match_prefix(_, []) -> + []; +match_prefix(_, _) -> + nil. + % Returns true if a field in the selector is a constant value e.g. {a: {$eq: 1}} is_constant_field(Selector, Field) when not is_list(Field) -> {ok, Path} = mango_util:parse_field(Field), @@ -1255,6 +1289,23 @@ has_required_fields_or_nested_or_false_test() -> Normalized = normalize(Selector), ?assertEqual(false, has_required_fields(Normalized, RequiredFields)). +has_allowed_fields_test() -> + Sel1 = normalize({[{<<"a">>, 1}]}), + ?assertEqual(has_allowed_fields(Sel1, [<<"a">>]), true), + ?assertEqual(has_allowed_fields(Sel1, [<<"a">>, <<"b">>]), true), + ?assertEqual(has_allowed_fields(Sel1, [<<"b">>]), false), + + Sel2 = normalize({[{<<"$or">>, [{[{<<"a.b">>, 1}]}, {[{<<"c.d">>, 2}]}]}]}), + ?assertEqual(has_allowed_fields(Sel2, [<<"a">>, <<"c">>]), true), + ?assertEqual(has_allowed_fields(Sel2, [<<"a.b">>, <<"c.d">>]), true), + ?assertEqual(has_allowed_fields(Sel2, [<<"a">>]), false), + + Sel3 = normalize({[{<<"a">>, {[{<<"$or">>, [{[{<<"b">>, 1}]}, {[{<<"c">>, 2}]}]}]}}]}), + ?assertEqual(has_allowed_fields(Sel3, [<<"a">>]), true), + ?assertEqual(has_allowed_fields(Sel3, [<<"a.b">>, <<"a.c">>]), true), + ?assertEqual(has_allowed_fields(Sel3, [<<"a.c">>]), false), + ?assertEqual(has_allowed_fields(Sel3, [<<"b">>]), false). + check_match(Selector) -> % Call match_int/2 to avoid ERROR for missing metric; this is confusing % in the middle of test output. diff --git a/test/elixir/test/config/suite.elixir b/test/elixir/test/config/suite.elixir index 21bfe3dc03..57cf3334fe 100644 --- a/test/elixir/test/config/suite.elixir +++ b/test/elixir/test/config/suite.elixir @@ -532,6 +532,7 @@ "converting a Mango VDU to JavaScript updates its effects", "deleting a Mango VDU removes its effects", "Mango VDU rejects a doc if any existing ddoc fails to match", + "Mango VDU rejects a design doc if it contains unknown fields", ], "SecurityValidationTest": [ "Author presence and user security", diff --git a/test/elixir/test/validate_doc_update_test.exs b/test/elixir/test/validate_doc_update_test.exs index 9279e1da0e..333e6ec7e9 100644 --- a/test/elixir/test/validate_doc_update_test.exs +++ b/test/elixir/test/validate_doc_update_test.exs @@ -212,4 +212,22 @@ defmodule ValidateDocUpdateTest do assert resp.status_code == 403 assert resp.body["error"] == "forbidden" end + + @tag :with_db + test "Mango VDU rejects a design doc if it contains unknown fields", context do + db = context[:db_name] + + ddoc = %{ + language: "query", + + validate_doc_update: %{ + "wrongField" => %{"year" => %{"$lt" => 2026}} + } + } + resp = Couch.put("/#{db}/_design/mango-test-2", body: ddoc) + assert resp.status_code == 201 + + resp = Couch.put("/#{db}/doc", body: %{"year" => 1994}) + assert resp.status_code == 500 + end end