Skip to content

Commit c842cf7

Browse files
authored
Merge pull request #586 from biojppm/fix/test_suite
2 parents fa9fb1f + 824c8b1 commit c842cf7

11 files changed

Lines changed: 1158 additions & 974 deletions

changelog/current.md

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
- Ensure parse errors for invalid YAML cases:
2-
- [PR#585](https://github.com/biojppm/rapidyaml/pull/585): colon on newline at top level ([play](https://play.yaml.com/?show=xd#c2NhbGFyCjogYmFkCi0tLQpbc2VxXQo6IGJhZAotLS0KW21hcF0KOiBiYWQK)):
2+
- colon on newline at top level ([PR#585](https://github.com/biojppm/rapidyaml/pull/585)):
33
```yaml
44
scalar
55
: bad
@@ -10,18 +10,32 @@
1010
{map: }
1111
: bad
1212
```
13-
- [PR#586](https://github.com/biojppm/rapidyaml/pull/586): tokens after explicit document endcolon on newline:
13+
- colon on newline generally in block containers ([PR#585](https://github.com/biojppm/rapidyaml/pull/585)):
1414
```yaml
1515
bad cases:
1616
scalar
17-
: bad
17+
: bad colon
1818
[seq]
19-
: bad
19+
: bad colon
2020
{map: }
21-
: bad
21+
: bad colon
2222
```
23-
- [PR#586](https://github.com/biojppm/rapidyaml/pull/586): tokens after explicit document end:
23+
- colon on newline in flow sequences ([PR#586](https://github.com/biojppm/rapidyaml/pull/586)):
24+
```yaml
25+
[a
26+
:
27+
b]
28+
```
29+
- tokens after explicit document end ([PR#585](https://github.com/biojppm/rapidyaml/pull/585)):
2430
```yaml
2531
foo: bar
2632
... bad tokens
2733
```
34+
- comments directly after comma in flow containers ([PR#586](https://github.com/biojppm/rapidyaml/pull/586)):
35+
```yaml
36+
[a,b,# bad comment
37+
]
38+
---
39+
{a: b,# bad comment
40+
}
41+
```

src/c4/yml/parse_engine.def.hpp

Lines changed: 60 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -95,26 +95,6 @@ inline bool _is_doc_end_token(csubstr s)
9595

9696
inline bool _is_doc_token(csubstr s) noexcept
9797
{
98-
//
99-
// NOTE: this function was failing under some scenarios when
100-
// compiled with gcc -O2 (but not -O3 or -O1 or -O0), likely
101-
// related to optimizer assumptions on the input string and
102-
// possibly caused from UB around assignment to that string (the
103-
// call site was in _scan_block()). For more details see:
104-
//
105-
// https://github.com/biojppm/rapidyaml/issues/440
106-
//
107-
// The current version does not suffer this problem, but it may
108-
// appear again.
109-
//
110-
//
111-
// UPDATE. The problem appeared again in gcc12 and gcc13 with -Os
112-
// (but not any other optimization level, nor any other compiler
113-
// or version), because the assignment to s is being hoisted out
114-
// of the loop which calls this function. Then the length doesn't
115-
// enter the s.len >= 3 when it should. Adding a
116-
// C4_DONT_OPTIMIZE(var) makes the problem go away.
117-
//
11898
if(s.len >= 3)
11999
{
120100
switch(s.str[0])
@@ -162,7 +142,7 @@ C4_ALWAYS_INLINE size_t _extend_from_combined_newline(char nl, char following)
162142
}
163143

164144
//! look for the next newline chars, and jump to the right of those
165-
inline substr from_next_line(substr rem)
145+
inline substr _from_next_line(substr rem)
166146
{
167147
size_t nlpos = rem.first_of("\r\n");
168148
if(nlpos == csubstr::npos)
@@ -268,6 +248,7 @@ ParseEngine<EventHandler>::ParseEngine(EventHandler *evt_handler, ParserOptions
268248
, m_pending_tags()
269249
, m_doc_empty(false)
270250
, m_prev_colon(npos)
251+
, m_prev_val_end(npos)
271252
, m_encoding(NOBOM)
272253
, m_newline_offsets()
273254
, m_newline_offsets_size(0)
@@ -287,6 +268,7 @@ ParseEngine<EventHandler>::ParseEngine(ParseEngine &&that) noexcept
287268
, m_pending_tags(that.m_pending_tags)
288269
, m_doc_empty(false)
289270
, m_prev_colon(npos)
271+
, m_prev_val_end(npos)
290272
, m_encoding(NOBOM)
291273
, m_newline_offsets(that.m_newline_offsets)
292274
, m_newline_offsets_size(that.m_newline_offsets_size)
@@ -306,6 +288,7 @@ ParseEngine<EventHandler>::ParseEngine(ParseEngine const& that)
306288
, m_pending_tags(that.m_pending_tags)
307289
, m_doc_empty(false)
308290
, m_prev_colon(npos)
291+
, m_prev_val_end(npos)
309292
, m_encoding(NOBOM)
310293
, m_newline_offsets()
311294
, m_newline_offsets_size()
@@ -333,6 +316,7 @@ ParseEngine<EventHandler>& ParseEngine<EventHandler>::operator=(ParseEngine &&th
333316
m_pending_tags = that.m_pending_tags;
334317
m_doc_empty = that.m_doc_empty;
335318
m_prev_colon = that.m_prev_colon;
319+
m_prev_val_end = that.m_prev_val_end;
336320
m_encoding = that.m_encoding;
337321
m_newline_offsets = (that.m_newline_offsets);
338322
m_newline_offsets_size = (that.m_newline_offsets_size);
@@ -356,6 +340,7 @@ ParseEngine<EventHandler>& ParseEngine<EventHandler>::operator=(ParseEngine cons
356340
m_pending_tags = that.m_pending_tags;
357341
m_doc_empty = that.m_doc_empty;
358342
m_prev_colon = that.m_prev_colon;
343+
m_prev_val_end = that.m_prev_val_end;
359344
m_encoding = that.m_encoding;
360345
if(that.m_newline_offsets_capacity > m_newline_offsets_capacity)
361346
_resize_locations(that.m_newline_offsets_capacity);
@@ -379,6 +364,7 @@ void ParseEngine<EventHandler>::_clr()
379364
m_pending_tags = {};
380365
m_doc_empty = true;
381366
m_prev_colon = npos;
367+
m_prev_val_end = npos;
382368
m_encoding = NOBOM;
383369
m_newline_offsets = {};
384370
m_newline_offsets_size = {};
@@ -409,6 +395,7 @@ void ParseEngine<EventHandler>::_reset()
409395
m_pending_tags = {};
410396
m_doc_empty = true;
411397
m_prev_colon = npos;
398+
m_prev_val_end = npos;
412399
m_bom_len = 0;
413400
m_encoding = NOBOM;
414401
m_bom_line = 0;
@@ -896,8 +883,7 @@ bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_flow(ScannedScalar *C4_RE
896883

897884
bool needs_filter = false;
898885
size_t col = 0; // zero-based column
899-
size_t offs = 0;
900-
size_t offsp1;
886+
size_t offs = 0; // offset
901887
for( ; offs < s.len; ++offs, ++col)
902888
{
903889
const char c = s.str[offs];
@@ -910,11 +896,10 @@ bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_flow(ScannedScalar *C4_RE
910896
goto ended_scalar;
911897
case '\n':
912898
_c4dbgpf("found newline. offs={} col={}", offs, col);
913-
offsp1 = offs + 1;
914-
if(s.len > offsp1)
899+
if(s.len > offs + 1)
915900
{
916-
csubstr next_line = s.sub(offsp1).triml(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));
917-
if(next_line.begins_with_any(",]#")) // any of the characters we're interested in
901+
csubstr next_line = s.sub(offs + 1).triml(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));
902+
if(next_line.begins_with_any(",]#:")) // any of the characters we're interested in
918903
{
919904
_c4dbgpf("found terminating character beginning next line: '{}'", next_line.str[0]);
920905
goto ended_scalar;
@@ -932,14 +917,13 @@ bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_flow(ScannedScalar *C4_RE
932917
break;
933918
case ':':
934919
_c4dbgp("found suspicious ':'");
935-
offsp1 = offs + 1;
936-
if(s.len > offsp1)
920+
if(s.len > offs + 1)
937921
{
938-
char next = s.str[offsp1];
922+
char next = s.str[offs + 1];
939923
_c4dbgpf("next char is '{}'", _c4prc(next));
940924
if(next == '\r')
941925
{
942-
csubstr after = s.sub(offsp1).triml('\r');
926+
csubstr after = s.sub(offs + 1).triml('\r');
943927
if(after.len)
944928
{
945929
next = after.str[0];
@@ -959,7 +943,7 @@ bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_flow(ScannedScalar *C4_RE
959943
}
960944
else
961945
{
962-
_RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len == offsp1, m_evt_handler->m_curr->pos);
946+
_RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len == offs + 1, m_evt_handler->m_curr->pos);
963947
_line_progressed(col);
964948
_c4err("missing termination: '{}'", c); // noreturn
965949
}
@@ -1461,7 +1445,7 @@ substr ParseEngine<EventHandler>::_peek_next_line(size_t pos) const
14611445
goto next_is_empty;
14621446

14631447
// look for the next newline chars, and jump to the right of those
1464-
rem = from_next_line(m_buf.sub(pos));
1448+
rem = _from_next_line(m_buf.sub(pos));
14651449
if(rem.empty())
14661450
goto next_is_empty;
14671451

@@ -1539,7 +1523,7 @@ void ParseEngine<EventHandler>::_line_ended_undo()
15391523

15401524
//-----------------------------------------------------------------------------
15411525
template<class EventHandler>
1542-
void ParseEngine<EventHandler>::_set_indentation(size_t indentation)
1526+
void ParseEngine<EventHandler>::_set_indentation(size_t indentation) noexcept
15431527
{
15441528
m_evt_handler->m_curr->indref = indentation;
15451529
_c4dbgpf("state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
@@ -1553,6 +1537,13 @@ void ParseEngine<EventHandler>::_save_indentation()
15531537
_c4dbgpf("state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
15541538
}
15551539

1540+
template<class EventHandler>
1541+
void ParseEngine<EventHandler>::_mark_seqflow_val_end() noexcept
1542+
{
1543+
_c4dbgpf("SEQFLOW. mark val end at line={}", m_evt_handler->m_curr->pos.line);
1544+
m_prev_val_end = m_evt_handler->m_curr->pos.line;
1545+
}
1546+
15561547

15571548
//-----------------------------------------------------------------------------
15581549

@@ -1589,6 +1580,11 @@ void ParseEngine<EventHandler>::_end_flow_container(size_t orig_indent)
15891580
_c4dbgp("flow container: end map as key!");
15901581
}
15911582
}
1583+
else if(has_any(RSEQ))
1584+
{
1585+
_RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RFLOW), m_evt_handler->m_curr->pos);
1586+
_mark_seqflow_val_end();
1587+
}
15921588
}
15931589

15941590
template<class EventHandler>
@@ -5095,6 +5091,7 @@ void ParseEngine<EventHandler>::_handle_seq_flow()
50955091
csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
50965092
m_evt_handler->set_val_scalar_squoted(maybe_filtered);
50975093
addrem_flags(RNXT, RVAL);
5094+
_mark_seqflow_val_end();
50985095
}
50995096
else if(first == '"')
51005097
{
@@ -5103,6 +5100,7 @@ void ParseEngine<EventHandler>::_handle_seq_flow()
51035100
csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
51045101
m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
51055102
addrem_flags(RNXT, RVAL);
5103+
_mark_seqflow_val_end();
51065104
}
51075105
// block scalars (ie | and >) cannot appear in flow containers
51085106
else if(_scan_scalar_plain_seq_flow(&sc))
@@ -5111,6 +5109,7 @@ void ParseEngine<EventHandler>::_handle_seq_flow()
51115109
csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
51125110
m_evt_handler->set_val_scalar_plain(maybe_filtered);
51135111
addrem_flags(RNXT, RVAL);
5112+
_mark_seqflow_val_end();
51145113
}
51155114
else if(first == '[')
51165115
{
@@ -5208,6 +5207,11 @@ void ParseEngine<EventHandler>::_handle_seq_flow()
52085207
addrem_flags(RVAL, RNXT);
52095208
m_evt_handler->add_sibling();
52105209
_line_progressed(1);
5210+
if(m_evt_handler->m_curr->line_contents.rem.begins_with('#'))
5211+
{
5212+
_c4err("parse error: invalid comment after comma");
5213+
}
5214+
_mark_seqflow_val_end();
52115215
}
52125216
else if(first == ']')
52135217
{
@@ -5218,12 +5222,20 @@ void ParseEngine<EventHandler>::_handle_seq_flow()
52185222
}
52195223
else if(first == ':')
52205224
{
5221-
_c4dbgpf("seqflow[RNXT]: actually seqimap at node[{}]", m_evt_handler->m_curr->node_id);
5222-
m_evt_handler->actually_val_is_first_key_of_new_map_flow();
5223-
_set_indentation(m_evt_handler->m_parent->indref);
5224-
_line_progressed(1);
5225-
addrem_flags(RSEQIMAP|RVAL, RNXT);
5226-
goto seqflow_finish;
5225+
_c4dbgpf("seqflow[RNXT]: line@valend={} line@now={}", m_prev_val_end, m_evt_handler->m_curr->pos.line);
5226+
if(m_prev_val_end != NONE && m_evt_handler->m_curr->pos.line == m_prev_val_end)
5227+
{
5228+
_c4dbgpf("seqflow[RNXT]: actually seqimap at node[{}]", m_evt_handler->m_curr->node_id);
5229+
m_evt_handler->actually_val_is_first_key_of_new_map_flow();
5230+
_set_indentation(m_evt_handler->m_parent->indref);
5231+
_line_progressed(1);
5232+
addrem_flags(RSEQIMAP|RVAL, RNXT);
5233+
goto seqflow_finish;
5234+
}
5235+
else
5236+
{
5237+
_c4err("parse error");
5238+
}
52275239
}
52285240
else
52295241
{
@@ -5415,6 +5427,10 @@ void ParseEngine<EventHandler>::_handle_map_flow()
54155427
m_evt_handler->add_sibling();
54165428
addrem_flags(RKEY, RKCL);
54175429
_line_progressed(1);
5430+
if(m_evt_handler->m_curr->line_contents.rem.begins_with('#'))
5431+
{
5432+
_c4err("parse error: invalid comment after comma");
5433+
}
54185434
}
54195435
else
54205436
{
@@ -5527,6 +5543,10 @@ void ParseEngine<EventHandler>::_handle_map_flow()
55275543
m_evt_handler->add_sibling();
55285544
addrem_flags(RKEY, RNXT);
55295545
_line_progressed(1);
5546+
if(m_evt_handler->m_curr->line_contents.rem.begins_with('#'))
5547+
{
5548+
_c4err("parse error: invalid comment after comma");
5549+
}
55305550
}
55315551
else if(rem.begins_with('}'))
55325552
{

src/c4/yml/parse_engine.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -526,8 +526,9 @@ class ParseEngine
526526
void _end_doc_suddenly__pop();
527527
void _end_stream();
528528

529-
void _set_indentation(size_t indentation);
529+
void _set_indentation(size_t indentation) noexcept;
530530
void _save_indentation();
531+
void _mark_seqflow_val_end() noexcept;
531532
void _handle_indentation_pop_from_block_seq();
532533
void _handle_indentation_pop_from_block_map();
533534
void _handle_indentation_pop(ParserState const* dst);
@@ -694,6 +695,7 @@ class ParseEngine
694695

695696
bool m_doc_empty = true;
696697
size_t m_prev_colon = npos;
698+
size_t m_prev_val_end = npos;
697699

698700
private:
699701

test/CMakeLists.txt

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -92,9 +92,10 @@ endfunction()
9292

9393

9494
ryml_add_test(engine_1_doc)
95-
ryml_add_test(engine_2_seq)
96-
ryml_add_test(engine_3_map_1)
97-
ryml_add_test(engine_3_map_2)
95+
ryml_add_test(engine_2_seq_block)
96+
ryml_add_test(engine_2_seq_flow)
97+
ryml_add_test(engine_3_map_block)
98+
ryml_add_test(engine_3_map_flow)
9899
ryml_add_test(engine_4_anchor)
99100
ryml_add_test(engine_5_tag)
100101
ryml_add_test(engine_6_qmrk_1)
@@ -163,9 +164,10 @@ endif()
163164
# MSVC bigobj
164165
if(MSVC)
165166
target_compile_options(ryml-test-engine_1_doc PRIVATE /bigobj)
166-
target_compile_options(ryml-test-engine_2_seq PRIVATE /bigobj)
167-
target_compile_options(ryml-test-engine_3_map_1 PRIVATE /bigobj)
168-
target_compile_options(ryml-test-engine_3_map_2 PRIVATE /bigobj)
167+
target_compile_options(ryml-test-engine_2_seq_block PRIVATE /bigobj)
168+
target_compile_options(ryml-test-engine_2_seq_flow PRIVATE /bigobj)
169+
target_compile_options(ryml-test-engine_3_map_block PRIVATE /bigobj)
170+
target_compile_options(ryml-test-engine_3_map_flow PRIVATE /bigobj)
169171
target_compile_options(ryml-test-engine_4_anchor PRIVATE /bigobj)
170172
target_compile_options(ryml-test-engine_5_tag PRIVATE /bigobj)
171173
target_compile_options(ryml-test-engine_6_qmrk_1 PRIVATE /bigobj)

0 commit comments

Comments
 (0)