Skip to content

Commit b92ddf1

Browse files
committed
Explicit keys: fix parsing
1 parent e7ad0cd commit b92ddf1

5 files changed

Lines changed: 132 additions & 40 deletions

File tree

changelog/current.md

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,16 @@
1+
## Parsing fixes
2+
3+
- [PR#583](https://github.com/biojppm/rapidyaml/pull/583): Fix corner cases of container keys. Eg, parsing of [explicit keys forming valid YAML](https://play.yaml.com/?show=xd#PwogID8gICMgd2FzIGNhdXNpbmcgYSBwYXJzZSBlcnJvcgo/ICAgICMgcG9wcGluZyB3YXMgYWxzbyBjYXVzaW5nIGEgcGFyc2UgZXJyb3IKLS0tCj8gW2E6IGJdOiB4CjogeQo=) like:
4+
```yaml
5+
?
6+
? # was causing a parse error
7+
? # popping was also causing a parse error
8+
---
9+
? [a: b]: x
10+
: y
11+
```
12+
13+
114
## Fixes
215

316
- [PR#580](https://github.com/biojppm/rapidyaml/pull/580): fix compilation error when `RYML_NO_DEFAULT_CALLBACKS` is defined (thanks @toge)
@@ -7,8 +20,7 @@
720

821
## Python
922

10-
- [PR#579](https://github.com/biojppm/rapidyaml/pull/579): python packaging files and CI infrastructure was moved to a different repo [biojppm/rapidyaml-python](https://github.com/biojppm/rapidyaml-python). This was done because python packaging is notoriously hard and has
11-
always posed trouble in the CI, standing in the way of C++ development and releases.
23+
- [PR#579](https://github.com/biojppm/rapidyaml/pull/579): python packaging files and CI infrastructure was moved to a different repo [biojppm/rapidyaml-python](https://github.com/biojppm/rapidyaml-python). This was done because python packaging is notoriously hard and has always posed trouble in the CI, standing in the way of C++ development and releases.
1224

1325

1426
## Thanks

src/c4/yml/parse_engine.def.hpp

Lines changed: 76 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -495,6 +495,23 @@ C4_NO_INLINE void ParseEngine<EventHandler>::_fmt_msg(DumpFn &&dumpfn) const
495495
_dbg_dump(std::forward<DumpFn>(dumpfn), "top state: {}\n", detail::_parser_flags_to_str(flagbuf_, m_evt_handler->m_curr->flags));
496496
}
497497
}
498+
499+
template<class EventHandler>
500+
void ParseEngine<EventHandler>::_print_state_stack(substr buf) const
501+
{
502+
if(_dbg_enabled())
503+
{
504+
for(typename EventHandler::state const& s : m_evt_handler->m_stack)
505+
_dbg_printf("state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(buf, s.flags));
506+
}
507+
}
508+
509+
template<class EventHandler>
510+
void ParseEngine<EventHandler>::_print_state_stack() const
511+
{
512+
char buf[128];
513+
_print_state_stack(buf);
514+
}
498515
#endif
499516

500517

@@ -661,9 +678,15 @@ bool ParseEngine<EventHandler>::_maybe_scan_following_colon() noexcept
661678
}
662679
if(m_evt_handler->m_curr->line_contents.rem.len && (m_evt_handler->m_curr->line_contents.rem.str[0] == ':'))
663680
{
664-
_c4dbgp("found ':' colon next");
665-
_line_progressed(1);
666-
return true;
681+
if(m_evt_handler->m_curr->line_contents.rem.len == 1
682+
|| m_evt_handler->m_curr->line_contents.rem.str[1] == ' '
683+
_RYML_WITH_TAB_TOKENS(|| m_evt_handler->m_curr->line_contents.rem.str[1] == '\t')
684+
)
685+
{
686+
_c4dbgp("found ':' colon next");
687+
_line_progressed(1);
688+
return true;
689+
}
667690
}
668691
}
669692
return false;
@@ -1540,20 +1563,39 @@ void ParseEngine<EventHandler>::_save_indentation()
15401563

15411564
//-----------------------------------------------------------------------------
15421565

1566+
template<class EventHandler>
1567+
void ParseEngine<EventHandler>::_flow_container_was_a_key(size_t orig_indent)
1568+
{
1569+
if(_maybe_scan_following_colon())
1570+
{
1571+
_c4dbgpf("flow container is followed by colon! orig_indent={}", orig_indent);
1572+
m_evt_handler->actually_val_is_first_key_of_new_map_block();
1573+
addrem_flags(RMAP|RVAL|RBLCK, RKCL|RUNK);
1574+
_set_indentation(orig_indent);
1575+
_maybe_skip_whitespace_tokens();
1576+
}
1577+
}
1578+
15431579
template<class EventHandler>
15441580
void ParseEngine<EventHandler>::_end_map_flow()
15451581
{
15461582
bool multiline = m_options.detect_flow_ml() && m_evt_handler->m_parent->pos.line < m_evt_handler->m_curr->pos.line;
1583+
size_t orig_indent = m_evt_handler->m_curr->indref;
15471584
_c4dbgpf("mapflow: end, multiline={}", multiline);
15481585
m_evt_handler->end_map_flow(multiline);
1586+
if(has_none(RFLOW) && (has_any(RUNK|RSEQ) || m_was_inside_qmrk))
1587+
_flow_container_was_a_key(orig_indent);
15491588
}
15501589

15511590
template<class EventHandler>
15521591
void ParseEngine<EventHandler>::_end_seq_flow()
15531592
{
15541593
bool multiline = m_options.detect_flow_ml() && m_evt_handler->m_parent->pos.line < m_evt_handler->m_curr->pos.line;
1594+
size_t orig_indent = m_evt_handler->m_curr->indref;
15551595
_c4dbgpf("seqflow: end, multiline={}", multiline);
15561596
m_evt_handler->end_seq_flow(multiline);
1597+
if(has_none(RFLOW) && (has_any(RUNK|RSEQ) || m_was_inside_qmrk))
1598+
_flow_container_was_a_key(orig_indent);
15571599
}
15581600

15591601
template<class EventHandler>
@@ -1806,12 +1848,7 @@ void ParseEngine<EventHandler>::_handle_indentation_pop_from_block_seq()
18061848
_RYML_ASSERT_BASIC_(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end());
18071849
const size_t ind = m_evt_handler->m_curr->line_contents.indentation;
18081850
#ifdef RYML_DBG
1809-
if(_dbg_enabled())
1810-
{
1811-
char flagbuf_[128];
1812-
for(state_type const& s : stack)
1813-
_dbg_printf("state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(flagbuf_, s.flags));
1814-
}
1851+
_print_state_stack();
18151852
#endif
18161853
for(state_type const* s = m_evt_handler->m_curr-1; s >= stack.begin(); --s)
18171854
{
@@ -1842,11 +1879,7 @@ void ParseEngine<EventHandler>::_handle_indentation_pop_from_block_map()
18421879
state_type const* popto = nullptr;
18431880
#ifdef RYML_DBG
18441881
char flagbuf_[128];
1845-
if(_dbg_enabled())
1846-
{
1847-
for(state_type const& s : stack)
1848-
_dbg_printf("state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(flagbuf_, s.flags));
1849-
}
1882+
_print_state_stack(flagbuf_);
18501883
#endif
18511884
for(state_type const* s = m_evt_handler->m_curr-1; s > stack.begin(); --s) // never go to the stack bottom. that's the root
18521885
{
@@ -5152,8 +5185,8 @@ void ParseEngine<EventHandler>::_handle_seq_flow()
51525185
else if(first == ']')
51535186
{
51545187
_c4dbgp("seqflow[RNXT]: end!");
5155-
_end_seq_flow();
51565188
_line_progressed(1);
5189+
_end_seq_flow();
51575190
goto seqflow_finish;
51585191
}
51595192
else if(first == ':')
@@ -5270,8 +5303,8 @@ void ParseEngine<EventHandler>::_handle_map_flow()
52705303
else if(first == '}') // this happens on a trailing comma like ", }"
52715304
{
52725305
_c4dbgp("mapflow[RKEY]: end!");
5273-
_end_map_flow();
52745306
_line_progressed(1);
5307+
_end_map_flow();
52755308
goto mapflow_finish;
52765309
}
52775310
else if(first == '&')
@@ -5344,8 +5377,8 @@ void ParseEngine<EventHandler>::_handle_map_flow()
53445377
_c4dbgp("mapflow[RKCL]: end with missing val!");
53455378
addrem_flags(RVAL, RKCL);
53465379
m_evt_handler->set_val_scalar_plain_empty();
5347-
_end_map_flow();
53485380
_line_progressed(1);
5381+
_end_map_flow();
53495382
goto mapflow_finish;
53505383
}
53515384
else if(first == ',')
@@ -5418,8 +5451,8 @@ void ParseEngine<EventHandler>::_handle_map_flow()
54185451
{
54195452
_c4dbgp("mapflow[RVAL]: end!");
54205453
m_evt_handler->set_val_scalar_plain_empty();
5421-
_end_map_flow();
54225454
_line_progressed(1);
5455+
_end_map_flow();
54235456
goto mapflow_finish;
54245457
}
54255458
else if(first == ',')
@@ -5471,8 +5504,8 @@ void ParseEngine<EventHandler>::_handle_map_flow()
54715504
else if(rem.begins_with('}'))
54725505
{
54735506
_c4dbgp("mapflow[RNXT]: end!");
5474-
_end_map_flow();
54755507
_line_progressed(1);
5508+
_end_map_flow();
54765509
goto mapflow_finish;
54775510
}
54785511
else
@@ -6199,11 +6232,11 @@ void ParseEngine<EventHandler>::_handle_map_block()
61996232
// appear in an explicit QMRK scope (ie, after the ? token),
62006233
else if(C4_UNLIKELY(first == '|'))
62016234
{
6202-
_c4err("block keys must be enclosed in '?'");
6235+
_c4err("block map: literal keys must be enclosed in '?'");
62036236
}
62046237
else if(C4_UNLIKELY(first == '>'))
62056238
{
6206-
_c4err("block keys must be enclosed in '?'");
6239+
_c4err("block map: folded keys must be enclosed in '?'");
62076240
}
62086241
else if(_scan_scalar_plain_map_blck(&sc))
62096242
{
@@ -6982,10 +7015,7 @@ void ParseEngine<EventHandler>::_handle_map_block()
69827015
if(has_all(RMAP|RBLCK))
69837016
{
69847017
_c4dbgp("mapblck[QMRK]: still mapblck!");
6985-
_RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(QMRK));
6986-
rem = m_evt_handler->m_curr->line_contents.rem;
6987-
if(!rem.len)
6988-
goto mapblck_again;
7018+
goto mapblck_again;
69897019
}
69907020
else
69917021
{
@@ -7199,7 +7229,7 @@ void ParseEngine<EventHandler>::_handle_map_block()
71997229
addrem_flags(RKCL, RKEY|QMRK);
72007230
m_evt_handler->begin_seq_key_flow();
72017231
addrem_flags(RVAL|RSEQ|RFLOW, RMAP|RKCL|QMRK|RBLCK);
7202-
_set_indentation(m_evt_handler->m_parent->indref);
7232+
_set_indentation(startindent);
72037233
_line_progressed(1);
72047234
goto mapblck_finish;
72057235
}
@@ -7209,17 +7239,32 @@ void ParseEngine<EventHandler>::_handle_map_block()
72097239
addrem_flags(RKCL, RKEY|QMRK);
72107240
m_evt_handler->begin_map_key_flow();
72117241
addrem_flags(RKEY|RFLOW, RVAL|RKCL|QMRK|RBLCK);
7212-
_set_indentation(m_evt_handler->m_parent->indref);
7242+
_set_indentation(startindent);
72137243
_line_progressed(1);
72147244
goto mapblck_finish;
72157245
}
72167246
else if(first == '?')
72177247
{
72187248
_c4dbgp("mapblck[QMRK]: another QMRK '?'");
7219-
m_evt_handler->set_key_scalar_plain_empty();
7220-
m_evt_handler->set_val_scalar_plain_empty();
7221-
m_evt_handler->add_sibling();
7249+
if(m_evt_handler->m_curr->indentation_eq())
7250+
{
7251+
_c4dbgp("mapblck[QMRK]: ? indent eq - prev ? was for an empty keyval");
7252+
m_evt_handler->set_key_scalar_plain_empty();
7253+
m_evt_handler->set_val_scalar_plain_empty();
7254+
m_evt_handler->add_sibling();
7255+
}
7256+
else
7257+
{
7258+
_RYML_ASSERT_BASIC_(callbacks(), m_evt_handler->m_curr->indentation_gt());
7259+
_c4dbgp("mapblck[QMRK]: ? indent gt - start child mapblck (!)");
7260+
addrem_flags(RKCL, RKEY|QMRK);
7261+
m_evt_handler->begin_map_key_block();
7262+
addrem_flags(RBLCK|QMRK, RVAL|RKCL);
7263+
_set_indentation(startindent);
7264+
}
7265+
// indentation_lt() should be handled elsewhere
72227266
_line_progressed(1);
7267+
_maybe_skip_whitespace_tokens();
72237268
}
72247269
else if(first == '.')
72257270
{
@@ -7549,7 +7594,7 @@ void ParseEngine<EventHandler>::_handle_unk()
75497594
_maybe_begin_doc();
75507595
_handle_annotations_before_blck_val_scalar();
75517596
m_evt_handler->begin_map_val_block();
7552-
addrem_flags(RMAP|RBLCK|QMRK, RKEY|RVAL|RTOP|RUNK);
7597+
addrem_flags(RMAP|RBLCK|QMRK, RKEY|RVAL|RTOP|RUNK|RDOC);
75537598
m_doc_empty = false;
75547599
m_was_inside_qmrk = true;
75557600
_set_indentation(remindent); //_save_indentation();

src/c4/yml/parse_engine.hpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -507,6 +507,7 @@ class ParseEngine
507507
void _end_seq_blck();
508508
void _end2_map();
509509
void _end2_seq();
510+
void _flow_container_was_a_key(size_t orig_indent);
510511

511512
void _begin2_doc();
512513
void _begin2_doc_expl();
@@ -630,12 +631,14 @@ class ParseEngine
630631
void _free();
631632
void _clr();
632633

634+
template<class ...Args> C4_NORETURN C4_NO_INLINE void _err(Location const& cpploc, const char *fmt, Args const& ...args) const;
635+
template<class ...Args> C4_NORETURN C4_NO_INLINE void _err(Location const& cpploc, Location const& ymlloc, const char *fmt, Args const& ...args) const;
633636
#ifdef RYML_DBG
634637
template<class ...Args> C4_NO_INLINE void _dbg(csubstr fmt, Args const& ...args) const;
635638
template<class DumpFn> C4_NO_INLINE void _fmt_msg(DumpFn &&dumpfn) const;
639+
C4_NO_INLINE void _print_state_stack() const;
640+
C4_NO_INLINE void _print_state_stack(substr buf) const;
636641
#endif
637-
template<class ...Args> C4_NORETURN C4_NO_INLINE void _err(Location const& cpploc, const char *fmt, Args const& ...args) const;
638-
template<class ...Args> C4_NORETURN C4_NO_INLINE void _err(Location const& cpploc, Location const& ymlloc, const char *fmt, Args const& ...args) const;
639642

640643

641644
private:

src_extra/c4/yml/extra/event_handler_ints.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -842,6 +842,7 @@ struct EventHandlerInts : public c4::yml::EventHandlerStack<EventHandlerInts, Ev
842842
*/
843843
void actually_val_is_first_key_of_new_map_block()
844844
{
845+
_c4dbgpf("{}/{}: prev={} actually_val_is_first_key_of_new_map_block", m_evt_pos, m_evt_size, m_evt_prev);
845846
if(m_evt_prev < m_evt_size)
846847
{
847848
// interpolate BMAP|VAL|BLCK after the last BDOC

src_extra/c4/yml/extra/event_handler_testsuite.hpp

Lines changed: 36 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,29 @@ struct EventHandlerTestSuite : public EventHandlerStack<EventHandlerTestSuite, E
7272
#define _enable_(bits) _enable__<bits>()
7373
#define _disable_(bits) _disable__<bits>()
7474
#define _has_any_(bits) _has_any__<bits>()
75+
76+
void _dbg_print() const
77+
{
78+
#ifdef RYML_DBG
79+
auto indent = [](id_type n){
80+
for(id_type level = 0; level < n; ++level)
81+
{
82+
_dbg_printf(" ");
83+
}
84+
};
85+
for(id_type i = 0; i < m_stack.size(); ++i)
86+
{
87+
csubstr const& str = _buf_(i);
88+
indent(i);
89+
_dbg_printf("[{}]\n", i);
90+
for(csubstr ln : str.split('\n'))
91+
{
92+
indent(i);
93+
_dbg_printf("{}\n", ln);
94+
}
95+
}
96+
#endif
97+
}
7598
/** @endcond */
7699

77100
public:
@@ -349,6 +372,7 @@ struct EventHandlerTestSuite : public EventHandlerStack<EventHandlerTestSuite, E
349372
*/
350373
void actually_val_is_first_key_of_new_map_flow()
351374
{
375+
_c4dbgpf("node[{}]: actually_val_is_first_key_of_new_map_flow", m_curr->node_id);
352376
// ensure we have a temporary buffer to save the current val
353377
const id_type tmp = m_curr->level + id_type(2);
354378
_buf_ensure_(tmp + id_type(2));
@@ -371,8 +395,9 @@ struct EventHandlerTestSuite : public EventHandlerStack<EventHandlerTestSuite, E
371395
*/
372396
void actually_val_is_first_key_of_new_map_block()
373397
{
398+
_c4dbgpf("node[{}]: actually_val_is_first_key_of_new_map_block", m_curr->node_id);
374399
EventSink &sink = _buf_();
375-
substr full = sink;(void)full;
400+
csubstr full = sink;(void)full;
376401
// interpolate +MAP\n after the last +DOC\n
377402
_RYML_ASSERT_BASIC_(m_stack.m_callbacks, full.len);
378403
_RYML_ASSERT_BASIC_(m_stack.m_callbacks, !full.count('\r'));
@@ -387,10 +412,16 @@ struct EventHandlerTestSuite : public EventHandlerStack<EventHandlerTestSuite, E
387412
{
388413
// ... or interpolate +MAP\n after the last +DOC ---\n
389414
docpos = sink.find_last("+DOC ---\n");
390-
_RYML_ASSERT_BASIC_(m_stack.m_callbacks, docpos != npos);
391-
_RYML_ASSERT_BASIC_(m_stack.m_callbacks, (m_stack.size() == 1u) ? (docpos >= 5u) : (docpos == 0u));
392-
_RYML_ASSERT_BASIC_(m_stack.m_callbacks, docpos + 9u < full.len);
393-
sink.insert("+MAP\n", docpos + 9u);
415+
if(docpos != npos)
416+
{
417+
_RYML_ASSERT_BASIC_(m_stack.m_callbacks, (m_stack.size() == 1u) ? (docpos >= 5u) : (docpos == 0u));
418+
_RYML_ASSERT_BASIC_(m_stack.m_callbacks, docpos + 9u < full.len);
419+
sink.insert("+MAP\n", docpos + 9u);
420+
}
421+
else
422+
{
423+
sink.insert("+MAP\n", 0u);
424+
}
394425
}
395426
_push();
396427
}

0 commit comments

Comments
 (0)