Skip to content

Commit c1fcee8

Browse files
committed
Parser: fix ambiguity of tags/anchors in ? mode
1 parent f537bdf commit c1fcee8

2 files changed

Lines changed: 90 additions & 28 deletions

File tree

changelog/current.md

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,12 @@
11
- Fix parsing of **valid** YAML corner cases:
2+
- Add missing an
3+
- Ambiguity of tags/anchors in ? mode ([PR#587](https://github.com/biojppm/rapidyaml/pull/587)):
4+
```yaml
5+
? &mapanchor
6+
key: val
7+
?
8+
&keyanchor key: val
9+
```
210
- flow tags/anchors with omitted plain scalar ([PR#587](https://github.com/biojppm/rapidyaml/pull/587)):
311
```yaml
412
# ... likewise for !tag
@@ -8,6 +16,8 @@
816
- {&anchor :,&anchor :}
917
- [: &anchor,: &anchor]
1018
- {: &anchor,: &anchor}
19+
---
20+
? anchor
1121
```
1222
- flow tags/anchors terminating with `:` (the colon is part of the tag/anchor)([PR#587](https://github.com/biojppm/rapidyaml/pull/587)):
1323
```yaml
@@ -18,6 +28,8 @@
1828
- {&anchor: :,&anchor: :}
1929
- [: &anchor:,: &anchor:]
2030
- {: &anchor:,: &anchor:}
31+
---
32+
? anchor
2133
```
2234
-----
2335
- Ensure parse errors for **invalid** YAML cases, and improve reported location:
@@ -97,3 +109,9 @@
97109
- ,foo
98110
- ,
99111
```
112+
- references with anchors or tags ([PR#587](https://github.com/biojppm/rapidyaml/pull/587)):
113+
```yaml
114+
all invalid:
115+
- &anchor *ref
116+
- !tag *ref
117+
```

src/c4/yml/parse_engine.def.hpp

Lines changed: 72 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -4511,7 +4511,7 @@ void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck(size_t
45114511
}
45124512
else if(m_pending_tags.num_entries == 1)
45134513
{
4514-
_c4dbgpf("1 tag. line={}, curr={}", m_pending_tags.annotations[0].line);
4514+
_c4dbgpf("1 tag. line={}, curr={}", m_pending_tags.annotations[0].line, current_line);
45154515
if(m_pending_tags.annotations[0].line < current_line)
45164516
{
45174517
_c4dbgp("...tag is for the map. setting it.");
@@ -4527,7 +4527,7 @@ void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck(size_t
45274527
}
45284528
else if(m_pending_anchors.num_entries == 1)
45294529
{
4530-
_c4dbgpf("1 anchor. line={}, curr={}", m_pending_anchors.annotations[0].line);
4530+
_c4dbgpf("1 anchor. line={}, curr={}", m_pending_anchors.annotations[0].line, current_line);
45314531
if(m_pending_anchors.annotations[0].line < current_line)
45324532
{
45334533
_c4dbgp("...anchor is for the map. setting it.");
@@ -4541,13 +4541,37 @@ template<class EventHandler>
45414541
void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck_as_key()
45424542
{
45434543
_c4dbgp("annotations_before_start_mapblck_as_key");
4544-
if(m_pending_tags.num_entries == 2)
4544+
switch(m_pending_tags.num_entries)
45454545
{
4546+
case 1u:
4547+
_c4dbgpf("annotations_after_start_mapblck_as_key: 1 tag={} line={} currline=", m_pending_tags.annotations[0].str, m_pending_tags.annotations[0].line, m_evt_handler->m_curr->pos.line);
4548+
if(m_pending_tags.annotations[0].line != m_evt_handler->m_curr->pos.line)
4549+
{
4550+
_c4dbgp("annotations_after_start_mapblck_as_key: is map tag");
4551+
m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4552+
_clear_annotations(&m_pending_tags);
4553+
}
4554+
break;
4555+
case 2u:
4556+
_c4dbgpf("annotations_after_start_mapblck_as_key: 2 tags: {} -> {}", m_pending_tags.annotations[0].str, m_pending_tags.annotations[1].str);
45464557
m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4558+
break;
45474559
}
4548-
if(m_pending_anchors.num_entries == 2)
4560+
switch(m_pending_anchors.num_entries)
45494561
{
4562+
case 1u:
4563+
_c4dbgpf("annotations_after_start_mapblck_as_key: 1 anchor={} line={} currline=", m_pending_anchors.annotations[0].str, m_pending_anchors.annotations[0].line, m_evt_handler->m_curr->pos.line);
4564+
if(m_pending_anchors.annotations[0].line != m_evt_handler->m_curr->pos.line)
4565+
{
4566+
_c4dbgp("annotations_after_start_mapblck_as_key: is map anchor");
4567+
m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4568+
_clear_annotations(&m_pending_anchors);
4569+
}
4570+
break;
4571+
case 2u:
4572+
_c4dbgpf("annotations_after_start_mapblck_as_key: 2 anchors: {} -> {}", m_pending_anchors.annotations[0].str, m_pending_anchors.annotations[1].str);
45504573
m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4574+
break;
45514575
}
45524576
}
45534577

@@ -4563,21 +4587,25 @@ void ParseEngine<EventHandler>::_handle_annotations_and_indentation_after_start_
45634587
switch(m_pending_tags.num_entries)
45644588
{
45654589
case 1u:
4590+
_c4dbgpf("annotations_after_start_mapblck: 1 tag: {}", m_pending_tags.annotations[0].str);
45664591
m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
45674592
_clear_annotations(&m_pending_tags);
45684593
break;
45694594
case 2u:
4595+
_c4dbgpf("annotations_after_start_mapblck: 2 tags: {} -> {}", m_pending_tags.annotations[0].str, m_pending_tags.annotations[1].str);
45704596
m_evt_handler->set_key_tag(m_pending_tags.annotations[1].str);
45714597
_clear_annotations(&m_pending_tags);
45724598
break;
45734599
}
45744600
switch(m_pending_anchors.num_entries)
45754601
{
45764602
case 1u:
4603+
_c4dbgpf("annotations_after_start_mapblck: 1 anchors: {} -> {}", m_pending_anchors.annotations[0].str);
45774604
m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
45784605
_clear_annotations(&m_pending_anchors);
45794606
break;
45804607
case 2u:
4608+
_c4dbgpf("annotations_after_start_mapblck: 2 anchors: {} -> {}", m_pending_anchors.annotations[0].str, m_pending_anchors.annotations[1].str);
45814609
m_evt_handler->set_key_anchor(m_pending_anchors.annotations[1].str);
45824610
_clear_annotations(&m_pending_anchors);
45834611
break;
@@ -7281,37 +7309,54 @@ bool ParseEngine<EventHandler>::_handle_map_block_qmrk()
72817309
//
72827310
if(m_evt_handler->m_curr->at_line_beginning())
72837311
{
7312+
_c4dbgpf("mapblck[QMRK]: at line beginning. ind={} indref={}", m_evt_handler->m_curr->line_contents.indentation, m_evt_handler->m_curr->indref);
72847313
_RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.indentation != npos, m_evt_handler->m_curr->pos);
7285-
if(m_evt_handler->m_curr->indentation_eq())
7314+
if(m_evt_handler->m_curr->indentation_eq_extra())
72867315
{
7287-
_c4dbgpf("mapblck[QMRK]: skip {} from indref", m_evt_handler->m_curr->indref);
7288-
_line_progressed(m_evt_handler->m_curr->indref);
7316+
_c4dbgpf("mapblck[QMRK]: skip {} from indref", m_evt_handler->m_curr->indref + 1);
7317+
_line_progressed(m_evt_handler->m_curr->indref + 1);
72897318
if(!m_evt_handler->m_curr->line_contents.rem.len)
72907319
return true; // go again
72917320
}
7292-
else if(m_evt_handler->m_curr->indentation_lt())
7321+
// indentation can be larger in QMRK state
7322+
else if(m_evt_handler->m_curr->indentation_gt_extra())
72937323
{
7294-
_c4dbgp("mapblck[QMRK]: smaller indentation!");
7295-
_handle_indentation_pop_from_block_map();
7324+
_c4dbgp("mapblck[QMRK]: larger indentation !");
72967325
_line_progressed(m_evt_handler->m_curr->line_contents.indentation);
7297-
if(has_all(RMAP|RBLCK))
7298-
{
7299-
_c4dbgp("mapblck[QMRK]: still mapblck!");
7326+
if(!m_evt_handler->m_curr->line_contents.rem.len)
73007327
return true; // go again
7301-
}
7302-
else
7303-
{
7304-
_c4dbgp("mapblck[QMRK]: no longer mapblck!");
7305-
return false; // finish mapblck
7306-
}
73077328
}
7308-
// indentation can be larger in QMRK state
73097329
else
73107330
{
7311-
_c4dbgp("mapblck[QMRK]: larger indentation !");
7312-
_line_progressed(m_evt_handler->m_curr->line_contents.indentation);
7313-
if(!m_evt_handler->m_curr->line_contents.rem.len)
7331+
_c4dbgp("mapblck[QMRK]: smaller indentation!");
7332+
_RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_lt_extra(), m_evt_handler->m_curr->pos);
7333+
_RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.len > 0, m_evt_handler->m_curr->pos);
7334+
if(m_evt_handler->m_curr->indentation_eq()
7335+
// defend against docs or indentless seqs
7336+
&& m_evt_handler->m_curr->line_contents.rem.str[0] != '-')
7337+
{
7338+
_c4dbgp("mapblck[QMRK]: QMRK finished!");
7339+
_handle_annotations_before_blck_key_scalar();
7340+
m_evt_handler->set_key_scalar_plain_empty();
7341+
addrem_flags(RKCL, QMRK);
73147342
return true; // go again
7343+
}
7344+
else if(m_evt_handler->m_curr->indentation_lt())
7345+
{
7346+
_c4dbgp("mapblck[QMRK]: indentation pop!");
7347+
_handle_indentation_pop_from_block_map();
7348+
_line_progressed(m_evt_handler->m_curr->line_contents.indentation);
7349+
if(has_all(RMAP|RBLCK))
7350+
{
7351+
_c4dbgp("mapblck[QMRK]: still mapblck!");
7352+
return true; // go again
7353+
}
7354+
else
7355+
{
7356+
_c4dbgp("mapblck[QMRK]: no longer mapblck!");
7357+
return false; // finish mapblck
7358+
}
7359+
}
73157360
}
73167361
}
73177362
//
@@ -7480,8 +7525,7 @@ bool ParseEngine<EventHandler>::_handle_map_block_qmrk()
74807525
else if(first == '-')
74817526
{
74827527
_c4dbgp("mapblck[QMRK]: maybe doc?");
7483-
csubstr rs = m_evt_handler->m_curr->line_contents.rem.sub(1);
7484-
if(rs == "--" || rs.begins_with("-- "))
7528+
if(_is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem))
74857529
{
74867530
_c4dbgp("mapblck[QMRK]: end+start doc");
74877531
_start_doc_suddenly();
@@ -7524,8 +7568,8 @@ bool ParseEngine<EventHandler>::_handle_map_block_qmrk()
75247568
}
75257569
else if(first == '?')
75267570
{
7527-
_c4dbgp("mapblck[QMRK]: another QMRK '?'");
7528-
if(m_evt_handler->m_curr->indentation_eq())
7571+
_c4dbgpf("mapblck[QMRK]: another QMRK '?'. ind={} indref={}", startindent, m_evt_handler->m_curr->indref);
7572+
if(startindent == m_evt_handler->m_curr->indref)
75297573
{
75307574
_c4dbgp("mapblck[QMRK]: ? indent eq - prev ? was for an empty keyval");
75317575
_handle_annotations_before_blck_key_scalar();
@@ -7535,7 +7579,7 @@ bool ParseEngine<EventHandler>::_handle_map_block_qmrk()
75357579
}
75367580
else
75377581
{
7538-
_RYML_ASSERT_PARSE_(callbacks(), m_evt_handler->m_curr->indentation_gt(), m_evt_handler->m_curr->pos);
7582+
_RYML_ASSERT_PARSE_(callbacks(), startindent > m_evt_handler->m_curr->indref, m_evt_handler->m_curr->pos);
75397583
_c4dbgp("mapblck[QMRK]: ? indent gt - start child mapblck (!)");
75407584
addrem_flags(RKCL, RKEY|QMRK);
75417585
_handle_annotations_before_blck_key_scalar();

0 commit comments

Comments
 (0)