Skip to content

Commit b70182e

Browse files
committed
cov
1 parent 89666ee commit b70182e

2 files changed

Lines changed: 55 additions & 146 deletions

File tree

src/c4/yml/parse_engine.def.hpp

Lines changed: 50 additions & 137 deletions
Original file line numberDiff line numberDiff line change
@@ -81,13 +81,13 @@ C4_HOT C4_ALWAYS_INLINE void _set_first(csubstr &C4_RESTRICT subject, size_t pos
8181
C4_HOT C4_ALWAYS_INLINE void _set_first_strict(substr &C4_RESTRICT subject, size_t pos) RYML_NOEXCEPT
8282
{
8383
// avoids reassigning the ptr in substr
84-
_RYML_ASSERT_BASIC(pos != npos);
84+
_RYML_ASSERT_BASIC(pos != npos); // LCOV_EXCL_LINE
8585
subject.len = pos;
8686
}
8787
C4_HOT C4_ALWAYS_INLINE void _set_first_strict(csubstr &C4_RESTRICT subject, size_t pos) RYML_NOEXCEPT
8888
{
8989
// avoids reassigning the ptr in substr
90-
_RYML_ASSERT_BASIC(pos != npos);
90+
_RYML_ASSERT_BASIC(pos != npos); // LCOV_EXCL_LINE
9191
subject.len = pos;
9292
}
9393

@@ -1824,15 +1824,9 @@ void ParseEngine<EventHandler>::_end_doc_suddenly__pop()
18241824
if(m_evt_handler->m_curr->level != 0)
18251825
_handle_indentation_pop(&m_evt_handler->m_stack[0]);
18261826
}
1827-
else if((m_evt_handler->m_stack.size() > 1) && (m_evt_handler->m_stack[1].flags & RDOC))
1828-
{
1829-
_c4dbgp("root is STREAM");
1830-
if(m_evt_handler->m_curr->level != 1)
1831-
_handle_indentation_pop(&m_evt_handler->m_stack[1]);
1832-
}
18331827
else
18341828
{
1835-
_c4err("internal error");
1829+
_RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !((m_evt_handler->m_stack.size() > 1) && (m_evt_handler->m_stack[1].flags & RDOC)), m_evt_handler->m_curr->pos);
18361830
}
18371831
_RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RDOC), m_evt_handler->m_curr->pos);
18381832
}
@@ -2038,31 +2032,17 @@ typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_sc
20382032
// quoted scalars can spread over multiple lines!
20392033
// nice explanation here: http://yaml-multiline.info/
20402034

2041-
// a span to the end of the file
2042-
size_t b = m_evt_handler->m_curr->pos.offset;
2043-
substr s = m_buf.sub(b);
2044-
if(s.begins_with(' '))
2045-
{
2046-
s = s.triml(' ');
2047-
_RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_buf.sub(b).is_super(s), m_evt_handler->m_curr->pos);
2048-
_RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begin() >= m_buf.sub(b).begin(), m_evt_handler->m_curr->pos);
2049-
_line_progressed((size_t)(s.begin() - m_buf.sub(b).begin()));
2050-
}
2051-
b = m_evt_handler->m_curr->pos.offset; // take this into account
2052-
_RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with('\''), m_evt_handler->m_curr->pos);
2035+
_RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_buf.sub(m_evt_handler->m_curr->pos.offset).begins_with('\''), m_evt_handler->m_curr->pos);
20532036

2054-
// skip the opening quote
2055-
_line_progressed(1);
2056-
s = s.sub(1);
2037+
// a span to the end of the file, skipping the opening quote
2038+
substr s = m_buf.sub(m_evt_handler->m_curr->pos.offset + 1);
2039+
_line_progressed(1); // advance over the opening quote
20572040

20582041
bool needs_filter = false;
2059-
2060-
size_t numlines = 1; // we already have one line
20612042
size_t pos = npos; // find the pos of the matching quote
20622043
while( ! _finished_file())
20632044
{
20642045
const csubstr line = m_evt_handler->m_curr->line_contents.rem;
2065-
bool line_is_blank = true;
20662046
_c4dbgpf("scanning single quoted scalar @ line[{}]: {}", m_evt_handler->m_curr->pos.line, _prs(line));
20672047
if(C4_UNLIKELY(_is_doc_token(line)))
20682048
_c4err("token can not appear at line begin");
@@ -2074,57 +2054,35 @@ typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_sc
20742054
const char next = i+1 < line.len ? line.str[i+1] : '~';
20752055
if(next != '\'') // so just look for the first quote
20762056
{ // without another after it
2077-
pos = i;
2078-
break;
2057+
_line_progressed(i + 1); // progress beyond the quote
2058+
pos = i + (size_t)(line.str - s.str); // set pos to before the quote
2059+
goto found_close;
20792060
}
20802061
else
20812062
{
20822063
needs_filter = true; // needs filter to remove escaped quotes
20832064
++i; // skip the escaped quote
20842065
}
20852066
}
2086-
else if(curr != ' ')
2087-
{
2088-
line_is_blank = false;
2089-
}
2090-
}
2091-
2092-
// leading whitespace also needs filtering
2093-
needs_filter = needs_filter
2094-
|| (numlines > 1)
2095-
|| line_is_blank
2096-
|| (m_evt_handler->m_curr->at_line_beginning() && line.begins_with(' '));
2097-
2098-
if(pos == npos)
2099-
{
2100-
_line_progressed(line.len);
2101-
++numlines;
2102-
}
2103-
else
2104-
{
2105-
_RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, pos >= 0 && pos < m_buf.len, m_evt_handler->m_curr->pos);
2106-
_RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_buf[m_evt_handler->m_curr->pos.offset + pos] == '\'', m_evt_handler->m_curr->pos);
2107-
_line_progressed(pos + 1); // progress beyond the quote
2108-
pos = m_evt_handler->m_curr->pos.offset - b - 1; // but we stop before it
2109-
break;
21102067
}
21112068

2069+
needs_filter = true;
2070+
_line_progressed(line.len);
21122071
_line_ended();
21132072
_scan_line();
21142073
_check_valid_newline_in_quoted_scalar();
21152074
}
21162075

2117-
if(pos == npos)
2118-
{
2119-
_c4err("reached end of file while looking for closing quote");
2120-
}
2121-
else
2122-
{
2123-
_RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, pos > 0, m_evt_handler->m_curr->pos);
2124-
_RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.end() >= m_buf.begin() && s.end() <= m_buf.end(), m_evt_handler->m_curr->pos);
2125-
_RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.end() == m_buf.end() || *s.end() == '\'', m_evt_handler->m_curr->pos);
2126-
s = s.sub(0, pos-1);
2127-
}
2076+
_c4err("reached end of file while looking for closing quote");
2077+
2078+
found_close:
2079+
2080+
_c4dbgpf("found closing quote at: {}", pos);
2081+
_RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, pos != npos, m_evt_handler->m_curr->pos);
2082+
_RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, pos >= 0, m_evt_handler->m_curr->pos);
2083+
_RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.end() >= m_buf.begin() && s.end() <= m_buf.end(), m_evt_handler->m_curr->pos);
2084+
_RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.end() == m_buf.end() || *s.end() == '\'', m_evt_handler->m_curr->pos);
2085+
_set_first_strict(s, pos);
21282086

21292087
_c4prscalar("scanned squoted scalar", s, /*keep_newlines*/true);
21302088

@@ -2139,30 +2097,16 @@ typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_sc
21392097
// quoted scalars can spread over multiple lines!
21402098
// nice explanation here: http://yaml-multiline.info/
21412099

2142-
// a span to the end of the file
2143-
size_t b = m_evt_handler->m_curr->pos.offset;
2144-
substr s = m_buf.sub(b);
2145-
if(s.begins_with(' '))
2146-
{
2147-
s = s.triml(' ');
2148-
_RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_buf.sub(b).is_super(s), m_evt_handler->m_curr->pos);
2149-
_RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begin() >= m_buf.sub(b).begin(), m_evt_handler->m_curr->pos);
2150-
_line_progressed((size_t)(s.begin() - m_buf.sub(b).begin()));
2151-
}
2152-
b = m_evt_handler->m_curr->pos.offset; // take this into account
2153-
_RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with('"'), m_evt_handler->m_curr->pos);
2100+
_RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_buf.sub(m_evt_handler->m_curr->pos.offset).begins_with('"'), m_evt_handler->m_curr->pos);
21542101

2155-
// skip the opening quote
2156-
_line_progressed(1);
2157-
s = s.sub(1);
2102+
// a span to the end of the file, skipping the opening quote
2103+
substr s = m_buf.sub(m_evt_handler->m_curr->pos.offset + 1);
2104+
_line_progressed(1); // advance over the opening quote
21582105

21592106
bool needs_filter = false;
2160-
2161-
size_t numlines = 1; // we already have one line
21622107
size_t pos = npos; // find the pos of the matching quote
21632108
while( ! _finished_file())
21642109
{
2165-
bool line_is_blank = true;
21662110
#if defined(__GNUC__) && (/*__GNUC__ == 12 || */__GNUC__ == 13)
21672111
C4_DONT_OPTIMIZE(m_evt_handler->m_curr->line_contents.rem); // prevent hoisting
21682112
#endif
@@ -2173,8 +2117,6 @@ typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_sc
21732117
for(size_t i = 0; i < rem.len; ++i)
21742118
{
21752119
const char curr = rem.str[i];
2176-
if(curr != ' ')
2177-
line_is_blank = false;
21782120
// every \ is an escape
21792121
if(curr == '\\')
21802122
{
@@ -2185,48 +2127,30 @@ typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_sc
21852127
}
21862128
else if(curr == '"')
21872129
{
2188-
pos = i;
2189-
break;
2130+
_line_progressed(i + 1); // progress beyond the quote
2131+
pos = i + (size_t)(rem.str - s.str); // set pos to before the quote
2132+
goto found_close;
21902133
}
21912134
}
21922135

21932136
// leading whitespace also needs filtering
2194-
needs_filter = needs_filter
2195-
|| (numlines > 1)
2196-
|| line_is_blank
2197-
|| (m_evt_handler->m_curr->at_line_beginning() && rem.begins_with(' '));
2198-
2199-
if(pos == npos)
2200-
{
2201-
_line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
2202-
++numlines;
2203-
}
2204-
else
2205-
{
2206-
_RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, pos >= 0 && pos < m_buf.len, m_evt_handler->m_curr->pos);
2207-
_RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_buf[m_evt_handler->m_curr->pos.offset + pos] == '"', m_evt_handler->m_curr->pos);
2208-
_line_progressed(pos + 1); // progress beyond the quote
2209-
pos = m_evt_handler->m_curr->pos.offset - b - 1; // but we stop before it
2210-
break;
2211-
}
2212-
2137+
needs_filter = true;
2138+
_line_progressed(rem.len);
22132139
_line_ended();
22142140
_scan_line();
2215-
22162141
_check_valid_newline_in_quoted_scalar();
22172142
}
22182143

2219-
if(pos == npos)
2220-
{
2221-
_c4err("reached end of file looking for closing quote");
2222-
}
2223-
else
2224-
{
2225-
_RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, pos > 0, m_evt_handler->m_curr->pos);
2226-
_RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.end() == m_buf.end() || *s.end() == '"', m_evt_handler->m_curr->pos);
2227-
_RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.end() >= m_buf.begin() && s.end() <= m_buf.end(), m_evt_handler->m_curr->pos);
2228-
s = s.sub(0, pos-1);
2229-
}
2144+
_c4err("reached end of file while looking for closing quote");
2145+
2146+
found_close:
2147+
2148+
_c4dbgpf("found closing quote at: {}", pos);
2149+
_RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, pos != npos, m_evt_handler->m_curr->pos);
2150+
_RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, pos >= 0, m_evt_handler->m_curr->pos);
2151+
_RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.end() >= m_buf.begin() && s.end() <= m_buf.end(), m_evt_handler->m_curr->pos);
2152+
_RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.end() == m_buf.end() || *s.end() == '"', m_evt_handler->m_curr->pos);
2153+
_set_first_strict(s, pos);
22302154

22312155
_c4prscalar("scanned dquoted scalar", s, /*keep_newlines*/true);
22322156

@@ -2243,22 +2167,13 @@ void ParseEngine<EventHandler>::_scan_block(ScannedBlock *C4_RESTRICT sb, size_t
22432167

22442168
// nice explanation here: http://yaml-multiline.info/
22452169
csubstr s = m_evt_handler->m_curr->line_contents.rem;
2246-
csubstr trimmed = s.triml(' ');
2247-
if(trimmed.str > s.str)
2248-
{
2249-
_c4dbgp("skipping whitespace");
2250-
_RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, trimmed.str >= s.str, m_evt_handler->m_curr->pos);
2251-
_line_progressed(static_cast<size_t>(trimmed.str - s.str));
2252-
s = trimmed;
2253-
}
22542170
_RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with('|') || s.begins_with('>'), m_evt_handler->m_curr->pos);
22552171

22562172
_c4dbgpf("blck: specs={}", _prs(s));
22572173

22582174
// parse the spec
22592175
BlockChomp_e chomp = CHOMP_CLIP; // default to clip unless + or - are used
22602176
size_t indentation = npos; // have to find out if no spec is given
2261-
csubstr digits;
22622177
if(s.len > 1)
22632178
{
22642179
_RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with_any("|>"), m_evt_handler->m_curr->pos);
@@ -2279,7 +2194,8 @@ void ParseEngine<EventHandler>::_scan_block(ScannedBlock *C4_RESTRICT sb, size_t
22792194
t = t.first(pos);
22802195
}
22812196
// from here to the end, only digits are considered
2282-
digits = t.left_of(t.first_not_of("0123456789"));
2197+
pos = t.first_not_of("0123456789");
2198+
csubstr digits = t.first(pos);
22832199
if( ! digits.empty())
22842200
{
22852201
if(C4_UNLIKELY(digits.len > 1))
@@ -2292,6 +2208,11 @@ void ParseEngine<EventHandler>::_scan_block(ScannedBlock *C4_RESTRICT sb, size_t
22922208
_c4dbgpf("blck: indentation specified: {}. add {} from curr state -> {}", indentation, m_evt_handler->m_curr->indref, indentation+indref);
22932209
indentation += m_evt_handler->m_curr->indref;
22942210
}
2211+
else
2212+
{
2213+
if(C4_UNLIKELY(t.len && (!t.begins_with_any(" \t") || !t.sub(pos).triml(" \t").begins_with('#'))))
2214+
_c4err("parse error: invalid token");
2215+
}
22952216
}
22962217

22972218
_c4dbgpf("blck: style={} chomp={} indentation={}", s.begins_with('>') ? "fold" : "literal", chomp==CHOMP_CLIP ? "clip" : (chomp==CHOMP_STRIP ? "strip" : "keep"), indentation);
@@ -6728,16 +6649,8 @@ void ParseEngine<EventHandler>::_handle_map_block()
67286649
{
67296650
_c4dbgpf("mapblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref+1, m_evt_handler->m_curr->line_contents.indentation);
67306651
m_evt_handler->m_curr->more_indented = false;
6731-
if(m_evt_handler->m_curr->indref == npos)
6732-
{
6733-
// FIXME is this needed?
6734-
_c4dbgpf("mapblck[RVAL]: setting indentation={}", m_evt_handler->m_parent->indref);
6735-
_set_indentation(m_evt_handler->m_curr->line_contents.indentation);
6736-
_line_progressed(m_evt_handler->m_curr->indref);
6737-
if(!m_evt_handler->m_curr->line_contents.rem.len)
6738-
goto mapblck_again;
6739-
}
6740-
else if(m_evt_handler->m_curr->indentation_eq_extra())
6652+
_RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref != npos, m_evt_handler->m_curr->pos);
6653+
if(m_evt_handler->m_curr->indentation_eq_extra())
67416654
{
67426655
_c4dbgp("mapblck[RVAL]: skip indentation!");
67436656
_line_progressed(m_evt_handler->m_curr->indref + 1);

test/testsuite/testsuite_parts.cpp

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -30,22 +30,15 @@ const AllowedFailure allowed_failures[] = {
3030

3131
// These invalid YAML cases should materialize parse errors, and currently don't.
3232

33-
// flow seq
34-
_("Y79Y_004-error" , "cannot use tab for indentation of block entry"),
35-
3633
// block seq
3734
_("SY6V-error" , "should not accept - after anchor"),
3835

3936
// block maps
4037
_("5U3A-error" , "should not accept opening a sequence on same line as block key"),
4138
_("C2SP-error" , "should not accept flow sequence key with terminating ] on the next line"),
42-
_("Y79Y_006-error" , "should not accept tab after ?"),
43-
_("Y79Y_008-error" , "should not accept tab after ?"),
4439

4540
// block scalars
46-
_("S4GJ-error" , "should not accept text after block scalar indicator"),
4741
_("S98Z-error" , "should not accept block scalar with more spaces than first content line"),
48-
_("X4QW-error" , "should not accept comment without whitespace after block scalar indicator"),
4942

5043
// docs
5144
_("CXX2-error" , "should not accept map with anchor after document start token"), // same for scalars/seqs
@@ -54,11 +47,14 @@ const AllowedFailure allowed_failures[] = {
5447
_("H7TQ-error" , "should not accept extra words after directive"),
5548
_("QLJ7-error" , "tag directives should apply only to the next doc (?)"),
5649

57-
// issue only with tabs
50+
// issues with tabs
5851
#ifdef RYML_WITH_TAB_TOKENS
59-
_("Y79Y_005-error" , "cannot use tab for indentation of block entry"),
6052
_("Y79Y_007-error" , "should not accept tab after : succeeding ?"),
6153
_("Y79Y_009-error" , "should not accept tab after : succeeding ?"),
54+
#else
55+
_("Y79Y_004-error" , "cannot use tab for indentation of block entry"),
56+
_("Y79Y_006-error" , "should not accept tab after ?"),
57+
_("Y79Y_008-error" , "should not accept tab after ?"),
6258
#endif
6359

6460

0 commit comments

Comments
 (0)