@@ -81,13 +81,13 @@ C4_HOT C4_ALWAYS_INLINE void _set_first(csubstr &C4_RESTRICT subject, size_t pos
8181C4_HOT C4_ALWAYS_INLINE void _set_first_strict (substr &C4_RESTRICT subject, size_t pos) RYML_NOEXCEPT
8282{
8383 // avoids reassigning the ptr in substr
84- _RYML_ASSERT_BASIC (pos != npos);
84+ _RYML_ASSERT_BASIC (pos != npos); // LCOV_EXCL_LINE
8585 subject.len = pos;
8686}
8787C4_HOT C4_ALWAYS_INLINE void _set_first_strict (csubstr &C4_RESTRICT subject, size_t pos) RYML_NOEXCEPT
8888{
8989 // avoids reassigning the ptr in substr
90- _RYML_ASSERT_BASIC (pos != npos);
90+ _RYML_ASSERT_BASIC (pos != npos); // LCOV_EXCL_LINE
9191 subject.len = pos;
9292}
9393
@@ -1824,15 +1824,9 @@ void ParseEngine<EventHandler>::_end_doc_suddenly__pop()
18241824 if (m_evt_handler->m_curr ->level != 0 )
18251825 _handle_indentation_pop (&m_evt_handler->m_stack [0 ]);
18261826 }
1827- else if ((m_evt_handler->m_stack .size () > 1 ) && (m_evt_handler->m_stack [1 ].flags & RDOC))
1828- {
1829- _c4dbgp (" root is STREAM" );
1830- if (m_evt_handler->m_curr ->level != 1 )
1831- _handle_indentation_pop (&m_evt_handler->m_stack [1 ]);
1832- }
18331827 else
18341828 {
1835- _c4err ( " internal error " );
1829+ _RYML_ASSERT_PARSE_ (m_evt_handler-> m_stack . m_callbacks , !((m_evt_handler-> m_stack . size () > 1 ) && (m_evt_handler-> m_stack [ 1 ]. flags & RDOC)), m_evt_handler-> m_curr -> pos );
18361830 }
18371831 _RYML_ASSERT_PARSE_ (m_evt_handler->m_stack .m_callbacks , has_any (RDOC), m_evt_handler->m_curr ->pos );
18381832}
@@ -2038,31 +2032,17 @@ typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_sc
20382032 // quoted scalars can spread over multiple lines!
20392033 // nice explanation here: http://yaml-multiline.info/
20402034
2041- // a span to the end of the file
2042- size_t b = m_evt_handler->m_curr ->pos .offset ;
2043- substr s = m_buf.sub (b);
2044- if (s.begins_with (' ' ))
2045- {
2046- s = s.triml (' ' );
2047- _RYML_ASSERT_PARSE_ (m_evt_handler->m_stack .m_callbacks , m_buf.sub (b).is_super (s), m_evt_handler->m_curr ->pos );
2048- _RYML_ASSERT_PARSE_ (m_evt_handler->m_stack .m_callbacks , s.begin () >= m_buf.sub (b).begin (), m_evt_handler->m_curr ->pos );
2049- _line_progressed ((size_t )(s.begin () - m_buf.sub (b).begin ()));
2050- }
2051- b = m_evt_handler->m_curr ->pos .offset ; // take this into account
2052- _RYML_ASSERT_PARSE_ (m_evt_handler->m_stack .m_callbacks , s.begins_with (' \' ' ), m_evt_handler->m_curr ->pos );
2035+ _RYML_ASSERT_PARSE_ (m_evt_handler->m_stack .m_callbacks , m_buf.sub (m_evt_handler->m_curr ->pos .offset ).begins_with (' \' ' ), m_evt_handler->m_curr ->pos );
20532036
2054- // skip the opening quote
2055- _line_progressed ( 1 );
2056- s = s. sub (1 );
2037+ // a span to the end of the file, skipping the opening quote
2038+ substr s = m_buf. sub (m_evt_handler-> m_curr -> pos . offset + 1 );
2039+ _line_progressed (1 ); // advance over the opening quote
20572040
20582041 bool needs_filter = false ;
2059-
2060- size_t numlines = 1 ; // we already have one line
20612042 size_t pos = npos; // find the pos of the matching quote
20622043 while ( ! _finished_file ())
20632044 {
20642045 const csubstr line = m_evt_handler->m_curr ->line_contents .rem ;
2065- bool line_is_blank = true ;
20662046 _c4dbgpf (" scanning single quoted scalar @ line[{}]: {}" , m_evt_handler->m_curr ->pos .line , _prs (line));
20672047 if (C4_UNLIKELY (_is_doc_token (line)))
20682048 _c4err (" token can not appear at line begin" );
@@ -2074,57 +2054,35 @@ typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_sc
20742054 const char next = i+1 < line.len ? line.str [i+1 ] : ' ~' ;
20752055 if (next != ' \' ' ) // so just look for the first quote
20762056 { // without another after it
2077- pos = i;
2078- break ;
2057+ _line_progressed (i + 1 ); // progress beyond the quote
2058+ pos = i + (size_t )(line.str - s.str ); // set pos to before the quote
2059+ goto found_close;
20792060 }
20802061 else
20812062 {
20822063 needs_filter = true ; // needs filter to remove escaped quotes
20832064 ++i; // skip the escaped quote
20842065 }
20852066 }
2086- else if (curr != ' ' )
2087- {
2088- line_is_blank = false ;
2089- }
2090- }
2091-
2092- // leading whitespace also needs filtering
2093- needs_filter = needs_filter
2094- || (numlines > 1 )
2095- || line_is_blank
2096- || (m_evt_handler->m_curr ->at_line_beginning () && line.begins_with (' ' ));
2097-
2098- if (pos == npos)
2099- {
2100- _line_progressed (line.len );
2101- ++numlines;
2102- }
2103- else
2104- {
2105- _RYML_ASSERT_PARSE_ (m_evt_handler->m_stack .m_callbacks , pos >= 0 && pos < m_buf.len , m_evt_handler->m_curr ->pos );
2106- _RYML_ASSERT_PARSE_ (m_evt_handler->m_stack .m_callbacks , m_buf[m_evt_handler->m_curr ->pos .offset + pos] == ' \' ' , m_evt_handler->m_curr ->pos );
2107- _line_progressed (pos + 1 ); // progress beyond the quote
2108- pos = m_evt_handler->m_curr ->pos .offset - b - 1 ; // but we stop before it
2109- break ;
21102067 }
21112068
2069+ needs_filter = true ;
2070+ _line_progressed (line.len );
21122071 _line_ended ();
21132072 _scan_line ();
21142073 _check_valid_newline_in_quoted_scalar ();
21152074 }
21162075
2117- if (pos == npos)
2118- {
2119- _c4err (" reached end of file while looking for closing quote" );
2120- }
2121- else
2122- {
2123- _RYML_ASSERT_PARSE_ (m_evt_handler->m_stack .m_callbacks , pos > 0 , m_evt_handler->m_curr ->pos );
2124- _RYML_ASSERT_PARSE_ (m_evt_handler->m_stack .m_callbacks , s.end () >= m_buf.begin () && s.end () <= m_buf.end (), m_evt_handler->m_curr ->pos );
2125- _RYML_ASSERT_PARSE_ (m_evt_handler->m_stack .m_callbacks , s.end () == m_buf.end () || *s.end () == ' \' ' , m_evt_handler->m_curr ->pos );
2126- s = s.sub (0 , pos-1 );
2127- }
2076+ _c4err (" reached end of file while looking for closing quote" );
2077+
2078+ found_close:
2079+
2080+ _c4dbgpf (" found closing quote at: {}" , pos);
2081+ _RYML_ASSERT_PARSE_ (m_evt_handler->m_stack .m_callbacks , pos != npos, m_evt_handler->m_curr ->pos );
2082+ _RYML_ASSERT_PARSE_ (m_evt_handler->m_stack .m_callbacks , pos >= 0 , m_evt_handler->m_curr ->pos );
2083+ _RYML_ASSERT_PARSE_ (m_evt_handler->m_stack .m_callbacks , s.end () >= m_buf.begin () && s.end () <= m_buf.end (), m_evt_handler->m_curr ->pos );
2084+ _RYML_ASSERT_PARSE_ (m_evt_handler->m_stack .m_callbacks , s.end () == m_buf.end () || *s.end () == ' \' ' , m_evt_handler->m_curr ->pos );
2085+ _set_first_strict (s, pos);
21282086
21292087 _c4prscalar (" scanned squoted scalar" , s, /* keep_newlines*/ true );
21302088
@@ -2139,30 +2097,16 @@ typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_sc
21392097 // quoted scalars can spread over multiple lines!
21402098 // nice explanation here: http://yaml-multiline.info/
21412099
2142- // a span to the end of the file
2143- size_t b = m_evt_handler->m_curr ->pos .offset ;
2144- substr s = m_buf.sub (b);
2145- if (s.begins_with (' ' ))
2146- {
2147- s = s.triml (' ' );
2148- _RYML_ASSERT_PARSE_ (m_evt_handler->m_stack .m_callbacks , m_buf.sub (b).is_super (s), m_evt_handler->m_curr ->pos );
2149- _RYML_ASSERT_PARSE_ (m_evt_handler->m_stack .m_callbacks , s.begin () >= m_buf.sub (b).begin (), m_evt_handler->m_curr ->pos );
2150- _line_progressed ((size_t )(s.begin () - m_buf.sub (b).begin ()));
2151- }
2152- b = m_evt_handler->m_curr ->pos .offset ; // take this into account
2153- _RYML_ASSERT_PARSE_ (m_evt_handler->m_stack .m_callbacks , s.begins_with (' "' ), m_evt_handler->m_curr ->pos );
2100+ _RYML_ASSERT_PARSE_ (m_evt_handler->m_stack .m_callbacks , m_buf.sub (m_evt_handler->m_curr ->pos .offset ).begins_with (' "' ), m_evt_handler->m_curr ->pos );
21542101
2155- // skip the opening quote
2156- _line_progressed ( 1 );
2157- s = s. sub (1 );
2102+ // a span to the end of the file, skipping the opening quote
2103+ substr s = m_buf. sub (m_evt_handler-> m_curr -> pos . offset + 1 );
2104+ _line_progressed (1 ); // advance over the opening quote
21582105
21592106 bool needs_filter = false ;
2160-
2161- size_t numlines = 1 ; // we already have one line
21622107 size_t pos = npos; // find the pos of the matching quote
21632108 while ( ! _finished_file ())
21642109 {
2165- bool line_is_blank = true ;
21662110 #if defined(__GNUC__) && (/* __GNUC__ == 12 || */ __GNUC__ == 13 )
21672111 C4_DONT_OPTIMIZE (m_evt_handler->m_curr ->line_contents .rem ); // prevent hoisting
21682112 #endif
@@ -2173,8 +2117,6 @@ typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_sc
21732117 for (size_t i = 0 ; i < rem.len ; ++i)
21742118 {
21752119 const char curr = rem.str [i];
2176- if (curr != ' ' )
2177- line_is_blank = false ;
21782120 // every \ is an escape
21792121 if (curr == ' \\ ' )
21802122 {
@@ -2185,48 +2127,30 @@ typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_sc
21852127 }
21862128 else if (curr == ' "' )
21872129 {
2188- pos = i;
2189- break ;
2130+ _line_progressed (i + 1 ); // progress beyond the quote
2131+ pos = i + (size_t )(rem.str - s.str ); // set pos to before the quote
2132+ goto found_close;
21902133 }
21912134 }
21922135
21932136 // leading whitespace also needs filtering
2194- needs_filter = needs_filter
2195- || (numlines > 1 )
2196- || line_is_blank
2197- || (m_evt_handler->m_curr ->at_line_beginning () && rem.begins_with (' ' ));
2198-
2199- if (pos == npos)
2200- {
2201- _line_progressed (m_evt_handler->m_curr ->line_contents .rem .len );
2202- ++numlines;
2203- }
2204- else
2205- {
2206- _RYML_ASSERT_PARSE_ (m_evt_handler->m_stack .m_callbacks , pos >= 0 && pos < m_buf.len , m_evt_handler->m_curr ->pos );
2207- _RYML_ASSERT_PARSE_ (m_evt_handler->m_stack .m_callbacks , m_buf[m_evt_handler->m_curr ->pos .offset + pos] == ' "' , m_evt_handler->m_curr ->pos );
2208- _line_progressed (pos + 1 ); // progress beyond the quote
2209- pos = m_evt_handler->m_curr ->pos .offset - b - 1 ; // but we stop before it
2210- break ;
2211- }
2212-
2137+ needs_filter = true ;
2138+ _line_progressed (rem.len );
22132139 _line_ended ();
22142140 _scan_line ();
2215-
22162141 _check_valid_newline_in_quoted_scalar ();
22172142 }
22182143
2219- if (pos == npos)
2220- {
2221- _c4err (" reached end of file looking for closing quote" );
2222- }
2223- else
2224- {
2225- _RYML_ASSERT_PARSE_ (m_evt_handler->m_stack .m_callbacks , pos > 0 , m_evt_handler->m_curr ->pos );
2226- _RYML_ASSERT_PARSE_ (m_evt_handler->m_stack .m_callbacks , s.end () == m_buf.end () || *s.end () == ' "' , m_evt_handler->m_curr ->pos );
2227- _RYML_ASSERT_PARSE_ (m_evt_handler->m_stack .m_callbacks , s.end () >= m_buf.begin () && s.end () <= m_buf.end (), m_evt_handler->m_curr ->pos );
2228- s = s.sub (0 , pos-1 );
2229- }
2144+ _c4err (" reached end of file while looking for closing quote" );
2145+
2146+ found_close:
2147+
2148+ _c4dbgpf (" found closing quote at: {}" , pos);
2149+ _RYML_ASSERT_PARSE_ (m_evt_handler->m_stack .m_callbacks , pos != npos, m_evt_handler->m_curr ->pos );
2150+ _RYML_ASSERT_PARSE_ (m_evt_handler->m_stack .m_callbacks , pos >= 0 , m_evt_handler->m_curr ->pos );
2151+ _RYML_ASSERT_PARSE_ (m_evt_handler->m_stack .m_callbacks , s.end () >= m_buf.begin () && s.end () <= m_buf.end (), m_evt_handler->m_curr ->pos );
2152+ _RYML_ASSERT_PARSE_ (m_evt_handler->m_stack .m_callbacks , s.end () == m_buf.end () || *s.end () == ' "' , m_evt_handler->m_curr ->pos );
2153+ _set_first_strict (s, pos);
22302154
22312155 _c4prscalar (" scanned dquoted scalar" , s, /* keep_newlines*/ true );
22322156
@@ -2243,22 +2167,13 @@ void ParseEngine<EventHandler>::_scan_block(ScannedBlock *C4_RESTRICT sb, size_t
22432167
22442168 // nice explanation here: http://yaml-multiline.info/
22452169 csubstr s = m_evt_handler->m_curr ->line_contents .rem ;
2246- csubstr trimmed = s.triml (' ' );
2247- if (trimmed.str > s.str )
2248- {
2249- _c4dbgp (" skipping whitespace" );
2250- _RYML_ASSERT_PARSE_ (m_evt_handler->m_stack .m_callbacks , trimmed.str >= s.str , m_evt_handler->m_curr ->pos );
2251- _line_progressed (static_cast <size_t >(trimmed.str - s.str ));
2252- s = trimmed;
2253- }
22542170 _RYML_ASSERT_PARSE_ (m_evt_handler->m_stack .m_callbacks , s.begins_with (' |' ) || s.begins_with (' >' ), m_evt_handler->m_curr ->pos );
22552171
22562172 _c4dbgpf (" blck: specs={}" , _prs (s));
22572173
22582174 // parse the spec
22592175 BlockChomp_e chomp = CHOMP_CLIP; // default to clip unless + or - are used
22602176 size_t indentation = npos; // have to find out if no spec is given
2261- csubstr digits;
22622177 if (s.len > 1 )
22632178 {
22642179 _RYML_ASSERT_PARSE_ (m_evt_handler->m_stack .m_callbacks , s.begins_with_any (" |>" ), m_evt_handler->m_curr ->pos );
@@ -2279,7 +2194,8 @@ void ParseEngine<EventHandler>::_scan_block(ScannedBlock *C4_RESTRICT sb, size_t
22792194 t = t.first (pos);
22802195 }
22812196 // from here to the end, only digits are considered
2282- digits = t.left_of (t.first_not_of (" 0123456789" ));
2197+ pos = t.first_not_of (" 0123456789" );
2198+ csubstr digits = t.first (pos);
22832199 if ( ! digits.empty ())
22842200 {
22852201 if (C4_UNLIKELY (digits.len > 1 ))
@@ -2292,6 +2208,11 @@ void ParseEngine<EventHandler>::_scan_block(ScannedBlock *C4_RESTRICT sb, size_t
22922208 _c4dbgpf (" blck: indentation specified: {}. add {} from curr state -> {}" , indentation, m_evt_handler->m_curr ->indref , indentation+indref);
22932209 indentation += m_evt_handler->m_curr ->indref ;
22942210 }
2211+ else
2212+ {
2213+ if (C4_UNLIKELY (t.len && (!t.begins_with_any (" \t " ) || !t.sub (pos).triml (" \t " ).begins_with (' #' ))))
2214+ _c4err (" parse error: invalid token" );
2215+ }
22952216 }
22962217
22972218 _c4dbgpf (" blck: style={} chomp={} indentation={}" , s.begins_with (' >' ) ? " fold" : " literal" , chomp==CHOMP_CLIP ? " clip" : (chomp==CHOMP_STRIP ? " strip" : " keep" ), indentation);
@@ -6728,16 +6649,8 @@ void ParseEngine<EventHandler>::_handle_map_block()
67286649 {
67296650 _c4dbgpf (" mapblck[RVAL]: indref={} indentation={}" , m_evt_handler->m_curr ->indref +1 , m_evt_handler->m_curr ->line_contents .indentation );
67306651 m_evt_handler->m_curr ->more_indented = false ;
6731- if (m_evt_handler->m_curr ->indref == npos)
6732- {
6733- // FIXME is this needed?
6734- _c4dbgpf (" mapblck[RVAL]: setting indentation={}" , m_evt_handler->m_parent ->indref );
6735- _set_indentation (m_evt_handler->m_curr ->line_contents .indentation );
6736- _line_progressed (m_evt_handler->m_curr ->indref );
6737- if (!m_evt_handler->m_curr ->line_contents .rem .len )
6738- goto mapblck_again;
6739- }
6740- else if (m_evt_handler->m_curr ->indentation_eq_extra ())
6652+ _RYML_ASSERT_PARSE_ (m_evt_handler->m_stack .m_callbacks , m_evt_handler->m_curr ->indref != npos, m_evt_handler->m_curr ->pos );
6653+ if (m_evt_handler->m_curr ->indentation_eq_extra ())
67416654 {
67426655 _c4dbgp (" mapblck[RVAL]: skip indentation!" );
67436656 _line_progressed (m_evt_handler->m_curr ->indref + 1 );
0 commit comments