@@ -95,26 +95,6 @@ inline bool _is_doc_end_token(csubstr s)
9595
9696inline bool _is_doc_token (csubstr s) noexcept
9797{
98- //
99- // NOTE: this function was failing under some scenarios when
100- // compiled with gcc -O2 (but not -O3 or -O1 or -O0), likely
101- // related to optimizer assumptions on the input string and
102- // possibly caused from UB around assignment to that string (the
103- // call site was in _scan_block()). For more details see:
104- //
105- // https://github.com/biojppm/rapidyaml/issues/440
106- //
107- // The current version does not suffer this problem, but it may
108- // appear again.
109- //
110- //
111- // UPDATE. The problem appeared again in gcc12 and gcc13 with -Os
112- // (but not any other optimization level, nor any other compiler
113- // or version), because the assignment to s is being hoisted out
114- // of the loop which calls this function. Then the length doesn't
115- // enter the s.len >= 3 when it should. Adding a
116- // C4_DONT_OPTIMIZE(var) makes the problem go away.
117- //
11898 if (s.len >= 3 )
11999 {
120100 switch (s.str [0 ])
@@ -162,7 +142,7 @@ C4_ALWAYS_INLINE size_t _extend_from_combined_newline(char nl, char following)
162142}
163143
164144// ! look for the next newline chars, and jump to the right of those
165- inline substr from_next_line (substr rem)
145+ inline substr _from_next_line (substr rem)
166146{
167147 size_t nlpos = rem.first_of (" \r\n " );
168148 if (nlpos == csubstr::npos)
@@ -268,6 +248,7 @@ ParseEngine<EventHandler>::ParseEngine(EventHandler *evt_handler, ParserOptions
268248 , m_pending_tags()
269249 , m_doc_empty(false )
270250 , m_prev_colon(npos)
251+ , m_prev_val_end(npos)
271252 , m_encoding(NOBOM)
272253 , m_newline_offsets()
273254 , m_newline_offsets_size(0 )
@@ -287,6 +268,7 @@ ParseEngine<EventHandler>::ParseEngine(ParseEngine &&that) noexcept
287268 , m_pending_tags(that.m_pending_tags)
288269 , m_doc_empty(false )
289270 , m_prev_colon(npos)
271+ , m_prev_val_end(npos)
290272 , m_encoding(NOBOM)
291273 , m_newline_offsets(that.m_newline_offsets)
292274 , m_newline_offsets_size(that.m_newline_offsets_size)
@@ -306,6 +288,7 @@ ParseEngine<EventHandler>::ParseEngine(ParseEngine const& that)
306288 , m_pending_tags(that.m_pending_tags)
307289 , m_doc_empty(false )
308290 , m_prev_colon(npos)
291+ , m_prev_val_end(npos)
309292 , m_encoding(NOBOM)
310293 , m_newline_offsets()
311294 , m_newline_offsets_size()
@@ -333,6 +316,7 @@ ParseEngine<EventHandler>& ParseEngine<EventHandler>::operator=(ParseEngine &&th
333316 m_pending_tags = that.m_pending_tags ;
334317 m_doc_empty = that.m_doc_empty ;
335318 m_prev_colon = that.m_prev_colon ;
319+ m_prev_val_end = that.m_prev_val_end ;
336320 m_encoding = that.m_encoding ;
337321 m_newline_offsets = (that.m_newline_offsets );
338322 m_newline_offsets_size = (that.m_newline_offsets_size );
@@ -356,6 +340,7 @@ ParseEngine<EventHandler>& ParseEngine<EventHandler>::operator=(ParseEngine cons
356340 m_pending_tags = that.m_pending_tags ;
357341 m_doc_empty = that.m_doc_empty ;
358342 m_prev_colon = that.m_prev_colon ;
343+ m_prev_val_end = that.m_prev_val_end ;
359344 m_encoding = that.m_encoding ;
360345 if (that.m_newline_offsets_capacity > m_newline_offsets_capacity)
361346 _resize_locations (that.m_newline_offsets_capacity );
@@ -379,6 +364,7 @@ void ParseEngine<EventHandler>::_clr()
379364 m_pending_tags = {};
380365 m_doc_empty = true ;
381366 m_prev_colon = npos;
367+ m_prev_val_end = npos;
382368 m_encoding = NOBOM;
383369 m_newline_offsets = {};
384370 m_newline_offsets_size = {};
@@ -409,6 +395,7 @@ void ParseEngine<EventHandler>::_reset()
409395 m_pending_tags = {};
410396 m_doc_empty = true ;
411397 m_prev_colon = npos;
398+ m_prev_val_end = npos;
412399 m_bom_len = 0 ;
413400 m_encoding = NOBOM;
414401 m_bom_line = 0 ;
@@ -896,8 +883,7 @@ bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_flow(ScannedScalar *C4_RE
896883
897884 bool needs_filter = false ;
898885 size_t col = 0 ; // zero-based column
899- size_t offs = 0 ;
900- size_t offsp1;
886+ size_t offs = 0 ; // offset
901887 for ( ; offs < s.len ; ++offs, ++col)
902888 {
903889 const char c = s.str [offs];
@@ -910,11 +896,10 @@ bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_flow(ScannedScalar *C4_RE
910896 goto ended_scalar;
911897 case ' \n ' :
912898 _c4dbgpf (" found newline. offs={} col={}" , offs, col);
913- offsp1 = offs + 1 ;
914- if (s.len > offsp1)
899+ if (s.len > offs + 1 )
915900 {
916- csubstr next_line = s.sub (offsp1 ).triml (_RYML_WITH_OR_WITHOUT_TAB_TOKENS (" \t " , ' ' ));
917- if (next_line.begins_with_any (" ,]#" )) // any of the characters we're interested in
901+ csubstr next_line = s.sub (offs + 1 ).triml (_RYML_WITH_OR_WITHOUT_TAB_TOKENS (" \t " , ' ' ));
902+ if (next_line.begins_with_any (" ,]#: " )) // any of the characters we're interested in
918903 {
919904 _c4dbgpf (" found terminating character beginning next line: '{}'" , next_line.str [0 ]);
920905 goto ended_scalar;
@@ -932,14 +917,13 @@ bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_flow(ScannedScalar *C4_RE
932917 break ;
933918 case ' :' :
934919 _c4dbgp (" found suspicious ':'" );
935- offsp1 = offs + 1 ;
936- if (s.len > offsp1)
920+ if (s.len > offs + 1 )
937921 {
938- char next = s.str [offsp1 ];
922+ char next = s.str [offs + 1 ];
939923 _c4dbgpf (" next char is '{}'" , _c4prc (next));
940924 if (next == ' \r ' )
941925 {
942- csubstr after = s.sub (offsp1 ).triml (' \r ' );
926+ csubstr after = s.sub (offs + 1 ).triml (' \r ' );
943927 if (after.len )
944928 {
945929 next = after.str [0 ];
@@ -959,7 +943,7 @@ bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_flow(ScannedScalar *C4_RE
959943 }
960944 else
961945 {
962- _RYML_ASSERT_PARSE_ (m_evt_handler->m_stack .m_callbacks , s.len == offsp1 , m_evt_handler->m_curr ->pos );
946+ _RYML_ASSERT_PARSE_ (m_evt_handler->m_stack .m_callbacks , s.len == offs + 1 , m_evt_handler->m_curr ->pos );
963947 _line_progressed (col);
964948 _c4err (" missing termination: '{}'" , c); // noreturn
965949 }
@@ -1461,7 +1445,7 @@ substr ParseEngine<EventHandler>::_peek_next_line(size_t pos) const
14611445 goto next_is_empty;
14621446
14631447 // look for the next newline chars, and jump to the right of those
1464- rem = from_next_line (m_buf.sub (pos));
1448+ rem = _from_next_line (m_buf.sub (pos));
14651449 if (rem.empty ())
14661450 goto next_is_empty;
14671451
@@ -1539,7 +1523,7 @@ void ParseEngine<EventHandler>::_line_ended_undo()
15391523
15401524// -----------------------------------------------------------------------------
15411525template <class EventHandler >
1542- void ParseEngine<EventHandler>::_set_indentation(size_t indentation)
1526+ void ParseEngine<EventHandler>::_set_indentation(size_t indentation) noexcept
15431527{
15441528 m_evt_handler->m_curr ->indref = indentation;
15451529 _c4dbgpf (" state[{}]: saving indentation: {}" , m_evt_handler->m_curr ->level , m_evt_handler->m_curr ->indref );
@@ -1553,6 +1537,13 @@ void ParseEngine<EventHandler>::_save_indentation()
15531537 _c4dbgpf (" state[{}]: saving indentation: {}" , m_evt_handler->m_curr ->level , m_evt_handler->m_curr ->indref );
15541538}
15551539
1540+ template <class EventHandler >
1541+ void ParseEngine<EventHandler>::_mark_seqflow_val_end() noexcept
1542+ {
1543+ _c4dbgpf (" SEQFLOW. mark val end at line={}" , m_evt_handler->m_curr ->pos .line );
1544+ m_prev_val_end = m_evt_handler->m_curr ->pos .line ;
1545+ }
1546+
15561547
15571548// -----------------------------------------------------------------------------
15581549
@@ -1589,6 +1580,11 @@ void ParseEngine<EventHandler>::_end_flow_container(size_t orig_indent)
15891580 _c4dbgp (" flow container: end map as key!" );
15901581 }
15911582 }
1583+ else if (has_any (RSEQ))
1584+ {
1585+ _RYML_ASSERT_PARSE_ (m_evt_handler->m_stack .m_callbacks , has_any (RFLOW), m_evt_handler->m_curr ->pos );
1586+ _mark_seqflow_val_end ();
1587+ }
15921588}
15931589
15941590template <class EventHandler >
@@ -5095,6 +5091,7 @@ void ParseEngine<EventHandler>::_handle_seq_flow()
50955091 csubstr maybe_filtered = _maybe_filter_val_scalar_squot (sc);
50965092 m_evt_handler->set_val_scalar_squoted (maybe_filtered);
50975093 addrem_flags (RNXT, RVAL);
5094+ _mark_seqflow_val_end ();
50985095 }
50995096 else if (first == ' "' )
51005097 {
@@ -5103,6 +5100,7 @@ void ParseEngine<EventHandler>::_handle_seq_flow()
51035100 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot (sc);
51045101 m_evt_handler->set_val_scalar_dquoted (maybe_filtered);
51055102 addrem_flags (RNXT, RVAL);
5103+ _mark_seqflow_val_end ();
51065104 }
51075105 // block scalars (ie | and >) cannot appear in flow containers
51085106 else if (_scan_scalar_plain_seq_flow (&sc))
@@ -5111,6 +5109,7 @@ void ParseEngine<EventHandler>::_handle_seq_flow()
51115109 csubstr maybe_filtered = _maybe_filter_val_scalar_plain (sc, m_evt_handler->m_curr ->indref );
51125110 m_evt_handler->set_val_scalar_plain (maybe_filtered);
51135111 addrem_flags (RNXT, RVAL);
5112+ _mark_seqflow_val_end ();
51145113 }
51155114 else if (first == ' [' )
51165115 {
@@ -5208,6 +5207,11 @@ void ParseEngine<EventHandler>::_handle_seq_flow()
52085207 addrem_flags (RVAL, RNXT);
52095208 m_evt_handler->add_sibling ();
52105209 _line_progressed (1 );
5210+ if (m_evt_handler->m_curr ->line_contents .rem .begins_with (' #' ))
5211+ {
5212+ _c4err (" parse error: invalid comment after comma" );
5213+ }
5214+ _mark_seqflow_val_end ();
52115215 }
52125216 else if (first == ' ]' )
52135217 {
@@ -5218,12 +5222,20 @@ void ParseEngine<EventHandler>::_handle_seq_flow()
52185222 }
52195223 else if (first == ' :' )
52205224 {
5221- _c4dbgpf (" seqflow[RNXT]: actually seqimap at node[{}]" , m_evt_handler->m_curr ->node_id );
5222- m_evt_handler->actually_val_is_first_key_of_new_map_flow ();
5223- _set_indentation (m_evt_handler->m_parent ->indref );
5224- _line_progressed (1 );
5225- addrem_flags (RSEQIMAP|RVAL, RNXT);
5226- goto seqflow_finish;
5225+ _c4dbgpf (" seqflow[RNXT]: line@valend={} line@now={}" , m_prev_val_end, m_evt_handler->m_curr ->pos .line );
5226+ if (m_prev_val_end != NONE && m_evt_handler->m_curr ->pos .line == m_prev_val_end)
5227+ {
5228+ _c4dbgpf (" seqflow[RNXT]: actually seqimap at node[{}]" , m_evt_handler->m_curr ->node_id );
5229+ m_evt_handler->actually_val_is_first_key_of_new_map_flow ();
5230+ _set_indentation (m_evt_handler->m_parent ->indref );
5231+ _line_progressed (1 );
5232+ addrem_flags (RSEQIMAP|RVAL, RNXT);
5233+ goto seqflow_finish;
5234+ }
5235+ else
5236+ {
5237+ _c4err (" parse error" );
5238+ }
52275239 }
52285240 else
52295241 {
@@ -5415,6 +5427,10 @@ void ParseEngine<EventHandler>::_handle_map_flow()
54155427 m_evt_handler->add_sibling ();
54165428 addrem_flags (RKEY, RKCL);
54175429 _line_progressed (1 );
5430+ if (m_evt_handler->m_curr ->line_contents .rem .begins_with (' #' ))
5431+ {
5432+ _c4err (" parse error: invalid comment after comma" );
5433+ }
54185434 }
54195435 else
54205436 {
@@ -5527,6 +5543,10 @@ void ParseEngine<EventHandler>::_handle_map_flow()
55275543 m_evt_handler->add_sibling ();
55285544 addrem_flags (RKEY, RNXT);
55295545 _line_progressed (1 );
5546+ if (m_evt_handler->m_curr ->line_contents .rem .begins_with (' #' ))
5547+ {
5548+ _c4err (" parse error: invalid comment after comma" );
5549+ }
55305550 }
55315551 else if (rem.begins_with (' }' ))
55325552 {
0 commit comments