@@ -47,7 +47,7 @@ namespace yml {
4747 * ```
4848 */
4949template <class Fn >
50- void escape_scalar_fn (Fn &&fn, csubstr scalar, bool keep_newlines=false )
50+ C4_NO_INLINE void escape_scalar_fn (Fn &&fn, csubstr scalar, bool keep_newlines=false )
5151{
5252 size_t prev = 0 ; // the last position that was flushed
5353 size_t skip = 0 ; // how much to add to prev
@@ -161,10 +161,72 @@ void escape_scalar_fn(Fn &&fn, csubstr scalar, bool keep_newlines=false)
161161
162162
163163C4_SUPPRESS_WARNING_GCC_WITH_PUSH (" -Wattributes" )
164+
165+ /* * Adjust a position in a scalar, increasing it to account for any
166+ * escaped characters.
167+ *
168+ * @note This is a utility/debugging function, so it is provided in
169+ * this optional header. For this reason, we inline it to obey to the
170+ * One Definition Rule. But then we set the noinline attribute to
171+ * ensure they are not inlined in calling code. */
172+ inline C4_NO_INLINE size_t adjust_pos_with_escapes (csubstr scalar, size_t pos, bool keep_newlines=false )
173+ {
174+ // cast to u8 to avoid having to deal with negative
175+ // signed chars (which are present in some platforms)
176+ uint8_t const * C4_RESTRICT s = reinterpret_cast <uint8_t const *>(scalar.str ); // NOLINT(*-reinterpret-cast)
177+ pos = pos < scalar.len ? pos : scalar.len ;
178+ const size_t newbump = keep_newlines ? 2 : 1 ;
179+ for (size_t i = 0 ; i < pos; ++i)
180+ {
181+ switch (s[i])
182+ {
183+ case UINT8_C (0x5c ): // '\\'
184+ case UINT8_C (0x09 ): // \t
185+ case UINT8_C (0x0d ): // \r
186+ case UINT8_C (0x00 ): // \0
187+ case UINT8_C (0x0c ): // \f (form feed)
188+ case UINT8_C (0x08 ): // \b (backspace)
189+ case UINT8_C (0x07 ): // \a (bell)
190+ case UINT8_C (0x0b ): // \v (vertical tab)
191+ case UINT8_C (0x1b ): // \e (escape)
192+ ++pos;
193+ break ;
194+ case UINT8_C (0x0a ): // \n
195+ pos += newbump;
196+ break ;
197+ case UINT8_C (0xc2 ): // AKA -0x3e
198+ if (i+1 < scalar.len )
199+ {
200+ if (s[i+1 ] == UINT8_C (0xa0 ) // AKA -0x60 -> \_
201+ ||
202+ s[i+1 ] == UINT8_C (0x85 )) // AKA -0x7b -> \N
203+ ++pos;
204+ }
205+ break ;
206+ case UINT8_C (0xe2 ): // AKA -0x1e
207+ if (i+2 < scalar.len )
208+ {
209+ if (s[i+1 ] == UINT8_C (0x80 )) // AKA -0x80
210+ {
211+ if (s[i+2 ] == UINT8_C (0xa8 ) // AKA -0x58 -> \L
212+ ||
213+ s[i+2 ] == UINT8_C (0xa9 )) // AKA -0x57 -> \P
214+ ++pos;
215+ }
216+ }
217+ break ;
218+ default :
219+ break ;
220+ }
221+ }
222+ return pos;
223+ }
224+
225+
164226/* * Escape a scalar to an existing buffer, using @ref escape_scalar_fn
165227 *
166- * @note This is a utility/debugging function, so it is provided in this
167- * ( optional) header. For this reason, we inline it to obey to the
228+ * @note This is a utility/debugging function, so it is provided in
229+ * this optional header. For this reason, we inline it to obey to the
168230 * One Definition Rule. But then we set the noinline attribute to
169231 * ensure they are not inlined in calling code. */
170232inline C4_NO_INLINE size_t escape_scalar (substr buffer, csubstr scalar, bool keep_newlines=false )
0 commit comments