@@ -476,14 +476,30 @@ void ParseEngine<EventHandler>::_relocate_arena(csubstr prev_arena, substr next_
476476 }
477477 _ryml_relocate (m_evt_handler->m_src );
478478 for (size_t i = 0 ; i < m_pending_tags.num_entries ; ++i)
479- _ryml_relocate (m_pending_tags.annotations [i].str ); // LCOV_EXCL_LINE
479+ {
480+ _ryml_relocate (m_pending_tags.annotations [i].str ); // LCOV_EXCL_LINE
481+ _ryml_relocate (m_pending_tags.annotations [i].orig ); // LCOV_EXCL_LINE
482+ }
480483 for (size_t i = 0 ; i < m_pending_anchors.num_entries ; ++i)
481- _ryml_relocate (m_pending_anchors.annotations [i].str ); // LCOV_EXCL_LINE
482- TagDirectives &tds = m_evt_handler->tag_directives ();
483- for (size_t i = 0 , sz = tds.size (); i < sz; ++i)
484484 {
485- _ryml_relocate (tds.m_directives [i].handle ); // LCOV_EXCL_LINE
486- _ryml_relocate (tds.m_directives [i].prefix ); // LCOV_EXCL_LINE
485+ _ryml_relocate (m_pending_anchors.annotations [i].str ); // LCOV_EXCL_LINE
486+ _ryml_relocate (m_pending_anchors.annotations [i].orig ); // LCOV_EXCL_LINE
487+ }
488+ {
489+ TagDirectives &tds = m_evt_handler->tag_directives ();
490+ for (size_t i = 0 , sz = tds.size (); i < sz; ++i)
491+ {
492+ _ryml_relocate (tds.m_directives [i].handle ); // LCOV_EXCL_LINE
493+ _ryml_relocate (tds.m_directives [i].prefix ); // LCOV_EXCL_LINE
494+ }
495+ }
496+ {
497+ TagCache &tch = m_evt_handler->tag_cache ();
498+ for (id_type i = 0 , sz = tch.m_entries .size (); i < sz; ++i)
499+ {
500+ _ryml_relocate (tch.m_entries [i].tag ); // LCOV_EXCL_LINE
501+ _ryml_relocate (tch.m_entries [i].resolved ); // LCOV_EXCL_LINE
502+ }
487503 }
488504 if (other)
489505 _ryml_relocate (*other); // LCOV_EXCL_LINE
@@ -4608,27 +4624,63 @@ template<class EventHandler>
46084624csubstr ParseEngine<EventHandler>::_resolve_tag(csubstr tag)
46094625{
46104626 _c4dbgpf (" resolving tag: {} curr_doc={}" , _prs (tag), m_evt_handler->m_curr_doc );
4627+ _c4assert (tag.is_sub (_buf ()));
4628+ TagCache::LookupResult ret = m_evt_handler->tag_cache ().find (tag, m_evt_handler->m_curr_doc );
4629+ if (ret)
4630+ {
4631+ _c4dbgpf (" resolving tag: found in cache[{}]: {}" , ret.pos , _prs (ret.resolved ));
4632+ return ret.resolved ;
4633+ }
4634+ _c4dbgpf (" resolving tag: not in cache: {} curr_doc={}" , _prs (tag), m_evt_handler->m_curr_doc );
46114635 size_t bufsz = 0 ;
46124636 substr buf = m_evt_handler->arena_rem ();
46134637 TagDirectives const & C4_RESTRICT tds = m_evt_handler->tag_directives ();
46144638 csubstr ttag = tds.resolve (buf, &bufsz, tag, m_evt_handler->m_curr_doc ,
46154639 m_evt_handler->m_curr ->pos ,
46164640 m_evt_handler->m_stack .m_callbacks );
4617- _c4dbgpf (" resolving tag: bufsz={}" , bufsz);
4618- if (bufsz)
4641+ _c4dbgpf (" resolving tag: bufsz={} ttag.len={} !!ttag.str={}" , bufsz, ttag.len , !!ttag.str );
4642+ _c4assert ((bufsz > buf.len ) == (!ttag.str ));
4643+ _c4assert (!!bufsz == (ttag.len == bufsz));
4644+ // try again if the arena size was not enough
4645+ if (!ttag.str )
4646+ {
4647+ _c4dbgpf (" tag requires arena, but it was small. arena.len={} arena.slack={} tag.required={}" , m_evt_handler->arena_rem ().len , m_evt_handler->arena ().len , ttag.len );
4648+ _c4assert (ttag.len == bufsz);
4649+ buf = _alloc_arena (bufsz, &tag);
4650+ if (buf.str ) // the alloc may fail eg with the ints handler
4651+ {
4652+ ttag = tds.resolve (buf, &bufsz, tag, m_evt_handler->m_curr_doc ,
4653+ m_evt_handler->m_curr ->pos ,
4654+ m_evt_handler->m_stack .m_callbacks );
4655+ }
4656+ _c4assert (ttag.len == bufsz);
4657+ _c4assert (!ttag.str || ttag.is_sub (m_evt_handler->arena ()));
4658+ }
4659+ else if (bufsz) // if we succeeded writing into the arena, grow it as needed
46194660 {
4620- substr bufretry = _alloc_arena (bufsz, &tag);
4621- if (C4_UNLIKELY (bufsz > buf.len ))
4661+ _c4dbgp (" tag required arena. update size" );
4662+ _c4assert (ttag.len == bufsz);
4663+ _c4assert (ttag.is_sub (buf));
4664+ (void )_alloc_arena (bufsz);
4665+ }
4666+ if C4_IF_CONSTEXPR (EventHandler::requires_strings_on_buffers) // NOLINT
4667+ {
4668+ _c4dbgpf (" handler requires tags in buffers. !!ttag.str={} in_arena={} in_src={}" , !!ttag.str , ttag.is_sub (m_evt_handler->arena ()), ttag.is_sub (_buf ()));
4669+ // is the resolved tag not in any of those buffers?
4670+ if (ttag.str && !ttag.is_sub (m_evt_handler->arena ()) && !ttag.is_sub (_buf ()))
46224671 {
4623- if (bufretry. str ) // some handlers may be just counting the required arena size
4624- {
4625- ttag = tds. resolve (bufretry, &bufsz, tag, m_evt_handler-> m_curr_doc ,
4626- m_evt_handler-> m_curr -> pos ,
4627- m_evt_handler-> m_stack . m_callbacks );
4628- }
4672+ _c4dbgpf ( " copying resolved tag to arena: slack={} required={} " , m_evt_handler-> arena_rem (). len , ttag. len );
4673+ buf = _alloc_arena (ttag. len , &tag);
4674+ if (buf. str ) // the alloc may fail eg with the ints handler
4675+ memcpy (buf. str , ttag. str , ttag. len );
4676+ ttag. str = buf. str ; // keep the current len!
4677+ _c4assert (!ttag. str || ttag. is_sub (m_evt_handler-> arena ()));
46294678 }
46304679 }
4631- _c4dbgpf (" resolving tag: {} --> {}" , _prs (tag), _prs (ttag.str ? ttag : csubstr (" (arena full)" )));
4680+ _c4dbgpf (" resolved tag: {} --> [{}]~~~{}~~~" , _prs (tag), ttag.len , ttag.str ? ttag : csubstr (" (out of size)" ));
4681+ _c4assert (ttag.len > 0 );
4682+ // cache the hard-earned result!
4683+ m_evt_handler->tag_cache ().add (tag, ttag, m_evt_handler->m_curr_doc , ret.pos );
46324684 return ttag;
46334685}
46344686
@@ -8248,18 +8300,19 @@ uint32_t ParseEngine<EventHandler>::_get_annotations_same_line(csubstr token_sou
82488300 _c4dbgpf (" second annotation: {} indent={} line={}" , second->str , second->indentation , second->line );
82498301 }
82508302 auto extract_string = [&](EntryPtr e){
8251- if (e->str .begins_with_any (" !<" ))
8303+ // tags can be null when the arena ran out of space
8304+ if (!e->str .str || e->str .begins_with_any (" !<" ))
82528305 {
82538306 csubstr tag = e->orig ;
82548307 _c4assert (tag.str );
82558308 _c4assert (tag.len );
82568309 _c4assert (tag.is_sub (token_soup));
8257- _c4dbgpf (" tag: {} -> {}" , e->str , tag);
8310+ _c4dbgpf (" tag: {} -> {}" , e->str . str ? e-> str : csubstr ( " (out of size) " ) , tag);
82588311 return tag;
82598312 }
82608313 csubstr anchor = e->str ;
8261- _c4assert (anchor.str );
82628314 _c4assert (anchor.len );
8315+ _c4assert (anchor.str );
82638316 _c4assert (anchor.is_sub (token_soup));
82648317 _c4assert (!anchor.begins_with (' &' ));
82658318 _c4assert (anchor.str - token_soup.str > 0 );
0 commit comments