Skip to content

Commit 8e82ec3

Browse files
committed
Add TagCache
1 parent 078cc31 commit 8e82ec3

8 files changed

Lines changed: 344 additions & 37 deletions

File tree

src/c4/yml/event_handler_tree.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ struct EventHandlerTree : public EventHandlerStack<EventHandlerTree, EventHandle
3737
* @{ */
3838

3939
using state = EventHandlerTreeState;
40+
enum { requires_strings_on_buffers = false };
4041

4142
/** @} */
4243

@@ -45,6 +46,7 @@ struct EventHandlerTree : public EventHandlerStack<EventHandlerTree, EventHandle
4546
/** @cond dev */
4647
Tree *C4_RESTRICT m_tree;
4748
id_type m_curr_doc;
49+
TagCache m_tag_cache;
4850

4951
#ifdef RYML_DBG
5052
#define _enable_(bits) _enable__<bits>(); _c4dbgpf("node[{}]: enable {}", m_curr->node_id, #bits)
@@ -91,11 +93,13 @@ struct EventHandlerTree : public EventHandlerStack<EventHandlerTree, EventHandle
9193
_reset_parser_state(m_curr, id, id);
9294
}
9395
m_curr_doc = m_tree->ancestor_doc(id);
96+
m_tag_cache.clear();
9497
}
9598

9699
Callbacks const& callbacks() const { return m_stack.m_callbacks; }
97100

98101
C4_ALWAYS_INLINE TagDirectives& tag_directives() { return m_tree->m_tag_directives; } // NOLINT(readability-make-member-function-const)
102+
C4_ALWAYS_INLINE TagCache &tag_cache() { return m_tag_cache; }
99103

100104
/** @} */
101105

src/c4/yml/parse_engine.def.hpp

Lines changed: 72 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -476,14 +476,30 @@ void ParseEngine<EventHandler>::_relocate_arena(csubstr prev_arena, substr next_
476476
}
477477
_ryml_relocate(m_evt_handler->m_src);
478478
for(size_t i = 0; i < m_pending_tags.num_entries; ++i)
479-
_ryml_relocate(m_pending_tags.annotations[i].str); // LCOV_EXCL_LINE
479+
{
480+
_ryml_relocate(m_pending_tags.annotations[i].str); // LCOV_EXCL_LINE
481+
_ryml_relocate(m_pending_tags.annotations[i].orig); // LCOV_EXCL_LINE
482+
}
480483
for(size_t i = 0; i < m_pending_anchors.num_entries; ++i)
481-
_ryml_relocate(m_pending_anchors.annotations[i].str); // LCOV_EXCL_LINE
482-
TagDirectives &tds = m_evt_handler->tag_directives();
483-
for(size_t i = 0, sz = tds.size(); i < sz; ++i)
484484
{
485-
_ryml_relocate(tds.m_directives[i].handle); // LCOV_EXCL_LINE
486-
_ryml_relocate(tds.m_directives[i].prefix); // LCOV_EXCL_LINE
485+
_ryml_relocate(m_pending_anchors.annotations[i].str); // LCOV_EXCL_LINE
486+
_ryml_relocate(m_pending_anchors.annotations[i].orig); // LCOV_EXCL_LINE
487+
}
488+
{
489+
TagDirectives &tds = m_evt_handler->tag_directives();
490+
for(size_t i = 0, sz = tds.size(); i < sz; ++i)
491+
{
492+
_ryml_relocate(tds.m_directives[i].handle); // LCOV_EXCL_LINE
493+
_ryml_relocate(tds.m_directives[i].prefix); // LCOV_EXCL_LINE
494+
}
495+
}
496+
{
497+
TagCache &tch = m_evt_handler->tag_cache();
498+
for(id_type i = 0, sz = tch.m_entries.size(); i < sz; ++i)
499+
{
500+
_ryml_relocate(tch.m_entries[i].tag); // LCOV_EXCL_LINE
501+
_ryml_relocate(tch.m_entries[i].resolved); // LCOV_EXCL_LINE
502+
}
487503
}
488504
if(other)
489505
_ryml_relocate(*other); // LCOV_EXCL_LINE
@@ -4608,27 +4624,62 @@ template<class EventHandler>
46084624
csubstr ParseEngine<EventHandler>::_resolve_tag(csubstr tag)
46094625
{
46104626
_c4dbgpf("resolving tag: {} curr_doc={}", _prs(tag), m_evt_handler->m_curr_doc);
4627+
_c4assert(tag.is_sub(_buf()));
4628+
TagCache::LookupResult ret = m_evt_handler->tag_cache().find(tag, m_evt_handler->m_curr_doc);
4629+
if(ret)
4630+
{
4631+
_c4dbgpf("resolving tag: found in cache[{}]: {}", ret.pos, _prs(ret.resolved));
4632+
return ret.resolved;
4633+
}
4634+
_c4dbgpf("resolving tag: not in cache: {} curr_doc={}", _prs(tag), m_evt_handler->m_curr_doc);
46114635
size_t bufsz = 0;
46124636
substr buf = m_evt_handler->arena_rem();
46134637
TagDirectives const& C4_RESTRICT tds = m_evt_handler->tag_directives();
46144638
csubstr ttag = tds.resolve(buf, &bufsz, tag, m_evt_handler->m_curr_doc,
46154639
m_evt_handler->m_curr->pos,
46164640
m_evt_handler->m_stack.m_callbacks);
4617-
_c4dbgpf("resolving tag: bufsz={}", bufsz);
4618-
if(bufsz)
4641+
_c4dbgpf("resolving tag: bufsz={} ttag.len={} !!ttag.str={}", bufsz, ttag.len, !!ttag.str);
4642+
_c4assert((bufsz > buf.len) == (!ttag.str));
4643+
_c4assert(!!bufsz == (ttag.len == bufsz));
4644+
// try again if the arena size was not enough
4645+
if(!ttag.str)
4646+
{
4647+
_c4dbgpf("tag requires arena, but it was small. arena.len={} arena.slack={} tag.required={}", m_evt_handler->arena_rem().len, m_evt_handler->arena().len, ttag.len);
4648+
_c4assert(ttag.len == bufsz);
4649+
buf = _alloc_arena(bufsz, &tag);
4650+
if(buf.str) // the alloc may fail eg with the ints handler
4651+
{
4652+
ttag = tds.resolve(buf, &bufsz, tag, m_evt_handler->m_curr_doc,
4653+
m_evt_handler->m_curr->pos,
4654+
m_evt_handler->m_stack.m_callbacks);
4655+
}
4656+
_c4assert(ttag.len == bufsz);
4657+
_c4assert(!ttag.str || ttag.is_sub(m_evt_handler->arena()));
4658+
}
4659+
else if(bufsz) // if we succeeded writing into the arena, grow it as needed
46194660
{
4620-
substr bufretry = _alloc_arena(bufsz, &tag);
4621-
if(C4_UNLIKELY(bufsz > buf.len))
4661+
_c4dbgp("tag required arena. update size");
4662+
_c4assert(ttag.len == bufsz);
4663+
_c4assert(ttag.is_sub(buf));
4664+
(void)_alloc_arena(bufsz);
4665+
}
4666+
if C4_IF_CONSTEXPR (EventHandler::requires_strings_on_buffers) // NOLINT
4667+
{
4668+
_c4dbgpf("handler requires tags in buffers. !!ttag.str={} in_arena={} in_src={}", !!ttag.str, ttag.is_sub(m_evt_handler->arena()), ttag.is_sub(_buf()));
4669+
// is the resolved tag not in any of those buffers?
4670+
if(ttag.str && !ttag.is_sub(m_evt_handler->arena()) && !ttag.is_sub(_buf()))
46224671
{
4623-
if(bufretry.str) // some handlers may be just counting the required arena size
4624-
{
4625-
ttag = tds.resolve(bufretry, &bufsz, tag, m_evt_handler->m_curr_doc,
4626-
m_evt_handler->m_curr->pos,
4627-
m_evt_handler->m_stack.m_callbacks);
4628-
}
4672+
_c4dbgpf("copying resolved tag to arena: slack={} required={}", m_evt_handler->arena_rem().len, ttag.len);
4673+
buf = _alloc_arena(ttag.len, &tag);
4674+
if(buf.str) // the alloc may fail eg with the ints handler
4675+
memcpy(buf.str, ttag.str, ttag.len);
4676+
ttag = buf;
4677+
_c4assert(!ttag.str || ttag.is_sub(m_evt_handler->arena()));
46294678
}
46304679
}
4631-
_c4dbgpf("resolving tag: {} --> {}", _prs(tag), _prs(ttag.str ? ttag : csubstr("(arena full)")));
4680+
_c4dbgpf("resolved tag: {} --> [{}]~~~{}~~~", _prs(tag), ttag.len, ttag.str ? ttag : csubstr("(out of size)"));
4681+
// cache the hard-earned result!
4682+
m_evt_handler->tag_cache().add(tag, ttag, m_evt_handler->m_curr_doc, ret.pos);
46324683
return ttag;
46334684
}
46344685

@@ -8248,18 +8299,19 @@ uint32_t ParseEngine<EventHandler>::_get_annotations_same_line(csubstr token_sou
82488299
_c4dbgpf("second annotation: {} indent={} line={}", second->str, second->indentation, second->line);
82498300
}
82508301
auto extract_string = [&](EntryPtr e){
8251-
if(e->str.begins_with_any("!<"))
8302+
// tags can be null when the arena ran out of space
8303+
if(!e->str.str || e->str.begins_with_any("!<"))
82528304
{
82538305
csubstr tag = e->orig;
82548306
_c4assert(tag.str);
82558307
_c4assert(tag.len);
82568308
_c4assert(tag.is_sub(token_soup));
8257-
_c4dbgpf("tag: {} -> {}", e->str, tag);
8309+
_c4dbgpf("tag: {} -> {}", e->str.str ? e->str : csubstr("(out of size)"), tag);
82588310
return tag;
82598311
}
82608312
csubstr anchor = e->str;
8261-
_c4assert(anchor.str);
82628313
_c4assert(anchor.len);
8314+
_c4assert(anchor.str);
82638315
_c4assert(anchor.is_sub(token_soup));
82648316
_c4assert(!anchor.begins_with('&'));
82658317
_c4assert(anchor.str - token_soup.str > 0);

src/c4/yml/tag.cpp

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -532,5 +532,81 @@ csubstr TagDirectives::resolve(substr buf, size_t *bufsz, csubstr tag, id_type i
532532
}
533533
}
534534

535+
536+
//-----------------------------------------------------------------------------
537+
TagCache::LookupResult TagCache::find(csubstr tag, id_type doc_id, id_type linear_threshold) const noexcept
538+
{
539+
LookupResult ret = {};
540+
id_type sz = m_entries.size();
541+
#define _same(s1, s2) (((s1).len == (s2).len) && ((s1).str == (s2).str))
542+
if(sz < linear_threshold) // do a linear search on small size
543+
{
544+
for(size_t i = 0; i < sz; ++i)
545+
{
546+
Entry const& C4_RESTRICT e = m_entries[i];
547+
if(_same(e.tag, tag) && e.doc_id == doc_id)
548+
{
549+
ret.resolved = m_entries[i].resolved;
550+
ret.pos = i;
551+
return ret;
552+
}
553+
else if(e.tag > tag || (_same(e.tag, tag) && e.doc_id > doc_id))
554+
{
555+
ret.pos = i;
556+
return ret;
557+
}
558+
}
559+
ret.pos = sz;
560+
}
561+
else // do a binary search on larger size
562+
{
563+
id_type first = 0;
564+
id_type count = sz;
565+
while(count)
566+
{
567+
id_type halfsz = count / id_type(2);
568+
id_type mid = first + halfsz;
569+
_RYML_ASSERT_BASIC_(m_entries.m_callbacks, mid < sz);
570+
Entry const& C4_RESTRICT e = m_entries[mid];
571+
if(e.tag < tag || (_same(e.tag, tag) && e.doc_id < doc_id))
572+
{
573+
first = mid + 1;
574+
_RYML_ASSERT_BASIC_(m_entries.m_callbacks, count >= halfsz + 1);
575+
count -= halfsz + 1;
576+
}
577+
else
578+
{
579+
count = halfsz;
580+
}
581+
}
582+
ret.pos = first;
583+
if(first < sz)
584+
{
585+
Entry const& C4_RESTRICT e = m_entries[first];
586+
if(_same(e.tag, tag) && e.doc_id == doc_id)
587+
{
588+
ret.resolved = m_entries[first].resolved;
589+
}
590+
}
591+
}
592+
#undef _same
593+
return ret;
594+
}
595+
596+
void TagCache::add(csubstr tag, csubstr resolved, id_type doc_id, const_iterator pos)
597+
{
598+
id_type sz = m_entries.size();
599+
_RYML_ASSERT_BASIC_(m_entries.m_callbacks, pos <= sz);
600+
_RYML_ASSERT_BASIC_(m_entries.m_callbacks, pos == sz || tag < m_entries[pos].tag || (tag == m_entries[pos].tag && doc_id < m_entries[pos].doc_id));
601+
m_entries.resize(sz + 1);
602+
Entry *C4_RESTRICT ptr = m_entries.m_stack;
603+
if(pos < sz)
604+
memmove(ptr + pos + 1, ptr + pos, (sz - pos) * sizeof(Entry));
605+
ptr[pos].tag = tag;
606+
ptr[pos].resolved = resolved;
607+
ptr[pos].doc_id = doc_id;
608+
_c4dbgpf("tagcache: add entry @pos={}: docid={} {}->{}", pos, doc_id, tag, (resolved.str ? resolved : csubstr("(out of size)")));
609+
}
610+
535611
} // namespace yml
536612
} // namespace c4

src/c4/yml/tag.hpp

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,12 @@
11
#ifndef _C4_YML_TAG_HPP_
22
#define _C4_YML_TAG_HPP_
33

4+
#ifndef _C4_YML_COMMON_HPP_
45
#include <c4/yml/common.hpp>
6+
#endif
7+
#ifndef _C4_YML_DETAIL_STACK_HPP_
8+
#include <c4/yml/detail/stack.hpp>
9+
#endif
510

611
namespace c4 {
712
namespace yml {
@@ -55,6 +60,46 @@ RYML_EXPORT csubstr normalize_tag_long(csubstr tag, substr output);
5560
RYML_EXPORT bool is_custom_tag(csubstr tag);
5661
RYML_EXPORT bool is_valid_tag_handle(csubstr handle);
5762

63+
64+
//-----------------------------------------------------------------------------
65+
66+
67+
struct RYML_EXPORT TagCache
68+
{
69+
struct Entry
70+
{
71+
csubstr tag;
72+
csubstr resolved;
73+
id_type doc_id;
74+
};
75+
using Entries = detail::stack<Entry>;
76+
using const_iterator = id_type;
77+
struct LookupResult
78+
{
79+
csubstr resolved;
80+
const_iterator pos;
81+
operator bool() const noexcept { return resolved.len > 0; }
82+
};
83+
84+
public:
85+
86+
TagCache() noexcept : m_entries() {}
87+
LookupResult find(csubstr tag, id_type doc_id, id_type linear_threshold=Entries::sso_size) const noexcept;
88+
void add(csubstr tag, csubstr resolved, id_type doc_id, const_iterator pos);
89+
90+
void clear() { m_entries.clear(); }
91+
92+
public:
93+
94+
/** @cond dev */
95+
Entries m_entries;
96+
/** @endcond */
97+
98+
};
99+
100+
101+
//-----------------------------------------------------------------------------
102+
58103
struct RYML_EXPORT TagDirective
59104
{
60105
/** Eg <pre>!e!</pre> in <pre>%TAG !e! tag:example.com,2000:app/</pre> */

src/c4/yml/tree.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1528,8 +1528,9 @@ void _normalize_tags_long(Tree *t, id_type node)
15281528
}
15291529
} // namespace
15301530

1531-
void Tree::resolve_tags()
1531+
void Tree::resolve_tags(TagCache &cache)
15321532
{
1533+
(void)cache;
15331534
if(empty())
15341535
return;
15351536
size_t needed_size = 0;

src/c4/yml/tree.hpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -656,7 +656,7 @@ class RYML_EXPORT Tree
656656
/** @name tag directives */
657657
/** @{ */
658658

659-
void resolve_tags();
659+
void resolve_tags(TagCache &cache);
660660
void normalize_tags();
661661
void normalize_tags_long();
662662

@@ -1324,6 +1324,11 @@ class RYML_EXPORT Tree
13241324
Callbacks m_callbacks;
13251325

13261326
TagDirectives m_tag_directives;
1327+
1328+
public:
1329+
/** @cond dev */
1330+
/*RYML_DEPRECATED("use Tree::resolve_tags(TagCache&)")*/ void resolve_tags() { TagCache cache; resolve_tags(cache); }
1331+
/** @endcond */
13271332
};
13281333

13291334

0 commit comments

Comments
 (0)