From 1aedd1bcd07f878637cabd34bfdec2e16a1697f9 Mon Sep 17 00:00:00 2001 From: Srinidhi Hegde Date: Mon, 8 May 2023 12:09:38 -0400 Subject: [PATCH 1/2] Update bbhash.pyx to add memory info --- bbhash.pyx | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/bbhash.pyx b/bbhash.pyx index c6cfc74..9a055d4 100644 --- a/bbhash.pyx +++ b/bbhash.pyx @@ -45,6 +45,7 @@ cdef extern from "BooPHF.h" namespace "boomphf": cdef cppclass mphf[T,U]: mphf(unsigned long long, vector[T], int, float, bool, bool) except + uint64_t lookup(uint64_t) + uint64_t totalBitSize() void save(ofstream) void load(ifstream) @@ -82,6 +83,9 @@ cdef class PyMPHF: return mp_hashes + def get_mem(self): + return deref(self.c_mphf).totalBitSize() + def save(self, str filename): cdef ofstream* outputter outputter = new ofstream(filename.encode(), binary) From 92ecd8e36619e72e039f8082c8ac84590de7128a Mon Sep 17 00:00:00 2001 From: Srinidhi Hegde Date: Mon, 8 May 2023 12:10:42 -0400 Subject: [PATCH 2/2] Update bbhash_table.pyx to add memory info --- bbhash_table.pyx | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/bbhash_table.pyx b/bbhash_table.pyx index 7ae4111..7eef6fd 100644 --- a/bbhash_table.pyx +++ b/bbhash_table.pyx @@ -54,6 +54,12 @@ class BBHashTable(object): self.mphf_to_value[mp_hash] = value + def get_mem(self): + "Get memory usage." + return self.mphf.get_mem() + \ + self.mphf_to_hash.nbytes + \ + self.mphf_to_value.nbytes + def get_unique_values(self, hashes, require_exist=False): "Retrieve unique values for item." values = defaultdict(int)