3535from pypegasus .replication .ttypes import query_cfg_request
3636from pypegasus .rrdb import *
3737from pypegasus .rrdb .ttypes import scan_request , get_scanner_request , update_request , key_value , multi_put_request , \
38- multi_get_request , multi_remove_request
38+ multi_get_request , multi_remove_request , filter_type
3939from pypegasus .transport .protocol import *
4040from pypegasus .utils .tools import restore_key , get_ttl , bytes_cmp , ScanOptions
4141
@@ -504,6 +504,10 @@ def start_scan(self):
504504 request .stop_inclusive = self ._scan_options .stop_inclusive
505505 request .batch_size = self ._scan_options .batch_size
506506 request .need_check_hash = self ._check_hash
507+ request .sort_key_filter_type = self ._scan_options .sortkey_filter_type
508+ request .sort_key_filter_pattern = blob (self ._scan_options .sortkey_filter_pattern )
509+ request .hash_key_filter_type = self ._scan_options .hashkey_filter_type
510+ request .hash_key_filter_pattern = blob (self ._scan_options .hashkey_filter_pattern )
507511
508512 op = RrdbGetScannerOperator (self ._gpid , request , self ._partition_hash )
509513 session = self ._table .get_session (self ._gpid )
@@ -605,6 +609,9 @@ def generate_key(cls, hash_key, sort_key):
605609 hash_key_len = len (hash_key )
606610 sort_key_len = len (sort_key )
607611
612+ if hash_key_len >= 0xFFFF :
613+ raise ValueError ("hash_key length must be less than 65535" )
614+
608615 if sort_key_len > 0 :
609616 values = (hash_key_len , hash_key , sort_key )
610617 s = struct .Struct ('>H' + str (hash_key_len )+ 's' + str (sort_key_len )+ 's' )
@@ -619,24 +626,50 @@ def generate_key(cls, hash_key, sort_key):
619626
620627 @classmethod
621628 def generate_next_bytes (cls , buff ):
622- pos = len (buff ) - 1
629+ """
630+ Increment the last non-0xFF byte in the buffer.
631+
632+ If `buff` is a string, it is assumed to be encoded with 'latin-1' to ensure
633+ a 1:1 mapping between characters and bytes. Unicode strings with characters
634+ outside the 0-255 range will raise a UnicodeEncodeError.
635+ """
636+ is_str = isinstance (buff , str )
637+ is_ba = isinstance (buff , bytearray )
638+
639+ if is_str :
640+ arr = bytearray (buff .encode ('latin-1' ))
641+ elif is_ba :
642+ arr = buff
643+ else :
644+ arr = bytearray (buff )
645+ pos = len (arr ) - 1
623646 found = False
624647 while pos >= 0 :
625- if ord ( buff [pos ]) != 0xFF :
626- buff [pos ] += 1
648+ if arr [pos ] != 0xFF :
649+ arr [pos ] += 1
627650 found = True
628651 break
629- if found :
630- return buff
652+ pos -= 1
653+ if not found :
654+ arr += b'\x00 '
655+ if is_str :
656+ return arr .decode ('latin-1' )
657+ elif is_ba :
658+ return arr
631659 else :
632- return buff + chr ( 0 )
660+ return bytes ( arr )
633661
662+ @classmethod
663+ def generate_next_key (cls , hash_key , stop_sort_key ):
664+ key = cls .generate_key (hash_key , stop_sort_key )
665+ return blob (cls .generate_next_bytes (key .raw ()))
666+
634667 @classmethod
635668 def generate_stop_key (cls , hash_key , stop_sort_key ):
636669 if stop_sort_key :
637670 return cls .generate_key (hash_key , stop_sort_key ), True
638671 else :
639- return cls .generate_next_bytes (hash_key ), False
672+ return blob ( cls .generate_next_bytes (hash_key ) ), False
640673
641674 def __init__ (self , meta_addrs = None , table_name = '' ,
642675 timeout = DEFAULT_TIMEOUT ):
@@ -1012,6 +1045,24 @@ def get_scanner(self, hash_key,
10121045 stop_key , stop_inclusive = self .generate_stop_key (hash_key , stop_sort_key )
10131046 if not stop_inclusive :
10141047 scan_options .stop_inclusive = stop_inclusive
1048+
1049+ # limit key range by prefix filter
1050+ if scan_options .sortkey_filter_type == filter_type .FT_MATCH_PREFIX and \
1051+ len (scan_options .sortkey_filter_pattern ) > 0 :
1052+ prefix_start = self .generate_key (hash_key , scan_options .sortkey_filter_pattern )
1053+ # If the prefix start is after the current start_key, move the scan start to the prefix.
1054+ if bytes_cmp (prefix_start .data , start_key .data ) > 0 :
1055+ start_key = prefix_start
1056+ scan_options .start_inclusive = True
1057+
1058+ prefix_stop = self .generate_next_key (hash_key , scan_options .sortkey_filter_pattern )
1059+ # If the prefix stop is before or equal to the current stop_key, move the scan stop to the prefix stop.
1060+ # The prefix stop represents the next key after hash_key and sortkey_filter_pattern,
1061+ # so stop_inclusive should be False.
1062+ if bytes_cmp (prefix_stop .data , stop_key .data ) <= 0 :
1063+ stop_key = prefix_stop
1064+ scan_options .stop_inclusive = False
1065+
10151066 gpid_list = []
10161067 hash_list = []
10171068 r = bytes_cmp (start_key .data , stop_key .data )
@@ -1041,10 +1092,6 @@ def get_unordered_scanners(self, max_split_count, scan_options):
10411092 size = count // split
10421093 more = count % split
10431094
1044- opt = ScanOptions ()
1045- opt .timeout_millis = scan_options .timeout_millis
1046- opt .batch_size = scan_options .batch_size
1047- opt .snapshot = scan_options .snapshot
10481095 scanner_list = []
10491096 for i in range (split ):
10501097 gpid_list = []
@@ -1056,6 +1103,6 @@ def get_unordered_scanners(self, max_split_count, scan_options):
10561103 gpid_list .append (all_gpid_list [count ])
10571104 hash_list .append (int (count ))
10581105
1059- scanner_list .append (PegasusScanner (self .table , gpid_list , opt , hash_list , True ))
1106+ scanner_list .append (PegasusScanner (self .table , gpid_list , scan_options , hash_list , True ))
10601107
10611108 return scanner_list
0 commit comments