Source code for probables.hashes

""" Probables Hashing Utilities """

from functools import wraps
from hashlib import md5, sha256
from struct import unpack
from typing import Callable, List, Union

from probables.constants import UINT32_T_MAX, UINT64_T_MAX

KeyT = Union[str, bytes]
SimpleHashT = Callable[[KeyT, int], int]
HashResultsT = List[int]
HashFuncT = Callable[[KeyT, int], HashResultsT]
HashFuncBytesT = Callable[[KeyT, int], bytes]


[docs] def hash_with_depth_bytes(func: HashFuncBytesT) -> HashFuncT: """Decorator to turns a function taking a single key and hashes it to bytes. Wraps functions to be used in Bloom filters and Count-Min sketch data structures. Args: key (str): The element to be hashed depth (int): The number of hash permutations to compute Returns: list(int): 64-bit hashed representation of key Note: Arguments shown are as it will be after decorated""" @wraps(func) def hashing_func(key, depth=1): """wrapper function""" res = [] tmp = key if not isinstance(key, str) else key.encode("utf-8") for idx in range(depth): tmp = func(tmp, idx) res.append(unpack("Q", tmp[:8])[0]) # turn into 64 bit number return res return hashing_func
[docs] def hash_with_depth_int(func: HashFuncT) -> HashFuncT: """Decorator to turn a function that takes a single key and hashes it to an int. Wraps functions to be used in Bloom filters and Count-Min sketch data structures. Args: key (str): The element to be hashed depth (int): The number of hash permutations to compute Returns: list(int): 64-bit hashed representation of key Note: Arguments shown are as it will be after decorated""" @wraps(func) def hashing_func(key, depth=1): """wrapper function""" res = [] tmp = func(key, 0) res.append(tmp) for idx in range(1, depth): tmp = func(f"{tmp:x}", idx) res.append(tmp) return res return hashing_func
[docs] def default_fnv_1a(key: KeyT, depth: int = 1) -> List[int]: """The default fnv-1a hashing routine Args: key (str): The element to be hashed depth (int): The number of hash permutations to compute Returns: list(int): List of size depth hashes""" res = [] for idx in range(depth): res.append(fnv_1a(key, idx)) return res
[docs] def fnv_1a(key: KeyT, seed: int = 0) -> int: """Pure python implementation of the 64 bit fnv-1a hash Args: key (str): The element to be hashed seed (int): Add a seed to the initial starting point (0 means no seed) Returns: int: 64-bit hashed representation of key Note: Uses the lower 64 bits when overflows occur""" hval = (14695981039346656037 + (31 * seed)) & UINT64_T_MAX fnv_64_prime = 1099511628211 tmp = list(key) if not isinstance(key, str) else list(map(ord, key)) for t_str in tmp: hval ^= t_str hval *= fnv_64_prime hval &= UINT64_T_MAX return hval
[docs] def fnv_1a_32(key: KeyT, seed: int = 0) -> int: """Pure python implementation of the 32 bit fnv-1a hash Args: key (str): The element to be hashed seed (int): Add a seed to the initial starting point (0 means no seed) Returns: int: 32-bit hashed representation of key Note: Uses the lower 32 bits when overflows occur""" hval = (0x811C9DC5 + (31 * seed)) & UINT32_T_MAX fnv_32_prime = 0x01000193 tmp = list(key) if not isinstance(key, str) else list(map(ord, key)) for t_str in tmp: hval ^= t_str hval *= fnv_32_prime hval &= UINT32_T_MAX return hval
[docs] @hash_with_depth_bytes def default_md5(key: KeyT, *args, **kwargs) -> bytes: """The default md5 hashing routine Args: key (str): The element to be hashed depth (int): The number of hash permutations to compute Returns: list(int): List of 64-bit hashed representation of key hashes Note: Returns the upper-most 64 bits""" return md5(key).digest() # type: ignore
[docs] @hash_with_depth_bytes def default_sha256(key: KeyT, *args, **kwargs) -> bytes: """The default sha256 hashing routine Args: key (str): The element to be hashed depth (int): The number of hash permutations to compute Returns: list(int): List of 64-bit hashed representation of key hashes Note: Returns the upper-most 64 bits""" return sha256(key).digest() # type: ignore