"""Calculate the hash of files."""from__future__importannotationsfrompathlibimportPathfromtypingimportTYPE_CHECKING,ClassVarifTYPE_CHECKING:importhashlibfromcollections.abcimportIterablefrom_typeshedimportStrPath
[docs]classFileHash:"""Wrapper for hashlib to easily calculate file hashes. Attributes: DEFAULT_CHUNK_SIZE: Default chunk size if not defined. Note: Does not support algorithms with variable length digests (e.g. SHAKE). """DEFAULT_CHUNK_SIZE:ClassVar[int]=(1024*10_000_000# 10mb - number of bytes in each read operation)
[docs]def__init__(self,hash_alg:hashlib._Hash,*,chunk_size:int=DEFAULT_CHUNK_SIZE)->None:"""Instantiate class. Args: hash_alg: Instance of a hashlib algorithm. chunk_size: When reading a file, it will be read this many bytes at a time. Larger values are more time efficient while smaller values or more memory efficient. """self._hash=hash_alg# protected to discourage direct accessself.chunk_size=chunk_size
@propertydefdigest(self)->bytes:"""Digest of the data hashed so far. Returns: This is a bytes object of size ``digest_size`` which may contain bytes in the whole range from 0 to 255. """returnself._hash.digest()@propertydefdigest_size(self)->int:"""Size of the resulting hash in bytes."""returnself._hash.digest_size@propertydefhexdigest(self)->str:"""Digest of the data hashed so far. Returns: String object that is double the length of ``digest`` and contains only hexadecimal digits. """returnself._hash.hexdigest()
[docs]defadd_file(self,file_path:StrPath)->None:"""Add file contents to the hash. Args: file_path: Path of the file to add. """withPath.open(Path(file_path),"rb")asstream:whilechunk:=stream.read(self.chunk_size):self._hash.update(chunk)chunk=stream.read(self.chunk_size)# read in new chunk
[docs]defadd_file_name(self,file_path:StrPath,*,end_character:str="\0",relative_to:StrPath|None=None,)->None:"""Add file name to the hash. This includes the path. Args: file_path: Path of the file to add. The full path (or relative) is included when adding it to the hash. This is not resolved prior to use. It is used as-is unless another argument acts up it. end_character: Character that will be added to the end of the file_path. This can be an empty string. relative_to: Optionally, convert the file_path to path relative to this one. It is recommended that both paths be absolute. """self._hash.update((str(Path(file_path).relative_to(relative_to)ifrelative_toelsePath(file_path))+end_character).encode())
[docs]defadd_files(self,file_paths:Iterable[StrPath],*,relative_to:StrPath|None=None,)->None:"""Add files to the hash. Args: file_paths: Paths of the files to add. The full path (or relative) is included when adding it to the hash. This is not resolved prior to use. It is used as-is unless another argument acts up it. relative_to: Optionally, convert the file_path to path relative to this one. It is recommended that both paths be absolute. """forfpinfile_paths:self.add_file_name(fp,relative_to=relative_to)self.add_file(fp)# end of file contents; only necessary with multiple filesself._hash.update(b"\0")