Source code for f_lib.utils._file_hash
"""Calculate the hash of files."""
from __future__ import annotations
from pathlib import Path
from typing import TYPE_CHECKING, ClassVar
if TYPE_CHECKING:
import hashlib
from collections.abc import Iterable
from _typeshed import StrPath
[docs]
class FileHash:
"""Wrapper for hashlib to easily calculate file hashes.
Attributes:
DEFAULT_CHUNK_SIZE: Default chunk size if not defined.
Note:
Does not support algorithms with variable length digests (e.g. SHAKE).
"""
DEFAULT_CHUNK_SIZE: ClassVar[int] = (
1024 * 10_000_000 # 10mb - number of bytes in each read operation
)
[docs]
def __init__(self, hash_alg: hashlib._Hash, *, chunk_size: int = DEFAULT_CHUNK_SIZE) -> None:
"""Instantiate class.
Args:
hash_alg: Instance of a hashlib algorithm.
chunk_size: When reading a file, it will be read this many bytes at
a time. Larger values are more time efficient while smaller
values or more memory efficient.
"""
self._hash = hash_alg # protected to discourage direct access
self.chunk_size = chunk_size
@property
def digest(self) -> bytes:
"""Digest of the data hashed so far.
Returns:
This is a bytes object of size ``digest_size`` which may contain
bytes in the whole range from 0 to 255.
"""
return self._hash.digest()
@property
def digest_size(self) -> int:
"""Size of the resulting hash in bytes."""
return self._hash.digest_size
@property
def hexdigest(self) -> str:
"""Digest of the data hashed so far.
Returns:
String object that is double the length of ``digest`` and contains
only hexadecimal digits.
"""
return self._hash.hexdigest()
[docs]
def add_file(self, file_path: StrPath) -> None:
"""Add file contents to the hash.
Args:
file_path: Path of the file to add.
"""
with Path.open(Path(file_path), "rb") as stream:
while chunk := stream.read(self.chunk_size):
self._hash.update(chunk)
chunk = stream.read(self.chunk_size) # read in new chunk
[docs]
def add_file_name(
self,
file_path: StrPath,
*,
end_character: str = "\0",
relative_to: StrPath | None = None,
) -> None:
"""Add file name to the hash. This includes the path.
Args:
file_path: Path of the file to add. The full path (or relative) is
included when adding it to the hash. This is not resolved prior
to use. It is used as-is unless another argument acts up it.
end_character: Character that will be added to the end of the file_path.
This can be an empty string.
relative_to: Optionally, convert the file_path to path relative to
this one. It is recommended that both paths be absolute.
"""
self._hash.update(
(str(Path(file_path).relative_to(relative_to) if relative_to else Path(file_path)) + end_character).encode()
)
[docs]
def add_files(
self,
file_paths: Iterable[StrPath],
*,
relative_to: StrPath | None = None,
) -> None:
"""Add files to the hash.
Args:
file_paths: Paths of the files to add. The full path (or relative) is
included when adding it to the hash. This is not resolved prior
to use. It is used as-is unless another argument acts up it.
relative_to: Optionally, convert the file_path to path relative to
this one. It is recommended that both paths be absolute.
"""
for fp in file_paths:
self.add_file_name(fp, relative_to=relative_to)
self.add_file(fp)
# end of file contents; only necessary with multiple files
self._hash.update(b"\0")