import collections import hashlib import os from datetime import datetime from util import Progress def hash_block(data): return hashlib.sha256(data).digest()[-HashTree.HASH_LEN:] class HashTree: HASH_LEN = 16 # bytes BLOCK_SIZE = 4096 # bytes ## Prepares a tree containing leaf_count leaves. def __init__(self, leaf_count): self.store = [b""]*(leaf_count*2-1) self.leaf_start = leaf_count-1 self.leaf_count = leaf_count self._index = self.leaf_start @classmethod def from_file(cls, filename): with open(filename, "rb") as f: stat = os.fstat(f.fileno()) size = stat.st_size # !! symlinks leaf_count = (size-1)//HashTree.BLOCK_SIZE+1 # number of leaf blocks res = cls(leaf_count) print(datetime.now(), "hashing file:") progress = Progress(leaf_count) for i in range(leaf_count): data = f.read(HashTree.BLOCK_SIZE) res.insert_leaf(hash_block(data)) progress.p(i) progress.done() res.build_tree() return res @classmethod def load(cls, filename): with open(filename, "rb") as f: stat = os.fstat(f.fileno()) size = stat.st_size node_count = size//HashTree.HASH_LEN res = cls((node_count+1)//2) for i in range(node_count): res.store[i] = f.read(HashTree.HASH_LEN) return res def save(self, filename): with open(filename, "wb") as f: for h in self.store: f.write(h) ## Inserts a leaf at the first empty position. # # Useful and used only during the tree construction. def insert_leaf(self, h): self.store[self._index] = h self._index += 1 ## Updates a hash stored in the leaf. def update_leaf(self, index, h): if index=0: self.store[index] = hash_block(self.store[index*2+1] + self.store[index*2+2]) index = (index-1)//2 ## Fast construction of the tree over the leaves. O(n). def build_tree(self): print(datetime.now(), "building tree:") progress = Progress(-1, self.leaf_start-1) for i in range(self.leaf_start-1, -1, -1): self.store[i] = hash_block(self.store[i*2+1] + self.store[i*2+2]) progress.p(i) progress.done() ## Update faster than repeated insertLeaf. def batch_update(self, keys_hashes): queue = collections.deque() for (k, v) in sorted(keys_hashes): self.store[k] = v parent_k = (k-1)//2 if len(queue)==0 or queue[-1]!=parent_k: queue.append(parent_k) while len(queue)>0: k = queue.pop() self.store[k] = hash_block(self.store[k*2+1] + self.store[k*2+2]) parent_k = (k-1)//2 if (len(queue)==0 or queue[0]!=parent_k) and k!=0: queue.appendleft(parent_k)