import collections import hashlib import os from datetime import datetime from util import Progress def hashBlock(data): return hashlib.sha256(data).digest()[-HashTree.HASH_LEN:] class HashTree: HASH_LEN = 16 # bytes BLOCK_SIZE = 4096 # bytes ## Prepares a tree containing leafCount leaves. def __init__(self, leafCount): self.store = [b""]*(leafCount*2-1) self.leafStart = leafCount-1 self.leafCount = leafCount self._index = self.leafStart @classmethod def fromFile(cls, filename): with open(filename, "rb") as f: stat = os.fstat(f.fileno()) size = stat.st_size # !! symlinks leafCount = (size-1)//HashTree.BLOCK_SIZE+1 # number of leaf blocks res = cls(leafCount) print(datetime.now(), "hashing file:") progress = Progress(leafCount) for i in range(leafCount): data = f.read(HashTree.BLOCK_SIZE) res.insertLeaf(hashBlock(data)) progress.p(i) progress.done() res.buildTree() return res @classmethod def load(cls, filename): with open(filename, "rb") as f: stat = os.fstat(f.fileno()) size = stat.st_size nodeCount = size//HashTree.HASH_LEN res = cls((nodeCount+1)//2) for i in range(nodeCount): res.store[i] = f.read(HashTree.HASH_LEN) return res def save(self, filename): with open(filename, "wb") as f: for h in self.store: f.write(h) ## Inserts a leaf at the first empty position. # # Useful and used only during the tree construction. def insertLeaf(self,h): self.store[self._index] = h self._index += 1 ## Updates a hash stored in the leaf. def updateLeaf(self, index, h): if index=0: self.store[index] = hashBlock(self.store[index*2+1]+self.store[index*2+2]) index = (index-1)//2 ## Fast construction of the tree over the leaves. O(n). def buildTree(self): print(datetime.now(), "building tree:") progress=Progress(-1, self.leafStart-1) for i in range(self.leafStart-1, -1, -1): self.store[i] = hashBlock(self.store[i*2+1]+self.store[i*2+2]) progress.p(i) progress.done() ## Update faster than repeated insertLeaf. def batchUpdate(self, keysHashes): queue = collections.deque() for (k, v) in sorted(keysHashes): self.store[k] = v parentK = (k-1)//2 if len(queue)==0 or queue[-1]!=parentK: queue.append(parentK) while len(queue)>0: k = queue.pop() self.store[k] = hashBlock(self.store[k*2+1]+self.store[k*2+2]) parentK = (k-1)//2 if (len(queue)==0 or queue[0]!=parentK) and k!=0: queue.appendleft(parentK)