import collections import hashlib import os from datetime import datetime from util import Progress def hashBlock(data): return hashlib.sha256(data).digest()[-HashTree.HASH_LEN:] class HashTree: HASH_LEN=16 # bytes BLOCK_SIZE=4096 # bytes ## Prepares a tree containing leafCount leaves. def __init__(self,leafCount): self.store=[b""]*(leafCount*2-1) self.leafStart=leafCount-1 self.leafCount=leafCount self._index=self.leafStart @classmethod def fromFile(cls,filename): with open(filename,"rb") as f: stat=os.fstat(f.fileno()) size=stat.st_size # !! symlinks leafCount=(size-1)//HashTree.BLOCK_SIZE+1 # number of leaf blocks res=cls(leafCount) print(datetime.now(), "hashing file:") progress=Progress(leafCount) for i in range(leafCount): data=f.read(HashTree.BLOCK_SIZE) res.insertLeaf(hashBlock(data)) progress.p(i) progress.done() res.buildTree() return res @classmethod def load(cls,filename): with open(filename,"rb") as f: stat=os.fstat(f.fileno()) size=stat.st_size nodeCount=size//HashTree.HASH_LEN res=cls((nodeCount+1)//2) for i in range(nodeCount): res.store[i]=f.read(HashTree.HASH_LEN) return res def save(self,filename): with open(filename,"wb") as f: for h in self.store: f.write(h) ## Inserts a leaf at the first empty position. # # Useful and used only during the tree construction. def insertLeaf(self,h): self.store[self._index]=h self._index+=1 ## Updates a hash stored in the leaf. def updateLeaf(self,index,h): if index=0: self.store[index]=hashBlock(self.store[index*2+1]+self.store[index*2+2]) index=(index-1)//2 ## Fast construction of the tree over the leaves. O(n). def buildTree(self): print(datetime.now(), "building tree:") progress=Progress(-1, self.leafStart-1) for i in range(self.leafStart-1,-1,-1): self.store[i]=hashBlock(self.store[i*2+1]+self.store[i*2+2]) progress.p(i) progress.done() ## Update faster than repeated insertLeaf. def batchUpdate(self,keysHashes): queue=collections.deque() for (k,v) in sorted(keysHashes): self.store[k]=v parentK=(k-1)//2 if len(queue)==0 or queue[-1]!=parentK: queue.append(parentK) while len(queue)>0: k=queue.pop() self.store[k]=hashBlock(self.store[k*2+1]+self.store[k*2+2]) parentK=(k-1)//2 if (len(queue)==0 or queue[0]!=parentK) and k!=0: queue.appendleft(parentK)