import hashlib
import os
from datetime import datetime
from util import Progress
class HashTree:
HASH_LEN=16 # bytes
BLOCK_SIZE=4096 # bytes
## Prepares a tree containing leafCount leaves.
def __init__(self,leafCount):
self.store=[b""]*(leafCount*2-1)
self.leafStart=leafCount-1
self.leafCount=leafCount
self._index=self.leafStart
@classmethod
def fromFile(cls,filename):
with open(filename,"rb") as f:
stat=os.fstat(f.fileno())
size=stat.st_size # !! symlinks
leafCount=(size-1)//HashTree.BLOCK_SIZE+1 # number of leaf blocks
res=cls(leafCount)
print(datetime.now(), "hashing file:")
progress=Progress(leafCount)
for i in range(leafCount):
data=f.read(HashTree.BLOCK_SIZE)
res.insertLeaf(hashlib.sha256(data).digest()[HashTree.HASH_LEN:])
progress.p(i)
progress.done()
res.buildTree()
return res
@classmethod
def load(cls,filename):
with open(filename,"rb") as f:
stat=os.fstat(f.fileno())
size=stat.st_size
nodeCount=size//HashTree.HASH_LEN
res=cls((nodeCount+1)//2)
for i in range(nodeCount):
res.store[i]=f.read(HashTree.HASH_LEN)
return res
def save(self,filename):
with open(filename,"wb") as f:
for h in self.store:
f.write(h)
## Inserts a leaf at the first empty position.
#
# Useful and used only during the tree construction.
def insertLeaf(self,h):
self.store[self._index]=h
self._index+=1
## Updates a hash stored in the leaf.
def updateLeaf(self,index,h):
if index<self.leafStart: raise IndexError()
self.store[index]=h
self.updateNode((index-1)//2)
## Updates the node at index and all its ancestors.
def updateNode(self,index):
while index>=0:
self.store[index]=hashlib.sha256(self.store[index*2+1]+self.store[index*2+2]).digest()[HashTree.HASH_LEN:]
index=(index-1)//2
## Fast construction of the tree over the leaves. O(n).
def buildTree(self):
print(datetime.now(), "building tree:")
progress=Progress(-1, self.leafStart-1)
for i in range(self.leafStart-1,-1,-1):
self.store[i]=hashlib.sha256(self.store[i*2+1]+self.store[i*2+2]).digest()[HashTree.HASH_LEN:]
progress.p(i)
progress.done()
if __name__=="__main__":
f1=HashTree.fromFile(open("serverFile.txt",mode='rb'))
f2=HashTree.fromFile(open("clientFile.txt",mode='rb'))
for i,(h1,h2) in enumerate(zip(f1.store,f2.store)):
print("{0:2}".format(i),h1.hex(),h2.hex(),h1==h2)