Files @ e6f9a4843e49
Branch filter:

Location: Morevna/src/hashtree.py

Laman
configurable log path
import collections
import hashlib
import os
from datetime import datetime

from util import Progress


def hashBlock(data):
	return hashlib.sha256(data).digest()[-HashTree.HASH_LEN:]


class HashTree:
	HASH_LEN=16 # bytes
	BLOCK_SIZE=4096 # bytes
	
	## Prepares a tree containing leafCount leaves.
	def __init__(self,leafCount):
		self.store=[b""]*(leafCount*2-1)
		self.leafStart=leafCount-1
		self.leafCount=leafCount
		self._index=self.leafStart
		
	@classmethod
	def fromFile(cls,filename):
		with open(filename,"rb") as f:
			stat=os.fstat(f.fileno())
			size=stat.st_size # !! symlinks
			leafCount=(size-1)//HashTree.BLOCK_SIZE+1 # number of leaf blocks
			res=cls(leafCount)
			print(datetime.now(), "hashing file:")

			progress=Progress(leafCount)
			for i in range(leafCount):
				data=f.read(HashTree.BLOCK_SIZE)
				res.insertLeaf(hashBlock(data))

				progress.p(i)
			progress.done()
		res.buildTree()
		
		return res

	@classmethod
	def load(cls,filename):
		with open(filename,"rb") as f:
			stat=os.fstat(f.fileno())
			size=stat.st_size
			nodeCount=size//HashTree.HASH_LEN
			res=cls((nodeCount+1)//2)

			for i in range(nodeCount):
				res.store[i]=f.read(HashTree.HASH_LEN)
		return res

	def save(self,filename):
		with open(filename,"wb") as f:
			for h in self.store:
				f.write(h)
		
	## Inserts a leaf at the first empty position.
	#
	#	Useful and used only during the tree construction.
	def insertLeaf(self,h):
		self.store[self._index]=h
		self._index+=1
		
	## Updates a hash stored in the leaf.
	def updateLeaf(self,index,h):
		if index<self.leafStart: raise IndexError()
		
		self.store[index]=h
		self.updateNode((index-1)//2)
	
	## Updates the node at index and all its ancestors.
	def updateNode(self,index):
		while index>=0:
			self.store[index]=hashBlock(self.store[index*2+1]+self.store[index*2+2])
			index=(index-1)//2
			
	## Fast construction of the tree over the leaves. O(n).
	def buildTree(self):
		print(datetime.now(), "building tree:")
		progress=Progress(-1, self.leafStart-1)
		for i in range(self.leafStart-1,-1,-1):
			self.store[i]=hashBlock(self.store[i*2+1]+self.store[i*2+2])
			progress.p(i)
		progress.done()

	## Update faster than repeated insertLeaf.
	def batchUpdate(self,keysHashes):
		queue=collections.deque()
		for (k,v) in sorted(keysHashes):
			self.store[k]=v
			parentK=(k-1)//2
			if len(queue)==0 or queue[-1]!=parentK:
				queue.append(parentK)

		while len(queue)>0:
			k=queue.pop()
			self.store[k]=hashBlock(self.store[k*2+1]+self.store[k*2+2])
			parentK=(k-1)//2
			if (len(queue)==0 or queue[0]!=parentK) and k!=0:
				queue.appendleft(parentK)