Files @ b0515ceb502d
Branch filter:

Location: Morevna/src/hashtree.py

Laman
actually flushing the files
import collections
import hashlib
import os
from datetime import datetime

from util import Progress


def hash_block(data):
	return hashlib.sha256(data).digest()[-HashTree.HASH_LEN:]


class HashTree:
	HASH_LEN = 16  # bytes
	BLOCK_SIZE = 4096  # bytes
	
	## Prepares a tree containing leaf_count leaves.
	def __init__(self, leaf_count):
		self.store = [b""]*(leaf_count*2-1)
		self.leaf_start = leaf_count-1
		self.leaf_count = leaf_count
		self._index = self.leaf_start
		
	@classmethod
	def from_file(cls, filename):
		with open(filename, "rb") as f:
			stat = os.fstat(f.fileno())
			size = stat.st_size  # !! symlinks
			leaf_count = (size-1)//HashTree.BLOCK_SIZE+1  # number of leaf blocks
			res = cls(leaf_count)
			print(datetime.now(), "hashing file:")

			progress = Progress(leaf_count)
			for i in range(leaf_count):
				data = f.read(HashTree.BLOCK_SIZE)
				res.insert_leaf(hash_block(data))

				progress.p(i)
			progress.done()
		res.build_tree()
		
		return res

	@classmethod
	def load(cls, filename):
		with open(filename, "rb") as f:
			stat = os.fstat(f.fileno())
			size = stat.st_size
			node_count = size//HashTree.HASH_LEN
			res = cls((node_count+1)//2)

			for i in range(node_count):
				res.store[i] = f.read(HashTree.HASH_LEN)
		return res

	def save(self, filename):
		with open(filename, "wb") as f:
			for h in self.store:
				f.write(h)
		
	## Inserts a leaf at the first empty position.
	#
	#	Useful and used only during the tree construction.
	def insert_leaf(self, h):
		self.store[self._index] = h
		self._index += 1
		
	## Updates a hash stored in the leaf.
	def update_leaf(self, index, h):
		if index<self.leaf_start: raise IndexError()
		
		self.store[index] = h
		self.update_node((index-1)//2)
	
	## Updates the node at index and all its ancestors.
	def update_node(self, index):
		while index>=0:
			self.store[index] = hash_block(self.store[index*2+1] + self.store[index*2+2])
			index = (index-1)//2
			
	## Fast construction of the tree over the leaves. O(n).
	def build_tree(self):
		print(datetime.now(), "building tree:")
		progress = Progress(-1, self.leaf_start-1)
		for i in range(self.leaf_start-1, -1, -1):
			self.store[i] = hash_block(self.store[i*2+1] + self.store[i*2+2])
			progress.p(i)
		progress.done()

	## Update faster than repeated insertLeaf.
	def batch_update(self, keys_hashes):
		queue = collections.deque()
		for (k, v) in sorted(keys_hashes):
			self.store[k] = v
			parent_k = (k-1)//2
			if len(queue)==0 or queue[-1]!=parent_k:
				queue.append(parent_k)

		while len(queue)>0:
			k = queue.pop()
			self.store[k] = hash_block(self.store[k*2+1] + self.store[k*2+2])
			parent_k = (k-1)//2
			if (len(queue)==0 or queue[0]!=parent_k) and k!=0:
				queue.appendleft(parent_k)