Changeset - bacd3b2d37aa
[Not reviewed]
default
0 4 1
Laman - 7 years ago 2017-11-01 23:50:10

optimized hashtree batch update
5 files changed with 68 insertions and 11 deletions:
0 comments (0 inline, 0 general)
src/hashtree.py
Show inline comments
 
import hashlib
 
import collections
 
import hashlib
 
import os
 
from datetime import datetime
 

	
 
from util import Progress
 

	
 

	
 
def hashBlock(data):
 
	return hashlib.sha256(data).digest()[-HashTree.HASH_LEN:]
 

	
 

	
 
class HashTree:
 
	HASH_LEN=16 # bytes
 
@@ -76,12 +77,28 @@ class HashTree:
 
		while index>=0:
 
			self.store[index]=hashBlock(self.store[index*2+1]+self.store[index*2+2])
 
			index=(index-1)//2
 
			
 
	## Fast construction of the tree over the leaves. O(n).
 
	def buildTree(self):
 
		print(datetime.now(), "building tree:")
 
		progress=Progress(-1, self.leafStart-1)
 
		for i in range(self.leafStart-1,-1,-1):
 
			self.store[i]=hashBlock(self.store[i*2+1]+self.store[i*2+2])
 
			progress.p(i)
 
		progress.done()
 

	
 
	## Update faster than repeated insertLeaf.
 
	def batchUpdate(self,keysHashes):
 
		queue=collections.deque()
 
		for (k,v) in sorted(keysHashes):
 
			self.store[k]=v
 
			parentK=(k-1)//2
 
			if len(queue)==0 or queue[-1]!=parentK:
 
				queue.append(parentK)
 

	
 
		while len(queue)>0:
 
			k=queue.pop()
 
			self.store[k]=hashBlock(self.store[k*2+1]+self.store[k*2+2])
 
			parentK=(k-1)//2
 
			if (len(queue)==0 or queue[0]!=parentK) and k!=0:
 
				queue.appendleft(parentK)
src/netnode.py
Show inline comments
 
@@ -40,15 +40,14 @@ class NetNode:
 
		self._filename=filename
 
		self._treeFile=treeFile
 

	
 
		if treeFile:
 
			self._tree=HashTree.load(treeFile)
 
		else:
 
			self._tree=HashTree.fromFile(filename)
 

	
 
		self._newLeaves=dict()
 

	
 
	def _updateTree(self):
 
		log.info("updating hash tree...")
 
		for (k,v) in self._newLeaves.items():
 
			self._tree.updateLeaf(k, v)
 
		self._tree.batchUpdate(self._newLeaves.items())
 
		self._tree.save(self._treeFile)
src/tests/__init__.py
Show inline comments
 
import sys
 

	
 

	
 
class RedirectedOutput():
 
	_stdout=None
 

	
 
	@classmethod
 
	def setUpClass(cls):
 
		cls._stdout=sys.stdout
 
		sys.stdout=open("/tmp/morevna-stdout.log",mode="a")
 

	
 
	@classmethod
 
	def tearDownClass(cls):
 
		sys.stdout.close()
 
		sys.stdout=cls._stdout
src/tests/test_hashtree.py
Show inline comments
 
new file 100644
 
import random
 
from unittest import TestCase
 

	
 
from hashtree import HashTree
 
from . import RedirectedOutput
 

	
 

	
 
random.seed(17)
 

	
 

	
 
def buildTree(leaves):
 
	tree=HashTree(len(leaves))
 
	for l in leaves:
 
		tree.insertLeaf(l)
 
	tree.buildTree()
 
	return tree
 

	
 

	
 
class TestMorevna(RedirectedOutput,TestCase):
 
	def test_batchUpdate(self):
 
		leaves=[b"a" for i in range(8)]
 
		t1=buildTree(leaves)
 
		keys=list(range(8))
 

	
 
		for i in range(8):
 
			random.shuffle(keys)
 
			for k in keys[:i+1]:
 
				leaves[k]=bytes([random.randrange(256)])
 
			t2=buildTree(leaves)
 
			t1.batchUpdate((k+t1.leafStart,leaves[k]) for k in keys[:i+1])
 
			self.assertEqual(t1.store,t2.store)
src/tests/test_overall.py
Show inline comments
 
import os
 
import sys
 
import shutil
 
import hashlib
 
import multiprocessing
 
from logging import FileHandler
 
from unittest import TestCase
 

	
 
import config
 
from hashtree import HashTree
 
from client import Client, Connection as ClientConnection
 
from server import Miniserver
 
from . import RedirectedOutput
 

	
 

	
 
config.logger.removeHandler(config.handler)
 
handler=FileHandler("/tmp/morevna.log")
 
handler.setFormatter(config.formatter)
 
config.logger.addHandler(handler)
 

	
 
config.batchSize=8
 

	
 
dataDir=os.path.join(config.directory,"src/tests/data")
 
filename=os.path.join(dataDir,"test.img")
 

	
 

	
 
def compareFiles(f1,f2):
 
	with open(f1,mode="rb") as f:
 
		h2=hashlib.sha256(f.read()).hexdigest()
 
	with open(f2,mode="rb") as f:
 
		h=hashlib.sha256(f.read()).hexdigest()
 
	return (h,h2)
 

	
 

	
 
class TestMorevna(TestCase):
 
class TestMorevna(RedirectedOutput,TestCase):
 
	_stdout=None
 

	
 
	@classmethod
 
	def setUpClass(cls):
 
		cls._stdout=sys.stdout
 
		sys.stdout=open("/tmp/morevna-stdout.log",mode="a")
 

	
 
	def setUp(self):
 
		src=os.path.join(dataDir,"test1.img")
 
		shutil.copyfile(src,filename)
 

	
 
	@classmethod
 
	def tearDownClass(cls):
 
		super().tearDownClass()
 
		os.remove(filename)
 
		sys.stdout.close()
 
		sys.stdout=cls._stdout
 

	
 
	def test_build(self):
 
		treeFile=os.path.join(dataDir,"test.bin")
 
		refFile=os.path.join(dataDir,"test1.bin")
 

	
 
		tree=HashTree.fromFile(os.path.join(dataDir,"test1.img"))
 
		tree.save(treeFile)
 

	
 
		self.assertEqual(*compareFiles(refFile,treeFile))
 

	
 
		os.remove(treeFile)
 

	
0 comments (0 inline, 0 general)