Changeset - 8bb6a904d50b
[Not reviewed]
default
0 4 0
Laman - 7 years ago 2017-10-19 13:26:42

client too can now use saved hash tree
4 files changed with 37 insertions and 21 deletions:
0 comments (0 inline, 0 general)
src/client.py
Show inline comments
 
import collections
 
import socket
 
import ssl
 
import logging as log
 
from datetime import datetime
 

	
 
import config as conf
 
import stats
 
from util import Progress
 
from hashtree import HashTree
 
from hashtree import HashTree,hashBlock
 
from networkers import NetworkReader,NetworkWriter
 

	
 

	
 
class Connection:
 
	def __init__(self):
 
		sock=socket.socket(socket.AF_INET, socket.SOCK_STREAM)
 

	
 
		sslContext=ssl.create_default_context(cafile=conf.peers)
 
		sslContext.check_hostname=False
 
		sslContext.load_cert_chain(conf.certfile,conf.keyfile)
 

	
 
		self._socket=sslContext.wrap_socket(sock)
 
@@ -27,39 +27,45 @@ class Connection:
 
		self.incoming=NetworkReader(fr)
 
		self.outcoming=NetworkWriter(fw)
 

	
 
	def __enter__(self):
 
		return self.incoming,self.outcoming
 

	
 
	def __exit__(self, exc_type, exc_val, exc_tb):
 
		self._socket.shutdown(socket.SHUT_RDWR)
 
		self._socket.close()
 

	
 

	
 
class Client:
 
	def __init__(self,filename):
 
	def __init__(self,filename,treeFile=""):
 
		self._incoming=None
 
		self._outcoming=None
 
		self._filename=filename
 
		self._treeFile=treeFile
 

	
 
		print(datetime.now(), "initializing...")
 
		self._localTree=HashTree.fromFile(self._filename)
 
		if treeFile:
 
			self._tree=HashTree.load(treeFile)
 
		else:
 
			self._tree=HashTree.fromFile(filename)
 

	
 
		self._newLeaves=dict()
 

	
 
	## Asks server for node hashes to determine which are to be transferred.
 
	#
 
	# Uses a binary HashTree, where item at k is hash of items at 2k+1, 2k+2.
 
	#
 
	# Requests nodes in order of a batch DFS. Needs stack of size O(treeDepth*batchSize). Nodes in each tree level are accessed in order.
 
	def negotiate(self):
 
		localTree=self._localTree
 
		localTree=self._tree
 
		blocksToTransfer=[]
 
		nodeStack=collections.deque([0]) # root
 

	
 
		# initialize session
 
		jsonData={"command":"init", "blockSize":localTree.BLOCK_SIZE, "blockCount":localTree.leafCount, "version":conf.version}
 
		self._outcoming.writeMsg(jsonData)
 
		jsonData,binData=self._incoming.readMsg()
 
		assert jsonData["command"]=="ack"
 

	
 
		# determine which blocks to send
 
		print(datetime.now(), "negotiating:")
 
		progress=Progress(localTree.leafCount)
 
@@ -126,26 +132,35 @@ class Client:
 

	
 
		print(datetime.now(), "receiving data:")
 
		progress=Progress(len(blocksToTransfer))
 
		for (k,i2) in enumerate(blocksToTransfer):
 
			self._outcoming.writeMsg({"command":"req", "index":i2, "dataType":"data"})
 
			jsonData,binData=self._incoming.readMsg()
 
			assert jsonData["command"]=="send" and jsonData["index"]==i2 and jsonData["dataType"]=="data", jsonData
 

	
 
			if i1+1!=i2:
 
				dataFile.seek(i2*HashTree.BLOCK_SIZE)
 
			dataFile.write(binData)
 

	
 
			if self._treeFile:
 
				self._newLeaves[i2+self._tree.leafStart]=hashBlock(binData)
 

	
 
			log.info("block #{0}: {1}...{2}".format(i2,binData[:5],binData[-5:]))
 

	
 
			stats.logTransferredBlock()
 
			i1=i2
 
			progress.p(k)
 
		progress.done()
 

	
 
		self._outcoming.writeMsg({"command":"end"})
 

	
 
		log.info("closing session...")
 
		dataFile.close()
 

	
 
		if self._treeFile:
 
			log.info("updating hash tree...")
 
			for (k,v) in self._newLeaves.items():
 
				self._tree.updateLeaf(k, v)
 
			self._tree.save(self._treeFile)
 

	
 
	def setConnection(self,connection):
 
		(self._incoming,self._outcoming)=connection
src/hashtree.py
Show inline comments
 
import hashlib
 
import os
 
from datetime import datetime
 

	
 
from util import Progress
 

	
 

	
 
def hashBlock(data):
 
	return hashlib.sha256(data).digest()[-HashTree.HASH_LEN:]
 

	
 

	
 
class HashTree:
 
	HASH_LEN=16 # bytes
 
	BLOCK_SIZE=4096 # bytes
 
	
 
	## Prepares a tree containing leafCount leaves.
 
	def __init__(self,leafCount):
 
		self.store=[b""]*(leafCount*2-1)
 
		self.leafStart=leafCount-1
 
		self.leafCount=leafCount
 
		self._index=self.leafStart
 
		
 
	@classmethod
 
	def fromFile(cls,filename):
 
		with open(filename,"rb") as f:
 
			stat=os.fstat(f.fileno())
 
			size=stat.st_size # !! symlinks
 
			leafCount=(size-1)//HashTree.BLOCK_SIZE+1 # number of leaf blocks
 
			res=cls(leafCount)
 
			print(datetime.now(), "hashing file:")
 

	
 
			progress=Progress(leafCount)
 
			for i in range(leafCount):
 
				data=f.read(HashTree.BLOCK_SIZE)
 
				res.insertLeaf(hashlib.sha256(data).digest()[-HashTree.HASH_LEN:])
 
				res.insertLeaf(hashBlock(data))
 

	
 
				progress.p(i)
 
			progress.done()
 
		res.buildTree()
 
		
 
		return res
 

	
 
	@classmethod
 
	def load(cls,filename):
 
		with open(filename,"rb") as f:
 
			stat=os.fstat(f.fileno())
 
			size=stat.st_size
 
@@ -61,31 +65,23 @@ class HashTree:
 
		self._index+=1
 
		
 
	## Updates a hash stored in the leaf.
 
	def updateLeaf(self,index,h):
 
		if index<self.leafStart: raise IndexError()
 
		
 
		self.store[index]=h
 
		self.updateNode((index-1)//2)
 
	
 
	## Updates the node at index and all its ancestors.
 
	def updateNode(self,index):
 
		while index>=0:
 
			self.store[index]=hashlib.sha256(self.store[index*2+1]+self.store[index*2+2]).digest()[-HashTree.HASH_LEN:]
 
			self.store[index]=hashBlock(self.store[index*2+1]+self.store[index*2+2])
 
			index=(index-1)//2
 
			
 
	## Fast construction of the tree over the leaves. O(n).
 
	def buildTree(self):
 
		print(datetime.now(), "building tree:")
 
		progress=Progress(-1, self.leafStart-1)
 
		for i in range(self.leafStart-1,-1,-1):
 
			self.store[i]=hashlib.sha256(self.store[i*2+1]+self.store[i*2+2]).digest()[-HashTree.HASH_LEN:]
 
			self.store[i]=hashBlock(self.store[i*2+1]+self.store[i*2+2])
 
			progress.p(i)
 
		progress.done()
 

	
 

	
 
if __name__=="__main__":
 
	f1=HashTree.fromFile(open("serverFile.txt",mode='rb'))
 
	f2=HashTree.fromFile(open("clientFile.txt",mode='rb'))
 

	
 
	for i,(h1,h2) in enumerate(zip(f1.store,f2.store)):
 
		print("{0:2}".format(i),h1.hex(),h2.hex(),h1==h2)
src/morevna.py
Show inline comments
 
@@ -16,41 +16,45 @@ def _checkFile(f):
 
		print("invalid file specified:",f,file=sys.stderr)
 
		sys.exit(1)
 

	
 

	
 
def buildTree(args):
 
	_checkFile(args.datafile)
 

	
 
	tree=HashTree.fromFile(args.datafile)
 
	tree.save(args.treefile)
 

	
 
def push(args):
 
	_checkFile(args.datafile)
 
	if args.tree:
 
		_checkFile(args.tree)
 
	if args.host: conf.hosts.insert(0,args.host)
 
	if args.port: conf.port=args.port
 

	
 
	c=Client(args.datafile)
 
	c=Client(args.datafile,args.tree)
 
	with ClientConnection() as con:
 
		c.setConnection(con)
 
		blocksToTransfer=c.negotiate()
 
		c.sendData(blocksToTransfer)
 
	print()
 
	print(stats.report())
 

	
 
def pull(args):
 
	_checkFile(args.datafile)
 
	if args.tree:
 
		_checkFile(args.tree)
 
	if args.host: conf.hosts.insert(0,args.host)
 
	if args.port: conf.port=args.port
 

	
 
	c=Client(args.datafile)
 
	c=Client(args.datafile,args.tree)
 
	with ClientConnection() as con:
 
		c.setConnection(con)
 
		blocksToTransfer=c.negotiate()
 
		c.pullData(blocksToTransfer)
 
	print()
 
	print(stats.report())
 

	
 
def serve(args):
 
	_checkFile(args.datafile)
 
	if args.tree:
 
		_checkFile(args.tree)
 
	if args.host: conf.hosts.insert(0,args.host)
 
@@ -65,30 +69,32 @@ def serve(args):
 

	
 
parser=ArgumentParser()
 
subparsers=parser.add_subparsers()
 

	
 
pBuild=subparsers.add_parser("build")
 
pBuild.add_argument("treefile", help="stored hash tree location")
 
pBuild.add_argument("datafile")
 
pBuild.set_defaults(func=buildTree)
 

	
 
pUpdate=subparsers.add_parser("push")
 
pUpdate.add_argument("-p","--port",type=int)
 
pUpdate.add_argument("--host",default="127.0.0.1")
 
pUpdate.add_argument("-t","--tree",help="stored hash tree location")
 
pUpdate.add_argument("datafile")
 
pUpdate.set_defaults(func=push)
 

	
 
pUpdate=subparsers.add_parser("pull")
 
pUpdate.add_argument("-p","--port",type=int)
 
pUpdate.add_argument("--host",default="127.0.0.1")
 
pUpdate.add_argument("-t","--tree",help="stored hash tree location")
 
pUpdate.add_argument("datafile")
 
pUpdate.set_defaults(func=pull)
 

	
 
pServe=subparsers.add_parser("serve")
 
pServe.add_argument("-p","--port",type=int)
 
pServe.add_argument("--host",default="")
 
pServe.add_argument("-t","--tree",help="stored hash tree location")
 
pServe.add_argument("datafile")
 
pServe.set_defaults(func=serve)
 

	
 
args=parser.parse_args()
 
args.func(args)
src/server.py
Show inline comments
 
import hashlib
 
import socket
 
import socket
 
import ssl
 
import multiprocessing
 
import logging as log
 

	
 
from hashtree import HashTree
 
from hashtree import HashTree,hashBlock
 
from networkers import NetworkReader,NetworkWriter
 
import config as conf
 

	
 

	
 
class Connection:
 
	def __init__(self,serverSocket,sslContext):
 
		sock, address = serverSocket.accept()
 
		self._socket=sslContext.wrap_socket(sock,server_side=True)
 

	
 
		log.info('Connected by {0}'.format(address))
 
		fr=self._socket.makefile(mode="rb")
 
		fw=self._socket.makefile(mode="wb")
 
@@ -131,25 +130,25 @@ class Server:
 

	
 
		return (jsonResponse,binResponse)
 

	
 
	def _receiveData(self,jsonData,binData):
 
		log.info("received data block #{0}: {1}...{2}".format(jsonData["index"],binData[:5],binData[-5:]))
 

	
 
		i=jsonData["index"]
 
		if self._lastIndex+1!=i:
 
			self._dataFile.seek(i*self.BLOCK_SIZE)
 
		self._dataFile.write(binData)
 
		self._lastIndex=i
 
		if self._treeFile:
 
			self._newLeaves[i+self._tree.leafStart]=hashlib.sha256(binData).digest()[HashTree.HASH_LEN:]
 
			self._newLeaves[i+self._tree.leafStart]=hashBlock(binData)
 

	
 
		return ({"command": "ack", "index": i},)
 

	
 
	def _finalize(self):
 
		log.info("closing session...")
 
		self._dataFile.close()
 
		self._dataFileHandle=None
 
		if self._treeFile:
 
			log.info("updating hash tree...")
 
			for (k,v) in self._newLeaves.items():
 
				self._tree.updateLeaf(k, v)
 
			self._tree.save(self._treeFile)
0 comments (0 inline, 0 general)