Changeset - 8bb6a904d50b
[Not reviewed]
default
0 4 0
Laman - 7 years ago 2017-10-19 13:26:42

client too can now use saved hash tree
4 files changed with 37 insertions and 21 deletions:
0 comments (0 inline, 0 general)
src/client.py
Show inline comments
 
import collections
 
import socket
 
import ssl
 
import logging as log
 
from datetime import datetime
 

	
 
import config as conf
 
import stats
 
from util import Progress
 
from hashtree import HashTree
 
from hashtree import HashTree,hashBlock
 
from networkers import NetworkReader,NetworkWriter
 

	
 

	
 
class Connection:
 
	def __init__(self):
 
		sock=socket.socket(socket.AF_INET, socket.SOCK_STREAM)
 

	
 
		sslContext=ssl.create_default_context(cafile=conf.peers)
 
		sslContext.check_hostname=False
 
		sslContext.load_cert_chain(conf.certfile,conf.keyfile)
 

	
 
		self._socket=sslContext.wrap_socket(sock)
 
		self._socket.connect((conf.hosts[0], conf.port))
 
		fr=self._socket.makefile(mode="rb")
 
		fw=self._socket.makefile(mode="wb")
 

	
 
		self.incoming=NetworkReader(fr)
 
		self.outcoming=NetworkWriter(fw)
 

	
 
	def __enter__(self):
 
		return self.incoming,self.outcoming
 

	
 
	def __exit__(self, exc_type, exc_val, exc_tb):
 
		self._socket.shutdown(socket.SHUT_RDWR)
 
		self._socket.close()
 

	
 

	
 
class Client:
 
	def __init__(self,filename):
 
	def __init__(self,filename,treeFile=""):
 
		self._incoming=None
 
		self._outcoming=None
 
		self._filename=filename
 
		self._treeFile=treeFile
 

	
 
		print(datetime.now(), "initializing...")
 
		self._localTree=HashTree.fromFile(self._filename)
 
		if treeFile:
 
			self._tree=HashTree.load(treeFile)
 
		else:
 
			self._tree=HashTree.fromFile(filename)
 

	
 
		self._newLeaves=dict()
 

	
 
	## Asks server for node hashes to determine which are to be transferred.
 
	#
 
	# Uses a binary HashTree, where item at k is hash of items at 2k+1, 2k+2.
 
	#
 
	# Requests nodes in order of a batch DFS. Needs stack of size O(treeDepth*batchSize). Nodes in each tree level are accessed in order.
 
	def negotiate(self):
 
		localTree=self._localTree
 
		localTree=self._tree
 
		blocksToTransfer=[]
 
		nodeStack=collections.deque([0]) # root
 

	
 
		# initialize session
 
		jsonData={"command":"init", "blockSize":localTree.BLOCK_SIZE, "blockCount":localTree.leafCount, "version":conf.version}
 
		self._outcoming.writeMsg(jsonData)
 
		jsonData,binData=self._incoming.readMsg()
 
		assert jsonData["command"]=="ack"
 

	
 
		# determine which blocks to send
 
		print(datetime.now(), "negotiating:")
 
		progress=Progress(localTree.leafCount)
 
		while len(nodeStack)>0:
 
			indices=[]
 
			for i in range(conf.batchSize):
 
				indices.append(nodeStack.pop())
 
				if len(nodeStack)==0: break
 
			self._outcoming.writeMsg({"command":"req", "index":indices, "dataType":"hash"})
 

	
 
			jsonData,binData=self._incoming.readMsg()
 
			assert jsonData["index"]==indices
 
			assert jsonData["dataType"]=="hash"
 
			stats.logExchangedNode(len(indices))
 

	
 
@@ -114,38 +120,47 @@ class Client:
 
			progress.p(k)
 
		progress.done()
 

	
 
		self._outcoming.writeMsg({"command":"end"})
 

	
 
		log.info("closing session...")
 
		dataFile.close()
 

	
 
	def pullData(self,blocksToTransfer):
 
		log.info(blocksToTransfer)
 
		dataFile=open(self._filename, mode="rb+")
 
		i1=-1
 

	
 
		print(datetime.now(), "receiving data:")
 
		progress=Progress(len(blocksToTransfer))
 
		for (k,i2) in enumerate(blocksToTransfer):
 
			self._outcoming.writeMsg({"command":"req", "index":i2, "dataType":"data"})
 
			jsonData,binData=self._incoming.readMsg()
 
			assert jsonData["command"]=="send" and jsonData["index"]==i2 and jsonData["dataType"]=="data", jsonData
 

	
 
			if i1+1!=i2:
 
				dataFile.seek(i2*HashTree.BLOCK_SIZE)
 
			dataFile.write(binData)
 

	
 
			if self._treeFile:
 
				self._newLeaves[i2+self._tree.leafStart]=hashBlock(binData)
 

	
 
			log.info("block #{0}: {1}...{2}".format(i2,binData[:5],binData[-5:]))
 

	
 
			stats.logTransferredBlock()
 
			i1=i2
 
			progress.p(k)
 
		progress.done()
 

	
 
		self._outcoming.writeMsg({"command":"end"})
 

	
 
		log.info("closing session...")
 
		dataFile.close()
 

	
 
		if self._treeFile:
 
			log.info("updating hash tree...")
 
			for (k,v) in self._newLeaves.items():
 
				self._tree.updateLeaf(k, v)
 
			self._tree.save(self._treeFile)
 

	
 
	def setConnection(self,connection):
 
		(self._incoming,self._outcoming)=connection
src/hashtree.py
Show inline comments
 
import hashlib
 
import os
 
from datetime import datetime
 

	
 
from util import Progress
 

	
 

	
 
def hashBlock(data):
 
	return hashlib.sha256(data).digest()[-HashTree.HASH_LEN:]
 

	
 

	
 
class HashTree:
 
	HASH_LEN=16 # bytes
 
	BLOCK_SIZE=4096 # bytes
 
	
 
	## Prepares a tree containing leafCount leaves.
 
	def __init__(self,leafCount):
 
		self.store=[b""]*(leafCount*2-1)
 
		self.leafStart=leafCount-1
 
		self.leafCount=leafCount
 
		self._index=self.leafStart
 
		
 
	@classmethod
 
	def fromFile(cls,filename):
 
		with open(filename,"rb") as f:
 
			stat=os.fstat(f.fileno())
 
			size=stat.st_size # !! symlinks
 
			leafCount=(size-1)//HashTree.BLOCK_SIZE+1 # number of leaf blocks
 
			res=cls(leafCount)
 
			print(datetime.now(), "hashing file:")
 

	
 
			progress=Progress(leafCount)
 
			for i in range(leafCount):
 
				data=f.read(HashTree.BLOCK_SIZE)
 
				res.insertLeaf(hashlib.sha256(data).digest()[-HashTree.HASH_LEN:])
 
				res.insertLeaf(hashBlock(data))
 

	
 
				progress.p(i)
 
			progress.done()
 
		res.buildTree()
 
		
 
		return res
 

	
 
	@classmethod
 
	def load(cls,filename):
 
		with open(filename,"rb") as f:
 
			stat=os.fstat(f.fileno())
 
			size=stat.st_size
 
			nodeCount=size//HashTree.HASH_LEN
 
			res=cls((nodeCount+1)//2)
 

	
 
			for i in range(nodeCount):
 
				res.store[i]=f.read(HashTree.HASH_LEN)
 
		return res
 

	
 
	def save(self,filename):
 
		with open(filename,"wb") as f:
 
			for h in self.store:
 
				f.write(h)
 
		
 
	## Inserts a leaf at the first empty position.
 
	#
 
	#	Useful and used only during the tree construction.
 
	def insertLeaf(self,h):
 
		self.store[self._index]=h
 
		self._index+=1
 
		
 
	## Updates a hash stored in the leaf.
 
	def updateLeaf(self,index,h):
 
		if index<self.leafStart: raise IndexError()
 
		
 
		self.store[index]=h
 
		self.updateNode((index-1)//2)
 
	
 
	## Updates the node at index and all its ancestors.
 
	def updateNode(self,index):
 
		while index>=0:
 
			self.store[index]=hashlib.sha256(self.store[index*2+1]+self.store[index*2+2]).digest()[-HashTree.HASH_LEN:]
 
			self.store[index]=hashBlock(self.store[index*2+1]+self.store[index*2+2])
 
			index=(index-1)//2
 
			
 
	## Fast construction of the tree over the leaves. O(n).
 
	def buildTree(self):
 
		print(datetime.now(), "building tree:")
 
		progress=Progress(-1, self.leafStart-1)
 
		for i in range(self.leafStart-1,-1,-1):
 
			self.store[i]=hashlib.sha256(self.store[i*2+1]+self.store[i*2+2]).digest()[-HashTree.HASH_LEN:]
 
			self.store[i]=hashBlock(self.store[i*2+1]+self.store[i*2+2])
 
			progress.p(i)
 
		progress.done()
 

	
 

	
 
if __name__=="__main__":
 
	f1=HashTree.fromFile(open("serverFile.txt",mode='rb'))
 
	f2=HashTree.fromFile(open("clientFile.txt",mode='rb'))
 

	
 
	for i,(h1,h2) in enumerate(zip(f1.store,f2.store)):
 
		print("{0:2}".format(i),h1.hex(),h2.hex(),h1==h2)
src/morevna.py
Show inline comments
 
@@ -4,91 +4,97 @@ import logging as log
 
from argparse import ArgumentParser
 

	
 
from util import spawnDaemon
 
import config as conf
 
import stats
 
from hashtree import HashTree
 
from client import Client, Connection as ClientConnection
 
from server import Miniserver
 

	
 

	
 
def _checkFile(f):
 
	if not os.path.isfile(f):
 
		print("invalid file specified:",f,file=sys.stderr)
 
		sys.exit(1)
 

	
 

	
 
def buildTree(args):
 
	_checkFile(args.datafile)
 

	
 
	tree=HashTree.fromFile(args.datafile)
 
	tree.save(args.treefile)
 

	
 
def push(args):
 
	_checkFile(args.datafile)
 
	if args.tree:
 
		_checkFile(args.tree)
 
	if args.host: conf.hosts.insert(0,args.host)
 
	if args.port: conf.port=args.port
 

	
 
	c=Client(args.datafile)
 
	c=Client(args.datafile,args.tree)
 
	with ClientConnection() as con:
 
		c.setConnection(con)
 
		blocksToTransfer=c.negotiate()
 
		c.sendData(blocksToTransfer)
 
	print()
 
	print(stats.report())
 

	
 
def pull(args):
 
	_checkFile(args.datafile)
 
	if args.tree:
 
		_checkFile(args.tree)
 
	if args.host: conf.hosts.insert(0,args.host)
 
	if args.port: conf.port=args.port
 

	
 
	c=Client(args.datafile)
 
	c=Client(args.datafile,args.tree)
 
	with ClientConnection() as con:
 
		c.setConnection(con)
 
		blocksToTransfer=c.negotiate()
 
		c.pullData(blocksToTransfer)
 
	print()
 
	print(stats.report())
 

	
 
def serve(args):
 
	_checkFile(args.datafile)
 
	if args.tree:
 
		_checkFile(args.tree)
 
	if args.host: conf.hosts.insert(0,args.host)
 
	if args.port: conf.port=args.port
 

	
 
	s=Miniserver(args.datafile,args.tree)
 
	try:
 
		spawnDaemon(s.serve)
 
	except Exception as e:
 
		log.exception("exception: %s",e)
 

	
 

	
 
parser=ArgumentParser()
 
subparsers=parser.add_subparsers()
 

	
 
pBuild=subparsers.add_parser("build")
 
pBuild.add_argument("treefile", help="stored hash tree location")
 
pBuild.add_argument("datafile")
 
pBuild.set_defaults(func=buildTree)
 

	
 
pUpdate=subparsers.add_parser("push")
 
pUpdate.add_argument("-p","--port",type=int)
 
pUpdate.add_argument("--host",default="127.0.0.1")
 
pUpdate.add_argument("-t","--tree",help="stored hash tree location")
 
pUpdate.add_argument("datafile")
 
pUpdate.set_defaults(func=push)
 

	
 
pUpdate=subparsers.add_parser("pull")
 
pUpdate.add_argument("-p","--port",type=int)
 
pUpdate.add_argument("--host",default="127.0.0.1")
 
pUpdate.add_argument("-t","--tree",help="stored hash tree location")
 
pUpdate.add_argument("datafile")
 
pUpdate.set_defaults(func=pull)
 

	
 
pServe=subparsers.add_parser("serve")
 
pServe.add_argument("-p","--port",type=int)
 
pServe.add_argument("--host",default="")
 
pServe.add_argument("-t","--tree",help="stored hash tree location")
 
pServe.add_argument("datafile")
 
pServe.set_defaults(func=serve)
 

	
 
args=parser.parse_args()
 
args.func(args)
src/server.py
Show inline comments
 
import hashlib
 
import socket
 
import socket
 
import ssl
 
import multiprocessing
 
import logging as log
 

	
 
from hashtree import HashTree
 
from hashtree import HashTree,hashBlock
 
from networkers import NetworkReader,NetworkWriter
 
import config as conf
 

	
 

	
 
class Connection:
 
	def __init__(self,serverSocket,sslContext):
 
		sock, address = serverSocket.accept()
 
		self._socket=sslContext.wrap_socket(sock,server_side=True)
 

	
 
		log.info('Connected by {0}'.format(address))
 
		fr=self._socket.makefile(mode="rb")
 
		fw=self._socket.makefile(mode="wb")
 

	
 
		self.incoming=NetworkReader(fr)
 
		self.outcoming=NetworkWriter(fw)
 

	
 
	def __enter__(self):
 
		return self.incoming,self.outcoming
 

	
 
	def __exit__(self, exc_type, exc_val, exc_tb):
 
		self._socket.shutdown(socket.SHUT_RDWR)
 
		self._socket.close()
 

	
 

	
 
@@ -119,38 +118,38 @@ class Server:
 
		jsonResponse={"command":"send", "index":indices, "dataType":"hash"}
 
		binResponse=b"".join(hashes)
 

	
 
		return (jsonResponse,binResponse)
 

	
 
	def _requestData(self,index):
 
		log.info("received request for data block #{0}".format(index))
 

	
 
		jsonResponse={"command":"send", "index":index, "dataType":"data"}
 
		if self._lastIndex+1!=index:
 
			self._dataFile.seek(index*self.BLOCK_SIZE)
 
		binResponse=self._dataFile.read(self.BLOCK_SIZE)
 

	
 
		return (jsonResponse,binResponse)
 

	
 
	def _receiveData(self,jsonData,binData):
 
		log.info("received data block #{0}: {1}...{2}".format(jsonData["index"],binData[:5],binData[-5:]))
 

	
 
		i=jsonData["index"]
 
		if self._lastIndex+1!=i:
 
			self._dataFile.seek(i*self.BLOCK_SIZE)
 
		self._dataFile.write(binData)
 
		self._lastIndex=i
 
		if self._treeFile:
 
			self._newLeaves[i+self._tree.leafStart]=hashlib.sha256(binData).digest()[HashTree.HASH_LEN:]
 
			self._newLeaves[i+self._tree.leafStart]=hashBlock(binData)
 

	
 
		return ({"command": "ack", "index": i},)
 

	
 
	def _finalize(self):
 
		log.info("closing session...")
 
		self._dataFile.close()
 
		self._dataFileHandle=None
 
		if self._treeFile:
 
			log.info("updating hash tree...")
 
			for (k,v) in self._newLeaves.items():
 
				self._tree.updateLeaf(k, v)
 
			self._tree.save(self._treeFile)
 
		log.info("done")
0 comments (0 inline, 0 general)