Changeset - bb3b53ee15d6
[Not reviewed]
default
0 3 1
Laman - 7 years ago 2018-01-21 00:42:05

batch pull
4 files changed with 53 insertions and 21 deletions:
0 comments (0 inline, 0 general)
src/client.py
Show inline comments
 
import collections
 
import socket
 
import ssl
 
import logging as log
 
from datetime import datetime
 

	
 
import config as conf
 
import status
 
import stats
 
from util import Progress
 
from hashtree import HashTree,hashBlock
 
from netnode import BaseConnection,NetNode,FailedConnection,LockedException,IncompatibleException
 
from datafile import DataFile
 

	
 

	
 
class DeniedConnection(Exception): pass
 

	
 

	
 
class Connection(BaseConnection):
 
	def __init__(self,host,port):
 
		super().__init__()
 
		sock=socket.socket(socket.AF_INET, socket.SOCK_STREAM)
 

	
 
		sslContext=ssl.create_default_context(cafile=conf.peers)
 
		sslContext.check_hostname=False
 
		sslContext.load_cert_chain(conf.certfile,conf.keyfile)
 

	
 
		self._socket=sslContext.wrap_socket(sock)
 

	
 
		try:
 
			self._socket.connect((host,port))
 
		except (ConnectionRefusedError,OSError) as e:
 
			log.exception(e)
 
			print("Couldn't connect to {0}:{1}".format(host,port))
 
			raise FailedConnection()
 
		except ssl.SSLError as e:
 
			log.exception(e)
 
			print("Error creating SSL connection to {0}:{1}".format(host,port))
 
			raise FailedConnection()
 

	
 
		self.createNetworkers()
 
		print("Connected to {0}".format(host))
 

	
 

	
 
class Client(NetNode):
 
	def __init__(self,filename,treeFile=""):
 
		print(datetime.now(), "initializing...")
 
		super().__init__(filename,treeFile)
 

	
 
	def init(self,action):
 
		jsonData={"command":"init", "blockSize":self._tree.BLOCK_SIZE, "blockCount":self._tree.leafCount, "version":conf.version, "action":action}
 
		self._outcoming.writeMsg(jsonData)
 
		jsonData,binData=self._incoming.readMsg()
 
		if jsonData["command"]=="deny":
 
			if jsonData["status"]==status.incompatible.version:
 
				raise DeniedConnection("Incompatible client version. Consider upgrading it.")
 
			raise DeniedConnection()
 
		assert jsonData["command"]=="init"
 
		if jsonData["version"]<conf.lowestCompatible:
 
			raise IncompatibleException("Incompatible server version. Consider upgrading it.")
 

	
 
	## Asks server for node hashes to determine which are to be transferred.
 
	#
 
	# Uses a binary HashTree, where item at k is hash of items at 2k+1, 2k+2.
 
	#
 
	# Requests nodes in order of a batch DFS. Needs stack of size O(treeDepth*batchSize). Nodes in each tree level are accessed in order.
 
	def negotiate(self):
 
		localTree=self._tree
 
		blocksToTransfer=[]
 
		nodeStack=collections.deque([0]) # root
 

	
 
		# determine which blocks to send
 
		print(datetime.now(), "negotiating:")
 
		progress=Progress(localTree.leafCount)
 
		while len(nodeStack)>0:
 
			indices=[]
 
			for i in range(conf.batchSize):
 
				indices.append(nodeStack.pop())
 
				if len(nodeStack)==0: break
 
			self._outcoming.writeMsg({"command":"req", "index":indices, "dataType":"hash"})
 

	
 
			jsonData,binData=self._incoming.readMsg()
 
			assert jsonData["index"]==indices
 
			assert jsonData["dataType"]=="hash"
 
			stats.logExchangedNode(len(indices))
 

	
 
			frontier=[]
 
			for (j,i) in enumerate(indices):
 
				(j1,j2)=[HashTree.HASH_LEN*ji for ji in (j,j+1)]
 
				if localTree.store[i]!=binData[j1:j2]:
 
					# ie. 0-6 nodes, 7-14 leaves. 2*6+2<15
 
					if 2*i+2<len(localTree.store): # inner node
 
						frontier.append(2*i+1)
 
						frontier.append(2*i+2)
 
					else:
 
						blocksToTransfer.append(i-localTree.leafStart) # leaf
 
						progress.p(i-localTree.leafStart)
 
			nodeStack.extend(reversed(frontier))
 
		progress.done()
 

	
 
		size=stats.formatBytes(len(blocksToTransfer)*self._tree.BLOCK_SIZE)
 
		print(datetime.now(), "{0} to transfer".format(size))
 

	
 
		return blocksToTransfer
 

	
 
	def sendData(self,blocksToTransfer):
 
		log.info(blocksToTransfer)
 
		dataFile=open(self._filename, mode="rb")
 
		i1=-1
 

	
 
		print(datetime.now(), "sending data:")
 
		progress=Progress(len(blocksToTransfer))
 

	
 
		for k in range(0,len(blocksToTransfer),conf.batchSize):
 
			indices=[]
 
			blocks=[]
 
			for j in range(conf.batchSize):
 
				if k+j>=len(blocksToTransfer): break
 
				i2=blocksToTransfer[k+j]
 
				if i1+1!=i2:
 
					dataFile.seek(i2*HashTree.BLOCK_SIZE)
 
				block=dataFile.read(HashTree.BLOCK_SIZE)
 

	
 
				indices.append(i2)
 
				blocks.append(block)
 
				log.info("block #{0}: {1}...{2}".format(i2,block[:5],block[-5:]))
 

	
 
				i1=i2
 
				progress.p(k+j)
 
			if indices: self._sendData(indices,blocks)
 
		progress.done()
 

	
 
		self._outcoming.writeMsg({"command":"end","action":"push"})
 

	
 
		log.info("closing session...")
 
		dataFile.close()
 

	
 
	def pullData(self,blocksToTransfer,ignoreLock=False):
 
		if not ignoreLock:
 
			try:
 
				self._lock()
 
			except LockedException:
 
				print("The file is locked. Either (a) there's another pull going on (then wait or kill it), or (b) a previous pull ended prematurely and the file is probably corrupt (then repeat pull with -f for force).")
 
				return
 
		log.info(blocksToTransfer)
 
		dataFile=open(self._filename, mode="rb+")
 
		i1=-1
 
		dataFile=DataFile.open(self._filename, mode="rb+")
 

	
 
		print(datetime.now(), "receiving data:")
 
		progress=Progress(len(blocksToTransfer))
 
		for (k,i2) in enumerate(blocksToTransfer):
 
			self._outcoming.writeMsg({"command":"req", "index":i2, "dataType":"data"})
 
			jsonData,binData=self._incoming.readMsg()
 
			assert jsonData["command"]=="send" and jsonData["index"]==i2 and jsonData["dataType"]=="data", jsonData
 

	
 
			if i1+1!=i2:
 
				dataFile.seek(i2*HashTree.BLOCK_SIZE)
 
			dataFile.write(binData)
 

	
 
			if self._treeFile:
 
				self._newLeaves[i2+self._tree.leafStart]=hashBlock(binData)
 
		for k in range(0,len(blocksToTransfer),conf.batchSize):
 
			indices=blocksToTransfer[k:k+conf.batchSize]
 
			self._outcoming.writeMsg({"command":"req", "index":indices, "dataType":"data"})
 
			jsonData,binData=self._incoming.readMsg()
 
			assert jsonData["command"]=="send" and jsonData["index"]==indices and jsonData["dataType"]=="data", jsonData
 
			for (j,i) in enumerate(indices):
 
				block=binData[j*HashTree.BLOCK_SIZE:(j+1)*HashTree.BLOCK_SIZE]
 
				dataFile.writeAt(i,block)
 

	
 
			log.info("block #{0}: {1}...{2}".format(i2,binData[:5],binData[-5:]))
 
				if self._treeFile:
 
					self._newLeaves[i+self._tree.leafStart]=hashBlock(block)
 

	
 
			stats.logTransferredBlock()
 
			i1=i2
 
			progress.p(k)
 
				log.info("block #{0}: {1}...{2}".format(i,block[:5],block[-5:]))
 

	
 
				stats.logTransferredBlock()
 
				progress.p(k+j)
 
		progress.done()
 

	
 
		self._outcoming.writeMsg({"command":"end"})
 

	
 
		log.info("closing session...")
 
		dataFile.close()
 
		self._unlock()
 

	
 
		if self._treeFile:
 
			self._updateTree()
 

	
 
	def _sendData(self,indices,blocks):
 
		jsonData={"command":"send", "index":indices, "dataType":"data"}
 
		binData=b"".join(blocks)
 
		self._outcoming.writeMsg(jsonData,binData)
 
		stats.logTransferredBlock(len(indices))
 
		jsonData,binData=self._incoming.readMsg()
 
		assert jsonData["command"]=="ack" and jsonData["index"]==indices, jsonData
 

	
 
	def setConnection(self,connection):
 
		(self._incoming,self._outcoming)=connection
src/datafile.py
Show inline comments
 
new file 100644
 
from hashtree import HashTree
 

	
 

	
 
BLOCK_SIZE=HashTree.BLOCK_SIZE
 

	
 

	
 
class DataFile:
 
	def __init__(self,fileHandle):
 
		self._lastIndex=0
 
		self._f=fileHandle
 

	
 
	@staticmethod
 
	def open(filename,mode="rb"):
 
		return DataFile(open(filename,mode=mode))
 

	
 
	def writeAt(self,i,blockData):
 
		if i!=self._lastIndex+1:
 
			self._f.seek(i*BLOCK_SIZE)
 
		self._f.write(blockData)
 
		self._lastIndex=i
 

	
 
	def readFrom(self,i,byteCount=BLOCK_SIZE):
 
		if i!=self._lastIndex+1:
 
			self._f.seek(i*BLOCK_SIZE)
 
		self._lastIndex=i
 
		return self._f.read(byteCount)
 

	
 
	def close(self):
 
		self._f.close()
src/morevna.py
Show inline comments
 
import sys
 
import os.path
 
import logging as log
 
from argparse import ArgumentParser
 

	
 
from util import spawnDaemon, splitHost
 
import config as conf
 
import stats
 
from hashtree import HashTree
 
from client import Client, Connection as ClientConnection, FailedConnection, DeniedConnection
 
from client import Client, Connection as ClientConnection, FailedConnection, DeniedConnection, IncompatibleException
 
from server import Miniserver
 

	
 

	
 
def _checkFile(f):
 
	if not os.path.isfile(f):
 
		print("invalid file specified:",f,file=sys.stderr)
 
		sys.exit(1)
 

	
 

	
 
def buildTree(args):
 
	_checkFile(args.datafile)
 
	if os.path.isfile(args.treefile):
 
		treeMod=os.stat(args.treefile).st_mtime
 
		dataMod=os.stat(args.datafile).st_mtime
 
		if dataMod<treeMod and not args.force:
 
			print("tree file is up to date")
 
			return
 

	
 
	tree=HashTree.fromFile(args.datafile)
 
	tree.save(args.treefile)
 

	
 
def push(args):
 
	_checkFile(args.datafile)
 
	if args.tree:
 
		_checkFile(args.tree)
 
	if args.host: conf.hosts=[args.host]
 
	if args.port: conf.port=args.port
 

	
 
	c=Client(args.datafile,args.tree)
 
	for h in conf.hosts:
 
		host=splitHost(h,conf.port)
 
		stats.reset()
 
		try:
 
			with ClientConnection(*host) as con:
 
				c.setConnection(con)
 
				c.init("push")
 
				blocksToTransfer=c.negotiate()
 
				c.sendData(blocksToTransfer)
 
			print()
 
			print(stats.report())
 
			print()
 
		except FailedConnection: pass
 
		except DeniedConnection as e:
 
			print("Server {0}:{1} denied connection.".format(*host))
 
			print(e)
 
		except IncompatibleException as e: print(e)
 

	
 
def pull(args):
 
	_checkFile(args.datafile)
 
	if args.tree:
 
		_checkFile(args.tree)
 
	if args.host: conf.hosts=[args.host]
 
	if args.port: conf.port=args.port
 

	
 
	c=Client(args.datafile,args.tree)
 
	host=splitHost(conf.hosts[0],conf.port)
 
	try:
 
		with ClientConnection(*host) as con:
 
			c.setConnection(con)
 
			c.init("pull")
 
			blocksToTransfer=c.negotiate()
 
			c.pullData(blocksToTransfer,args.force)
 
		print()
 
		print(stats.report())
 
	except FailedConnection: pass
 
	except DeniedConnection as e:
 
		print("Server {0}:{1} denied connection.".format(*host))
 
		print(e)
 

	
 
def serve(args):
 
	_checkFile(args.datafile)
 
	if args.tree:
 
		_checkFile(args.tree)
 
	if args.host: conf.hosts.insert(0,args.host)
 
	if args.port: conf.port=args.port
 

	
 
	s=Miniserver(args.datafile,args.tree)
 
	try:
 
		spawnDaemon(s.serve)
 
	except Exception as e:
 
		log.exception("exception: %s",e)
 

	
 

	
 
parser=ArgumentParser()
 
subparsers=parser.add_subparsers()
 

	
 
pBuild=subparsers.add_parser("build")
 
pBuild.add_argument("-f","--force",action="store_true",help="force tree rebuild")
 
pBuild.add_argument("treefile", help="stored hash tree location")
 
pBuild.add_argument("datafile")
 
pBuild.set_defaults(func=buildTree)
 

	
 
pUpdate=subparsers.add_parser("push")
 
pUpdate.add_argument("-p","--port",type=int)
 
pUpdate.add_argument("--host")
 
pUpdate.add_argument("-t","--tree",help="stored hash tree location")
 
pUpdate.add_argument("datafile")
 
pUpdate.set_defaults(func=push)
 

	
 
pUpdate=subparsers.add_parser("pull")
 
pUpdate.add_argument("-p","--port",type=int)
 
pUpdate.add_argument("--host")
 
pUpdate.add_argument("-t","--tree",help="stored hash tree location")
 
pUpdate.add_argument("-f","--force",action="store_true",help="ignore lock file")
 
pUpdate.add_argument("datafile")
 
pUpdate.set_defaults(func=pull)
 

	
 
pServe=subparsers.add_parser("serve")
 
pServe.add_argument("-p","--port",type=int)
 
pServe.add_argument("--host")
 
pServe.add_argument("-t","--tree",help="stored hash tree location")
 
pServe.add_argument("datafile")
 
pServe.set_defaults(func=serve)
 

	
 
args=parser.parse_args()
 
args.func(args)
src/server.py
Show inline comments
 
@@ -30,124 +30,126 @@ class Miniserver:
 
		self._ssl.load_cert_chain(conf.certfile,conf.keyfile)
 

	
 
		self._ss = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
 
		self._ss.bind(("", conf.port))
 
		self._ss.listen(1)
 

	
 
	def serve(self):
 
		p=None
 
		with self._ss:
 
			while True:
 
				connection=Connection(self._ss,self._ssl)
 
				if p and p.is_alive():
 
					with connection as c:
 
						c[0].readMsg()
 
						c[1].writeMsg({"command":"deny","status":status.locked})
 
					continue
 
				p=multiprocessing.Process(target=Server.run,args=(connection,self._filename,self._treeFile))
 
				p.start()
 

	
 

	
 
class Server(NetNode):
 
	def __init__(self,connection,filename,treeFile=""):
 
		super().__init__(filename,treeFile)
 
		(self._incoming,self._outcoming)=connection
 

	
 
		self.BLOCK_SIZE=self._tree.BLOCK_SIZE
 

	
 
		self._lastIndex=-1
 
		self._dataFileHandle=None
 

	
 
	@staticmethod
 
	def run(connection,*args):
 
		with connection as c:
 
			s=Server(c,*args)
 
			s.serve()
 

	
 
	@property
 
	def _dataFile(self):
 
		if not self._dataFileHandle:
 
			self._dataFileHandle=open(self._filename, mode="rb+")
 
		return self._dataFileHandle
 

	
 
	def serve(self):
 
		try:
 
			while self._serveOne(): pass
 
		except (AssertionError,ConnectionResetError) as e:
 
			log.warning(e)
 

	
 
	def _serveOne(self):
 
		jsonData,binData=self._incoming.readMsg()
 

	
 
		if jsonData["command"]=="init":
 
			if jsonData["blockSize"]!=self.BLOCK_SIZE or jsonData["blockCount"]!=self._tree.leafCount:
 
				self._outcoming.writeMsg({"command":"deny","status":status.incompatible.parameters})
 
			if jsonData["version"]<conf.lowestCompatible:
 
				self._outcoming.writeMsg({"command":"deny","status":status.incompatible.version})
 
			if jsonData["action"]=="pull" and self.isLocked():
 
				self._outcoming.writeMsg({"command":"deny","status":status.locked})
 
			if jsonData["action"]=="push" and not self.isLocked():
 
				self._lock()
 

	
 
			self._outcoming.writeMsg({"command":"init", "version":conf.version})
 

	
 
		elif jsonData["command"]=="req":
 
			if jsonData["dataType"]=="data":
 
				self._outcoming.writeMsg(*self._requestData(jsonData["index"]))
 
			else:
 
				self._outcoming.writeMsg(*self._requestHash(jsonData["index"]))
 

	
 
		elif jsonData["command"]=="send" and jsonData["dataType"]=="data":
 
			self._outcoming.writeMsg(*self._receiveData(jsonData,binData))
 

	
 
		elif jsonData["command"]=="end":
 
			self._finalize()
 
			if jsonData.get("action")=="push": self._unlock()
 
			return False
 

	
 
		else:
 
			assert False, jsonData["command"]
 

	
 
		return True
 

	
 
	def _requestHash(self,indices):
 
		log.info("received request for nodes #{0}".format(",".join(str(i) for i in indices)))
 
		assert all(i<len(self._tree.store) for i in indices)
 
		hashes=[self._tree.store[i] for i in indices]
 

	
 
		jsonResponse={"command":"send", "index":indices, "dataType":"hash"}
 
		binResponse=b"".join(hashes)
 

	
 
		return (jsonResponse,binResponse)
 

	
 
	def _requestData(self,index):
 
		log.info("received request for data block #{0}".format(index))
 

	
 
		jsonResponse={"command":"send", "index":index, "dataType":"data"}
 
		if self._lastIndex+1!=index:
 
			self._dataFile.seek(index*self.BLOCK_SIZE)
 
		binResponse=self._dataFile.read(self.BLOCK_SIZE)
 
		blocks=[]
 
		for i in index:
 
			if self._lastIndex+1!=i:
 
				self._dataFile.seek(i*self.BLOCK_SIZE)
 
			blocks.append(self._dataFile.read(self.BLOCK_SIZE))
 

	
 
		return (jsonResponse,binResponse)
 
		return (jsonResponse,b"".join(blocks))
 

	
 
	def _receiveData(self,jsonData,binData):
 
		if not self.isLocked(): self._lock()
 
		log.info("received data block #{0}: {1}...{2}".format(jsonData["index"],binData[:5],binData[-5:]))
 

	
 
		indices=jsonData["index"]
 
		for (i,k) in enumerate(indices):
 
			if self._lastIndex+1!=k:
 
				self._dataFile.seek(k*self.BLOCK_SIZE)
 
			self._dataFile.write(binData[i*self.BLOCK_SIZE:(i+1)*self.BLOCK_SIZE])
 
			self._lastIndex=k
 
			if self._treeFile:
 
				self._newLeaves[k+self._tree.leafStart]=hashBlock(binData)
 

	
 
		return ({"command": "ack", "index": indices},)
 

	
 
	def _finalize(self):
 
		log.info("closing session...")
 
		self._dataFile.close()
 
		self._dataFileHandle=None
 
		if self._treeFile:
 
			self._updateTree()
 
		log.info("done")
0 comments (0 inline, 0 general)