Files @ a52fefe61468
Branch filter:

Location: Morevna/work.py - annotation

Laman
work in progress
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
a52fefe61468
import hashlib
import sys
import datetime
import sqlite3
import math
import pathlib

BLOCK_SIZE=4096
HASH_LEN=8
DB="morevna.db"
FILES_PER_DIR=100
FILE_COUNT=2**21

def initDB():
	conn=sqlite3.connect("morevna.db")

	c=conn.cursor()
	c.execute("""create table if not exists `hashes` (
		`sector_id` integer primary key,
		`sector_hash` blob not null,
		`dirty` integer check(`dirty` in (0,1))
	)""")
	
	c.execute("""create index if not exists `dirty_index` on `hashes` (`dirty`)""")
	conn.commit()

def initHashes(fileName, hashFile):
	with open(hashFile, mode="bw") as f:
		for chunkHash in chunks(fileName):
			f.write(chunkHash)

def chunks(fileName):
	with open(fileName, mode="br") as f:
		data=f.read(BLOCK_SIZE)
		while data:			
			yield hashlib.sha256(data).digest()[:HASH_LEN]
			data=f.read(BLOCK_SIZE)

def hashes(filename):
#	with open(filename, mode="br") as f:
#		hashBytes=f.read(HASH_LEN)
#		while hashBytes:
#			yield hashBytes
#			hashBytes=f.read(HASH_LEN)
	with sqlite3.connect(DB) as db:
		handle=db.cursor()
		handle.execute("""select `sector_hash` from `hashes` order by `sector_id` asc""")
		h=handle.fetchone()
		while h is not None:
			yield h
			h=handle.fetchone()

def compare(fileName, hashFile):
	changelist=[]

	# build changelist
	# can use up to (HASH_LEN+size(int))*(filesize/BLOCK_SIZE) bytes of memory plus overhead
	# that's (8+4)*(8*2**30 / 4096) = 24MB for defaults
	for (i,(dataHash,savedHash)) in enumerate(zip(chunks(fileName),hashes(hashFile))):
		if dataHash!=savedHash:
			changelist.append((i,dataHash))

	# write log
	with open(logFile, mode="w") as f:
		f.write("sector	hash\n")
		for (i,dataHash) in changelist:
			f.write("{0}\t{1}\n".format(i,dataHash))
			
	# update DB
	with sqlite3.connect(DB) as db:
		handle=db.cursor()
		handle.executemany("""update `hashes` set `sector_hash`=':hash', `dirty`=1 where `sector_id`=:id""",
			{"sector_id":i,"sector_hash":dataHash} for (i,dataHash) in changelist)
		db.commit()
			
	# update hashFile
#	with open(hashFile, mode="r+b") as f:
#		for (i,dataHash) in changelist:
#			f.seek(i*HASH_LEN)
#			f.write(dataHash)
	
def transferChanges():
	# read changes
	with sqlite3.connect(DB) as db, open(fileName,mode="rb") as sf:
		handle=db.cursor()
		handle.execute("""select `hash_id` from `hashes` where `dirty`=1""")
			
		# transfer modified sectors and mark them as clean
		sectorIds=handle.fetchall()
		for sectorId in sectorIds:
			path=getPath(sectorId)
			path.parent.mkdir(parents=True,exist_ok=True)
			df=path.open(mode="wb")
			sf.seek(sectorId)
			df.write(sf.read(BLOCK_SIZE))
			handle.execute("""update `hashes` set `dirty`=0 where `hash_id`=?""",sectorId)
			db.commit()

def getPath(index):
	nodeIds=[]
	k=1
	for i in range(math.ceil(math.log(FILE_COUNT)/math.log(k))):
		nodeIds.append(index//k)
		k*=FILES_PER_DIR
	nodeIds.reverse()
	return pathlib.Path(*nodeIds)

action=sys.argv[1]
fileName=sys.argv[2]
baseName=".".join(fileName.split(".")[:-1])
hashFile="{0}-hash.dat".format(baseName)
isoDate=datetime.datetime.now().strftime("%Y%m%dT%H%M%S")
logFile="{0}-{1}.log".format(baseName,isoDate)

if action=="init": initHashes(fileName, hashFile)
elif action=="update": compare(fileName, hashFile)
else: print("bad action")