# HG changeset patch
# User Laman
# Date 2016-02-24 20:42:39
# Node ID 72963904714e2c92b1a0e0b744800d162db1eb87
# Parent  a52fefe61468285c512773de2b4c7130a7cab54b

rewritten for db storage

diff --git a/work.py b/work.py
--- a/work.py
+++ b/work.py
@@ -4,6 +4,7 @@ import datetime
 import sqlite3
 import math
 import pathlib
+import itertools
 
 BLOCK_SIZE=4096
 HASH_LEN=8
@@ -24,10 +25,16 @@ def initDB():
 	c.execute("""create index if not exists `dirty_index` on `hashes` (`dirty`)""")
 	conn.commit()
 
-def initHashes(fileName, hashFile):
-	with open(hashFile, mode="bw") as f:
-		for chunkHash in chunks(fileName):
-			f.write(chunkHash)
+def initHashes(fileName):
+	initDB()
+			
+	with sqlite3.connect(DB) as db:
+		handle=db.cursor()
+		handle.executemany(
+			"""insert into `hashes` (`sector_id`,`sector_hash`,`dirty`) values (:id,:hash,1)""",
+			({"id":i,"hash":dataHash} for (i,dataHash) in enumerate(chunks(fileName)))
+		)
+		db.commit()
 
 def chunks(fileName):
 	with open(fileName, mode="br") as f:
@@ -36,12 +43,7 @@ def chunks(fileName):
 			yield hashlib.sha256(data).digest()[:HASH_LEN]
 			data=f.read(BLOCK_SIZE)
 
-def hashes(filename):
-#	with open(filename, mode="br") as f:
-#		hashBytes=f.read(HASH_LEN)
-#		while hashBytes:
-#			yield hashBytes
-#			hashBytes=f.read(HASH_LEN)
+def hashes():
 	with sqlite3.connect(DB) as db:
 		handle=db.cursor()
 		handle.execute("""select `sector_hash` from `hashes` order by `sector_id` asc""")
@@ -50,15 +52,16 @@ def hashes(filename):
 			yield h
 			h=handle.fetchone()
 
-def compare(fileName, hashFile):
+def findChanges(fileName):
 	changelist=[]
 
 	# build changelist
 	# can use up to (HASH_LEN+size(int))*(filesize/BLOCK_SIZE) bytes of memory plus overhead
 	# that's (8+4)*(8*2**30 / 4096) = 24MB for defaults
-	for (i,(dataHash,savedHash)) in enumerate(zip(chunks(fileName),hashes(hashFile))):
+	for (i,(dataHash,savedHash)) in enumerate(itertools.zip_longest(chunks(fileName),hashes())):
 		if dataHash!=savedHash:
 			changelist.append((i,dataHash))
+		if dataHash is None: break # shouldn't happen
 
 	# write log
 	with open(logFile, mode="w") as f:
@@ -69,41 +72,38 @@ def compare(fileName, hashFile):
 	# update DB
 	with sqlite3.connect(DB) as db:
 		handle=db.cursor()
-		handle.executemany("""update `hashes` set `sector_hash`=':hash', `dirty`=1 where `sector_id`=:id""",
-			{"sector_id":i,"sector_hash":dataHash} for (i,dataHash) in changelist)
+		handle.executemany(
+			"""update `hashes` set `sector_hash`=:hash, `dirty`=1 where `sector_id`=:id""",
+			({"id":i,"hash":dataHash} for (i,dataHash) in changelist)
+		)
 		db.commit()
-			
-	# update hashFile
-#	with open(hashFile, mode="r+b") as f:
-#		for (i,dataHash) in changelist:
-#			f.seek(i*HASH_LEN)
-#			f.write(dataHash)
 	
-def transferChanges():
+def transferChanges(targetPath):
 	# read changes
 	with sqlite3.connect(DB) as db, open(fileName,mode="rb") as sf:
 		handle=db.cursor()
-		handle.execute("""select `hash_id` from `hashes` where `dirty`=1""")
+		handle.execute("""select `sector_id` from `hashes` where `dirty`=1""")
 			
 		# transfer modified sectors and mark them as clean
 		sectorIds=handle.fetchall()
-		for sectorId in sectorIds:
-			path=getPath(sectorId)
-			path.parent.mkdir(parents=True,exist_ok=True)
+		for (sectorId,) in sectorIds:
+			path=targetPath / getPath(sectorId)
+			try: path.parent.mkdir(parents=True)
+			except FileExistsError: pass
 			df=path.open(mode="wb")
 			sf.seek(sectorId)
 			df.write(sf.read(BLOCK_SIZE))
-			handle.execute("""update `hashes` set `dirty`=0 where `hash_id`=?""",sectorId)
+			handle.execute("""update `hashes` set `dirty`=0 where `sector_id`=?""",(sectorId,))
 			db.commit()
 
 def getPath(index):
 	nodeIds=[]
 	k=1
-	for i in range(math.ceil(math.log(FILE_COUNT)/math.log(k))):
+	while k<=FILE_COUNT:
 		nodeIds.append(index//k)
 		k*=FILES_PER_DIR
 	nodeIds.reverse()
-	return pathlib.Path(*nodeIds)
+	return pathlib.Path(*[str(id) for id in nodeIds])
 
 action=sys.argv[1]
 fileName=sys.argv[2]
@@ -112,7 +112,8 @@ hashFile="{0}-hash.dat".format(baseName)
 isoDate=datetime.datetime.now().strftime("%Y%m%dT%H%M%S")
 logFile="{0}-{1}.log".format(baseName,isoDate)
 
-if action=="init": initHashes(fileName, hashFile)
-elif action=="update": compare(fileName, hashFile)
+if action=="init": initHashes(fileName)
+elif action=="update": findChanges(fileName)
+elif action=="transfer": transferChanges(sys.argv[3])
 else: print("bad action")