@@ -41,79 +41,94 @@ def chunks(fileName):
data=f.read(BLOCK_SIZE)
while data:
yield hashlib.sha256(data).digest()[:HASH_LEN]
def hashes():
with sqlite3.connect(DB) as db:
handle=db.cursor()
handle.execute("""select `sector_hash` from `hashes` order by `sector_id` asc""")
h=handle.fetchone()
while h is not None:
yield h
def findChanges(fileName):
changelist=[]
# build changelist
# can use up to (HASH_LEN+size(int))*(filesize/BLOCK_SIZE) bytes of memory plus overhead
# that's (8+4)*(8*2**30 / 4096) = 24MB for defaults
for (i,(dataHash,savedHash)) in enumerate(itertools.zip_longest(chunks(fileName),hashes())):
if dataHash!=savedHash:
changelist.append((i,dataHash))
if dataHash is None: break # shouldn't happen
# write log
with open(logFile, mode="w") as f:
f.write("sector hash\n")
for (i,dataHash) in changelist:
f.write("{0}\t{1}\n".format(i,dataHash))
# update DB
handle.executemany(
"""update `hashes` set `sector_hash`=:hash, `dirty`=1 where `sector_id`=:id""",
({"id":i,"hash":dataHash} for (i,dataHash) in changelist)
)
db.commit()
def transferChanges(targetPath):
# read changes
with sqlite3.connect(DB) as db, open(fileName,mode="rb") as sf:
handle.execute("""select `sector_id` from `hashes` where `dirty`=1""")
# transfer modified sectors and mark them as clean
sectorIds=handle.fetchall()
for (sectorId,) in sectorIds:
'''for (sectorId,) in sectorIds:
path=targetPath / getPath(sectorId)
try: path.parent.mkdir(parents=True)
except FileExistsError: pass
df=path.open(mode="wb")
sf.seek(sectorId)
df.write(sf.read(BLOCK_SIZE))
handle.execute("""update `hashes` set `dirty`=0 where `sector_id`=?""",(sectorId,))
db.commit()'''
sector=sf.read(BLOCK_SIZE)
i=j=0
while sector and j<len(sectorIds):
if i==sectorIds[j][0]:
df.write(sector)
handle.execute("""update `hashes` set `dirty`=0 where `sector_id`=?""",(sectorIds[j][0],))
j+=1
i+=1
def getPath(index):
nodeIds=[]
k=1
while k<=FILE_COUNT:
nodeIds.append(index//k)
paddedLen=int(math.log10(FILE_COUNT/k))
nodeIds.append("{0:0{1}}",index//k,paddedLen)
k*=FILES_PER_DIR
nodeIds.reverse()
return pathlib.Path(*[str(id) for id in nodeIds])
return pathlib.Path(*[id for id in enumerate(nodeIds)])
action=sys.argv[1]
fileName=sys.argv[2]
baseName=".".join(fileName.split(".")[:-1])
hashFile="{0}-hash.dat".format(baseName)
isoDate=datetime.datetime.now().strftime("%Y%m%dT%H%M%S")
logFile="{0}-{1}.log".format(baseName,isoDate)
if action=="init": initHashes(fileName)
elif action=="update": findChanges(fileName)
elif action=="transfer": transferChanges(sys.argv[3])
else: print("bad action")
Status change: