Files
@ a52fefe61468
Branch filter:
Location: Morevna/work.py - annotation
a52fefe61468
3.2 KiB
text/x-python
work in progress
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 | a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 a52fefe61468 | import hashlib
import sys
import datetime
import sqlite3
import math
import pathlib
BLOCK_SIZE=4096
HASH_LEN=8
DB="morevna.db"
FILES_PER_DIR=100
FILE_COUNT=2**21
def initDB():
conn=sqlite3.connect("morevna.db")
c=conn.cursor()
c.execute("""create table if not exists `hashes` (
`sector_id` integer primary key,
`sector_hash` blob not null,
`dirty` integer check(`dirty` in (0,1))
)""")
c.execute("""create index if not exists `dirty_index` on `hashes` (`dirty`)""")
conn.commit()
def initHashes(fileName, hashFile):
with open(hashFile, mode="bw") as f:
for chunkHash in chunks(fileName):
f.write(chunkHash)
def chunks(fileName):
with open(fileName, mode="br") as f:
data=f.read(BLOCK_SIZE)
while data:
yield hashlib.sha256(data).digest()[:HASH_LEN]
data=f.read(BLOCK_SIZE)
def hashes(filename):
# with open(filename, mode="br") as f:
# hashBytes=f.read(HASH_LEN)
# while hashBytes:
# yield hashBytes
# hashBytes=f.read(HASH_LEN)
with sqlite3.connect(DB) as db:
handle=db.cursor()
handle.execute("""select `sector_hash` from `hashes` order by `sector_id` asc""")
h=handle.fetchone()
while h is not None:
yield h
h=handle.fetchone()
def compare(fileName, hashFile):
changelist=[]
# build changelist
# can use up to (HASH_LEN+size(int))*(filesize/BLOCK_SIZE) bytes of memory plus overhead
# that's (8+4)*(8*2**30 / 4096) = 24MB for defaults
for (i,(dataHash,savedHash)) in enumerate(zip(chunks(fileName),hashes(hashFile))):
if dataHash!=savedHash:
changelist.append((i,dataHash))
# write log
with open(logFile, mode="w") as f:
f.write("sector hash\n")
for (i,dataHash) in changelist:
f.write("{0}\t{1}\n".format(i,dataHash))
# update DB
with sqlite3.connect(DB) as db:
handle=db.cursor()
handle.executemany("""update `hashes` set `sector_hash`=':hash', `dirty`=1 where `sector_id`=:id""",
{"sector_id":i,"sector_hash":dataHash} for (i,dataHash) in changelist)
db.commit()
# update hashFile
# with open(hashFile, mode="r+b") as f:
# for (i,dataHash) in changelist:
# f.seek(i*HASH_LEN)
# f.write(dataHash)
def transferChanges():
# read changes
with sqlite3.connect(DB) as db, open(fileName,mode="rb") as sf:
handle=db.cursor()
handle.execute("""select `hash_id` from `hashes` where `dirty`=1""")
# transfer modified sectors and mark them as clean
sectorIds=handle.fetchall()
for sectorId in sectorIds:
path=getPath(sectorId)
path.parent.mkdir(parents=True,exist_ok=True)
df=path.open(mode="wb")
sf.seek(sectorId)
df.write(sf.read(BLOCK_SIZE))
handle.execute("""update `hashes` set `dirty`=0 where `hash_id`=?""",sectorId)
db.commit()
def getPath(index):
nodeIds=[]
k=1
for i in range(math.ceil(math.log(FILE_COUNT)/math.log(k))):
nodeIds.append(index//k)
k*=FILES_PER_DIR
nodeIds.reverse()
return pathlib.Path(*nodeIds)
action=sys.argv[1]
fileName=sys.argv[2]
baseName=".".join(fileName.split(".")[:-1])
hashFile="{0}-hash.dat".format(baseName)
isoDate=datetime.datetime.now().strftime("%Y%m%dT%H%M%S")
logFile="{0}-{1}.log".format(baseName,isoDate)
if action=="init": initHashes(fileName, hashFile)
elif action=="update": compare(fileName, hashFile)
else: print("bad action")
|