Files
@ 6a0ab4fe9f5e
Branch filter:
Location: Morevna/src/hashtree.py
6a0ab4fe9f5e
2.7 KiB
text/x-python
changed naming to snake case. sadly no love for camel case in this world
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 | import collections
import hashlib
import os
from datetime import datetime
from util import Progress
def hash_block(data):
return hashlib.sha256(data).digest()[-HashTree.HASH_LEN:]
class HashTree:
HASH_LEN = 16 # bytes
BLOCK_SIZE = 4096 # bytes
## Prepares a tree containing leaf_count leaves.
def __init__(self, leaf_count):
self.store = [b""]*(leaf_count*2-1)
self.leaf_start = leaf_count-1
self.leaf_count = leaf_count
self._index = self.leaf_start
@classmethod
def from_file(cls, filename):
with open(filename, "rb") as f:
stat = os.fstat(f.fileno())
size = stat.st_size # !! symlinks
leaf_count = (size-1)//HashTree.BLOCK_SIZE+1 # number of leaf blocks
res = cls(leaf_count)
print(datetime.now(), "hashing file:")
progress = Progress(leaf_count)
for i in range(leaf_count):
data = f.read(HashTree.BLOCK_SIZE)
res.insert_leaf(hash_block(data))
progress.p(i)
progress.done()
res.build_tree()
return res
@classmethod
def load(cls, filename):
with open(filename, "rb") as f:
stat = os.fstat(f.fileno())
size = stat.st_size
node_count = size//HashTree.HASH_LEN
res = cls((node_count+1)//2)
for i in range(node_count):
res.store[i] = f.read(HashTree.HASH_LEN)
return res
def save(self, filename):
with open(filename, "wb") as f:
for h in self.store:
f.write(h)
## Inserts a leaf at the first empty position.
#
# Useful and used only during the tree construction.
def insert_leaf(self, h):
self.store[self._index] = h
self._index += 1
## Updates a hash stored in the leaf.
def update_leaf(self, index, h):
if index<self.leaf_start: raise IndexError()
self.store[index] = h
self.update_node((index-1)//2)
## Updates the node at index and all its ancestors.
def update_node(self, index):
while index>=0:
self.store[index] = hash_block(self.store[index*2+1] + self.store[index*2+2])
index = (index-1)//2
## Fast construction of the tree over the leaves. O(n).
def build_tree(self):
print(datetime.now(), "building tree:")
progress = Progress(-1, self.leaf_start-1)
for i in range(self.leaf_start-1, -1, -1):
self.store[i] = hash_block(self.store[i*2+1] + self.store[i*2+2])
progress.p(i)
progress.done()
## Update faster than repeated insertLeaf.
def batch_update(self, keys_hashes):
queue = collections.deque()
for (k, v) in sorted(keys_hashes):
self.store[k] = v
parent_k = (k-1)//2
if len(queue)==0 or queue[-1]!=parent_k:
queue.append(parent_k)
while len(queue)>0:
k = queue.pop()
self.store[k] = hash_block(self.store[k*2+1] + self.store[k*2+2])
parent_k = (k-1)//2
if (len(queue)==0 or queue[0]!=parent_k) and k!=0:
queue.appendleft(parent_k)
|