Changeset - 9483b964f560
[Not reviewed]
default
0 3 0
Laman - 6 years ago 2019-05-05 13:03:27

saving and loading prepared data
3 files changed with 33 insertions and 14 deletions:
0 comments (0 inline, 0 general)
exp/kerokero/prepare_data.py
Show inline comments
 
@@ -20,99 +20,104 @@ class Sample:
 
	SIDE=224
 

	
 
	def __init__(self,img,grid):
 
		self.img=img
 
		self.grid=grid
 

	
 
	def transform(self):
 
		center=self._getCenter()
 
		m=getIdentity()
 
		t1=getTranslation(-center.x,-center.y)
 
		proj=getProjection()
 
		rot=getRotation()
 
		mir=getMirroring()
 
		for mi in [t1,mir,proj,rot]:
 
			m=np.matmul(mi,m)
 
		m=np.matmul(self._computeCrop(m),m)
 
		img=cv.warpPerspective(self.img,m,(self.SIDE,self.SIDE))
 
		grid=Corners(c.transform(m) for c in self.grid)
 
		return (img,list(itertools.chain.from_iterable(grid)))
 

	
 
	def _getCenter(self):
 
		(a,b,c,d)=self.grid
 
		p=Line.fromPoints(a,c)
 
		q=Line.fromPoints(b,d)
 
		return p.intersect(q)
 

	
 
	def _computeCrop(self,m):
 
		grid=Corners(c.transform(m) for c in self.grid)
 
		(x1,y1,x2,y2)=computeBoundingBox(grid)
 
		(wg,hg)=(x2-x1,y2-y1)
 
		(left,top,right,bottom)=[random.uniform(0.05,0.2) for i in range(4)]
 
		t2=getTranslation(left*wg-x1, top*hg-y1)
 
		scale=getScale(self.SIDE/(wg*(1+left+right)), self.SIDE/(hg*(1+top+bottom)))
 
		return np.matmul(scale,t2)
 

	
 
	def show(self):
 
		img=np.copy(self.img)
 
		for c in self.grid:
 
			cv.circle(img,(int(c.x),int(c.y)),3,[0,255,0],-1)
 
		show(img)
 

	
 

	
 
def traverseDirs(root):
 
	stack=[root]
 
	while len(stack)>0:
 
		d=stack.pop()
 
		contents=sorted(os.scandir(d),key=lambda f: f.name,reverse=True)
 
		if any(f.name=="annotations.json.gz" for f in contents):
 
			print(d)
 
			yield d
 
		for f in contents:
 
			if f.is_dir(): stack.append(f.path)
 

	
 

	
 
def harvestDir(path):
 
	annotations=DataFile(os.path.join(path,"annotations.json.gz"))
 
	imgFilter=lambda f: f.is_file() and re.match(r".*\.(jpg|jpeg|png|gif)$", f.name.lower())
 
	files=sorted(filter(imgFilter,os.scandir(path)),key=lambda f: f.name)
 
	boards=annotations["."]
 
	for f in files:
 
		img=cv.imread(f.path)
 
		img=cv.cvtColor(img,cv.COLOR_BGR2GRAY)
 
		for b in boards:
 
			sample=Sample(img,b.grid)
 
			(img,label)=sample.transform()
 
			yield (img,label)
 

	
 

	
 
def loadDataset(root):
 
	testRatio=0.1
 
	trainRatio=1-testRatio
 
	images=[]
 
	labels=[]
 
	for d in traverseDirs(root):
 
		for (img,label) in harvestDir(d):
 
			images.append(img)
 
			labels.append(label)
 
	n=len(images)
 
	keys=list(range(n))
 
	random.shuffle(keys)
 
	images=[images[k] for k in keys]
 
	labels=[labels[k] for k in keys]
 
	m=int(n*trainRatio)
 
	return (
 
		(np.uint8(images[:m]),np.float32(labels[:m])),
 
		(np.uint8(images[m:]),np.float32(labels[m:]))
 
	)
 

	
 

	
 
def show(img,filename="x"):
 
	cv.imshow(filename,img)
 
	cv.waitKey(0)
 
	cv.destroyAllWindows()
 

	
 

	
 
if __name__=="__main__":
 
	root=sys.argv[1]
 
	for d in traverseDirs(root):
 
		harvestDir(d)
 
	((trainImages,trainLabels),(testImages,testLabels))=loadDataset(sys.argv[1])
 
	np.savez_compressed(
 
		sys.argv[2],
 
		trainImages=trainImages,
 
		trainLabels=trainLabels,
 
		testImages=testImages,
 
		testLabels=testLabels
 
	)
exp/kerokero/test.py
Show inline comments
 
import argparse
 

	
 
import numpy as np
 
from keras.models import load_model
 

	
 
from prepare_data import loadDataset,Sample
 
from analyzer.epoint import EPoint
 
from analyzer.corners import Corners
 

	
 

	
 
parser=argparse.ArgumentParser()
 
parser.add_argument("model")
 
parser.add_argument("data_dir")
 
parser.add_argument("data")
 
args=parser.parse_args()
 

	
 
model=load_model(args.model)
 

	
 
print("loading data...")
 
((trainImages,trainLabels),(testImages,testLabels))=loadDataset(args.data_dir)
 
with np.load(args.data) as data:
 
	trainImages=data["trainImages"]
 
	trainLabels=data["trainLabels"]
 
	testImages=data["testImages"]
 
	testLabels=data["testLabels"]
 
print("done")
 

	
 
for img in testImages:
 
	label=model.predict(np.reshape(img,(1,224,224,1)))
 
	print(label)
 
	points=[]
 
	for i in range(4):
 
		points.append(EPoint(label[0][i*2],label[0][i*2+1]))
 
	corners=Corners(points)
 
	sample=Sample(np.uint8(img),corners)
 
	sample.show()
exp/kerokero/train.py
Show inline comments
 
import argparse
 
import logging as log
 

	
 
import numpy as np
 
from keras.layers import Conv2D,Dropout,Dense,Flatten,MaxPooling2D,BatchNormalization
 
from keras.models import Sequential,load_model
 

	
 
from prepare_data import loadDataset
 
import ftp
 

	
 
log.basicConfig(level=log.INFO,format="%(asctime)s %(levelname)s: %(message)s")
 

	
 
parser=argparse.ArgumentParser()
 
parser.add_argument("data_dir")
 
parser.add_argument("data")
 
parser.add_argument("--load_model")
 
parser.add_argument("--save_model",default="/tmp/gogo-{0:03}.h5")
 
parser.add_argument("--epochs",type=int,default=100)
 
parser.add_argument("--initial_epoch",type=int,default=0)
 
args=parser.parse_args()
 

	
 

	
 
def createFullyConnected():
 
	model=Sequential([
 
		Flatten(input_shape=(224,224)),
 
		Dense(128, activation="relu"),
 
		Dropout(0.1),
 
		Dense(64, activation="relu"),
 
		Dense(8)
 
	])
 

	
 
	model.compile(
 
		optimizer='adam',
 
		loss='mse',
 
		metrics=['mae','accuracy']
 
	)
 
	return model
 

	
 
def createCNN():
 
	model=Sequential()
 
	
 
	model.add(Conv2D(filters=16,kernel_size=2,padding="same",activation="relu",input_shape=(224,224,1)))
 
	model.add(Dropout(0.1))
 
	model.add(MaxPooling2D(pool_size=(2,2),strides=(2,2),padding="valid"))
 
	model.add(BatchNormalization())
 
	
 
	model.add(Conv2D(32,(5,5),activation="relu"))
 
	model.add(MaxPooling2D(pool_size=(2,2),strides=(2,2),padding="valid"))
 
	model.add(Dropout(0.2))
 
	model.add(BatchNormalization())
 
	
 
	model.add(Conv2D(64,(5,5),activation="relu"))
 
	model.add(MaxPooling2D(pool_size=(2,2),strides=(2,2),padding="valid"))
 
	model.add(BatchNormalization())
 
	
 
	model.add(Conv2D(128,(3,3),activation="relu"))
 
	model.add(MaxPooling2D(pool_size=(2,2),strides=(2,2),padding="valid"))
 
	model.add(Dropout(0.4))
 
	model.add(BatchNormalization())
 
	
 
	model.add(Flatten())
 
	
 
	model.add(Dense(500, activation="relu"))
 
	model.add(Dense(500,activation="relu"))
 
	model.add(Dropout(0.1))
 
	
 
	model.add(Dense(128, activation="relu"))
 
	model.add(Dense(128,activation="relu"))
 
	model.add(Dropout(0.1))
 
	
 
	model.add(Dense(8))
 

	
 
	model.compile(optimizer='adam',loss='mse',metrics=['mae','accuracy'])
 
	return model
 

	
 

	
 
model=createCNN()
 
if args.load_model:
 
	model=load_model(args.load_model)
 

	
 
print("loading data...")
 
((trainImages,trainLabels),(testImages,testLabels))=loadDataset(args.data_dir)
 
print("done")
 
log.info("loading data...")
 
with np.load(args.data) as data:
 
	trainImages=data["trainImages"]
 
	trainLabels=data["trainLabels"]
 
	testImages=data["testImages"]
 
	testLabels=data["testLabels"]
 
log.info("done")
 

	
 
for i in range(args.initial_epoch,args.epochs//10):
 
	model.fit(trainImages.reshape((-1,224,224,1)),trainLabels,epochs=(i+1)*10,initial_epoch=i*10,batch_size=128,validation_split=0.2)
 
	model.save(args.save_model.format(i+1))
 
print(model.evaluate(testImages,testLabels))
 
	path=args.save_model.format(i+1)
 
	log.info("saving model...")
 
	model.save(path)
 
	ftp.push(path)
 
log.info(model.evaluate(testImages,testLabels))
0 comments (0 inline, 0 general)