# HG changeset patch
# User Laman
# Date 2019-09-09 20:58:21
# Node ID 29f28718a69b1ee204c6cc4ef446a36aa555623f
# Parent  a00c974af8ae8bcbdbc8a2eb17a2a23f913045e8

transitional data processing

diff --git a/exp/kerokero/prepare_data.py b/exp/kerokero/prepare_data.py
--- a/exp/kerokero/prepare_data.py
+++ b/exp/kerokero/prepare_data.py
@@ -7,6 +7,8 @@ import logging as log
 
 import numpy as np
 import cv2 as cv
+import PIL.Image
+import PIL.ImageDraw
 
 import config as cfg
 sys.path.append("..")
@@ -44,10 +46,10 @@ class Sample:
 			m=np.matmul(mi,m)
 		m=np.matmul(self._computeCrop(m),m)
 		img=cv.warpPerspective(self.img,m,(self.SIDE,self.SIDE))
-		img=np.uint8(img)
+		img=PIL.Image.fromarray(img[:,:,::-1])
 		grid=Corners(c.transform(m) for c in self.grid)
-		grid=list(map(lambda p: list(2*p/self.SIDE-EPoint(1,1)), grid))
-		return (img,grid)
+		mask=self._createMask(img,grid)
+		return (img,mask)
 
 	def rectify(self):
 		x1=self.SIDE*0.1
@@ -62,29 +64,6 @@ class Sample:
 		grid=list(map(lambda p: list(2*p/self.SIDE-EPoint(1,1)), grid))
 		return (img,grid)
 
-	def cut(self):
-		width=max(p.x for p in self.grid)-min(p.x for p in self.grid)
-		height=max(p.y for p in self.grid)-min(p.y for p in self.grid)
-		kx=width/4
-		ky=height/4
-		n=self.SIDE
-		for p in self.grid:
-			shift=self._createNoise(0.2)
-			abcd=[[p.x-kx,p.y-ky],[p.x-kx,p.y+ky],[p.x+kx,p.y+ky],[p.x+kx,p.y-ky]]
-			abcd_=[[shift.x,shift.y],[shift.x,n+shift.y],[n+shift.x,n+shift.y],[n+shift.x,shift.y]]
-			m=cv.getPerspectiveTransform(np.float32(abcd),np.float32(abcd_))
-			t1=getTranslation(-n/2,-n/2)
-			mir=getMirroring()
-			proj=getProjection()
-			rot=getRotation()
-			t2=getTranslation(n/2,n/2)
-			for mi in [t1,mir,proj,rot,t2]:
-				m=np.matmul(mi,m)
-			img=cv.warpPerspective(self.img,m,(self.SIDE,self.SIDE))
-			img=np.uint8(img)
-			point=p.transform(m)*2/self.SIDE-EPoint(1,1)
-			yield (img,[point.x,point.y])
-
 	def _getCenter(self):
 		(a,b,c,d)=self.grid
 		p=Line.fromPoints(a,c)
@@ -100,9 +79,15 @@ class Sample:
 		scale=getScale(self.SIDE/(wg*(1+left+right)), self.SIDE/(hg*(1+top+bottom)))
 		return np.matmul(scale,t2)
 
-	def _createNoise(self,mag=0.05):
+	def _createMask(self,image,grid):
+		img=PIL.Image.new("L",image.size)
+		draw=PIL.ImageDraw.Draw(img)
+		draw.polygon([tuple(p) for p in grid],255,255)
+		return img
+
+	def _createNoise(self):
 		alpha=random.uniform(0,math.pi*2)
-		d=random.uniform(0,self.SIDE*mag)
+		d=random.uniform(0,self.SIDE*0.05)
 		return EPoint(math.cos(alpha)*d, math.sin(alpha)*d)
 
 	def show(self):
@@ -133,17 +118,12 @@ def harvestDir(path):
 	for f in files:
 		grade=annotations.get(f.name,[Board()])[0].grade
 		Stats.counts[grade]+=1
-		if not Board.UNSET<grade<=Board.GOOD: continue
+		if not Board.UNSET<grade<=Board.POOR: continue
 		img=cv.imread(f.path)
-		img=cv.cvtColor(img,cv.COLOR_BGR2GRAY)
 		for b in boards:
 			sample=Sample(img,b.grid)
-			# sample.show()
-			# (transformedImg,label)=sample.transform()
-			# (transformedImg,label)=sample.rectify()
-			for (transformedImg,label) in sample.cut():
-				Sample(np.uint8(transformedImg),[(EPoint(*label)+EPoint(1,1))*Sample.SIDE/2]).show()
-				yield (transformedImg,label)
+			(transformedImg,mask)=sample.transform()
+			yield (transformedImg,mask)
 
 
 def loadDataset(root):
@@ -172,6 +152,35 @@ def loadDataset(root):
 	)
 
 
+def prepareDataset(root,dest):
+	i=0
+	train=[]
+	test=[]
+	for d in traverseDirs(root):
+		for (image,mask) in harvestDir(d):
+			i+=1
+			if random.random()<0.9:
+				image.save(os.path.join(dest,"train/{0}.jpg".format(i)))
+				mask.save(os.path.join(dest,"train_masks/{0}_mask.png".format(i)))
+				train.append(str(i)+".jpg")
+			else:
+				image.save(os.path.join(dest,"test/{0}.jpg".format(i)))
+				test.append(str(i)+".jpg")
+	with open(os.path.join(dest,"train_masks.csv"),mode="w") as f:
+		f.write("img,rle_mask\n")
+		for file in train:
+			f.write('{0},""\n'.format(file))
+	with open(os.path.join(dest,"test_masks.csv"),mode="w") as f:
+		f.write("img,rle_mask\n")
+		for file in test:
+			f.write('{0},""\n'.format(file))
+	log.info("clear images: %s",Stats.counts[1])
+	log.info("good images: %s",Stats.counts[2])
+	log.info("poor images: %s",Stats.counts[3])
+	log.info("unset images: %s",Stats.counts[0])
+	log.info("total: %s",sum(Stats.counts))
+
+
 def show(img,filename="x"):
 	cv.imshow(filename,img)
 	cv.waitKey(0)
@@ -179,11 +188,4 @@ def show(img,filename="x"):
 
 
 if __name__=="__main__":
-	((trainImages,trainLabels),(testImages,testLabels))=loadDataset(sys.argv[1])
-	np.savez_compressed(
-		sys.argv[2],
-		trainImages=trainImages,
-		trainLabels=trainLabels,
-		testImages=testImages,
-		testLabels=testLabels
-	)
+	prepareDataset(sys.argv[1],sys.argv[2])