# HG changeset patch # User Laman # Date 2019-09-09 20:58:21 # Node ID 29f28718a69b1ee204c6cc4ef446a36aa555623f # Parent a00c974af8ae8bcbdbc8a2eb17a2a23f913045e8 transitional data processing diff --git a/exp/kerokero/prepare_data.py b/exp/kerokero/prepare_data.py --- a/exp/kerokero/prepare_data.py +++ b/exp/kerokero/prepare_data.py @@ -7,6 +7,8 @@ import logging as log import numpy as np import cv2 as cv +import PIL.Image +import PIL.ImageDraw import config as cfg sys.path.append("..") @@ -44,10 +46,10 @@ class Sample: m=np.matmul(mi,m) m=np.matmul(self._computeCrop(m),m) img=cv.warpPerspective(self.img,m,(self.SIDE,self.SIDE)) - img=np.uint8(img) + img=PIL.Image.fromarray(img[:,:,::-1]) grid=Corners(c.transform(m) for c in self.grid) - grid=list(map(lambda p: list(2*p/self.SIDE-EPoint(1,1)), grid)) - return (img,grid) + mask=self._createMask(img,grid) + return (img,mask) def rectify(self): x1=self.SIDE*0.1 @@ -62,29 +64,6 @@ class Sample: grid=list(map(lambda p: list(2*p/self.SIDE-EPoint(1,1)), grid)) return (img,grid) - def cut(self): - width=max(p.x for p in self.grid)-min(p.x for p in self.grid) - height=max(p.y for p in self.grid)-min(p.y for p in self.grid) - kx=width/4 - ky=height/4 - n=self.SIDE - for p in self.grid: - shift=self._createNoise(0.2) - abcd=[[p.x-kx,p.y-ky],[p.x-kx,p.y+ky],[p.x+kx,p.y+ky],[p.x+kx,p.y-ky]] - abcd_=[[shift.x,shift.y],[shift.x,n+shift.y],[n+shift.x,n+shift.y],[n+shift.x,shift.y]] - m=cv.getPerspectiveTransform(np.float32(abcd),np.float32(abcd_)) - t1=getTranslation(-n/2,-n/2) - mir=getMirroring() - proj=getProjection() - rot=getRotation() - t2=getTranslation(n/2,n/2) - for mi in [t1,mir,proj,rot,t2]: - m=np.matmul(mi,m) - img=cv.warpPerspective(self.img,m,(self.SIDE,self.SIDE)) - img=np.uint8(img) - point=p.transform(m)*2/self.SIDE-EPoint(1,1) - yield (img,[point.x,point.y]) - def _getCenter(self): (a,b,c,d)=self.grid p=Line.fromPoints(a,c) @@ -100,9 +79,15 @@ class Sample: scale=getScale(self.SIDE/(wg*(1+left+right)), self.SIDE/(hg*(1+top+bottom))) return np.matmul(scale,t2) - def _createNoise(self,mag=0.05): + def _createMask(self,image,grid): + img=PIL.Image.new("L",image.size) + draw=PIL.ImageDraw.Draw(img) + draw.polygon([tuple(p) for p in grid],255,255) + return img + + def _createNoise(self): alpha=random.uniform(0,math.pi*2) - d=random.uniform(0,self.SIDE*mag) + d=random.uniform(0,self.SIDE*0.05) return EPoint(math.cos(alpha)*d, math.sin(alpha)*d) def show(self): @@ -133,17 +118,12 @@ def harvestDir(path): for f in files: grade=annotations.get(f.name,[Board()])[0].grade Stats.counts[grade]+=1 - if not Board.UNSET<grade<=Board.GOOD: continue + if not Board.UNSET<grade<=Board.POOR: continue img=cv.imread(f.path) - img=cv.cvtColor(img,cv.COLOR_BGR2GRAY) for b in boards: sample=Sample(img,b.grid) - # sample.show() - # (transformedImg,label)=sample.transform() - # (transformedImg,label)=sample.rectify() - for (transformedImg,label) in sample.cut(): - Sample(np.uint8(transformedImg),[(EPoint(*label)+EPoint(1,1))*Sample.SIDE/2]).show() - yield (transformedImg,label) + (transformedImg,mask)=sample.transform() + yield (transformedImg,mask) def loadDataset(root): @@ -172,6 +152,35 @@ def loadDataset(root): ) +def prepareDataset(root,dest): + i=0 + train=[] + test=[] + for d in traverseDirs(root): + for (image,mask) in harvestDir(d): + i+=1 + if random.random()<0.9: + image.save(os.path.join(dest,"train/{0}.jpg".format(i))) + mask.save(os.path.join(dest,"train_masks/{0}_mask.png".format(i))) + train.append(str(i)+".jpg") + else: + image.save(os.path.join(dest,"test/{0}.jpg".format(i))) + test.append(str(i)+".jpg") + with open(os.path.join(dest,"train_masks.csv"),mode="w") as f: + f.write("img,rle_mask\n") + for file in train: + f.write('{0},""\n'.format(file)) + with open(os.path.join(dest,"test_masks.csv"),mode="w") as f: + f.write("img,rle_mask\n") + for file in test: + f.write('{0},""\n'.format(file)) + log.info("clear images: %s",Stats.counts[1]) + log.info("good images: %s",Stats.counts[2]) + log.info("poor images: %s",Stats.counts[3]) + log.info("unset images: %s",Stats.counts[0]) + log.info("total: %s",sum(Stats.counts)) + + def show(img,filename="x"): cv.imshow(filename,img) cv.waitKey(0) @@ -179,11 +188,4 @@ def show(img,filename="x"): if __name__=="__main__": - ((trainImages,trainLabels),(testImages,testLabels))=loadDataset(sys.argv[1]) - np.savez_compressed( - sys.argv[2], - trainImages=trainImages, - trainLabels=trainLabels, - testImages=testImages, - testLabels=testLabels - ) + prepareDataset(sys.argv[1],sys.argv[2])