Commit 350fb407 authored by josh's avatar josh
Browse files

hooking up the ml pipeline

parent 534604c7
import os
import shutil
import rq
import json
import numpy as np
......@@ -8,18 +10,18 @@ from abc import ABC
from pathlib import Path
from redis import Redis
import rq
from rq import get_current_job
from ai4hdr_backend import importAndProcess
from ai4hdr_backend.sample.database import importAndProcess
from ai4hdr_backend.sample.model import HDRModel
'''
HDR App database interface
'''
class HDRDatabaseInterface( ABC ):
class HDRDatabaseInterface(ABC):
def getImageList( self ) -> list:
def getImageList(self) -> list:
'''
Should return a list of dictionaries containing:
- id : used to query database for more image information
......@@ -32,10 +34,10 @@ class HDRDatabaseInterface( ABC ):
}
]
'''
raise NotImplementedError( "getImageList: Method Not Implemented" )
raise NotImplementedError("getImageList: Method Not Implemented")
def getSliceList( self, imageId: str, size: str ) -> list:
def getSliceList(self, imageId: str, size: str) -> list:
'''
Similar to getImageList
......@@ -50,10 +52,10 @@ class HDRDatabaseInterface( ABC ):
}
]
'''
raise NotImplementedError( "getSliceList: Method Not Implemented" )
raise NotImplementedError("getSliceList: Method Not Implemented")
def getProcessedList( self, sliceId: str ) -> list:
def getProcessedList(self, sliceId: str) -> list:
'''
Should return a list of available processed images for given slice
i.e.
......@@ -64,41 +66,41 @@ class HDRDatabaseInterface( ABC ):
}
]
'''
raise NotImplementedError( "getProcessedList: Method Not Implemented" )
raise NotImplementedError("getProcessedList: Method Not Implemented")
def getSliceSizes( self, imageId: str ) -> list:
def getSliceSizes(self, imageId: str) -> list:
'''
Should return a list of slice sizes for imageId
i.e.
[ "128", "256", "512" ]
'''
raise NotImplementedError( "getSliceSizes: Method Not Implemented" )
raise NotImplementedError("getSliceSizes: Method Not Implemented")
def getImageArray( self, imageId: str ) -> np.array:
def getImageArray(self, imageId: str) -> np.array:
'''
Returns an image as a numpy array
Only returns Original images, not slices or processed slices
'''
raise NotImplementedError( "getImageArray: Method Not Implemented" )
raise NotImplementedError("getImageArray: Method Not Implemented")
def getMSLabelArray( self, imageId: str ) -> np.array:
def getMSLabelArray(self, imageId: str) -> np.array:
'''
'''
raise NotImplementedError( "getMSLabelArray: Method Not Implemented" )
raise NotImplementedError("getMSLabelArray: Method Not Implemented")
class HDRFileDB( HDRDatabaseInterface ):
class HDRFileDB(HDRDatabaseInterface):
def __init__( self, dbLoc: str ):
def __init__(self, dbLoc: str):
'''
Location of info directory names
Location of info in directory names
'''
self.UID = 0
self.IMG_TYPE = 1
......@@ -112,24 +114,29 @@ class HDRFileDB( HDRDatabaseInterface ):
'''
Directory Names
'''
self.BASE_FOLDER = "Images"
self.SLICE_FOLDER = "SL"
self.BASE_FOLDER = "Images"
self.SLICE_FOLDER = "SL"
self.PROCESSED_FOLDER = "PROC"
self.MODELS_FOLDER = "Models"
self.dbBasePath = Path( dbLoc ).resolve()
self.imagesPath = self.dbBasePath.joinpath( self.BASE_FOLDER )
self.dbBasePath = Path(dbLoc).resolve()
self.imagesPath = self.dbBasePath.joinpath(self.BASE_FOLDER)
'''
PUBLIC METHODS
'''
def getImageList( self ) -> list:
'''
IMAGE METHODS
'''
def getImageList(self) -> list:
imageList = []
for imageDir in self.imagesPath.iterdir():
imageId = imageDir.name
displayName = "{}_{}".format( self._idName( imageId ), self._idUid( imageId ) )
displayName = "{}_{}".format(self._idName(imageId), self._idUid(imageId))
imageList.append({
"id" : imageId,
......@@ -139,18 +146,18 @@ class HDRFileDB( HDRDatabaseInterface ):
return imageList
def getSliceList( self, imageId: str, size: str ) -> list:
def getSliceList(self, imageId: str, size: str) -> list:
sliceList = []
slicedPath = self._idToBasePath( imageId ).joinpath( self.SLICE_FOLDER ).joinpath( size )
slicedPath = self._idToBasePath(imageId).joinpath(self.SLICE_FOLDER).joinpath(size)
if slicedPath.is_dir():
for imageSlice in slicedPath.iterdir():
sliceId = imageSlice.name
displayName = "{}_{}_{}".format(
self._idName( sliceId ),
self._idXPix( sliceId ),
self._idYPix( sliceId )
)
displayName = "{}_{}_{}".format(
self._idName(sliceId),
self._idXPix(sliceId),
self._idYPix(sliceId)
)
sliceList.append({
"id" : sliceId,
"name": displayName
......@@ -160,14 +167,14 @@ class HDRFileDB( HDRDatabaseInterface ):
return []
def getProcessedList( self, sliceId: str ) -> list:
def getProcessedList(self, sliceId: str) -> list:
processedList = []
slicedDir = [ p for p in self._idToBasePath( sliceId ).rglob( sliceId ) ][0]
processedDir = slicedDir.joinpath( self.PROCESSED_FOLDER )
slicedDir = [ p for p in self._idToBasePath(sliceId).rglob(sliceId) ][0]
processedDir = slicedDir.joinpath(self.PROCESSED_FOLDER)
for processedImage in processedDir.iterdir():
processedId = processedImage.name
displayName = self._idParts( processedImage.name )[self.PROC_TYPE]
displayName = self._idParts(processedImage.name)[self.PROC_TYPE]
processedList.append({
"id" : processedId,
"name": displayName
......@@ -175,69 +182,189 @@ class HDRFileDB( HDRDatabaseInterface ):
return processedList
def getSliceSizes( self, imageId: str ) -> list:
def getSliceSizes(self, imageId: str) -> list:
nonEmptySliceDirs = []
for slDir in self._idToBasePath( imageId ).joinpath( self.SLICE_FOLDER ).iterdir():
slDirLen = len( [ f for f in slDir.iterdir() ] )
for slDir in self._idToBasePath(imageId).joinpath(self.SLICE_FOLDER).iterdir():
slDirLen = len([ f for f in slDir.iterdir() ])
if slDirLen > 0:
nonEmptySliceDirs.append( slDir.name )
nonEmptySliceDirs.append(slDir.name)
return nonEmptySliceDirs
def getImageArray( self, imageId: str ) -> np.array:
print( imageId )
baseDir = self._idToBasePath( imageId )
print( baseDir )
imageFilePath = [ p for p in baseDir.rglob( "{}.*".format( imageId ) ) ][0]
return np.array( Image.open( imageFilePath ) )
def getMSLabelArray( self, sliceId: str ) -> np.array:
baseDir = self._idToBasePath( sliceId )
labelsIdentifier = "{}-{}-{}-SL-MSLAB-PROC.npy".format( self._idSliceSize( sliceId ),
self._idXPix( sliceId ),
self._idYPix( sliceId ) )
msLabelsPath = [ p for p in baseDir.rglob( "*{}".format( labelsIdentifier ) ) ][0]
return np.load( msLabelsPath )
def saveMask( self, sliceId: str, maskImage: Image.Image, maskName: str="Mask" ):
print( type( maskImage ) )
maskName = maskName.replace( "-", "_" )
baseDir = self._idToBasePath( sliceId )
sliceDir = [ p for p in baseDir.rglob( sliceId ) ][0]
processedDir = sliceDir.joinpath( self.PROCESSED_FOLDER )
newMaskDir = processedDir.joinpath( "{}-{}-PROC".format( sliceId, maskName ) )
newMaskPath = newMaskDir.joinpath( "{}.{}".format( newMaskDir.name, self._idImageExt( sliceId ) ) )
print( "=====================")
print( sliceId )
print( newMaskPath )
os.mkdir( newMaskDir )
maskImage.save( newMaskPath )
def importAndProcessImage( self, imagePath: str ):
print( imagePath )
def getImageArray(self, imageId: str) -> np.array:
baseDir = self._idToBasePath(imageId)
imageFilePath = [ p for p in baseDir.rglob("{}.*".format(imageId)) ][0]
return np.array(Image.open(imageFilePath))
def getMSLabelArray(self, sliceId: str) -> np.array:
baseDir = self._idToBasePath(sliceId)
labelsIdentifier = "{}-{}-{}-SL-MSLAB-PROC.npy".format(self._idSliceSize(sliceId),
self._idXPix(sliceId),
self._idYPix(sliceId))
msLabelsPath = [ p for p in baseDir.rglob("*{}".format(labelsIdentifier)) ][0]
return np.load(msLabelsPath)
def saveMask(self, sliceId: str, maskImage: Image.Image, maskName: str="Mask"):
maskName = maskName.replace("-", "_")
baseDir = self._idToBasePath(sliceId)
# Find slice directory with id
sliceDir = [ p for p in baseDir.rglob(sliceId) ][0]
processedDir = sliceDir.joinpath(self.PROCESSED_FOLDER)
# Create new folder for masks in processed images
newMaskDir = processedDir.joinpath("{}-{}-PROC".format(sliceId, maskName))
os.mkdir(newMaskDir)
# Save mask
newMaskPath = newMaskDir.joinpath("{}.{}".format(newMaskDir.name, self._idImageExt(sliceId)))
maskImage.save(newMaskPath)
# Create and save label map
labelMapPath = newMaskDir.joinpath("{}-labelmap".format(newMaskDir.name))
imgArr = np.array(maskImage)
# Average across color channels
labelMap = imgArr.mean(axis=2)
# Assign labels to each pixel location
labels = np.unique(labelMap)
for label in labels:
labelMap[np.where(labelMap == label)] = np.where(labels == label)
np.save(str(labelMapPath), labelMap)
def importAndProcessImage(self, imagePath: str):
'''
- arg1: path to the image
- arg2: path to database
- arg3: new name for image
'''
imageName = Path(imagePath).name.split( "." )[0]
imageName = Path(imagePath).name.split(".")[0]
imgPath = str(imagePath)
dbPath = str(self.dbBasePath)
newName = imageName
importAndProcess(imgPath=imgPath, dbPath=dbPath, newImgName=newName)
shutil.rmtree(Path(imagePath).parent)
'''
MODEL METHODS
'''
#os.system( "python ai4hdr_backend/Scripts/importAndProcessImage.py {} {} {}".format( imagePath, self.dbBasePath, imageName ) )
def getModelList(self, imageId, size):
imgPath = str( imagePath )
dbPath = str( self.dbBasePath )
newName = imageName
print( "IMPORT AND PROCESS" )
importAndProcess( imgPath=imgPath, dbPath=dbPath, newImgName=newName )
print( "DONE")
shutil.rmtree( Path( imagePath ).parent )
#queue = rq.Queue( "hdr-tasks", connection=Redis.from_url( "redis://" ) )
#job = queue.enqueue( "tasks.importAndProcessImageTask", imgPath, dbPath, newName )
modelList = []
baseDir = self._idToBasePath(imageId)
modelsDir = baseDir.joinpath(self.MODELS_FOLDER)
for model in modelsDir.iterdir():
modelList.append({
"id": "{}-{}-{}".format(imageId, self.MODELS_FOLDER, model.name),
"name": model.name
})
return modelList
def createModel(self, imageId, size, modelName):
baseDir = self._idToBasePath(imageId)
modelsDir = baseDir.joinpath(self.MODELS_FOLDER)
saveDir = modelsDir.joinpath(modelName)
os.mkdir(saveDir)
hdrModel = HDRModel(shape=(size, size, 3))
hdrModel.save(saveDir)
def getMaskList(self, imageId, size):
maskList = []
baseDir = self._idToBasePath(imageId)
sliceSizeDir = baseDir.joinpath(self.SLICE_FOLDER).joinpath(size)
for sliceDir in sliceSizeDir.iterdir():
processedDir = sliceDir.joinpath(self.PROCESSED_FOLDER)
# Find processed images that aren't Mean Shift Segmentation
for processedTypeDir in processedDir.iterdir():
if "MSSEG" not in str(processedTypeDir.name):
procId = processedTypeDir.name
displayName = "{}_{}_{}".format(
self._idParts(procId)[self.PROC_TYPE],
self._idXPix(procId),
self._idYPix(procId)
)
maskList.append({
"id": procId,
"name": displayName
})
return maskList
# NOTE: DOESN'T COPY DUPLICATES
def saveSamplesTemp(self, modelId, sliceSize, sampleIds):
baseDir = self._idToBasePath(modelId)
modelDir = baseDir.joinpath(self.MODELS_FOLDER).joinpath(modelId.split("-")[-1])
# Create temp folder in model dir to hold training samples
tempTrainDir = modelDir.joinpath("tempTrain")
os.mkdir(tempTrainDir)
# Get all mask ids in db
maskIds = [mask["id"] for mask in self.getMaskList(baseDir.name, sliceSize)]
idParts = self._idParts(modelId)
sliceDir = baseDir.joinpath(self.SLICE_FOLDER).joinpath(sliceSize)
sampleIds = json.loads(sampleIds)
sID = 0
for sDir in sliceDir.iterdir():
processedDir = sDir.joinpath(self.PROCESSED_FOLDER)
for pDir in processedDir.iterdir():
if pDir.name in sampleIds:
# Make sample folder
sampleDir = tempTrainDir.joinpath("{}-sample".format(sID))
os.mkdir(sampleDir)
# Copy image and mask to sample folder
imgType = sDir.name.split("-")[self.IMG_TYPE]
slicePath = sDir.joinpath("{}.{}".format(sDir.name, imgType))
processedPath = pDir.joinpath("{}.{}".format(pDir.name, imgType))
shutil.copy(slicePath, sampleDir.joinpath("{}.{}".format("input", imgType)))
shutil.copy(processedPath, sampleDir.joinpath("{}.{}".format("mask", imgType)))
sID += 1
return tempTrainDir
def saveSlicesForPrediction(self, modelId, sliceSize, sliceIds):
baseDir = self._idToBasePath(modelId)
modelDir = baseDir.joinpath(self.MODELS_FOLDER).joinpath(modelId.split("-")[-1])
# Create create new folder to hold slices
predDir = modelDir.joinpath("SLICES_PRED")
os.mkdir(predDir)
sliceIds = json.loads(sliceIds)
count = 0
# Copy each slice to new folder
for sId in sliceIds:
imageFilePath = [ p for p in baseDir.rglob("{}.*".format(sId)) ][0]
saveDir = predDir.joinpath("{}-image".format(count))
os.mkdir(saveDir)
shutil.copy(imageFilePath, saveDir.joinpath(imageFilePath.name))
count += 1
return predDir
def getModel(self, modelId):
baseDir = self._idToBasePath(modelId)
modelDir = baseDir.joinpath(self.MODELS_FOLDER).joinpath(modelId.split("-")[-1])
return HDRModel(loadDir=modelDir)
def updateModel(self, modelId, model):
baseDir = self._idToBasePath(modelId)
modelDir = baseDir.joinpath(self.MODELS_FOLDER).joinpath(modelId.split("-")[-1])
# Remove old model and save new
shutil.rmtree(modelDir)
os.mkdir(modelDir)
model.save(modelDir)
'''
......@@ -245,49 +372,49 @@ class HDRFileDB( HDRDatabaseInterface ):
'''
def _idParts( self, imageId: str ) -> list:
return imageId.split( "-" )
def _idParts(self, imageId: str) -> list:
return imageId.split("-")
def _idImageType( self, imageId: str ) -> str:
return self._idParts( imageId )[-1]
def _idImageType(self, imageId: str) -> str:
return self._idParts(imageId)[-1]
def _idUid( self, imageId: str ) -> str:
return self._idParts( imageId )[self.UID]
def _idUid(self, imageId: str) -> str:
return self._idParts(imageId)[self.UID]
def _idImageExt( self, imageId: str ) -> str:
return self._idParts( imageId )[self.IMG_TYPE]
def _idImageExt(self, imageId: str) -> str:
return self._idParts(imageId)[self.IMG_TYPE]
def _idName( self, imageId: str ) -> str:
return self._idParts( imageId )[self.NAME]
def _idName(self, imageId: str) -> str:
return self._idParts(imageId)[self.NAME]
def _idSliceSize( self, imageId: str ) -> str:
idParts = self._idParts( imageId )
if len( idParts ) > 3:
def _idSliceSize(self, imageId: str) -> str:
idParts = self._idParts(imageId)
if len(idParts) > 3:
return idParts[self.SL_SIZE]
return ""
def _idXPix( self, imageId: str ) -> str:
idParts = self._idParts( imageId )
if len( idParts ) > 3:
def _idXPix(self, imageId: str) -> str:
idParts = self._idParts(imageId)
if len(idParts) > 3:
return idParts[self.X_PIX]
return ""
def _idYPix( self, imageId: str ) -> str:
idParts = self._idParts( imageId )
if len( idParts ) > 3:
def _idYPix(self, imageId: str) -> str:
idParts = self._idParts(imageId)
if len(idParts) > 3:
return idParts[self.Y_PIX]
return ""
def _idToBasePath( self, imageId: str ) -> Path:
return self.imagesPath.joinpath( "{}-{}-{}-{}".format( self._idUid( imageId ),
self._idImageExt( imageId ),
self._idName( imageId ),
"OR" ) )
\ No newline at end of file
def _idToBasePath(self, imageId: str) -> Path:
return self.imagesPath.joinpath("{}-{}-{}-{}".format(self._idUid(imageId),
self._idImageExt(imageId),
self._idName(imageId),
"OR"))
\ No newline at end of file
Subproject commit 0a3ac6ca1e31a6d06bab1afdfa9f2b99d31f9c5b
Subproject commit 77a695319e209de28f08c8f92d09dc59cd6e13c8
......@@ -52,6 +52,7 @@ app.config["result_backend"] = 'redis://localhost:6379'
celery = make_celery(app)
'''
Celery Tasks
'''
......@@ -62,6 +63,69 @@ def importAndProcessImage(imagePath):
return "OK"
@celery.task(name="app.createModel")
def createModel(imageId, size, modelName):
imageDB.createModel(imageId, size, modelName)
return "OK"
def getSamples(dataDir: Path) -> [(np.array, np.array)]:
samples = []
for sampleDir in dataDir.iterdir():
inputPath = sampleDir.joinpath("{}.jpg".format("input"))
maskPath = sampleDir.joinpath("{}.jpg".format("mask"))
inputImage = Image.open(inputPath)
maskImage = Image.open(maskPath)
inputArr = np.array(inputImage)
maskArr = np.array(maskImage.convert("L"))
samples.append((inputArr, maskArr))
X_TRAIN = []
Y_TRAIN = []
for sample in samples:
X_TRAIN.append(sample[0])
Y_TRAIN.append(sample[1])
return (np.array(X_TRAIN), np.array(Y_TRAIN))
@celery.task(name="app.trainModelTask")
def trainModelTask(modelId, sliceSize, sampleIds):
# Copy slices and masks to temp folder
tempSamplePath = imageDB.saveSamplesTemp(modelId, sliceSize, sampleIds)
(xTrain, yTrain) = getSamples(tempSamplePath)
# Train model
model = imageDB.getModel(modelId)
model.featureModel.summary()
model.fit(xTrain, yTrain)
# Save trained model
imageDB.updateModel(modelId, model)
return "OK"
@celery.task(name="app.predictModelTask")
def predictModelTask(modelId, sliceSize, sliceIds):
# Copy slices and masks to temp folder
tempPredDir = imageDB.saveSlicesForPrediction(modelId, sliceSize, sliceIds)
# Train model
model = imageDB.getModel(modelId)
for imageDir in tempPredDir.iterdir():
predPath = [p for p in imageDir.iterdir()][0]
imgInput = np.array(Image.open(predPath))
prediction = model.predict(imgInput)
Image.fromarray(prediction).save(imageDir.joinpath("PREDICTION.jpg"))
return "OK"
'''
App Routes
'''
......@@ -70,7 +134,9 @@ App Routes
def index():
return render_template("index.html")
'''
DATA DISPLAY
'''
@app.route("/image-select")
def imageSelect():
'''
......@@ -104,9 +170,16 @@ def imageSliceContent(imageId, sliceSize):
sliceList=sliceList)
'''
PREPROCESS MANAGEMENT
'''
@app.route("/create-mask/<string:imageId>/<string:sliceId>/<int:size>")
def createMask(imageId, sliceId, size):
processedImages = imageDB.getProcessedList(sliceId)
processedImages.append({
"id": sliceId,
"name": sliceId
})
return render_template("createMask.html",
imageId=imageId,
sliceId=sliceId,
......@@ -114,12 +187,66 @@ def createMask(imageId, sliceId, size):