23 from ConfigParser
import SafeConfigParser
24 np.set_printoptions(precision=2)
30 print 'GPyOpt not found' 37 Class to perform optimisation of SAM Models. 39 This class reads in the parameters for optimisation from the `[Optimisation]` section found in the config.ini present in the data directory passed in as dataDir. Parameters are set for variables that are present in the `[driver_name]` section of the same config.ini in the following manner `variableName = [parameterType:commaSeparatedParameterValues]`.\n 41 Parameters can be : \n 42 1) __discreteInt__ : Equally spaced integers in the form of `x = [discreteInt:start,interval,end]`. \n 43 2) __discreteFloat__ : Equally spaced floats in the form of `x = [discreteFloat:start,interval,end]`. \n 44 3) __continuous__ : Continuous range in the form of `x = [continuous:start,end]`. \n 45 4) __list__ : Use one parameter value at a time from the comma separated list of possible parameter values in the form of `x = [list:a,b,c,d]` where x is set to just one of the values in the list. \n 46 5) __bool__ : In the form of `x = [bool]` where x = 1 or 0. Similar to `x = [list:1,0]`. \n 47 6) __combination__ : Use multiple parameters at a time from the comma separated list of possible parameters values in the form of `x = [combination:a,b,c,d]` where x is set to a combination of parameters. Length of the values list in x ranges from 1 to number of comma separated parameter values. \n 53 driver = driverName # driver present in SAM_Drivers folder 54 modelNameBase = modelName # user set model name 57 # default parameters for the training section which must always be present. 58 update_mode = `new` # `new` or `false`. `New` fill train a new model with the following parameters and delete the old one. `False` will check for the availbility of an already trained model and if one is available load it together with its parameters ignoring the rest of this file. 59 experiment_number = 0 # experiment number in case different models are required to be compared 60 model_type = mrd # model type can be `mrd` or `bgplvm` 61 model_mode = single # model_mode can be `single` or `multiple` 62 model_num_inducing = 170 # any integer number. Generally < 200 for performance considerations 63 model_num_iterations = 50 # any integer number. High numbers mean increased training time 64 model_init_iterations = 450 # any integer number. High numbers mean increased training time 65 verbose = True # `True` or `False` will turn logging to stdout on or off. Logging to file is always on. 66 Quser = 10 # Number of target dimensions for the output latent space. Higher numbers mean a more detailed latent space but very sparse clusters. Higher dimensionality also requires more input data for good generalisation. 67 ratioData = 80 # Train/Test split. 80% train, 20% test 68 kernelString = "GPy.kern.RBF(Q, ARD=False) + GPy.kern.Bias(Q) + GPy.kern.White(Q)" # This is the kernel used by the Gaussian Process. Keep this constant. Future releases will make this a changeable parameter 69 optimiseRecall = 0 # This parameter sets the number of optimisations that occur during recall. If 0 no optimisations are made and recall becames similair to nearest neighbour analysis. 70 calibrateUnknown = True # This parameter triggers the learning of known/unknown classification. 71 # start of unique model parameters. These parameters are usually parameters to modify and alter the signal processing that occurs in user defined readData function. The following are examples. 75 windowOffsetPercent = 12.5 80 # default parameters for the optimisation section which must always be present. 81 acquisitionFunction = 'EI' # Can be either 'MPI' : maximum probability of improvement, 'EI' : Expected Improvement or 'UCB' : Upper class bound 82 # custom optimisation parameters which must be a subset of the default and custom parameters in the previous section 83 model_num_inducing = [discreteInt:20,50,220] 84 thresholdMovement = [bool] 85 components = [combination:pos,vel,acc] 86 joints = [list:head,chest,right hand,left hand,right arm,left arm] 87 windowOffsetPercent = [discreteFloat:10.0,1.5,22.5] 88 thresholdPercent = [continuous:13.5,] 90 def __init__(self, fName, dataDir, modelDir, driverName, mode, baseName, persistence, windowed, verbose):
92 Initialisation for modelOptClass 95 fName: File name of the model to use as a starting point for the optimisation 96 dataDir: Directory containing the data to be trained 97 modelDir: Directory where to save optimised model and temporary models created during optimisation 98 driverName: Name of driver to use for training which must be present in SAM_Drivers folder 99 mode: This is a legacy parameter that changed the behaviour of saving models. This parameter is not in use anymore. 100 baseName: Base name of model to be trained. Model filenames take the form of __baseName_driverName_modelType_modelVersion.pickle__ with a corresponding __baseName_driverName_modelType_modelVersion_model.h5__. modelVersion can be `best`, `exp<experiment number>`, or `backup`. 101 persistence : `'True'` or `'False'` indicating terminals opened by samOptimiser stay open even after process termination 102 windowed : `'True'` or `'False'` indicating separate terminals will be opened by samOptimiser for each training and interaction process 103 verbose : `'True'` or `'False'` switching on or off logging to stdout 122 self.
penalty = 10000000000000000000
139 msg =
'Cannot find GPyOpt package. Make sure it is installed and added to PYTHONPATH' 145 Configure optimisation parameters from config.ini 147 Reads in config.ini parameters and sets up the optimisation landscape from these parameters. Also reads in the performance of a current model if one is available to set that as the performance to beat. 152 self.
parser = SafeConfigParser()
159 if (self.
parser.has_section(
'Optimisation')
and self.
parser.has_section(
168 'config.ini found, Optimisation and ' + self.
baseName +
169 ' sections found but Optimisation section does not contain parameters to optimise']
179 if '.pickle' in j
and '__L' not in j:
180 modelPickle = pickle.load(open(j,
'rb'))
181 testConf = modelPickle[
'overallPerformance']
182 np.fill_diagonal(testConf, 0)
190 print 'No model present' 193 print 'testConf key not present in .pickle file' 202 if i ==
'acquisitionFunction':
207 if v ==
'MPI' or v ==
'EI' or v ==
'UCB':
210 opts = v.partition(
'[')[-1].rpartition(
']')[0]
211 sects = opts.split(
':')
213 if sects[0] ==
'discreteInt':
214 lims = sects[1].split(
',')
215 arr = np.arange(int(lims[0]), int(lims[2]), int(lims[1]))
216 arr = np.hstack((arr, int(lims[2])))
219 tempDict[
'type'] =
'discrete' 220 tempDict[
'domain'] = arr
221 tempDict[
'description'] = sects[0]
222 self.
domain.append(tempDict)
223 elif sects[0] ==
'discreteFloat':
224 lims = sects[1].split(
',')
225 arr = np.arange(float(lims[0]), float(lims[2]), float(lims[1]))
226 arr = np.hstack((arr, float(lims[2])))
229 tempDict[
'type'] =
'discrete' 230 tempDict[
'domain'] = arr
231 tempDict[
'description'] = sects[0]
232 self.
domain.append(tempDict)
233 elif sects[0] ==
'continuous':
235 lims = sects[1].split(
',')
237 tempDict[
'type'] =
'continuous' 238 tempDict[
'domain'] = (float(lims[0]), float(lims[1]))
239 tempDict[
'description'] = sects[0]
240 self.
domain.append(tempDict)
241 elif sects[0] ==
'bool':
243 tempDict[
'type'] =
'discrete' 244 tempDict[
'domain'] = np.array((0, 1))
245 tempDict[
'description'] = sects[0]
247 self.
domain.append(tempDict)
248 elif sects[0] ==
'combination':
249 splitList = sects[1].split(
',')
253 tempDict[
'type'] =
'discrete' 254 tempDict[
'domain'] = np.array((0, 1))
256 tempDict[
'description'] = sects[0]
257 tempDict[
'groupName'] = i
258 self.
domain.append(tempDict)
259 elif sects[0] ==
'list':
260 splitList = sects[1].split(
',')
263 tempDict[
'type'] =
'discrete' 264 tempDict[
'domain'] = np.arange(0, len(splitList), 1)
265 tempDict[
'description'] = sects[0]
266 tempDict[
'groupName'] = i
267 tempDict[
'values'] = splitList
269 self.
domain.append(tempDict)
281 return [
True,
'Optimisation configuration ready']
284 'config.ini found at ' + self.
dataDir +
' but does not contain Optimisation and/or ' +
287 return [
False,
'config.ini not present in ' + self.
dataDir]
289 return [
False,
'Initialising parameters failed']
293 Optimisation Evaluation Function 295 This function evaluates the current values chosen by the optimiser in `x` by writing these parameter values to the respective parameters in the training parameters section of the config.ini and subsequently making a system call to trainSAMModel. 298 x : List of current evaluation parameters chosen by the optimiser. 301 Total error for the current training parameters. Error is the weighted sum total of the confusion matrix created during testing of the model as part of training. 305 for j
in range(len(x[0])):
306 print self.
domain[j][
'name'],
' : ', x[0][j]
309 combinationDicts = dict()
310 for j
in range(len(x[0])):
311 if self.
domain[j][
'description'] ==
'combination':
317 if self.
domain[j][
'groupName']
in combinationDicts:
318 combinationDicts[self.
domain[j][
'groupName']].append(self.
domain[j][
'name'])
320 combinationDicts[self.
domain[j][
'groupName']] = [self.
domain[j][
'name']]
321 elif self.
domain[j][
'description'] ==
'discreteInt':
323 elif self.
domain[j][
'description'] ==
'list':
325 elif self.
domain[j][
'description'] ==
'bool':
333 for name, val
in combinationDicts.iteritems():
345 cmd = self.
fName +
' ' + args
349 command =
"bash -c \"" + cmd +
"; exec bash\"" 351 command =
"bash -c \"" + cmd +
"\"" 359 self.
trainProcess = subprocess.Popen([
'xterm',
'-e', command], shell=
False)
372 print 'Training ...', totalTime * 0.5,
'minutes elapsed' 376 print 'poll return:', ret
381 self.
currFiles = [j
for j
in glob.glob(
'__'.join(self.
modelDir.split(
'__')[:3]) +
'*')
if 'exp' in j]
383 if '.pickle' in j
and '__L' not in j:
384 modelPickle = pickle.load(open(j,
'rb'))
385 testConf = modelPickle[
'overallPerformance']
386 print 'Confusion Matrix: ', testConf
387 np.fill_diagonal(testConf, 0)
389 factorMat = np.ones(testConf.shape)
390 factorMat[:-1, -1] = 0.5
391 print 'factorMat', factorMat
392 print 'testConf', testConf
393 print 'modified', testConf*factorMat
394 currError += np.sum(testConf*factorMat)
396 print 'Current cumulative error: ', currError
402 self.
parser.write(open(self.
dataDir +
"/configBest.ini",
'wb'))
406 print 'Error training model' 407 print 'Current cumulative error: ', currError
409 print 'Best Error so far : ', self.
bestError 411 print '-----------------------------------------------------' 417 Utility function to copy model. 420 newName: Model base name to copy. 421 direction: `normal` or `reverse`. `normal` makes a copy of the model with `exp<experiment number>` in the filename to `backup` in the filename. `reverse` makes a copy of `backup` into `exp<experiment number>`. 424 True of False indicating success. 428 self.
currFiles = [j
for j
in glob.glob(
'__'.join(self.
modelDir.split(
'__')[:3]) +
'*')
429 if 'backup' not in j
and 'best' not in j]
432 print 'Original: ', k
433 temp = k.split(
'exp')
435 temp2 = temp[1].split(
'__')
436 backupFiles += [temp[0] + newName +
'__' + temp2[1]]
438 temp2 = temp[1].split(
'.')
439 if 'model' in temp[1]:
440 backupFiles += [temp[0] + newName +
'_model.' + temp2[1]]
442 backupFiles += [temp[0] + newName +
'.' + temp2[1]]
443 print 'New: ', backupFiles[-1]
446 if direction ==
'reverse':
447 for j
in range(len(backupFiles)):
448 shutil.copyfile(backupFiles[j], self.
currFiles[j])
450 for j
in range(len(backupFiles)):
451 shutil.copyfile(self.
currFiles[j], backupFiles[j])
454 print 'No model present' 461 Utility function to delete models. 464 modelDir : Directory containing the model to delete. 465 newName : Subset of model files to delete. Either `best`, `backup` or `exp`. 471 if os.path.isfile(modelDir):
472 print modelDir,
' model file present' 473 fileList = [j
for j
in glob.glob(
'__'.join(modelDir.split(
'__')[:3]) +
'*')
if newName
in j]
480 Sets up the modelOptClass and calls GPyOpt optimisation for 200 iterations. 482 Same arguments as modelOptClass.init(). 485 0 if completed successfully. -1 if completed unsuccessfully. 488 print optNotFound,
' ', len(sys.argv)
489 if len(sys.argv) >= 9
and not optNotFound:
496 per = sys.argv[7] ==
'True' 498 wind = sys.argv[8] ==
'True' 499 verb = sys.argv[9] ==
'True' 502 if optModel.configured[0]:
503 myBopt = GPyOpt.methods.BayesianOptimization(f=optModel.f,
504 domain=optModel.domain,
505 initial_design_numdata=2,
506 acquisition_type=optModel.acquisitionFunction)
515 myBopt.run_optimization(max_iter)
516 optModel.parser.write(open(optModel.dataDir +
"/config.ini",
'wb'))
517 optModel.copyModel(
'best',
'reverse')
526 os.system(
"mv " + optModel.dataDir+
"/configBest.ini " + optModel.dataDir+
"/config.ini")
532 print 'GPyOpt package not found or incorrect number of arguments' 535 if __name__ ==
'__main__':
def deleteModel(modelDir, newName)
Utility function to delete models.
def configOptimisation(self)
Configure optimisation parameters from config.ini.
Class to perform optimisation of SAM Models.
def main()
Sets up the modelOptClass and calls GPyOpt optimisation for 200 iterations.
def f(self, x)
Optimisation Evaluation Function.
def copyModel(self, newName, direction)
Utility function to copy model.