icub-client
samOptimiser.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 # """"""""""""""""""""""""""""""""""""""""""""""
3 # The University of Sheffield
4 # WYSIWYD Project
5 #
6 # A class that implements Bayesian Optimisation of SAM model parameters
7 #
8 # Created on 20 July 2016
9 #
10 # @author: Daniel Camilleri
11 #
12 # """"""""""""""""""""""""""""""""""""""""""""""
13 
14 import numpy as np
15 import time
16 import os
17 import subprocess
18 import pickle
19 import sys
20 import glob
21 import shutil
22 import copy
23 from ConfigParser import SafeConfigParser
24 np.set_printoptions(precision=2)
25 
26 optNotFound = False
27 try:
28  import GPyOpt
29 except:
30  print 'GPyOpt not found'
31  optNotFound = True
32  pass
33 
34 
35 class modelOptClass(object):
36  """
37  Class to perform optimisation of SAM Models.
38 
39  This class reads in the parameters for optimisation from the `[Optimisation]` section found in the config.ini present in the data directory passed in as dataDir. Parameters are set for variables that are present in the `[driver_name]` section of the same config.ini in the following manner `variableName = [parameterType:commaSeparatedParameterValues]`.\n
40 
41  Parameters can be : \n
42  1) __discreteInt__ : Equally spaced integers in the form of `x = [discreteInt:start,interval,end]`. \n
43  2) __discreteFloat__ : Equally spaced floats in the form of `x = [discreteFloat:start,interval,end]`. \n
44  3) __continuous__ : Continuous range in the form of `x = [continuous:start,end]`. \n
45  4) __list__ : Use one parameter value at a time from the comma separated list of possible parameter values in the form of `x = [list:a,b,c,d]` where x is set to just one of the values in the list. \n
46  5) __bool__ : In the form of `x = [bool]` where x = 1 or 0. Similar to `x = [list:1,0]`. \n
47  6) __combination__ : Use multiple parameters at a time from the comma separated list of possible parameters values in the form of `x = [combination:a,b,c,d]` where x is set to a combination of parameters. Length of the values list in x ranges from 1 to number of comma separated parameter values. \n
48 
49  Examples:
50  Example config.ini
51 
52  [model_options]
53  driver = driverName # driver present in SAM_Drivers folder
54  modelNameBase = modelName # user set model name
55 
56  [modelName]
57  # default parameters for the training section which must always be present.
58  update_mode = `new` # `new` or `false`. `New` fill train a new model with the following parameters and delete the old one. `False` will check for the availbility of an already trained model and if one is available load it together with its parameters ignoring the rest of this file.
59  experiment_number = 0 # experiment number in case different models are required to be compared
60  model_type = mrd # model type can be `mrd` or `bgplvm`
61  model_mode = single # model_mode can be `single` or `multiple`
62  model_num_inducing = 170 # any integer number. Generally < 200 for performance considerations
63  model_num_iterations = 50 # any integer number. High numbers mean increased training time
64  model_init_iterations = 450 # any integer number. High numbers mean increased training time
65  verbose = True # `True` or `False` will turn logging to stdout on or off. Logging to file is always on.
66  Quser = 10 # Number of target dimensions for the output latent space. Higher numbers mean a more detailed latent space but very sparse clusters. Higher dimensionality also requires more input data for good generalisation.
67  ratioData = 80 # Train/Test split. 80% train, 20% test
68  kernelString = "GPy.kern.RBF(Q, ARD=False) + GPy.kern.Bias(Q) + GPy.kern.White(Q)" # This is the kernel used by the Gaussian Process. Keep this constant. Future releases will make this a changeable parameter
69  optimiseRecall = 0 # This parameter sets the number of optimisations that occur during recall. If 0 no optimisations are made and recall becames similair to nearest neighbour analysis.
70  calibrateUnknown = True # This parameter triggers the learning of known/unknown classification.
71  # start of unique model parameters. These parameters are usually parameters to modify and alter the signal processing that occurs in user defined readData function. The following are examples.
72  thresholdMovement = 1
73  components = pos,vel
74  joints = head
75  windowOffsetPercent = 12.5
76  thresholdPercent = 22
77  moveThresh = 0.01
78 
79  [Optimisation]
80  # default parameters for the optimisation section which must always be present.
81  acquisitionFunction = 'EI' # Can be either 'MPI' : maximum probability of improvement, 'EI' : Expected Improvement or 'UCB' : Upper class bound
82  # custom optimisation parameters which must be a subset of the default and custom parameters in the previous section
83  model_num_inducing = [discreteInt:20,50,220]
84  thresholdMovement = [bool]
85  components = [combination:pos,vel,acc]
86  joints = [list:head,chest,right hand,left hand,right arm,left arm]
87  windowOffsetPercent = [discreteFloat:10.0,1.5,22.5]
88  thresholdPercent = [continuous:13.5,]
89  """
90  def __init__(self, fName, dataDir, modelDir, driverName, mode, baseName, persistence, windowed, verbose):
91  """
92  Initialisation for modelOptClass
93 
94  Args:
95  fName: File name of the model to use as a starting point for the optimisation
96  dataDir: Directory containing the data to be trained
97  modelDir: Directory where to save optimised model and temporary models created during optimisation
98  driverName: Name of driver to use for training which must be present in SAM_Drivers folder
99  mode: This is a legacy parameter that changed the behaviour of saving models. This parameter is not in use anymore.
100  baseName: Base name of model to be trained. Model filenames take the form of __baseName_driverName_modelType_modelVersion.pickle__ with a corresponding __baseName_driverName_modelType_modelVersion_model.h5__. modelVersion can be `best`, `exp<experiment number>`, or `backup`.
101  persistence : `'True'` or `'False'` indicating terminals opened by samOptimiser stay open even after process termination
102  windowed : `'True'` or `'False'` indicating separate terminals will be opened by samOptimiser for each training and interaction process
103  verbose : `'True'` or `'False'` switching on or off logging to stdout
104 
105  Returns:
106  None
107  """
108  try:
109  import GPyOpt
110  self.fName = fName
111  self.dataDir = dataDir
112  self.modelDir = modelDir
113  self.driverName = driverName
114  self.baseName = baseName
115  self.persistence = persistence
116  self.verbose = verbose
117  self.devnull = None
118  self.windowed = windowed
119  if not self.windowed:
120  self.devnull = open('/dev/null', 'w')
121  self.numEvals = 0
122  self.penalty = 10000000000000000000
123  self.mode = mode
124  self.parser = None
125  self.sectionBackup = None
126  self.sectionOpt = None
127  self.modelPresent = False
128  self.bestOptions = None
129  self.domain = None
130  self.bestError = None
131  self.currIterSettings = None
133  self.trainProcess = None
134  self.resultsList = []
135  self.currFiles = None
137  print self.configured[1]
138  except:
139  msg = 'Cannot find GPyOpt package. Make sure it is installed and added to PYTHONPATH'
140  print msg
141  self.configured = [False, msg]
142 
143  def configOptimisation(self):
144  """
145  Configure optimisation parameters from config.ini
146 
147  Reads in config.ini parameters and sets up the optimisation landscape from these parameters. Also reads in the performance of a current model if one is available to set that as the performance to beat.
148 
149  Returns:
150  None
151  """
152  self.parser = SafeConfigParser()
153  self.parser.optionxform = str
154  try:
155  # check config file exists
156  found = self.parser.read(self.dataDir + "/config.ini")
157  if found:
158  # open and check if Optimisation section is present in config.ini
159  if (self.parser.has_section('Optimisation') and self.parser.has_section(
160  self.baseName)):
161  # create backup of current self.driverName section
162  self.sectionBackup = dict(self.parser.items(self.baseName))
163 
164  # load Optimisation section
165  self.sectionOpt = dict(self.parser.items('Optimisation'))
166  if len(self.sectionOpt) == 1 and 'acquisitionFunction' in self.sectionOpt:
167  return [False,
168  'config.ini found, Optimisation and ' + self.baseName +
169  ' sections found but Optimisation section does not contain parameters to optimise']
170  else:
171  # create backup if model in modelDir exists
172  self.modelPresent = self.copyModel('backup', 'normal')
173 
174  # load performance of current model if one is available and track performance
175  self.bestError = 0
176  try:
177  if len(self.currFiles) > 0:
178  for j in self.currFiles:
179  if '.pickle' in j and '__L' not in j:
180  modelPickle = pickle.load(open(j, 'rb'))
181  testConf = modelPickle['overallPerformance']
182  np.fill_diagonal(testConf, 0)
183  self.bestError += np.sum(testConf)
184  # after combining all errors into one value copy and rename files to best
185  # which contains the best performing model so far
186  # best model will only be present if correct computation of bestError occurs
187  self.copyModel('best', 'normal')
188  self.bestOptions = copy.deepcopy(self.parser.items(self.baseName))
189  else:
190  print 'No model present'
191  self.bestError = self.penalty
192  except:
193  print 'testConf key not present in .pickle file'
194  self.bestError = self.penalty
195 
196  # iterate over keys of sectionOpt to create domain of optimisation problem
197  self.acquisitionFunction = 'MPI'
198  self.domain = [] # list of dictionaries
199  # armedBanditsMode = True
200  self.numPossibilities = 1
201  for i, v in self.sectionOpt.iteritems():
202  if i == 'acquisitionFunction':
203  # possible acquisition functions
204  # 'MPI' : maximum probability of improvement
205  # 'EI' : Expected Improvement
206  # 'UCB' : Upper class bound
207  if v == 'MPI' or v == 'EI' or v == 'UCB':
208  self.acquisitionFunction = v
209  elif i in self.sectionBackup:
210  opts = v.partition('[')[-1].rpartition(']')[0]
211  sects = opts.split(':')
212  tempDict = dict()
213  if sects[0] == 'discreteInt':
214  lims = sects[1].split(',')
215  arr = np.arange(int(lims[0]), int(lims[2]), int(lims[1]))
216  arr = np.hstack((arr, int(lims[2])))
217  self.numPossibilities *= len(arr)
218  tempDict['name'] = i
219  tempDict['type'] = 'discrete'
220  tempDict['domain'] = arr
221  tempDict['description'] = sects[0]
222  self.domain.append(tempDict)
223  elif sects[0] == 'discreteFloat':
224  lims = sects[1].split(',')
225  arr = np.arange(float(lims[0]), float(lims[2]), float(lims[1]))
226  arr = np.hstack((arr, float(lims[2])))
227  self.numPossibilities *= len(arr)
228  tempDict['name'] = i
229  tempDict['type'] = 'discrete'
230  tempDict['domain'] = arr
231  tempDict['description'] = sects[0]
232  self.domain.append(tempDict)
233  elif sects[0] == 'continuous':
234  # armedBanditsMode = False
235  lims = sects[1].split(',')
236  tempDict['name'] = i
237  tempDict['type'] = 'continuous'
238  tempDict['domain'] = (float(lims[0]), float(lims[1]))
239  tempDict['description'] = sects[0]
240  self.domain.append(tempDict)
241  elif sects[0] == 'bool':
242  tempDict['name'] = i
243  tempDict['type'] = 'discrete'
244  tempDict['domain'] = np.array((0, 1))
245  tempDict['description'] = sects[0]
246  self.numPossibilities *= 2
247  self.domain.append(tempDict)
248  elif sects[0] == 'combination':
249  splitList = sects[1].split(',')
250  for b in splitList:
251  tempDict = dict()
252  tempDict['name'] = b
253  tempDict['type'] = 'discrete'
254  tempDict['domain'] = np.array((0, 1))
255  self.numPossibilities *= 2
256  tempDict['description'] = sects[0]
257  tempDict['groupName'] = i
258  self.domain.append(tempDict)
259  elif sects[0] == 'list':
260  splitList = sects[1].split(',')
261  tempDict = dict()
262  tempDict['name'] = i
263  tempDict['type'] = 'discrete'
264  tempDict['domain'] = np.arange(0, len(splitList), 1)
265  tempDict['description'] = sects[0]
266  tempDict['groupName'] = i
267  tempDict['values'] = splitList
268  self.numPossibilities *= len(splitList)
269  self.domain.append(tempDict)
270  else:
271  print 'ignoring ', i
272 
273  # if armedBanditsMode :
274  # for j in self.domain:
275  # j['type'] = 'bandit'
276 
277  for j in self.domain:
278  print j
279  print
280 
281  return [True, 'Optimisation configuration ready']
282  else:
283  return [False,
284  'config.ini found at ' + self.dataDir + ' but does not contain Optimisation and/or ' +
285  self.driverName + ' section']
286  else:
287  return [False, 'config.ini not present in ' + self.dataDir]
288  except:
289  return [False, 'Initialising parameters failed']
290 
291  def f(self, x):
292  """
293  Optimisation Evaluation Function
294 
295  This function evaluates the current values chosen by the optimiser in `x` by writing these parameter values to the respective parameters in the training parameters section of the config.ini and subsequently making a system call to trainSAMModel.
296 
297  Args:
298  x : List of current evaluation parameters chosen by the optimiser.
299 
300  Returns:
301  Total error for the current training parameters. Error is the weighted sum total of the confusion matrix created during testing of the model as part of training.
302  """
303  self.numEvals += 1
304  print 'Trial ', self.numEvals, 'out of', self.numPossibilities, 'possibilities'
305  for j in range(len(x[0])):
306  print self.domain[j]['name'], ' : ', x[0][j]
307  print
308  self.currIterSettings = self.sectionBackup
309  combinationDicts = dict()
310  for j in range(len(x[0])):
311  if self.domain[j]['description'] == 'combination':
312  if x[0][j] == 1:
313  val = True
314  else:
315  val = False
316  if val:
317  if self.domain[j]['groupName'] in combinationDicts:
318  combinationDicts[self.domain[j]['groupName']].append(self.domain[j]['name'])
319  else:
320  combinationDicts[self.domain[j]['groupName']] = [self.domain[j]['name']]
321  elif self.domain[j]['description'] == 'discreteInt':
322  self.parser.set(self.baseName, self.domain[j]['name'], str(int(x[0][j])))
323  elif self.domain[j]['description'] == 'list':
324  self.parser.set(self.baseName, self.domain[j]['name'], self.domain[j]['values'][int(x[0][j])])
325  elif self.domain[j]['description'] == 'bool':
326  if x[0][j] == 1:
327  val = 'True'
328  else:
329  val = 'False'
330  self.parser.set(self.baseName, self.domain[j]['name'], val)
331  else:
332  self.parser.set(self.baseName, self.domain[j]['name'], str(x[0][j]))
333  for name, val in combinationDicts.iteritems():
334  self.parser.set(self.baseName, name, ','.join(val) + ',')
335 
336  # for t in self.parser.items(self.baseName):
337  # print t
338 
339  # print
340  # print
341  self.parser.write(open(self.dataDir + "/config.ini", 'wb'))
342 
343  args = ' '.join([self.dataDir, self.modelDir, self.driverName, 'new', self.baseName])
344 
345  cmd = self.fName + ' ' + args
346  # NEW
347  # cmd = trainPath + ' -- ' + args
348  if self.persistence:
349  command = "bash -c \"" + cmd + "; exec bash\""
350  else:
351  command = "bash -c \"" + cmd + "\""
352 
353  if self.verbose:
354  print 'cmd: ', cmd
355 
356  # if self.windowed:
357  deleteModel(self.modelDir, 'exp')
358  if True:
359  self.trainProcess = subprocess.Popen(['xterm', '-e', command], shell=False)
360  else:
361  self.trainProcess = subprocess.Popen([cmd], shell=True, stdout=self.devnull, stderr=self.devnull)
362 
363  ret = None
364  cnt = 0
365  totalTime = 0
366  while ret is None:
367  ret = self.trainProcess.poll()
368  time.sleep(5)
369  cnt += 1
370  if cnt > 5:
371  totalTime += 1
372  print 'Training ...', totalTime * 0.5, 'minutes elapsed'
373  cnt = 0
374 
375  currError = 0
376  print 'poll return:', ret
377  # if len(self.currFiles) == 0:
378  # self.modelPresent = self.copyModel('backup', 'normal')
379  # self.copyModel('best', 'normal')
380  if ret == 0:
381  self.currFiles = [j for j in glob.glob('__'.join(self.modelDir.split('__')[:3]) + '*') if 'exp' in j]
382  for j in self.currFiles:
383  if '.pickle' in j and '__L' not in j:
384  modelPickle = pickle.load(open(j, 'rb'))
385  testConf = modelPickle['overallPerformance']
386  print 'Confusion Matrix: ', testConf
387  np.fill_diagonal(testConf, 0)
388  # introduce a factor to give favour to specific classifications
389  factorMat = np.ones(testConf.shape)
390  factorMat[:-1, -1] = 0.5
391  print 'factorMat', factorMat
392  print 'testConf', testConf
393  print 'modified', testConf*factorMat
394  currError += np.sum(testConf*factorMat)
395  print
396  print 'Current cumulative error: ', currError
397  if currError < self.bestError:
398  deleteModel(self.modelDir, 'best')
399  self.bestError = currError
400  self.copyModel('best', 'normal')
401  self.bestOptions = copy.deepcopy(self.parser.items(self.baseName))
402  self.parser.write(open(self.dataDir + "/configBest.ini", 'wb'))
403 
404  else:
405  currError = self.penalty
406  print 'Error training model'
407  print 'Current cumulative error: ', currError
408 
409  print 'Best Error so far : ', self.bestError
410  print
411  print '-----------------------------------------------------'
412  self.resultsList.append([x, currError])
413  return currError
414 
415  def copyModel(self, newName, direction):
416  """
417  Utility function to copy model.
418 
419  Args:
420  newName: Model base name to copy.
421  direction: `normal` or `reverse`. `normal` makes a copy of the model with `exp<experiment number>` in the filename to `backup` in the filename. `reverse` makes a copy of `backup` into `exp<experiment number>`.
422 
423  Returns:
424  True of False indicating success.
425  """
426  if os.path.isfile(self.modelDir):
427  print self.modelDir, ' model file present'
428  self.currFiles = [j for j in glob.glob('__'.join(self.modelDir.split('__')[:3]) + '*')
429  if 'backup' not in j and 'best' not in j]
430  backupFiles = []
431  for k in self.currFiles:
432  print 'Original: ', k
433  temp = k.split('exp')
434  if '__L' in k:
435  temp2 = temp[1].split('__')
436  backupFiles += [temp[0] + newName + '__' + temp2[1]]
437  else:
438  temp2 = temp[1].split('.')
439  if 'model' in temp[1]:
440  backupFiles += [temp[0] + newName + '_model.' + temp2[1]]
441  else:
442  backupFiles += [temp[0] + newName + '.' + temp2[1]]
443  print 'New: ', backupFiles[-1]
444  print
445 
446  if direction == 'reverse':
447  for j in range(len(backupFiles)):
448  shutil.copyfile(backupFiles[j], self.currFiles[j])
449  else:
450  for j in range(len(backupFiles)):
451  shutil.copyfile(self.currFiles[j], backupFiles[j])
452  return True
453  else:
454  print 'No model present'
455  self.currFiles = []
456  return False
457 
458 
459 def deleteModel(modelDir, newName):
460  """
461  Utility function to delete models.
462 
463  Args:
464  modelDir : Directory containing the model to delete.
465  newName : Subset of model files to delete. Either `best`, `backup` or `exp`.
466 
467  Returns:
468  None
469 
470  """
471  if os.path.isfile(modelDir):
472  print modelDir, ' model file present'
473  fileList = [j for j in glob.glob('__'.join(modelDir.split('__')[:3]) + '*') if newName in j]
474  for k in fileList:
475  os.remove(k)
476 
477 
478 def main():
479  """
480  Sets up the modelOptClass and calls GPyOpt optimisation for 200 iterations.
481 
482  Same arguments as modelOptClass.init().
483 
484  Returns:
485  0 if completed successfully. -1 if completed unsuccessfully.
486  """
487  # Initialisation parameters:
488  print optNotFound, ' ', len(sys.argv)
489  if len(sys.argv) >= 9 and not optNotFound:
490  a = sys.argv[1]
491  b = sys.argv[2]
492  c = sys.argv[3]
493  d = sys.argv[4]
494  e = sys.argv[5]
495  f = sys.argv[6]
496  per = sys.argv[7] == 'True'
497  # per = True
498  wind = sys.argv[8] == 'True'
499  verb = sys.argv[9] == 'True'
500 
501  optModel = modelOptClass(a, b, c, d, e, f, per, wind, verb)
502  if optModel.configured[0]:
503  myBopt = GPyOpt.methods.BayesianOptimization(f=optModel.f, # function to optimize
504  domain=optModel.domain, # box-constrains of the problem
505  initial_design_numdata=2, # number data initial design
506  acquisition_type=optModel.acquisitionFunction)
507  max_iter = 200
508  # pickle myBopt to save its initialisation
509  # logFilename = os.path.join(b, 'optimiserLog')
510  # output = open(logFilename, 'wb')
511  # pickle.dump({'optModel', optModel}, output)
512  # output.close()
513 
514  try:
515  myBopt.run_optimization(max_iter)
516  optModel.parser.write(open(optModel.dataDir + "/config.ini", 'wb'))
517  optModel.copyModel('best', 'reverse')
518  return 0
519  except:
520  # # pickle results list together with optimiser
521  # d = pickle.load(open(logFilename, 'r'))
522  # d['resultList'] = myBopt.resultsList
523  # output = open(logFilename, 'wb')
524  # pickle.dump(d, output)
525  # output.close()
526  os.system("mv " + optModel.dataDir+"/configBest.ini " + optModel.dataDir+"/config.ini")
527  # optModel.parser.write(open(optModel.dataDir + "/config.ini", 'wb'))
528  return -1
529  else:
530  return -1
531  else:
532  print 'GPyOpt package not found or incorrect number of arguments'
533  return -1
534 
535 if __name__ == '__main__':
536  main()
def deleteModel(modelDir, newName)
Utility function to delete models.
def configOptimisation(self)
Configure optimisation parameters from config.ini.
Class to perform optimisation of SAM Models.
Definition: samOptimiser.py:91
def main()
Sets up the modelOptClass and calls GPyOpt optimisation for 200 iterations.
def f(self, x)
Optimisation Evaluation Function.
def copyModel(self, newName, direction)
Utility function to copy model.