icub-client
SAMDriver.py
Go to the documentation of this file.
1 # """"""""""""""""""""""""""""""""""""""""""""""
2 # The University of Sheffield
3 # WYSIWYD Project
4 #
5 # The superclass of all Drivers. Every driver written for
6 # SAM must inherit from this class and extend it with any
7 # specific functionality.
8 #
9 # See also: SAMDriver_faces.py, SAMDriver_actions.py
10 #
11 # Created on 26 May 2015
12 #
13 # @authors: Andreas Damianou, Uriel Martinez, Luke Boorman, Daniel Camilleri
14 #
15 # """"""""""""""""""""""""""""""""""""""""""""""
16 from SAM.SAM_Core import SAMCore
17 from SAM.SAM_Core import SAMTesting
18 import GPy
19 import numpy
20 import os
21 import logging
22 
23 
31 
32 class SAMDriver:
33  """
34  SAM Driver parent class that defines the methods by which models are initialised, trained and saved.
35  """
36 
37  def __init__(self):
38  """
39  Initialisation for SAMDriver.
40  """
41 
43 
44  self.Y = None
45  self.L = None
46  self.X = None
47  self.Ytest = None
48  self.Ltest = None
49  self.Ytestn = None
50  self.Ltestn = None
51  self.Ymean = None
52  self.Ystd = None
53  self.Yn = None
54  self.Ln = None
55  self.segTrainConf = None
56  self.segTrainPerc = None
57  self.segTestConf = None
58  self.segTestPerc = None
59  self.data_labels = None
60  self.paramsDict = dict()
61  self.verbose = False
62  self.model_mode = False
63  self.Quser = None
64  self.listOfModels = []
65  self.model_type = None
66  self.modelLabel = None
67  self.textLabels = None
68  self.participantList = None
69  self.varianceThreshold = None
70  self.fname = None
71  self.optimiseRecall = True
72  self.modelLoaded = False
73  self.parallelOperation = False
74  self.calibrated = False
75  self.rawTextData = None
76  self.rawData = None
77 
78  self.Yall = None
79  self.Lall = None
80  self.YtestAll = None
81  self.LtestAll = None
82 
83  self.classifiers = None
84  self.classif_thresh = None
85 
89 
91 
92 
93  def loadParameters(self, parser, trainName):
94  """
95  Function to load parameters from the model config.ini file.
96 
97  Method to load parameters from file loaded in parser from within section trainName and store these parameters in self.paramsDict,
98 
99  Args:
100  parser: SafeConfigParser with pre-read config file.
101  trainName: Section from which parameters are to be read.
102 
103  Returns:
104  __NotImplementedError__ if this function is not implemented by the inheriting child.
105  """
106  raise NotImplementedError("this needs to be implemented to use the model class")
107 
108  def saveParameters(self):
109  """
110  Method to store variables generated during training which are required during interaction in self.paramsDict.
111 
112  Args:
113  None
114 
115  Returns:
116  None
117  """
118  for j in self.additionalParametersList:
119  commandString = 'self.paramsDict[\'' + j + '\'] = self.' + j
120  try:
121  logging.info(commandString)
122  exec commandString
123  except:
124  pass
125 
126  def testPerformance(self, testModel, Yall, Lall, YtestAll, LtestAll, verbose):
127  """
128  Method for testing the whole dataset for overall performance. Returns a confusion matrix.
129 
130  This implementation is a standard performance testing method that can be overridden by an inheriting child.
131 
132  Args:
133  testModel : SAMObject Model to be tested.
134  Yall : Numpy array with training data vectors to be tested.
135  Lall : List with corresponding training data labels.
136  YtestAll : Numpy array with testing data vectors to be tested.
137  LtestAll : List with corresponding testing data labels.
138  verbose : Boolean turning logging to stdout on or off.
139 
140  Returns:
141  Square numpy array confusion matrix.
142  """
143  yTesting = SAMTesting.formatDataFunc(Yall)
144  [self.segTrainConf, self.segTrainPerc, labelsSegTrain, labelComparisonDict] = SAMTesting.testSegments(testModel, yTesting, Lall, verbose,
145  label='Training')
146 
147  yTesting = SAMTesting.formatDataFunc(YtestAll)
148  [self.segTestConf, self.segTestPerc, labelsSegTest, labelComparisonDict] = SAMTesting.testSegments(testModel, yTesting, LtestAll, verbose,
149  label='Testing')
150 
151  return self.segTestConf, labelsSegTest, labelComparisonDict
152 
153  def training(self, modelNumInducing, modelNumIterations, modelInitIterations, fname, save_model, economy_save,
154  keepIfPresent=True, kernelStr=None):
155  """
156  Method to train, store and load the learned model
157 
158  This method tries reloading the model in fname. If unsuccessful or loaded model has mismatching parameters, trains a new model from scratch.
159 
160  Args:
161  modelNumInducing : Integer number of inducing parameters.
162  modelNumIterations : Integer number of training iterations.
163  modelInitIterations : Integer number of initialisation iterations.
164  fname : Filename to save model to.
165  save_model : Boolean to turn saving of the model on or off.
166  economy_save : Boolean to turn economy saving on or off. Economy save saves smaller models by not storing data inside the model but keeping it stored in the data path.
167  keepIfPresent : Boolean to enable or disable loading of a model when one is available.
168  kernelStr : Kernel string with the requested kernel. If `None` the default kernel is used.
169 
170  Returns:
171  None
172  """
173 
174  self.model_num_inducing = modelNumInducing
175  self.model_num_iterations = modelNumIterations
176  self.model_init_iterations = modelInitIterations
177 
178  if not os.path.isfile(fname + '.pickle') or economy_save:
179  if not os.path.isfile(fname + '.pickle'):
180  logging.info("Training for " + str(modelInitIterations) + "|" + str(modelNumIterations) + " iterations...")
181  try:
182  self.Quser is None
183  except:
184  self.Quser = None
185 
186  if self.X is not None:
187  Q = self.X.shape[1]
188  elif self.Quser is not None:
189  Q = self.Quser
190  else:
191  Q = 2
192 
193  kernel = None
194  if Q > 100:
195  if kernelStr is not None:
196  stringKernel = 'kernel = ' + kernelStr
197  else:
198  stringKernel = 'kernel = GPy.kern.RBF(Q, ARD=False) + GPy.kern.Bias(Q) + GPy.kern.White(Q)'
199  exec stringKernel
200  logging.info('stringKernel: ' + str(stringKernel))
201  self.SAMObject.kernelString = kernelStr
202  else:
203  self.SAMObject.kernelString = ''
204  # Simulate the function of storing a collection of events
205  if self.model_mode != 'temporal':
206  self.SAMObject.store(observed=self.Y, inputs=self.X, Q=Q, kernel=kernel,
207  num_inducing=self.model_num_inducing)
208  elif self.model_mode == 'temporal':
209  self.SAMObject.model = GPy.models.SparseGPRegression(numpy.hstack((self.X, self.L)), self.Y,
210  num_inducing=self.model_num_inducing)
211 
212  # If data are associated with labels (e.g. face identities),
213  # associate them with the event collection
214  if self.data_labels is not None:
215  self.SAMObject.add_labels(self.data_labels)
216 
217  if economy_save and os.path.isfile(fname + '.pickle') and keepIfPresent:
218  try:
219  logging.info("Try loading economy size SAMObject: " + fname)
220  # Load the model from the economy storage
221  SAMCore.load_pruned_model(fname, economy_save, self.SAMObject.model)
222  except ValueError:
223  logging.error("Loading " + fname + " failed. Parameters not valid. Training new model")
224  if self.model_mode != 'temporal':
225  self.SAMObject.learn(optimizer='bfgs', max_iters=self.model_num_iterations,
226  init_iters=self.model_init_iterations, verbose=True)
227  elif self.model_mode == 'temporal':
228  self.SAMObject.model.optimize('bfgs', max_iters=self.model_num_iterations, messages=True)
229  self.SAMObject.type = 'mrd'
230  self.SAMObject.Q = None
231  self.SAMObject.N = None
232  self.SAMObject.namesList = None
233  self.SAMObject.kernelString = None
234  if save_model:
235  logging.info("Saving SAMObject: " + fname)
236  SAMCore.save_pruned_model(self.SAMObject, fname, economy_save)
237  elif not os.path.isfile(fname + '.pickle') or not keepIfPresent:
238  # Simulate the function of learning from stored memories, e.g. while sleeping (consolidation).
239  if self.model_mode != 'temporal':
240  self.SAMObject.learn(optimizer='bfgs', max_iters=self.model_num_iterations,
241  init_iters=self.model_init_iterations, verbose=True)
242  elif self.model_mode == 'temporal':
243  self.SAMObject.model.optimize('bfgs', max_iters=self.model_num_iterations, messages=True)
244  self.SAMObject.type = 'mrd'
245  self.SAMObject.__num_views = None
246  self.SAMObject.Q = None
247  self.SAMObject.N = None
248  self.SAMObject.namesList = None
249  self.SAMObject.kernelString = None
250  if save_model:
251  logging.info("Saving SAMObject: " + fname)
252  SAMCore.save_pruned_model(self.SAMObject, fname, economy_save)
253  else:
254  logging.info("Loading SAMObject: " + fname)
255  self.SAMObject = SAMCore.load_pruned_model(fname)
256 
257  def prepareData(self, model='mrd', Ntr=50, randSeed=0, normalise=True):
258  """
259  Method for randomly splitting data and preparing Y dictionary.
260 
261  This method splits the data in the model randomly between training and testing. Currently does not take equal number of samples from different classes.
262 
263  Args:
264  model : String with type of model to be trained. Accepted values are `'mrd'` and `'bgplvm'`.
265  Ntr : Float percentage for the training/testing division of data.
266  randSeed : Integer random seed.
267  normalise : Boolean turning normalisation of data on or off.
268 
269  Returns:
270  None
271  """
272 
273  Nts = self.Y.shape[0] - Ntr
274  numpy.random.seed(randSeed)
275  perm = numpy.random.permutation(self.Y.shape[0])
276  indTs = perm[0:Nts]
277  indTr = perm[Nts:Nts + Ntr]
278  indTs.sort()
279  indTr.sort()
280  YtestAll = self.Y[indTs].copy()
281  self.Ytest = self.Y[indTs]
282  LtestAll = self.L[indTs].copy()
283  self.Ltest = self.L[indTs]
284  Yall = self.Y[indTr].copy()
285  self.Y = self.Y[indTr]
286  Lall = self.L[indTr].copy()
287  self.L = self.L[indTr]
288 
289  if normalise:
290  logging.warning('Normalising data')
291  # Center data to zero mean and 1 std
292  self.Ymean = self.Y.mean()
293  self.Yn = self.Y - self.Ymean
294  self.Ystd = self.Yn.std()
295  self.Yn /= self.Ystd
296  # Normalise test data similarly to training data
297  self.Ytestn = self.Ytest - self.Ymean
298  self.Ytestn /= self.Ystd
299 
300  # As above but for the labels
301  # self.Lmean = self.L.mean()
302  # self.Ln = self.L - self.Lmean
303  # self.Lstd = self.Ln.std()
304  # self.Ln /= self.Lstd
305  # self.Ltestn = self.Ltest - self.Lmean
306  # self.Ltestn /= self.Lstd
307 
308  if model == 'mrd':
309  self.X = None
310  self.Y = {'Y': self.Yn, 'L': self.L}
311  self.data_labels = self.L.copy()
312  elif model == 'gp':
313  self.X = self.Y.copy()
314  self.Y = {'L': self.Ln.copy() + 0.08 * numpy.random.randn(self.Ln.shape[0], self.Ln.shape[1])}
315  self.data_labels = None
316  elif model == 'bgplvm':
317  self.X = None
318  self.Y = {'Y': self.Yn}
319  self.data_labels = self.L.copy()
320  else:
321  logging.warning('Not normalising data')
322  if model == 'mrd':
323  self.X = None
324  self.Y = {'Y': self.Y, 'L': self.L}
325  self.data_labels = self.L.copy()
326  elif model == 'gp':
327  self.X = self.Y.copy()
328  self.Y = {'L': self.Ln.copy() + 0.08 * numpy.random.randn(self.Ln.shape[0], self.Ln.shape[1])}
329  self.data_labels = None
330  elif model == 'bgplvm':
331  self.X = None
332  self.Y = {'Y': self.Y}
333  self.data_labels = self.L.copy()
334 
335  return Yall, Lall, YtestAll, LtestAll
336 
337  def readData(self, root_data_dir, participant_index, *args, **kw):
338  """
339  Method which accepts a data directory, reads all the data in and outputs self.Y which is a numpy array with n instances of m length feature vectors and self.L which is a list of text Labels of length n.
340 
341  Args:
342  root_data_dir: Data directory.
343  participant_index: List of subfolders to consider. Can be left as an empty list.
344 
345  Returns:
346  __NotImplementedError__ if not implemented by the inheriting child. Otherwise returns self.Y and self.L which contain the formatted data and labels read from the data directory.
347  """
348  raise NotImplementedError("this needs to be implemented to use the model class")
349 
350  def processLiveData(self, dataList, thisModel, verbose=False):
351  """
352  Method which receives a list of data frames and outputs a classification if available or 'no_classification' if it is not.
353 
354  Args:
355  dataList: List of dataFrames collected. Length of list is variable.
356  thisModel: List of models required for testing.
357  verbose : Boolean turning logging to stdout on or off.
358 
359  Returns:
360  __NotImplementedError__ if not implemented by the inheriting child. Otherwise returns a string classification label.
361  """
362  raise NotImplementedError("this needs to be implemented to use the model class")
363 
364  def formatGeneratedData(self, instance):
365  """
366  Method which receives an instance generated from a model via a label and formats the received vector by reshaping it or adding crucial data with it for it to be deciphered at the receiving end.
367 
368  Args:
369  instance: A vector of generated data.
370 
371  Returns:
372  __NotImplementedError__ if not implemented by the inheriting child. Otherwise returns a Yarp Bottle or Yarp Image with the formatted generated data.
373  """
374  raise NotImplementedError("this needs to be implemented to use the model class")
375 
376 
def saveParameters(self)
Method to store variables generated during training which are required during interaction in self...
Definition: SAMDriver.py:117
def formatGeneratedData(self, instance)
Method which receives an instance generated from a model via a label and formats the received vector ...
Definition: SAMDriver.py:373
STL namespace.
def __init__(self)
Initialisation for SAMDriver.
Definition: SAMDriver.py:40
SAM Driver parent class that defines the methods by which models are initialised, trained and saved...
Definition: SAMDriver.py:35
def training(self, modelNumInducing, modelNumIterations, modelInitIterations, fname, save_model, economy_save, keepIfPresent=True, kernelStr=None)
Method to train, store and load the learned model.
Definition: SAMDriver.py:172
def processLiveData(self, dataList, thisModel, verbose=False)
Method which receives a list of data frames and outputs a classification if available or 'no_classifi...
Definition: SAMDriver.py:361
def prepareData(self, model='mrd', Ntr=50, randSeed=0, normalise=True)
Method for randomly splitting data and preparing Y dictionary.
Definition: SAMDriver.py:271
def testPerformance(self, testModel, Yall, Lall, YtestAll, LtestAll, verbose)
Method for testing the whole dataset for overall performance.
Definition: SAMDriver.py:142
def loadParameters(self, parser, trainName)
Function to load parameters from the model config.ini file.
Definition: SAMDriver.py:105
def readData(self, root_data_dir, participant_index, args, kw)
Method which accepts a data directory, reads all the data in and outputs self.Y which is a numpy arra...
Definition: SAMDriver.py:347
SAM based on Latent Feature Models.
Definition: SAMCore.py:40