icub-client
SAM_utils.py
Go to the documentation of this file.
1 # """"""""""""""""""""""""""""""""""""""""""""""
2 # The University of Sheffield
3 # WYSIWYD Project
4 #
5 # SAMpy class for various methods related to processing data
6 #
7 # Created on 26 May 2015
8 #
9 # @authors: Andreas Damianou, Daniel Camilleri
10 #
11 # """"""""""""""""""""""""""""""""""""""""""""""
12 import matplotlib
13 matplotlib.use("TkAgg")
14 import numpy as np
15 from ConfigParser import SafeConfigParser
16 import pickle
17 import matplotlib.mlab as mlab
18 import matplotlib.pyplot as plt
19 from SAM.SAM_Core import samOptimiser
20 from os import listdir
21 from os.path import join, isdir
22 import threading
23 import logging
24 
25 np.set_printoptions(precision=2)
26 
27 
30 
31 
32 def initialiseModels(argv, update, initMode='training'):
33  """Initialise SAM Model data structure, training parameters and user parameters.
34 
35  This method starts by initialising the required Driver from the driver name in argv[3] if it exists
36  in SAM_Drivers folder. The standard model parameters and the specific user parameters are then initialised
37  and the data is read in by the SAMDriver.readData method to complete the model data structure. This method
38  then replicates the model data structure for training with multiple models if it is required in the config
39  file loaded by the Driver.
40 
41  Args:
42  argv_0: dataPath containing the data that is to be trained on.
43  argv_1: modelPath containing the path of where the model is to be stored.
44  argv_2: driverName containing the name of the driver class that is to be loaded from SAM_Drivers folder.
45  update: String having either a value of 'update' or 'new'. 'new' will load the parameters as set in the
46  config file of the driver being loaded present in the dataPath directory. This is used to train a
47  new model from scratch. 'update' will check for an existing model in the modelPath directory and
48  load the parameters from this model if it exists. This is used for retraining a model when new
49  data becomes available.
50  initMode: String having either a value of 'training' or 'interaction'. 'training' takes into consideration
51  the value of update in loading the parameters. (Used by trainSAMModel.py) 'interaction' loads
52  the parameters directly from the model if the model exists. (Used by interactionSAMModel.py)
53 
54  Returns:
55  The output is a list of SAMDriver models. The list is of length 1 when the config file requests a single
56  model or a list of length n+1 for a config file requesting multiple models where n is the number of
57  requested models. The number of models either depends on the number of directories present in the dataPath
58  or from the length of textLabels returned from the SAMDriver.readData method.
59  """
60 
61  from SAM.SAM_Core import SAMDriver as Driver
62  dataPath = argv[0]
63  modelPath = argv[1]
64  driverName = argv[2]
65 
66  logging.info(argv)
67  stringCommand = 'from SAM.SAM_Drivers import ' + driverName + ' as Driver'
68  logging.info(stringCommand)
69  exec stringCommand
70 
71  mySAMpy = Driver()
72  mode = update
73  trainName = dataPath.split('/')[-1]
74 
75  # participantList is extracted from number of subdirectories of dataPath
76  participantList = [f for f in listdir(dataPath) if isdir(join(dataPath, f))]
77 
78  off = 17
79  logging.info('-------------------')
80  logging.info('Training Settings:')
81  logging.info('')
82  logging.info('Init mode: '.ljust(off) + str(initMode))
83  logging.info('Data Path: '.ljust(off) + str(dataPath))
84  logging.info('Model Path: '.ljust(off) + str(modelPath))
85  logging.info('Participants: '.ljust(off) + str(participantList))
86  logging.info('Model Root Name: '.ljust(off) + str(trainName))
87  logging.info('Training Mode:'.ljust(off) + str(mode))
88  logging.info('Driver:'.ljust(off) + str(driverName))
89  logging.info('-------------------')
90  logging.info('Loading Parameters...')
91  logging.info('')
92  modeConfig = ''
93  found = ''
94  try:
95  parser = SafeConfigParser()
96  found = parser.read(dataPath + "/config.ini")
97 
98  if parser.has_option(trainName, 'update_mode'):
99  modeConfig = parser.get(trainName, 'update_mode')
100  else:
101  modeConfig = 'update'
102  logging.info(modeConfig)
103  except IOError:
104  pass
105 
106  defaultParamsList = ['experiment_number', 'model_type', 'model_num_inducing',
107  'model_num_iterations', 'model_init_iterations', 'verbose',
108  'Quser', 'kernelString', 'ratioData', 'update_mode', 'model_mode',
109  'temporalModelWindowSize', 'optimiseRecall', 'classificationDict',
110  'useMaxDistance', 'calibrateUnknown']
111 
112  mySAMpy.experiment_number = None
113  mySAMpy.model_type = None
114  mySAMpy.kernelString = None
115  mySAMpy.fname = None
116  mySAMpy.ratioData = None
117 
118  if initMode == 'training' and (mode == 'new' or modeConfig == 'new' or 'exp' not in modelPath):
119  logging.info('Loading training parameters from:' + str(dataPath) + "/config.ini")
120  try:
121  default = False
122  parser = SafeConfigParser()
123  parser.optionxform = str
124  found = parser.read(dataPath + "/config.ini")
125 
126  mySAMpy.experiment_number = 'exp'
127 
128  if parser.has_option(trainName, 'model_type'):
129  mySAMpy.model_type = parser.get(trainName, 'model_type')
130  else:
131  default = True
132  mySAMpy.model_type = 'mrd'
133 
134  if parser.has_option(trainName, 'model_num_inducing'):
135  mySAMpy.model_num_inducing = int(parser.get(trainName, 'model_num_inducing'))
136  else:
137  default = True
138  mySAMpy.model_num_inducing = 30
139 
140  if parser.has_option(trainName, 'model_num_iterations'):
141  mySAMpy.model_num_iterations = int(parser.get(trainName, 'model_num_iterations'))
142  else:
143  default = True
144  mySAMpy.model_num_iterations = 700
145 
146  if parser.has_option(trainName, 'model_init_iterations'):
147  mySAMpy.model_init_iterations = int(parser.get(trainName, 'model_init_iterations'))
148  else:
149  default = True
150  mySAMpy.model_init_iterations = 2000
151 
152  if parser.has_option(trainName, 'verbose'):
153  mySAMpy.verbose = parser.get(trainName, 'verbose') == 'True'
154  else:
155  default = True
156  mySAMpy.verbose = False
157 
158  if parser.has_option(trainName, 'optimiseRecall'):
159  mySAMpy.optimiseRecall = int(parser.get(trainName, 'optimiseRecall'))
160  else:
161  default = True
162  mySAMpy.optimiseRecall = 200
163 
164  if parser.has_option(trainName, 'useMaxDistance'):
165  mySAMpy.useMaxDistance = parser.get(trainName, 'useMaxDistance') == 'True'
166  else:
167  mySAMpy.useMaxDistance = False
168 
169  if parser.has_option(trainName, 'calibrateUnknown'):
170  mySAMpy.calibrateUnknown = parser.get(trainName, 'calibrateUnknown') == 'True'
171  else:
172  mySAMpy.calibrateUnknown = False
173 
174  if parser.has_option(trainName, 'model_mode'):
175  mySAMpy.model_mode = parser.get(trainName, 'model_mode')
176  if mySAMpy.model_mode == 'temporal' and parser.has_option(trainName, 'temporalModelWindowSize'):
177  mySAMpy.temporalWindowSize = int(parser.get(trainName, 'temporalModelWindowSize'))
178  else:
179  temporalFlag = True
180  else:
181  default = True
182  mySAMpy.model_mode = 'single'
183 
184  if parser.has_option(trainName, 'Quser'):
185  mySAMpy.Quser = int(parser.get(trainName, 'Quser'))
186  else:
187  default = True
188  mySAMpy.Quser = 2
189 
190  if parser.has_option(trainName, 'kernelString'):
191  mySAMpy.kernelString = parser.get(trainName, 'kernelString')
192  else:
193  default = True
194  mySAMpy.kernelString = "GPy.kern.RBF(Q, ARD=False) + GPy.kern.Bias(Q) + GPy.kern.White(Q)"
195 
196  if parser.has_option(trainName, 'ratioData'):
197  mySAMpy.ratioData = int(parser.get(trainName, 'ratioData'))
198  else:
199  default = True
200  mySAMpy.ratioData = 50
201 
202  if default:
203  logging.info('Default settings applied')
204 
205  mySAMpy.paramsDict = dict()
206  mySAMpy.loadParameters(parser, trainName)
207 
208  except IOError:
209  logging.warning('IO Exception reading ', found)
210  pass
211  else:
212  logging.info('Loading parameters from: \n \t' + str(modelPath))
213  try:
214  parser = SafeConfigParser()
215  parser.optionxform = str
216  found = parser.read(dataPath + "/config.ini")
217 
218  # load parameters from config file
219  mySAMpy.experiment_number = modelPath.split('__')[-1]
220 
221  modelPickle = pickle.load(open(modelPath+'.pickle', 'rb'))
222  mySAMpy.paramsDict = dict()
223  for j in parser.options(trainName):
224  if j not in defaultParamsList:
225  logging.info(str(j))
226  mySAMpy.paramsDict[j] = modelPickle[j]
227 
228  mySAMpy.ratioData = modelPickle['ratioData']
229  mySAMpy.model_type = modelPickle['model_type']
230  mySAMpy.model_mode = modelPickle['model_mode']
231  if mySAMpy.model_mode == 'temporal':
232  mySAMpy.temporalModelWindowSize = modelPickle['temporalModelWindowSize']
233  mySAMpy.model_type = 'mrd'
234  mySAMpy.model_num_inducing = modelPickle['model_num_inducing']
235  mySAMpy.model_num_iterations = modelPickle['model_num_iterations']
236  mySAMpy.model_init_iterations = modelPickle['model_init_iterations']
237  mySAMpy.verbose = modelPickle['verbose']
238  mySAMpy.Quser = modelPickle['Quser']
239  mySAMpy.optimiseRecall = modelPickle['optimiseRecall']
240  mySAMpy.kernelString = modelPickle['kernelString']
241  mySAMpy.calibrated = modelPickle['calibrated']
242 
243  # try loading classification parameters for multiple model implementation
244  try:
245  mySAMpy.useMaxDistance = modelPickle['useMaxDistance']
246  except:
247  logging.warning('Failed to load useMaxDistace. Possible reasons: '
248  'Not saved or multiple model implementation')
249  mySAMpy.calibrateUnknown = modelPickle['calibrateUnknown']
250  if mySAMpy.calibrateUnknown:
251  mySAMpy.classificationDict = modelPickle['classificationDict']
252 
253  except IOError:
254  logging.warning('IO Exception reading ', found)
255  pass
256 
257  if 'exp' in modelPath or 'best' in modelPath or 'backup' in modelPath:
258  fnameProto = '/'.join(modelPath.split('/')[:-1]) + '/' + dataPath.split('/')[-1] + '__' + driverName + \
259  '__' + mySAMpy.model_type + '__' + str(mySAMpy.experiment_number)
260  else:
261  fnameProto = modelPath + dataPath.split('/')[-1] + '__' + driverName + '__' + mySAMpy.model_type + \
262  '__' + str(mySAMpy.experiment_number)
263 
264  logging.info('Full model name: ' + str(fnameProto))
265  logging.info('-------------------')
266  logging.info('')
267 
268  mySAMpy.save_model = False
269  mySAMpy.economy_save = True
270  mySAMpy.visualise_output = False
271  # test_mode = True
272 
273  mySAMpy.readData(dataPath, participantList)
274 
275  if mySAMpy.model_mode != 'temporal':
276  # get list of labels
277  mySAMpy.textLabels = list(set(mySAMpy.L))
278 
279  # convert L from list of strings to array of indices
280  mySAMpy.L = np.asarray([mySAMpy.textLabels.index(i) for i in mySAMpy.L])[:, None]
281  mySAMpy.textLabels = mySAMpy.textLabels
282  else:
283  mySAMpy.X, mySAMpy.Y = transformTimeSeriesToSeq(mySAMpy.Y1, mySAMpy.temporalModelWindowSize)
284  mySAMpy.L, mySAMpy.tmp = transformTimeSeriesToSeq(mySAMpy.U1, mySAMpy.temporalModelWindowSize)
285 
286  mm = [mySAMpy]
287  # mm.append(mySAMpy)
288  # mm[0] contains root model
289  # this is the only model in the case of a single model
290  # or contains all info for the rest of the models in case of multiple models
291  #
292 
293  if mySAMpy.model_mode == 'single' or mySAMpy.model_mode == 'temporal':
294  mm[0].participantList = ['all']
295  else:
296  mm[0].participantList = ['root'] + mySAMpy.textLabels
297 
298  for k in range(len(mm[0].participantList)):
299  if mm[0].participantList[k] == 'all':
300  normaliseData = True
301  minData = len(mm[k].L)
302  mm[0].fname = fnameProto
303  mm[0].model_type = mySAMpy.model_type
304  Ntr = int(mySAMpy.ratioData * minData / 100)
305  else:
306  if k > 0:
307  mm.append(Driver())
308  # extract subset of data corresponding to this model
309  inds = [i for i in range(len(mm[0].Y['L'])) if mm[0].Y['L'][i] == k - 1]
310  mm[k].Y = mm[0].Y['Y'][inds]
311  mm[k].L = mm[0].Y['L'][inds]
312  mm[k].Quser = mm[0].Quser
313  mm[k].verbose = mm[0].verbose
314  logging.info('Object class: ' + str(mm[0].participantList[k]))
315  minData = len(inds)
316  mm[k].fname = fnameProto + '__L' + str(k - 1)
317  mm[0].listOfModels.append(mm[k].fname)
318  mm[k].model_type = 'bgplvm'
319  Ntr = int(mySAMpy.ratioData * minData / 100)
320  normaliseData = True
321  else:
322  normaliseData = False
323  mm[0].listOfModels = []
324  mm[0].fname = fnameProto
325  mm[0].SAMObject.kernelString = ''
326  minData = len(mm[0].L)
327  Ntr = int(mySAMpy.ratioData * minData / 100)
328  mm[k].modelLabel = mm[0].participantList[k]
329 
330  if mm[0].model_mode != 'temporal':
331 
332  [Yall, Lall, YtestAll, LtestAll] = mm[k].prepareData(mm[k].model_type, Ntr,
333  randSeed=0,
334  normalise=normaliseData)
335  mm[k].Yall = Yall
336  mm[k].Lall = Lall
337  mm[k].YtestAll = YtestAll
338  mm[k].LtestAll = LtestAll
339  elif mm[0].model_mode == 'temporal':
340  [Xall, Yall, Lall, XtestAll, YtestAll, LtestAll] = mm[k].prepareData(mm[k].model_type, Ntr,
341  randSeed=0,
342  normalise=normaliseData)
343  mm[k].Xall = Xall
344  mm[k].Yall = Yall
345  mm[k].Lall = Lall
346  mm[k].XtestAll = XtestAll
347  mm[k].YtestAll = YtestAll
348  mm[k].LtestAll = LtestAll
349 
350  logging.info('minData = ' + str(minData))
351  logging.info('ratioData = ' + str(mySAMpy.ratioData))
352  logging.info('-------------------------------------------------------------------------------------------------')
353  if initMode == 'training':
354  samOptimiser.deleteModel(modelPath, 'exp')
355  for k in range(len(mm[0].participantList)):
356  # for k = 0 check if multiple model or not
357  if mm[0].participantList[k] != 'root':
358 
359  logging.info("Training with " + str(mm[0].model_num_inducing) + ' inducing points for ' +
360  str(mm[0].model_init_iterations) + '|' + str(mm[0].model_num_iterations))
361  logging.info("Fname:" + str(mm[k].fname))
362 
363  mm[k].training(mm[0].model_num_inducing, mm[0].model_num_iterations,
364  mm[0].model_init_iterations, mm[k].fname, mm[0].save_model,
365  mm[0].economy_save, keepIfPresent=False, kernelStr=mm[0].kernelString)
366 
367  if mm[0].visualise_output:
368  ax = mm[k].SAMObject.visualise()
369  visualiseInfo = dict()
370  visualiseInfo['ax'] = ax
371  else:
372  visualiseInfo = None
373  else:
374  for k in range(len(mm[0].participantList)):
375  # for k = 0 check if multiple model or not
376  if mm[0].participantList[k] != 'root':
377  logging.info("Training with " + str(mm[0].model_num_inducing) + ' inducing points for ' +
378  str(mm[0].model_init_iterations) + '|' + str(mm[0].model_num_iterations))
379 
380  mm[k].training(mm[0].model_num_inducing, mm[0].model_num_iterations,
381  mm[0].model_init_iterations, mm[k].fname, mm[0].save_model,
382  mm[0].economy_save, keepIfPresent=True, kernelStr=mm[0].kernelString)
383 
384  return mm
385 
386 
387 def varianceClass(varianceDirection, x, thresh):
388  """
389  Utility function to perform threshold or range checking.
390 
391  Args:
392  varianceDirection : List of strings with the conditions to check.
393  x : The value to be checked.
394  thresh : The threshold against which x is to be checked given the checks in varianceDirection.
395 
396  Returns:
397  Boolean True or False confirming arg x validates varianceDirection conditions for the threshold.
398  """
399  if varianceDirection == ['greater', 'smaller']:
400  return thresh[0] < x < thresh[1]
401  elif varianceDirection == ['smaller', 'greater']:
402  return thresh[0] > x > thresh[1]
403  elif varianceDirection == ['greater']:
404  return x > thresh
405  elif varianceDirection == ['smaller']:
406  return x < thresh
407 
408 
409 class TimeoutError(Exception):
410  """Custom TimeoutError Exception.
411 
412  Description:
413  Class used to raise TimeoutError Exceptions.
414  """
415  pass
416 
417 
418 class InterruptableThread(threading.Thread):
419  """Class to launch a function inside of a separate thread.
420  """
421  def __init__(self, func, *args, **kwargs):
422  """
423  Initialise the interruptible thread.
424  """
425  threading.Thread.__init__(self)
426  self._func = func
427  self._args = args
428  self._kwargs = kwargs
429  self._result = None
430 
431  def run(self):
432  """
433  Run the function.
434  """
435  self._result = self._func(*self._args, **self._kwargs)
436 
437  @property
438  def result(self):
439  """
440  Returns:
441  Result of the function.
442  """
443  return self._result
444 
445 
446 class timeout(object):
447  """
448  Class to terminate a function running inside of a separate thread.
449  """
450  def __init__(self, sec):
451  """
452  Initialise the timeout function.
453  """
454  self._sec = sec
455 
456  def __call__(self, f):
457  """
458  Initialise an interruptible thread and start the thread.
459  """
460  def wrapped_f(*args, **kwargs):
461  it = InterruptableThread(f, *args, **kwargs)
462  it.start()
463  it.join(self._sec)
464  if not it.is_alive():
465  return it.result
466  raise TimeoutError('execution expired')
467  return wrapped_f
468 
469 
470 def plotKnownAndUnknown(varDict, colour, axlist, width=[0.2, 0.2], factor=[(0, 0.6), (0.4, 1)], plotRange=False):
471  """
472  Utility function to plot variances of known and unknown as gaussian distributions.
473 
474  Args:
475  varDict : Dictionary containing the mean and variances of known and unknown for different sections of data.
476  colour : List of strings with the colours to be used for each plot.
477  axlist : Plot object to pass in and update.
478  width : List of floats with the linewidth for the plots.
479  factor : List of tuples with factors for the plotting of ranges.
480  plotRange : Boolean to plot a range together with gaussian distributions or not.
481 
482  Returns:
483  Plot object for the generated plot.
484  """
485  count = 0
486  for k, j in enumerate(varDict.keys()):
487  if len(varDict[j]) > 0 and 'Results' not in j:
488  [mlist, vlist, rlist] = meanVar_varianceDistribution(varDict[j])
489  axlist = plotGaussFromList(mlist, vlist, rlist, colour[count], j, width[count], factor[count], axlist, plotRange)
490  count += 1
491 
492  return axlist
493 
494 
495 def bhattacharyya_distance(mu1, mu2, var1, var2):
496  """
497  Calculates a measure for the separability of two univariate gaussians.
498 
499  Returns the bhattacharyya distance that is used to optimise for separability between known and unknown classes when these are modelled as univariate gaussians.
500 
501  Args:
502  mu1: Float with mean of distribution 1.
503  mu2: Float with mean of distribution 2.
504  var1: Float with variance of distribution 1.
505  var2: Float with variance of distribution 2.
506 
507  Returns:
508  Returns a float with the bhattacharyya distance between the two distributions.
509  """
510 
511  t1 = float(var1/var2) + float(var2/var1) + 2
512  t2 = np.log(0.25*t1)
513  t3 = float(mu1-mu2)*float(mu1-mu2)
514  t4 = t3/float(var1+var2)
515  return 0.25*t2 + 0.25*t4
516 
517 
518 def plot_confusion_matrix(cm, targetNames, title='Confusion matrix', cmap=plt.cm.inferno):
519  """Generate and display a confusion matrix.
520 
521  This method plots a formatted confusion matrix from the provided array and target names.
522 
523  Args:
524  cm: Square numpy array containing the values for the confusion matrix.
525  targetNames: labels for the different classes.
526  title: Title of the plot.
527  cmap: Matplotlib colourmap for the plot.
528 
529  Returns:
530  No return. Blocking call to matplotlib.plot.
531  """
532  plt.figure()
533  plt.imshow(cm, interpolation='nearest', cmap=cmap)
534  plt.title(title)
535  plt.colorbar()
536  tick_marks = np.arange(len(targetNames))
537  plt.xticks(tick_marks, targetNames, rotation=45)
538  plt.yticks(tick_marks, targetNames)
539  plt.tight_layout()
540  plt.ylabel('True label')
541  plt.xlabel('Predicted label')
542  plt.show(block=True)
543 
544 
545 def plotGaussFromList(mlist, vlist, rlist, colour, label, width, factor, axlist, plotRange=False):
546  """
547  Plot multiple Gaussians from a list on the same plot.
548 
549  Args:
550  mlist: List of float means.
551  vlist: List of float variances.
552  rlist: List of float data ranges.
553  colour: Colour for the plots.
554  label: Label for the plot.
555  width: Width of line in the plot.
556  factor: Factor for the height of the ranges to make them more aesthetic.
557  axlist: List of axes.
558  plotRange: Boolean to plot ranges or not.
559 
560  Returns:
561  List of axes.
562  """
563  numPlots = len(mlist)
564 
565  if len(axlist) == 0:
566  # f, axlist = plt.subplots(1, numPlots, figsize=(24.0, 15.0))
567  f, axlist = plt.subplots(1, numPlots, figsize=(12.0, 7.5))
568  for k, j in enumerate(axlist):
569  if k < numPlots - 2:
570  j.set_title('D ' + str(k), fontsize=20)
571  elif k == numPlots - 2:
572  j.set_title('Sum', fontsize=20)
573  elif k > numPlots - 2:
574  j.set_title('Mean', fontsize=20)
575  j.set_xticks([])
576  j.set_yticks([])
577 
578  for j in range(numPlots):
579  sigma = np.sqrt(vlist[j])
580  rangeData = rlist[j][1] - rlist[j][0]
581  x = np.linspace(rlist[j][0] - (rangeData / 2), rlist[j][1] + (rangeData / 2), 100)
582  y = mlab.normpdf(x, mlist[j], sigma)
583  axlist[j].plot(x, y, colour, label=label)
584  if plotRange:
585  axlist[j].plot([rlist[j][1], rlist[j][1]], [max(y)*factor[0], max(y)*factor[1]], '--'+colour, linewidth=width)
586  axlist[j].plot([rlist[j][0], rlist[j][0]], [max(y)*factor[0], max(y)*factor[1]], '--'+colour, linewidth=width)
587 
588  return axlist
589 
590 
591 def solve_intersections(m1, m2, std1, std2):
592  """
593  Solve for the intersection/s of two Gaussian distributions.
594 
595  Args:
596  m1: Float Mean of Gaussian 1.
597  m2: Float Mean of Gaussian 2.
598  std1: Float Standard Deviation of Gaussian 1.
599  std2: Float Standard Deviation of Gaussian 2.
600 
601  Returns:
602  Points of intersection for the two Gaussian distributions.
603  """
604  a = 1/(2*std1**2) - 1/(2*std2**2)
605  b = m2/(std2**2) - m1/(std1**2)
606  c = m1**2 / (2*std1**2) - m2**2 / (2*std2**2) - np.log(std2/std1)
607  return np.roots([a, b, c])
608 
609 
610 def PfromHist(sample, hist, binWidth):
611  """
612  Calulates the probability of a sample from a histogram.
613 
614  Args:
615  sample : Float with sample to be tested.
616  hist : Numpy array with normalised histogram probability values.
617  binWidth : Float indicating the width for each probability bin.
618 
619  Returns:
620  Probability ranging from 0 to 1 for sample with respect to the histogram.
621  """
622  idx = np.asarray(sample)//binWidth
623  idx = idx.astype(np.int)
624  pList = []
625  for j in range(len(idx)):
626  pList.append(hist[j][idx[j]])
627  return pList
628 
629 
630 def meanVar_varianceDistribution(dataList):
631  """
632  Calculate list of means, variances and ranges for the data in the dataList.
633 
634  Args:
635  dataList: List of numpy arrays containing the data to check.
636 
637  Returns:
638  List of means, list of variances and list of ranges. One for each array in the dataList.
639  """
640  mlist = []
641  vlist = []
642  rlist = []
643 
644  dataArray = np.asarray(dataList)
645  if len(dataArray.shape) == 1:
646  dataArray = dataArray[:, None]
647 
648  numPlots = dataArray.shape[1]
649 
650  for j in range(numPlots):
651  # if j < numPlots:
652  h = dataArray[:, j]
653  # elif j == numPlots:
654  # h = np.sum(dataArray, 1)
655  # elif j == numPlots + 1:
656  # h = np.mean(dataArray, 1)
657 
658  mean = np.mean(h)
659  variance = np.var(h)
660  rlist.append((min(h), max(h)))
661  mlist.append(mean)
662  vlist.append(variance)
663 
664  return mlist, vlist, rlist
665 
666 
667 def bhattacharyya_dict(m, v):
668  """
669  Calculate bhattacharyya distances for each item in the dictionaries.
670 
671  Args:
672  m : Dictionary of means.
673  v : Dictionary of variances.
674 
675  Returns:
676  List of bhattacharyya distances for the input dictionaries.
677  """
678  knownLabel = None
679  unknownLabel = None
680  dists = []
681 
682  for j in m.keys():
683  if 'known' == j.lower().split(' ')[1]:
684  knownLabel = j
685  elif 'unknown' in j.lower().split(' ')[1]:
686  unknownLabel = j
687 
688  if unknownLabel is not None and knownLabel is not None:
689  numDists = len(m[knownLabel])
690  for j in range(numDists):
691  dists.append(
692  bhattacharyya_distance(m[knownLabel][j], m[unknownLabel][j], v[knownLabel][j], v[unknownLabel][j]))
693  return dists
694  else:
695  return None
696 
697 
698 def smooth1D(x, window_len=11, window='hanning'):
699  """Smooth the data using a window with a requested size.
700 
701  This method is based on the convolution of a scaled window with the signal. The signal is prepared by introducing reflected copies of the signal (with the window size) in both ends so that transient parts are minimized in the beginning and end part of the output signal.
702 
703  input:
704  x: The input signal.
705  window_len: The dimension of the smoothing window; should be an odd integer.
706  window: The type of window from 'flat', 'hanning', 'hamming', 'bartlett', 'blackman' flat window will produce a moving average smoothing.
707 
708  output:
709  The smoothed signal.
710  """
711 
712  if x.ndim != 1:
713  raise ValueError("smooth only accepts 1 dimension arrays.")
714 
715  if x.size < window_len:
716  raise ValueError("Input vector needs to be bigger than window size.")
717 
718  if window_len < 3:
719  return x
720 
721  if window not in ['flat', 'hanning', 'hamming', 'bartlett', 'blackman']:
722  raise ValueError("Window is on of 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'")
723 
724  s = np.r_[x[window_len-1:0:-1], x, x[-1:-window_len:-1]]
725  # logging.info((len(s))
726  if window == 'flat': # moving average
727  w = np.ones(window_len, 'd')
728  else:
729  w = eval('np.'+window+'(window_len)')
730 
731  y = np.convolve(w/w.sum(), s, mode='valid')
732  off = 0
733  if window_len % 2 > 0:
734  off = 1
735 
736  return y[(window_len/2-1):-((window_len/2)+off)]
737 
738 
739 def transformTimeSeriesToSeq(Y, timeWindow, offset=1, normalised=False, reduced=False, noY=False, doOffset=False):
740  # TODO add parameter for number of points to skip between sampled windows
741  """
742  Utility function to convert a time series into multiple time windows with additional functionality.
743 
744  Args:
745  Y : Time series data.
746  timeWindow : Length of the time window.
747  offset : Number of non-overlapping frames between successive time windows.
748  normalised : Boolean to normalise time windows with respect to the starting frame.
749  reduced : Boolean to remove the starting frame if normalisation is enabled since this frame contains 0.
750  noY : Boolean to return time windows of labels together.
751  doOffset : In future versions this parameter will enable skipping data points between sampled windows.
752 
753  Returns:
754  X : numpy array of size (numberWindows x lengthOfWindow) containing the time series split up into windows.
755  Y : numpy array of size (numberWindows x lengthOfWindow) containing the labels for each frame in the time series split into windows.
756  """
757  Ntr, D = Y.shape
758  if noY:
759  blocksNumber = (Ntr - timeWindow + 1) // offset
760  else:
761  blocksNumber = (Ntr - timeWindow) // offset
762 
763  if normalised and reduced:
764  X = np.zeros((blocksNumber, (timeWindow - 1) * D))
765  else:
766  X = np.zeros((blocksNumber, timeWindow * D))
767 
768  if not noY:
769  Ynew = np.zeros((blocksNumber, D))
770  else:
771  Ynew = None
772 
773  for i in range(blocksNumber):
774  base = i * offset
775  tmp = Y[base:base + timeWindow, :].T
776 
777  if normalised:
778  tmp = np.subtract(tmp, tmp[:, 0][:, None])
779  if reduced:
780  tmp = np.delete(tmp, 0, 1)
781  X[i, :] = tmp.flatten().T
782 
783  if not noY:
784  Ynew[i, :] = Y[base + timeWindow, :]
785  return X, Ynew
786 
787 
Custom TimeoutError Exception.
Definition: SAM_utils.py:414
def __init__(self, func, args, kwargs)
Initialise the interruptible thread.
Definition: SAM_utils.py:424
def transformTimeSeriesToSeq(Y, timeWindow, offset=1, normalised=False, reduced=False, noY=False, doOffset=False)
Utility function to convert a time series into multiple time windows with additional functionality...
Definition: SAM_utils.py:755
def run(self)
Run the function.
Definition: SAM_utils.py:434
def __init__(self, sec)
Initialise the timeout function.
Definition: SAM_utils.py:453
def plotGaussFromList(mlist, vlist, rlist, colour, label, width, factor, axlist, plotRange=False)
Plot multiple Gaussians from a list on the same plot.
Definition: SAM_utils.py:562
Class to launch a function inside of a separate thread.
Definition: SAM_utils.py:420
def initialiseModels(argv, update, initMode='training')
Initialise SAM Model data structure, training parameters and user parameters.
Definition: SAM_utils.py:59
def varianceClass(varianceDirection, x, thresh)
Utility function to perform threshold or range checking.
Definition: SAM_utils.py:398
def bhattacharyya_distance(mu1, mu2, var1, var2)
Calculates a measure for the separability of two univariate gaussians.
Definition: SAM_utils.py:509
def PfromHist(sample, hist, binWidth)
Calulates the probability of a sample from a histogram.
Definition: SAM_utils.py:621
def smooth1D(x, window_len=11, window='hanning')
Smooth the data using a window with a requested size.
Definition: SAM_utils.py:710
def solve_intersections(m1, m2, std1, std2)
Solve for the intersection/s of two Gaussian distributions.
Definition: SAM_utils.py:603
def meanVar_varianceDistribution(dataList)
Calculate list of means, variances and ranges for the data in the dataList.
Definition: SAM_utils.py:639
Class to terminate a function running inside of a separate thread.
Definition: SAM_utils.py:449
def bhattacharyya_dict(m, v)
Calculate bhattacharyya distances for each item in the dictionaries.
Definition: SAM_utils.py:677
def plot_confusion_matrix(cm, targetNames, title='Confusion matrix', cmap=plt.cm.inferno)
Generate and display a confusion matrix.
Definition: SAM_utils.py:531
def plotKnownAndUnknown(varDict, colour, axlist, width=[0.2, factor=[(0, 0.6), plotRange=False)
Utility function to plot variances of known and unknown as gaussian distributions.
Definition: SAM_utils.py:484
def __call__(self, f)
Initialise an interruptible thread and start the thread.
Definition: SAM_utils.py:459