icub-client
SAMTesting.py
Go to the documentation of this file.
1 # """"""""""""""""""""""""""""""""""""""""""""""
2 # The University of Sheffield
3 # WYSIWYD Project
4 #
5 # A class includes the different testing methods implemented
6 # Currently tested only with Actions
7 #
8 # Created on 20 July 2016
9 #
10 # @author: Daniel Camilleri, Andreas Damianou
11 #
12 # """"""""""""""""""""""""""""""""""""""""""""""
13 from IPython.display import clear_output
14 from sklearn.mixture import GMM
15 from SAM.SAM_Core import SAM_utils as utils
16 import ipyparallel as ipp
17 import time
18 import matplotlib
19 matplotlib.use("TkAgg")
20 import matplotlib.pyplot as plt
21 import numpy.matlib
22 import sys
23 import copy
24 import psutil
25 import timeit
26 import numpy as np
27 from collections import Mapping, Container
28 from sys import getsizeof
29 from operator import gt
30 import logging
31 
32 np.set_printoptions(threshold=np.nan, precision=2)
33 
34 
35 # thisModel = None
36 
41 
42 def deep_getsizeof(o, ids):
43  """
44  Method to calculate the size of an object `o` in bytes.
45 
46  Args:
47  o: Object to calculate the size of.
48  ids: Set of ids to not consider when calculating the size of the object.
49 
50  Returns:
51  Size of object in bytes.
52  """
53  d = deep_getsizeof
54  if id(o) in ids:
55  return 0
56 
57  r = getsizeof(o)
58  ids.add(id(o))
59 
60  if isinstance(o, str) or isinstance(0, unicode):
61  return r
62 
63  if isinstance(o, Mapping):
64  return r + sum(d(k, ids) + d(v, ids) for k, v in o.iteritems())
65 
66  if isinstance(o, Container):
67  return r + sum(d(x, ids) for x in o)
68 
69  if 'SAM' in o.__class__.__name__:
70  total = 0
71  for attr, value in o.__dict__.iteritems():
72  # logging.info(attr
73  total += d(value, ids)
74  return r + total
75 
76  return r
77 
78 
79 def calibrateModelRecall(thisModel):
80  """
81  Logic to initialise calibration of model recall in order to recognize known from unknown instances.
82 
83  Args:
84  thisModel: SAMObject model to calibrate.
85  Returns:
86  None
87  """
88  if len(thisModel) > 1:
90  elif hasattr(thisModel[0], 'allDataDict'):
91  logging.info('calibrating model')
93  else:
94  logging.warning('no calibration')
95 
96 
97 def calibrateSingleModelRecall(thisModel):
98  """
99  Perform calibration for single model implementations.
100 
101  This method either uses the bhattacharyya distance to perform calibration of known and unknown or uses histograms to use the histogram distribution of known and unknown labels in the training data to carry out the classification. This method depends on the following parameters present in config.ini. \n
102 
103  1) __useMaxDistance__ : `False` or `True`. This enables the use of bhattacharyya distance method to recognise known and unknown. \n
104  2) __calibrateUnknown__ : `True` or `False`. This turns on or off the calibration of the model for known and unknown inputs. \n
105  3) __noBins__ : Integer number of bins to be used for the histogram method if __calibrateUnknown__ is `True` and __useMaxDistance__ is `False`. \n
106  4) __method__ : String indicating the method used when histograms are used for calibration. When using histograms, the multi-dimensional probability of known and unknown are both calculated using the histogram. `sumProb` then performs a decision based on the largest sum after summing the probabilities of known and unknown independently. `mulProb` performs a decision based on the largest sum after multiplying the probabilities of known and unknown independently.\n
107 
108  Args:
109  thisModel: SAMObject model to calibrate.
110 
111  Returns:
112  None
113  """
114  yCalib = formatDataFunc(thisModel[0].allDataDict['Y'])
115  logging.info('entering segment testing')
116  labelList, confMatrix, ret, variancesKnown, variancesUnknown = segmentTesting(thisModel, yCalib,
117  thisModel[0].allDataDict['L'],
118  thisModel[0].verbose, 'calib',
119  serialMode=False,
120  optimise=thisModel[0].optimiseRecall,
121  calibrate=True)
122  thisModel[0].classificationDict = dict()
123 
124  if thisModel[0].useMaxDistance:
125  [mk, vk, rk] = utils.meanVar_varianceDistribution(variancesKnown)
126  [muk, vuk, ruk] = utils.meanVar_varianceDistribution(variancesUnknown)
127 
128  distance = []
129  for j in range(len(mk)):
130  distance.append(utils.bhattacharyya_distance(mk[j], muk[j], vk[j], vuk[j]))
131 
132  if distance is not None:
133  maxIdx = distance.index(max(distance))
134  thisModel[0].classificationDict['bestDistanceIDX'] = maxIdx
135  thisModel[0].classificationDict['bestDistance_props'] = {'KnownMean': mk[maxIdx], 'UnknownMean': muk[maxIdx],
136  'KnownVar': vk[maxIdx], 'UnknownVar': vuk[maxIdx]}
137 
138  # if maxIdx < len(mk) - 2:
139  # thisModel[0].bestSegOperation = maxIdx
140  # elif maxIdx == len(mk) - 2:
141  # thisModel[0].bestSegOperation = 'sum'
142  # elif maxIdx == len(mk) - 1:
143  # thisModel[0].bestSegOperation = 'mean'
144 
145  intersection = utils.solve_intersections(mk[maxIdx], muk[maxIdx], np.sqrt(vk[maxIdx]), np.sqrt(vuk[maxIdx]))
146 
147  maxLim = max(rk[maxIdx][1], ruk[maxIdx][1])
148  minLim = min(rk[maxIdx][0], ruk[maxIdx][0])
149 
150  delList = []
151  for j in range(len(intersection)):
152  if intersection[j] > maxLim or intersection[j] < minLim:
153  delList.append(j)
154 
155  thisModel[0].classificationDict['segIntersections'] = np.delete(intersection, delList)
156  thisModel[0].classificationDict['bhattaDistances'] = distance
157 
158  logging.info('Num Intersections: ' + str(len(thisModel[0].classificationDict['segIntersections'])))
159 
160  [thisModel[0].classificationDict['varianceThreshold'],
161  thisModel[0].classificationDict['varianceDirection']] = \
162  calculateVarianceThreshold(thisModel[0].classificationDict['segIntersections'], mk[maxIdx], muk[maxIdx],
163  vk[maxIdx], vuk[maxIdx])
164 
165  logging.info('varianceThreshold ' + str(thisModel[0].classificationDict['varianceThreshold']))
166  logging.info('varianceDirection ' + str(thisModel[0].classificationDict['varianceDirection']))
167  else:
168  variancesKnownArray = np.asarray(variancesKnown)
169  variancesUnknownArray = np.asarray(variancesUnknown)
170  varianceAllArray = np.vstack([variancesKnownArray, variancesUnknownArray])
171  histKnown = [None] * (len(variancesKnownArray[0]) - 2)
172  binEdges = [None] * (len(variancesKnownArray[0]) - 2)
173  histUnknown = [None] * (len(variancesKnownArray[0]) - 2)
174 
175  thisModel[0].classificationDict['binWidth'] = thisModel[0].paramsDict['binWidth']
176  thisModel[0].classificationDict['method'] = thisModel[0].paramsDict['method']
177 
178  numBins = np.ceil(np.max(varianceAllArray) / thisModel[0].classificationDict['binWidth'])
179 
180  bins = range(int(numBins))
181  bins = np.multiply(bins, thisModel[0].classificationDict['binWidth'])
182 
183  for j in range(len(variancesKnown[0]) - 2):
184  histKnown[j], binEdges[j] = np.histogram(variancesKnownArray[:, j], bins=bins)
185  histKnown[j] = 1.0 * histKnown[j] / np.sum(histKnown[j])
186 
187  histUnknown[j], _ = np.histogram(variancesUnknownArray[:, j], bins=bins)
188  histUnknown[j] = 1.0 * histUnknown[j] / np.sum(histUnknown[j])
189 
190  thisModel[0].classificationDict['histKnown'] = histKnown
191  thisModel[0].classificationDict['binEdgesKnown'] = binEdges
192  thisModel[0].classificationDict['histUnknown'] = histUnknown
193 
194  thisModel[0].calibrated = True
195 
196 
197 def calibrateMultipleModelRecall(thisModel):
198  """
199  Perform calibration for multiple model implementations.
200 
201  In contrast with calibrateSingleModelRecall, in this method known and unknown are calibrated according to measures of familiarity between all model classes. The familiarity of each class with each other class and with itself are then used to perform a Bayesian decision depending on the resulting familiarity when testing a new instance.
202 
203  Args:
204  SAMObject model to calibrate.
205 
206  Returns:
207  None
208  """
209  cmSize = len(thisModel[0].textLabels)
210  confMatrix = np.zeros((cmSize, cmSize))
211 
212  # Create Validation set
213  Y_valid = []
214  Y_testing = []
215  for i in range(len(thisModel)):
216  if thisModel[i].SAMObject.model:
217  # De-normalize from the model which stored this test data
218  yy_test = thisModel[i].Ytestn.copy()
219  yy_test *= thisModel[i].Ystd
220  yy_test += thisModel[i].Ymean
221  y_valid_tmp, y_test_tmp, _, _ = utils.random_data_split(yy_test, [0.5, 0.5])
222  Y_valid.append(y_valid_tmp.copy())
223  Y_testing.append(y_test_tmp.copy())
224 
225  # Compute familiarities in VALIDATION SET
226  familiarities = [None] * (len(thisModel) - 1)
227  for i in range(len(thisModel)):
228  if thisModel[i].SAMObject.model:
229  # N_test x N_labels matrix.
230  familiarities[i - 1] = np.zeros((Y_valid[i - 1].shape[0], (len(thisModel) - 1)))
231  logging.info("## True label is " + thisModel[i].modelLabel)
232  for k in range(Y_valid[i - 1].shape[0]):
233  sstest = []
234  logging.info('# k= ' + str(k))
235  for j in range(len(thisModel)):
236  if thisModel[j].SAMObject.model:
237  yy_test = Y_valid[i - 1][k, :][None, :].copy()
238  # Normalize according to the model to predict
239  yy_test -= thisModel[j].Ymean
240  yy_test /= thisModel[j].Ystd
241  sstest.append(thisModel[j].SAMObject.familiarity(yy_test, optimise=thisModel[0].optimiseRecall))
242  familiarities[i - 1][k, j - 1] = sstest[-1]
243  msg = ''
244  for j in range(len(sstest)):
245  if j == np.argmax(sstest):
246  msg = ' *'
247  else:
248  msg = ' '
249  logging.info(msg + ' Familiarity of model ' + thisModel[j + 1].modelLabel + ' given label: ' +
250  thisModel[i].modelLabel + ' in valid: ' + str(sstest[j]))
251 
252  confMatrix[i - 1, np.argmax(sstest)] += 1
253  calculateData(thisModel[0].textLabels, confMatrix)
254 
255  # At this point we have:
256  # familiarities[i][k,j] -> familiarity for true label i, instance k
257  # predicted by model trained in label j
258  # ############# Train Familiarity classifier in VALIDATION SET
259  #
260  classifiers = []
261  classif_thresh = []
262  familiarity_predictions = []
263  tmp = []
264  for i in range(len(thisModel[0].textLabels)):
265  X_train = familiarities[0][:, i][:, None]
266  y_train = np.zeros((familiarities[0][:, i][:, None].shape[0], 1))
267  for j in range(1, len(thisModel[0].textLabels)):
268  X_train = np.vstack((X_train, familiarities[j][:, i][:, None]))
269  y_train = np.vstack((y_train, j + np.zeros((familiarities[j][:, i][:, None].shape[0], 1))))
270  tmp.append(X_train)
271  n_classes = len(np.unique(y_train))
272 
273  # Try GMMs using different types of covariances.
274  classifiers.append(GMM(n_components=n_classes, covariance_type='full', init_params='wc', n_iter=2000))
275 
276  # Since we have class labels for the training data, we can
277  # initialize the GMM parameters in a supervised manner.
278  classifiers[-1].means_ = np.array([X_train[y_train == kk].mean(axis=0)
279  for kk in xrange(n_classes)])[:, None]
280  classifiers[-1].fit(X_train)
281  familiarity_predictions.append(classifiers[-1].predict(X_train))
282 
283  # Find threshold of confident classification of model i predicting label i
284  tmp_i = classifiers[i].predict_proba(X_train[y_train == i][:, None])[:, i]
285  tmp_s = 0.8
286  # If in the test phase we get a predict_proba which falls in the threshold i, then
287  # model i is confident for this prediction.
288  classif_thresh.append([tmp_i.mean() - tmp_s * tmp_i.std(), tmp_i.mean() + tmp_s * tmp_i.std()])
289 
290  thisModel[0].classificationDict['classifiers'] = classifiers
291  thisModel[0].classificationDict['classif_thresh'] = classif_thresh
292  thisModel[0].calibrated = True
293 
294 
295 def formatDataFunc(Ydata):
296  """
297  Utility function to format data for testing.
298 
299  Args:
300  Ydata: Data to format for testing.
301 
302  Returns:
303  Formatted data for testing.
304  """
305  yDataList = []
306  for j in range(Ydata.shape[0]):
307  yDataList.append(Ydata[j][None, :])
308  return yDataList
309 
310 
311 def singleRecall(thisModel, testInstance, verbose, visualiseInfo=None, optimise=100):
312 
313  """
314  Method that performs classification for single model implementations.
315 
316  This method returns the classification label of a test instance by calculating the predictive mean and variance of the backwards mapping and subsequently decides whether the test instance is first known or unknown and if known its most probable classification label.
317 
318  Args:
319  thisModel: SAMObject model to recall from.
320  testInstance: Novel feature vector to test.
321  verbose: Enable or disable logging to stdout.
322  visualiseInfo: None to disable plotting and plotObject to display plot of recall.
323  optimise: Number of optimisation iterations to perform during recall.
324 
325  Returns:
326  Classification label and variance if __calibrateUnknown__ is set to `False` in the config file. Otherwise returns classification label and normalised classification probability.
327  """
328  #
329  # mm,vv,pp=self.SAMObject.pattern_completion(testFace, visualiseInfo=visualiseInfo)
330  # if verbose:
331  # logging.info('single model recall'
332  textStringOut = ''
333  # normalize incoming data
334  testValue = testInstance - thisModel.Ymean
335  testValue /= thisModel.Ystd
336 
337  try:
338  ret = thisModel.SAMObject.pattern_completion(testValue, visualiseInfo=visualiseInfo, optimise=optimise)
339  except IndexError:
340  return ['unknown', 0]
341  mm = ret[0]
342  vv = list(ret[1][0])
343  svv = sum(vv)
344  mvv = svv/len(vv)
345  vv.append(svv)
346  vv.append(mvv)
347 
348  # find nearest neighbour of mm and SAMObject.model.X
349 
350  k = np.matlib.repmat(mm[0].values, thisModel.SAMObject.model.X.mean.shape[0], 1)
351  pow2 = np.power(thisModel.SAMObject.model.X.mean - k, 2)
352  s = np.power(np.sum(pow2, 1), 0.5)
353  nn = np.argmin(s)
354  min_value = s[nn]
355 
356  if thisModel.SAMObject.type == 'mrd':
357  classLabel = thisModel.textLabels[int(thisModel.SAMObject.model.bgplvms[1].Y[nn, :])]
358  elif thisModel.SAMObject.type == 'bgplvm':
359  classLabel = thisModel.textLabels[int(thisModel.L[nn, :])]
360 
361  known = True
362  if thisModel.calibrated:
363  if thisModel.useMaxDistance:
364  known = utils.varianceClass(thisModel.classificationDict['varianceDirection'],
365  vv[thisModel.classificationDict['bestDistanceIDX']],
366  thisModel.classificationDict['varianceThreshold'])
367 
368  details = str(thisModel.classificationDict['varianceThreshold']) + ' ' + \
369  str(thisModel.classificationDict['varianceDirection'])
370 
371  probClass = vv[thisModel.classificationDict['bestDistanceIDX']]
372  else:
373  P_Known_given_X = utils.PfromHist(vv[:-2], thisModel.classificationDict['histKnown'],
374  thisModel.classificationDict['binWidth'])
375  P_Unknown_given_X = utils.PfromHist(vv[:-2], thisModel.classificationDict['histUnknown'],
376  thisModel.classificationDict['binWidth'])
377 
378  if thisModel.classificationDict['method'] == 'mulProb':
379  s1 = reduce(lambda x, y: x * y, P_Known_given_X)
380  s2 = reduce(lambda x, y: x * y, P_Unknown_given_X)
381  known = s1 > s2
382  else:
383  s1 = np.sum(P_Known_given_X)
384  s2 = np.sum(P_Unknown_given_X)
385  known = s1 > s2
386 
387  if known:
388  probClass = s1
389  details = s1, ' > ', s2
390  else:
391  probClass = s2
392  details = s2, ' > ', s1
393 
394  if thisModel.calibrated:
395  if known:
396  textStringOut = classLabel
397  else:
398  textStringOut = 'unknown'
399  runnerUp = classLabel
400  else:
401  textStringOut = classLabel
402 
403  if verbose:
404  if thisModel.calibrated:
405  if textStringOut == 'unknown':
406  logging.info("With " + str(probClass) + " prob. error the new instance is " + str(runnerUp))
407  logging.info('But ' + str(details) + ' than ' + str(probClass) + ' so class as ' + str(textStringOut))
408  else:
409  logging.info("With " + str(probClass) + " prob. error the new instance is " + str(textStringOut))
410  else:
411  logging.info("With " + str(vv) + " prob. error the new instance is " + str(textStringOut))
412 
413  if thisModel.calibrated:
414  return [textStringOut, probClass/len(vv)]
415  else:
416  return [textStringOut, vv]
417 
418 
419 def multipleRecall_noCalib(thisModel, testInstance, verbose, visualiseInfo=None, optimise=True):
420  """
421  Method that performs classification for uncalibrated multiple model implementations.
422 
423  Args:
424  thisModel: SAMObject model to recall from.
425  testInstance: Novel feature vector to test.
426  verbose: Enable or disable logging to stdout.
427  visualiseInfo: None to disable plotting and plotObject to display plot of recall.
428  optimise: Number of optimisation iterations to perform during recall.
429 
430  Returns:
431  Classification label and raw familiarity values.
432  """
433  result = []
434  if verbose:
435  pass
436  # logging.info('multiple model recall'
437 
438  for j in thisModel:
439  if j.SAMObject.model:
440  tempTest = testInstance - j.Ymean
441  tempTest /= j.Ystd
442  yy_test = j.SAMObject.familiarity(tempTest, optimise=optimise)
443  if verbose:
444  logging.info('Familiarity with ' + j.modelLabel + ' given current instance is: ' + str(yy_test))
445  # yy_test -= thisModel[j].Ymean
446  # yy_test /= thisModel[j].Ystd
447  result.append(yy_test)
448  maxIdx = np.argmax(result)
449 
450  if visualiseInfo:
451  pass
452 
453  return [thisModel[0].textLabels[maxIdx - 1], result[maxIdx][0]]
454 
455 
456 def multipleRecall(thisModel, testInstance, verbose, visualiseInfo=None, optimise=100):
457  """
458  Method that performs classification for calibrated multiple model implementations.
459 
460  Args:
461  thisModel: SAMObject model to recall from.
462  testInstance: Novel feature vector to test.
463  verbose: Enable or disable logging to stdout.
464  visualiseInfo: None to disable plotting and plotObject to display plot of recall.
465  optimise: Number of optimisation iterations to perform during recall.
466 
467  Returns:
468  Classification label and calibrated familiarity values.
469  """
470  cmSize = len(thisModel[0].textLabels)
471  familiarities_tmp = []
472  classif_tmp = []
473 
474  label = 'unknown'
475 
476  if not thisModel[0].classificationDict['classifiers']:
478 
479  for j in range(cmSize):
480  tempTest = testInstance - thisModel[j + 1].Ymean
481  tempTest /= thisModel[j + 1].Ystd
482  yy_test = thisModel[j + 1].SAMObject.familiarity(tempTest, optimise=optimise)[:, None]
483  # yy_test *= thisModel[j+1].Ystd
484  # yy_test += thisModel[j+1].Ymean
485  cc = thisModel[0].classificationDict['classifiers'][j].predict_proba(yy_test)[:, j]
486  if verbose:
487  logging.info('Familiarity with ' + thisModel[j + 1].modelLabel + ' given current instance is: ' +
488  str(yy_test) + ' ' + str(cc[0]))
489  familiarities_tmp.append(yy_test)
490  classif_tmp.append(cc)
491 
492  bestConfidence = np.argmax(classif_tmp)
493 
494  for j in range(cmSize):
495  if thisModel[0].classificationDict['classif_thresh'][j][0] <= \
496  classif_tmp[j] <= thisModel[0].classificationDict['classif_thresh'][j][1]:
497  bestConfidence = j
498  label = thisModel[0].textLabels[j]
499 
500  # logging.info('min, classifier, max = ' + str(thisModel[0].classif_thresh[j][0]) + \
501  # ' ' + str(classif_tmp[j]) + ' ' + \
502  # str(thisModel[0].classif_thresh[j][1])
503 
504  # if visualiseInfo:
505  # pass
506 
507  return [label, classif_tmp[bestConfidence][0]]
508 
509 
510 def wait_watching_stdout(ar, dt=1, truncate=1000):
511  """
512  Monitoring function that logs to stdout the logging output of multiple threads.
513 
514  Args:
515  ar: Thread to log.
516  dt: Integer delta time between readings of ar.stdout.
517  truncate: Integer limit on the number of lines returned by the threads.
518 
519  Returns:
520  None
521  """
522  while not ar.ready():
523  stdouts = ar.stdout
524  if any(stdouts):
525  clear_output()
526  logging.info('-' * 30)
527  logging.info("%.3fs elapsed" % ar.elapsed)
528  logging.info("")
529  for stdout in ar.stdout:
530  if stdout:
531  logging.info("\n%s" % (stdout[-truncate:]))
532  sys.stdout.flush()
533  time.sleep(dt)
534 
535 
536 def testSegment(thisModel, Ysample, verbose, visualiseInfo=None, optimise=100):
537  """
538  Utility function to test a sample.
539 
540  This model determines the type of model being used for the testing and directs the query to the appropriate function.
541 
542  Args:
543  thisModel: SAMObject model to recall from.
544  Ysample: Novel feature vector to test.
545  verbose: Enable or disable logging to stdout.
546  visualiseInfo: `None` to disable plotting and plotObject to display plot of recall.
547  optimise: Number of optimisation iterations to perform during recall.
548 
549  Returns:
550  Classification result containing a list with the classification string and a measure of the familiarity or probability of the recall.
551  """
552  if len(thisModel) > 1:
553  d = multipleRecall(thisModel, Ysample, verbose, visualiseInfo, optimise=optimise)
554  else:
555  d = singleRecall(thisModel[0], Ysample, verbose, visualiseInfo, optimise=optimise)
556  return d
557 
558 
559 def segmentTesting(thisModel, Ysample, Lnum, verbose, label, serialMode=False, optimise=100, calibrate=False):
560  """
561  Method to test multiple samples at a time.
562 
563  Args:
564  thisModel : SAMObject model to recall from.
565  Ysample : Novel feature vector to test.
566  Lnum : Ground truth labels to compare with.
567  verbose : Enable or disable logging to stdout.
568  label : Label for the current segments being tested.
569  serialMode : Boolean to test serially or in parallel.
570  optimise : Number of optimisation iterations to perform during recall.
571  calibrate : Indicate calibration mode when True which requires a different return.
572 
573  Returns:
574  labelList, confMatrix, ret, variancesKnown, variancesUnknown if calibrate is `True`.
575  labelList, confMatrix, labelComparisonDict if calibrate is `False`.
576 
577  labelList : List of classification labels
578  confMatrix : Numpy array with the confusion matrix
579  ret : Classification object
580  variancesKnown : Variances returned during calibration for known training instances
581  variancesUnknown : Variances returned during calibration for unknown training instances
582  labelComparisonDict : Dictionary with two items `'original'` and `'results'`.
583 
584  """
585  def testFunc(data, lab):
586  d = testSegment(thisModel, data, verbose, visualiseInfo=None, optimise=optimise)
587  if verbose:
588  if lab == d[0]:
589  res = True
590  else:
591  res = False
592  logging.info('Actual ' + str(lab).ljust(11) + ' Classification: ' + str(d[0]).ljust(11) + ' with ' + \
593  str(d[1])[:6] + ' confidence: ' + str(res) + '\n')
594  return d
595 
596  logging.info('')
597 
598  if type(Lnum).__module__ == np.__name__:
599  useModelLabels = True
600  else:
601  useModelLabels = False
602 
603  if len(thisModel) > 1:
604  labelList = copy.deepcopy(thisModel[0].textLabels)
605  labelList.append('unknown')
606  else:
607  labelList = copy.deepcopy(thisModel[0].textLabels)
608  labelList.append('unknown')
609 
610  confMatrix = np.zeros((len(labelList), len(labelList)))
611 
612  numItems = len(Ysample)
613 
614  off1 = 11
615  off2 = 8
616  off3 = len(str(numItems))
617  if useModelLabels:
618  Lsample = [thisModel[0].textLabels[int(Lnum[i])] for i in range(len(Lnum))]
619  else:
620  Lsample = Lnum
621 
622  if numItems < 1500:
623  serialMode = True
624  c = None
625  logging.info('serialMode: ' + str(serialMode))
626  if not serialMode and thisModel[0].parallelOperation:
627  try:
628  logging.info('Trying engines ...')
629  c = ipp.Client()
630  numWorkers = len(c._engines)
631  logging.info('Number of engines: ' + str(numWorkers))
632  except:
633  logging.error("Parallel workers not found")
634  thisModel[0].parallelOperation = False
635  numWorkers = 1
636  else:
637  logging.info(str(serialMode) + '= True')
638  thisModel[0].parallelOperation = False
639  numWorkers = 1
640  logging.info('Number of engines: ' + str(numWorkers))
641 
642  # average 5 classifications before providing this time
643  vTemp = copy.deepcopy(verbose)
644  verbose = False
645  if len(Lsample) < 400:
646  numTrials = len(Lsample)*0.1
647  numTrials = int(numTrials)
648  else:
649  numTrials = 20
650  t0 = time.time()
651  for j in range(numTrials):
652  testFunc(Ysample[j], Lsample[j])
653  t1 = time.time()
654  verbose = vTemp
655  thisModel[0].avgClassTime = (t1 - t0) / numTrials
656  logging.info('classification rate: ' + str(1.0 / thisModel[0].avgClassTime) + 'fps')
657  logging.info('estimated time: ' + str(thisModel[0].avgClassTime * numItems / (60*numWorkers)) + 'mins for ' +
658  str(numItems) + ' items with ' + str(numWorkers) + ' workers')
659  t0 = time.time()
660  logging.info(t0)
661  # check size of model
662  # modelSize is size in megabytes
663  modelSize = deep_getsizeof(thisModel, set()) / 1024.0 / 1024.0
664  logging.info("modelSize: " + str(modelSize))
665  logging.warning("required testing size: " + str((modelSize * numWorkers * 2) + 400) + " MB")
666  # check available system memory in megabytes
667  freeSystemMem = float(psutil.virtual_memory()[4]) / 1024.0 / 1024.0
668  logging.info("free memory: " + str(freeSystemMem) + " MB")
669 
670  if modelSize > 100 or not thisModel[0].parallelOperation or serialMode:
671  # serial testing
672  logging.warning('Testing serially')
673  ret = []
674  for j in range(len(Lsample)):
675  logging.info(str(j) + '/' + str(len(Lsample)))
676  ret.append(testFunc(Ysample[j], Lsample[j]))
677  else:
678  # parallel testing
679  logging.info('Testing in parallel')
680  dview = c[:] # not load balanced
681  lb = c.load_balanced_view() # load balanced
682 
683  # with dview.sync_imports():
684  # from SAM.SAM_Core import utils
685  # if not thisModel[0].modelLoaded :
686  dview.push({'thisModel': thisModel})
687  dview.push({'verbose': verbose})
688  dview.push({'optimise': optimise})
689  # thisModel[0].modelLoaded = True
690  syn = lb.map_async(testFunc, Ysample, Lsample)
691  wait_watching_stdout(syn, dt=1, truncate=1000)
692  ret = syn.get()
693  # maybe these are upsetting the ipcluster
694  # dview.clear()
695  # dview.purge_results('all')
696  t1 = time.time()
697  logging.info(t1)
698  logging.info('Actual time taken = ' + str(t1-t0))
699  if calibrate:
700  variancesKnown = []
701  variancesUnknown = []
702  for i in range(len(ret)):
703  currLabel = Lsample[i]
704 
705  if verbose:
706  if currLabel == ret[i][0]:
707  result = True
708  else:
709  result = False
710  logging.info(str(i).rjust(off3) + '/' + str(numItems) + ' Truth: ' + currLabel.ljust(off1) + ' Model: '
711  + ret[i][0].ljust(off1) + ' with ' + str(ret[i][1])[:6].ljust(off2) +
712  ' confidence: ' + str(result))
713 
714  if currLabel in thisModel[0].textLabels:
715  knownLabel = True
716  else:
717  knownLabel = False
718  currLabel = 'unknown'
719 
720  if knownLabel:
721  variancesKnown.append(ret[i][1])
722  else:
723  variancesUnknown.append(ret[i][1])
724 
725  confMatrix[labelList.index(currLabel), labelList.index(ret[i][0])] += 1
726 
727  return labelList, confMatrix, ret, variancesKnown, variancesUnknown
728  else:
729  labelComparisonDict = dict()
730  labelComparisonDict['original'] = []
731  labelComparisonDict['results'] = []
732  for i in range(len(ret)):
733  currLabel = Lsample[i]
734  retLabel = ret[i][0]
735 
736  if currLabel not in thisModel[0].textLabels:
737  currLabel = 'unknown'
738 
739  if verbose:
740  if currLabel == retLabel:
741  result = True
742  else:
743  result = False
744  logging.info(str(i).rjust(off3) + '/' + str(numItems) + ' Truth: ' + currLabel.ljust(off1) +
745  ' Model: ' + retLabel.ljust(off1) + ' with ' + str(ret[i][1])[:6].ljust(off2) +
746  ' confidence: ' + str(result))
747 
748  labelComparisonDict['original'].append(Lsample[i])
749  labelComparisonDict['results'].append(retLabel)
750  confMatrix[labelList.index(currLabel), labelList.index(retLabel)] += 1
751  return labelList, confMatrix, labelComparisonDict
752 
753 
754 def testSegments(thisModel, Ysample, Lnum, verbose, label, serialMode=False):
755  """
756  Function to test segments and return a confusion matrix.
757 
758  Args:
759  thisModel : SAMObject model to recall from.
760  Ysample : Novel feature vector to test.
761  Lnum : Ground truth labels to compare with.
762  verbose : Enable or disable logging to stdout.
763  label : Label for the current segments being tested.
764  serialMode : Boolean to test serially or in parallel.
765 
766  Returns:
767  Confusion matrix, confusion labels, list of possible labels and dictionary with results and comparison or truth and classification.
768  """
769  labelList, confMatrix, labelComparisonDict = segmentTesting(thisModel, Ysample, Lnum, verbose, label,
770  serialMode=serialMode,
771  optimise=thisModel[0].optimiseRecall, calibrate=False)
772 
773  dCalc = calculateData(labelList, confMatrix)
774 
775  return [dCalc[0], dCalc[1], labelList, labelComparisonDict]
776 
777 
778 def calculateVarianceThreshold(segIntersections, mk, muk, vk, vuk):
779  """
780  Method to decide on the approach to be used for setting variance thresholds and method of thresholding.
781 
782  Args:
783  segIntersections : Number of gaussian intersections.
784  mk : Means of known.
785  muk : Means of unknown.
786  vk : Variances of known.
787  vuk : Variances of unknown.
788 
789  Returns:
790  List of threshold variances and method of thresholding.
791  """
792  thresh = None
793  direction = None
794  if len(segIntersections) == 0:
795  # either gaussians exactly equal to each other(worst) or no overlap(best)
796  if mk == muk:
797  # gaussians on top of each other .. can only happen with 0 intersections when mean is identical
798  # and var is identical
799  thresh = [mk]
800  direction = ['smaller']
801  else:
802  # gaussians completely separated, threshold set equidistant to means
803  thresh = [(max(mk, muk) - min(mk, muk)) / 2 + min(mk, muk)]
804  if thresh[0] > mk:
805  direction = ['smaller']
806  elif len(segIntersections) == 1:
807  thresh = [segIntersections]
808  if thresh[0] > mk:
809  direction = ['smaller']
810  # set threshold at this point
811  elif len(segIntersections) == 2:
812  if mk == muk:
813  # set upper and lower bounds on threshold
814  thresh = [min(segIntersections), max(segIntersections)]
815  # works
816  if vk > vuk:
817  direction = ['smaller', 'greater']
818  else:
819  direction = ['greater', 'smaller']
820  else:
821  thresh = [np.ptp(segIntersections) / 2 + min(segIntersections)]
822  if thresh[0] < muk:
823  direction = ['smaller']
824  # set threshold equidistant from intersection
825 
826  if direction is None:
827  direction = ['greater']
828 
829  return [thresh, direction]
830 
831 
832 def calculateData(textLabels, confMatrix, numItems=None):
833  """
834  Calculate the normalised confusion matrix.
835 
836  Args:
837  textLabels: List of classifications.
838  confMatrix: Confusion matrix to normalise.
839  numItems: Total number of items tested.
840 
841  Returns:
842  Normalised confusion matrix and overall percentage correct.
843  """
844  logging.info(confMatrix)
845  if not numItems:
846  numItems = np.sum(confMatrix)
847 
848  h = confMatrix
849  total = h.astype(np.float).sum(axis=1)
850  normConf = copy.deepcopy(h)
851  normConf = normConf.astype(np.float)
852 
853  for l in range(h.shape[0]):
854  if total[l] != 0:
855  normConf[l, :] = normConf[l, :].astype(np.float) * 100 / total[l].astype(np.float)
856 
857  logging.info(normConf)
858 
859  # percCorect = 100 * np.diag(h.astype(np.float)).sum() / numItems
860  percCorect = 100 * np.diag(normConf.astype(np.float)).sum() / np.sum(normConf)
861 
862  logging.info(str(percCorect)[:5].ljust(7) + "% correct for training data")
863  logging.info('')
864  for i in range(confMatrix.shape[0]):
865  for j in range(confMatrix.shape[0]):
866  logging.info(str(normConf[i, j])[:5].ljust(7) + '% of ' + str(textLabels[i]) +
867  ' classified as ' + str(textLabels[j]))
868  logging.info('')
869  return [normConf, percCorect]
870 
871 
872 def combineClassifications(thisModel, labels, likelihoods):
873  """
874  Combine multiple classifications into a single classification.
875 
876  Args:
877  thisModel: SAMObject model.
878  labels: List of labels for classifications.
879  likelihoods: List of likelihoods.
880 
881  Returns:
882  Label with the highest likelihood together with the normalised likelihood.
883  """
884  # if len(thisModel) > 1:
885  # labelList = copy.deepcopy(thisModel[0].textLabels)
886  # labelList.append('unknown')
887  # else:
888  labelList = copy.deepcopy(thisModel[0].textLabels)
889 
890  sumLikelihoods = [None] * (len(labelList))
891  counts = [0] * (len(labelList))
892 
893  for i in range(len(labels)):
894  idx = [j for j, k in enumerate(labelList) if k == labels[i]][0]
895  counts[idx] += 1
896  if sumLikelihoods[idx] is None:
897  sumLikelihoods[idx] = likelihoods[i][thisModel[0].SAMObject.Q]
898  else:
899  sumLikelihoods[idx] += likelihoods[i][thisModel[0].SAMObject.Q]
900 
901  m = max(sumLikelihoods)
902  maxIdx = [j for j, k in enumerate(sumLikelihoods) if k == m][0]
903 
904  return [labelList[maxIdx], m / counts[maxIdx]]
905 
906 
def wait_watching_stdout(ar, dt=1, truncate=1000)
Monitoring function that logs to stdout the logging output of multiple threads.
Definition: SAMTesting.py:521
def segmentTesting(thisModel, Ysample, Lnum, verbose, label, serialMode=False, optimise=100, calibrate=False)
Method to test multiple samples at a time.
Definition: SAMTesting.py:584
def calibrateSingleModelRecall(thisModel)
Perform calibration for single model implementations.
Definition: SAMTesting.py:113
def testSegments(thisModel, Ysample, Lnum, verbose, label, serialMode=False)
Function to test segments and return a confusion matrix.
Definition: SAMTesting.py:768
def combineClassifications(thisModel, labels, likelihoods)
Combine multiple classifications into a single classification.
Definition: SAMTesting.py:883
def testSegment(thisModel, Ysample, verbose, visualiseInfo=None, optimise=100)
Utility function to test a sample.
Definition: SAMTesting.py:551
def calculateVarianceThreshold(segIntersections, mk, muk, vk, vuk)
Method to decide on the approach to be used for setting variance thresholds and method of thresholdin...
Definition: SAMTesting.py:791
def multipleRecall_noCalib(thisModel, testInstance, verbose, visualiseInfo=None, optimise=True)
Method that performs classification for uncalibrated multiple model implementations.
Definition: SAMTesting.py:432
def singleRecall(thisModel, testInstance, verbose, visualiseInfo=None, optimise=100)
Method that performs classification for single model implementations.
Definition: SAMTesting.py:326
def calibrateModelRecall(thisModel)
Logic to initialise calibration of model recall in order to recognize known from unknown instances...
Definition: SAMTesting.py:87
def calibrateMultipleModelRecall(thisModel)
Perform calibration for multiple model implementations.
Definition: SAMTesting.py:208
def formatDataFunc(Ydata)
Utility function to format data for testing.
Definition: SAMTesting.py:304
def calculateData(textLabels, confMatrix, numItems=None)
Calculate the normalised confusion matrix.
Definition: SAMTesting.py:843
def multipleRecall(thisModel, testInstance, verbose, visualiseInfo=None, optimise=100)
Method that performs classification for calibrated multiple model implementations.
Definition: SAMTesting.py:469
def deep_getsizeof(o, ids)
Method to calculate the size of an object o in bytes.
Definition: SAMTesting.py:52