doc/html/SAMTesting_8py_source.html

 # """"""""""""""""""""""""""""""""""""""""""""""
 # The University of Sheffield
 # WYSIWYD Project
 #
 # A class includes the different testing methods implemented
 # Currently tested only with Actions
 #
 # Created on 20 July 2016
 #
 # @author: Daniel Camilleri, Andreas Damianou
 #
 # """"""""""""""""""""""""""""""""""""""""""""""
 from IPython.display import clear_output
 from sklearn.mixture import GMM
 from SAM.SAM_Core import SAM_utils as utils
 import ipyparallel as ipp
 import time
 import matplotlib
 matplotlib.use("TkAgg")
 import matplotlib.pyplot as plt
 import numpy.matlib
 import sys
 import copy
 import psutil
 import timeit
 import numpy as np
 from collections import Mapping, Container
 from sys import getsizeof
 from operator import gt
 import logging

 np.set_printoptions(threshold=np.nan, precision=2)


 # thisModel = None


 def deep_getsizeof(o, ids):
     """
     Method to calculate the size of an object `o` in bytes.

     Args:
         o: Object to calculate the size of.
         ids: Set of ids to not consider when calculating the size of the object.

     Returns:
         Size of object in bytes.
     """
     d = deep_getsizeof
     if id(o) in ids:
         return 0

     r = getsizeof(o)
     ids.add(id(o))

     if isinstance(o, str) or isinstance(0, unicode):
         return r

     if isinstance(o, Mapping):
         return r + sum(d(k, ids) + d(v, ids) for k, v in o.iteritems())

     if isinstance(o, Container):
         return r + sum(d(x, ids) for x in o)

     if 'SAM' in o.__class__.__name__:
         total = 0
         for attr, value in o.__dict__.iteritems():
             # logging.info(attr
             total += d(value, ids)
         return r + total

     return r


 def calibrateModelRecall(thisModel):
     """
     Logic to initialise calibration of model recall in order to recognize known from unknown instances.

     Args:
         thisModel: SAMObject model to calibrate.
     Returns:
         None
     """
     if len(thisModel) > 1:
         calibrateMultipleModelRecall(thisModel)
     elif hasattr(thisModel[0], 'allDataDict'):
         logging.info('calibrating model')
         calibrateSingleModelRecall(thisModel)
     else:
         logging.warning('no calibration')


 def calibrateSingleModelRecall(thisModel):
     """
     Perform calibration for single model implementations.

     This method either uses the bhattacharyya distance to perform calibration of known and unknown or uses histograms to use the histogram distribution of known and unknown labels in the training data to carry out the classification. This method depends on the following parameters present in config.ini. \n

     1) __useMaxDistance__ : `False` or `True`. This enables the use of bhattacharyya distance method to recognise known and unknown. \n
     2) __calibrateUnknown__ : `True` or `False`. This turns on or off the calibration of the model for known and unknown inputs. \n
     3) __noBins__ : Integer number of bins to be used for the histogram method if __calibrateUnknown__ is `True` and __useMaxDistance__ is `False`. \n
     4) __method__ : String indicating the method used when histograms are used for calibration. When using histograms, the multi-dimensional probability of known and unknown are both calculated using the histogram. `sumProb` then performs a decision based on the largest sum after summing the probabilities of known and unknown independently. `mulProb` performs a decision based on the largest sum after multiplying the probabilities of known and unknown independently.\n

     Args:
         thisModel: SAMObject model to calibrate.

     Returns:
         None
     """
     yCalib = formatDataFunc(thisModel[0].allDataDict['Y'])
     logging.info('entering segment testing')
     labelList, confMatrix, ret, variancesKnown, variancesUnknown = segmentTesting(thisModel, yCalib,
                                                                                   thisModel[0].allDataDict['L'],
                                                                                   thisModel[0].verbose, 'calib',
                                                                                   serialMode=False,
                                                                                   optimise=thisModel[0].optimiseRecall,
                                                                                   calibrate=True)
     thisModel[0].classificationDict = dict()

     if thisModel[0].useMaxDistance:
         [mk, vk, rk] = utils.meanVar_varianceDistribution(variancesKnown)
         [muk, vuk, ruk] = utils.meanVar_varianceDistribution(variancesUnknown)

         distance = []
         for j in range(len(mk)):
             distance.append(utils.bhattacharyya_distance(mk[j], muk[j], vk[j], vuk[j]))

         if distance is not None:
             maxIdx = distance.index(max(distance))
         thisModel[0].classificationDict['bestDistanceIDX'] = maxIdx
         thisModel[0].classificationDict['bestDistance_props'] = {'KnownMean': mk[maxIdx], 'UnknownMean': muk[maxIdx],
                                                                  'KnownVar': vk[maxIdx], 'UnknownVar': vuk[maxIdx]}

         # if maxIdx < len(mk) - 2:
         #     thisModel[0].bestSegOperation = maxIdx
         # elif maxIdx == len(mk) - 2:
         #     thisModel[0].bestSegOperation = 'sum'
         # elif maxIdx == len(mk) - 1:
         #     thisModel[0].bestSegOperation = 'mean'

         intersection = utils.solve_intersections(mk[maxIdx], muk[maxIdx], np.sqrt(vk[maxIdx]), np.sqrt(vuk[maxIdx]))

         maxLim = max(rk[maxIdx][1], ruk[maxIdx][1])
         minLim = min(rk[maxIdx][0], ruk[maxIdx][0])

         delList = []
         for j in range(len(intersection)):
             if intersection[j] > maxLim or intersection[j] < minLim:
                 delList.append(j)

         thisModel[0].classificationDict['segIntersections'] = np.delete(intersection, delList)
         thisModel[0].classificationDict['bhattaDistances'] = distance

         logging.info('Num Intersections: ' + str(len(thisModel[0].classificationDict['segIntersections'])))

         [thisModel[0].classificationDict['varianceThreshold'],
          thisModel[0].classificationDict['varianceDirection']] = \
             calculateVarianceThreshold(thisModel[0].classificationDict['segIntersections'], mk[maxIdx], muk[maxIdx],
                                        vk[maxIdx], vuk[maxIdx])

         logging.info('varianceThreshold ' + str(thisModel[0].classificationDict['varianceThreshold']))
         logging.info('varianceDirection ' + str(thisModel[0].classificationDict['varianceDirection']))
     else:
         variancesKnownArray = np.asarray(variancesKnown)
         variancesUnknownArray = np.asarray(variancesUnknown)
         varianceAllArray = np.vstack([variancesKnownArray, variancesUnknownArray])
         histKnown = [None] * (len(variancesKnownArray[0]) - 2)
         binEdges = [None] * (len(variancesKnownArray[0]) - 2)
         histUnknown = [None] * (len(variancesKnownArray[0]) - 2)

         thisModel[0].classificationDict['binWidth'] = thisModel[0].paramsDict['binWidth']
         thisModel[0].classificationDict['method'] = thisModel[0].paramsDict['method']

         numBins = np.ceil(np.max(varianceAllArray) / thisModel[0].classificationDict['binWidth'])

         bins = range(int(numBins))
         bins = np.multiply(bins, thisModel[0].classificationDict['binWidth'])

         for j in range(len(variancesKnown[0]) - 2):
             histKnown[j], binEdges[j] = np.histogram(variancesKnownArray[:, j], bins=bins)
             histKnown[j] = 1.0 * histKnown[j] / np.sum(histKnown[j])

             histUnknown[j], _ = np.histogram(variancesUnknownArray[:, j], bins=bins)
             histUnknown[j] = 1.0 * histUnknown[j] / np.sum(histUnknown[j])

         thisModel[0].classificationDict['histKnown'] = histKnown
         thisModel[0].classificationDict['binEdgesKnown'] = binEdges
         thisModel[0].classificationDict['histUnknown'] = histUnknown

     thisModel[0].calibrated = True


 def calibrateMultipleModelRecall(thisModel):
     """
         Perform calibration for multiple model implementations.

         In contrast with calibrateSingleModelRecall, in this method known and unknown are calibrated according to measures of familiarity between all model classes. The familiarity of each class with each other class and with itself are then used to perform a Bayesian decision depending on the resulting familiarity when testing a new instance.

         Args:
             SAMObject model to calibrate.

         Returns:
             None
     """
     cmSize = len(thisModel[0].textLabels)
     confMatrix = np.zeros((cmSize, cmSize))

     # Create Validation set
     Y_valid = []
     Y_testing = []
     for i in range(len(thisModel)):
         if thisModel[i].SAMObject.model:
             # De-normalize from the model which stored this test data
             yy_test = thisModel[i].Ytestn.copy()
             yy_test *= thisModel[i].Ystd
             yy_test += thisModel[i].Ymean
             y_valid_tmp, y_test_tmp, _, _ = utils.random_data_split(yy_test, [0.5, 0.5])
             Y_valid.append(y_valid_tmp.copy())
             Y_testing.append(y_test_tmp.copy())

     # Compute familiarities in VALIDATION SET
     familiarities = [None] * (len(thisModel) - 1)
     for i in range(len(thisModel)):
         if thisModel[i].SAMObject.model:
             # N_test x N_labels matrix.
             familiarities[i - 1] = np.zeros((Y_valid[i - 1].shape[0], (len(thisModel) - 1)))
             logging.info("## True label is " + thisModel[i].modelLabel)
             for k in range(Y_valid[i - 1].shape[0]):
                 sstest = []
                 logging.info('# k= ' + str(k))
                 for j in range(len(thisModel)):
                     if thisModel[j].SAMObject.model:
                         yy_test = Y_valid[i - 1][k, :][None, :].copy()
                         # Normalize according to the model to predict
                         yy_test -= thisModel[j].Ymean
                         yy_test /= thisModel[j].Ystd
                         sstest.append(thisModel[j].SAMObject.familiarity(yy_test, optimise=thisModel[0].optimiseRecall))
                         familiarities[i - 1][k, j - 1] = sstest[-1]
                 msg = ''
                 for j in range(len(sstest)):
                     if j == np.argmax(sstest):
                         msg = '   *'
                     else:
                         msg = '    '
                     logging.info(msg + '      Familiarity of model ' + thisModel[j + 1].modelLabel + ' given label: ' +
                           thisModel[i].modelLabel + ' in valid: ' + str(sstest[j]))

                 confMatrix[i - 1, np.argmax(sstest)] += 1
     calculateData(thisModel[0].textLabels, confMatrix)

     # At this point we have:
     # familiarities[i][k,j] -> familiarity for true label i, instance k
     #                          predicted by model trained in label j
     # ############# Train Familiarity classifier in VALIDATION SET
     #
     classifiers = []
     classif_thresh = []
     familiarity_predictions = []
     tmp = []
     for i in range(len(thisModel[0].textLabels)):
         X_train = familiarities[0][:, i][:, None]
         y_train = np.zeros((familiarities[0][:, i][:, None].shape[0], 1))
         for j in range(1, len(thisModel[0].textLabels)):
             X_train = np.vstack((X_train, familiarities[j][:, i][:, None]))
             y_train = np.vstack((y_train, j + np.zeros((familiarities[j][:, i][:, None].shape[0], 1))))
         tmp.append(X_train)
         n_classes = len(np.unique(y_train))

         # Try GMMs using different types of covariances.
         classifiers.append(GMM(n_components=n_classes, covariance_type='full', init_params='wc', n_iter=2000))

         # Since we have class labels for the training data, we can
         # initialize the GMM parameters in a supervised manner.
         classifiers[-1].means_ = np.array([X_train[y_train == kk].mean(axis=0)
                                            for kk in xrange(n_classes)])[:, None]
         classifiers[-1].fit(X_train)
         familiarity_predictions.append(classifiers[-1].predict(X_train))

         # Find threshold of confident classification of model i predicting label i
         tmp_i = classifiers[i].predict_proba(X_train[y_train == i][:, None])[:, i]
         tmp_s = 0.8
         # If in the test phase we get a predict_proba which falls in the threshold i, then
         # model i is confident for this prediction.
         classif_thresh.append([tmp_i.mean() - tmp_s * tmp_i.std(), tmp_i.mean() + tmp_s * tmp_i.std()])

     thisModel[0].classificationDict['classifiers'] = classifiers
     thisModel[0].classificationDict['classif_thresh'] = classif_thresh
     thisModel[0].calibrated = True


 def formatDataFunc(Ydata):
     """
         Utility function to format data for testing.

     Args:
         Ydata: Data to format for testing.

     Returns:
         Formatted data for testing.
     """
     yDataList = []
     for j in range(Ydata.shape[0]):
         yDataList.append(Ydata[j][None, :])
     return yDataList


 def singleRecall(thisModel, testInstance, verbose, visualiseInfo=None, optimise=100):

     """
         Method that performs classification for single model implementations.

         This method returns the classification label of a test instance by calculating the predictive mean and variance of the backwards mapping and subsequently decides whether the test instance is first known or unknown and if known its most probable classification label.

         Args:
             thisModel: SAMObject model to recall from.
             testInstance: Novel feature vector to test.
             verbose: Enable or disable logging to stdout.
             visualiseInfo: None to disable plotting and plotObject to display plot of recall.
             optimise: Number of optimisation iterations to perform during recall.

         Returns:
             Classification label and variance if __calibrateUnknown__ is set to `False` in the config file. Otherwise returns classification label and normalised classification probability.
     """
     #
     # mm,vv,pp=self.SAMObject.pattern_completion(testFace, visualiseInfo=visualiseInfo)
     # if verbose:
     # logging.info('single model recall'
     textStringOut = ''
     # normalize incoming data
     testValue = testInstance - thisModel.Ymean
     testValue /= thisModel.Ystd

     try:
         ret = thisModel.SAMObject.pattern_completion(testValue, visualiseInfo=visualiseInfo, optimise=optimise)
     except IndexError:
         return ['unknown', 0]
     mm = ret[0]
     vv = list(ret[1][0])
     svv = sum(vv)
     mvv = svv/len(vv)
     vv.append(svv)
     vv.append(mvv)

     # find nearest neighbour of mm and SAMObject.model.X

     k = np.matlib.repmat(mm[0].values, thisModel.SAMObject.model.X.mean.shape[0], 1)
     pow2 = np.power(thisModel.SAMObject.model.X.mean - k, 2)
     s = np.power(np.sum(pow2, 1), 0.5)
     nn = np.argmin(s)
     min_value = s[nn]

     if thisModel.SAMObject.type == 'mrd':
         classLabel = thisModel.textLabels[int(thisModel.SAMObject.model.bgplvms[1].Y[nn, :])]
     elif thisModel.SAMObject.type == 'bgplvm':
         classLabel = thisModel.textLabels[int(thisModel.L[nn, :])]

     known = True
     if thisModel.calibrated:
         if thisModel.useMaxDistance:
             known = utils.varianceClass(thisModel.classificationDict['varianceDirection'],
                                 vv[thisModel.classificationDict['bestDistanceIDX']],
                                 thisModel.classificationDict['varianceThreshold'])

             details = str(thisModel.classificationDict['varianceThreshold']) + ' ' + \
                       str(thisModel.classificationDict['varianceDirection'])

             probClass = vv[thisModel.classificationDict['bestDistanceIDX']]
         else:
             P_Known_given_X = utils.PfromHist(vv[:-2], thisModel.classificationDict['histKnown'],
                                               thisModel.classificationDict['binWidth'])
             P_Unknown_given_X = utils.PfromHist(vv[:-2], thisModel.classificationDict['histUnknown'],
                                                 thisModel.classificationDict['binWidth'])

             if thisModel.classificationDict['method'] == 'mulProb':
                 s1 = reduce(lambda x, y: x * y, P_Known_given_X)
                 s2 = reduce(lambda x, y: x * y, P_Unknown_given_X)
                 known = s1 > s2
             else:
                 s1 = np.sum(P_Known_given_X)
                 s2 = np.sum(P_Unknown_given_X)
                 known = s1 > s2

             if known:
                 probClass = s1
                 details = s1, ' > ', s2
             else:
                 probClass = s2
                 details = s2, ' > ', s1

     if thisModel.calibrated:
         if known:
             textStringOut = classLabel
         else:
             textStringOut = 'unknown'
             runnerUp = classLabel
     else:
         textStringOut = classLabel

     if verbose:
         if thisModel.calibrated:
             if textStringOut == 'unknown':
                 logging.info("With " + str(probClass) + " prob. error the new instance is " + str(runnerUp))
                 logging.info('But ' + str(details) + ' than ' + str(probClass) + ' so class as ' + str(textStringOut))
             else:
                 logging.info("With " + str(probClass) + " prob. error the new instance is " + str(textStringOut))
         else:
             logging.info("With " + str(vv) + " prob. error the new instance is " + str(textStringOut))

     if thisModel.calibrated:
         return [textStringOut, probClass/len(vv)]
     else:
         return [textStringOut, vv]


 def multipleRecall_noCalib(thisModel, testInstance, verbose, visualiseInfo=None, optimise=True):
     """
     Method that performs classification for uncalibrated multiple model implementations.

     Args:
         thisModel: SAMObject model to recall from.
         testInstance: Novel feature vector to test.
         verbose: Enable or disable logging to stdout.
         visualiseInfo: None to disable plotting and plotObject to display plot of recall.
         optimise: Number of optimisation iterations to perform during recall.

     Returns:
         Classification label and raw familiarity values.
     """
     result = []
     if verbose:
         pass
     # logging.info('multiple model recall'

     for j in thisModel:
         if j.SAMObject.model:
             tempTest = testInstance - j.Ymean
             tempTest /= j.Ystd
             yy_test = j.SAMObject.familiarity(tempTest, optimise=optimise)
             if verbose:
                 logging.info('Familiarity with ' + j.modelLabel + ' given current instance is: ' + str(yy_test))
             # yy_test -= thisModel[j].Ymean
             # yy_test /= thisModel[j].Ystd
             result.append(yy_test)
     maxIdx = np.argmax(result)

     if visualiseInfo:
         pass

     return [thisModel[0].textLabels[maxIdx - 1], result[maxIdx][0]]


 def multipleRecall(thisModel, testInstance, verbose, visualiseInfo=None, optimise=100):
     """
     Method that performs classification for calibrated multiple model implementations.

     Args:
         thisModel: SAMObject model to recall from.
         testInstance: Novel feature vector to test.
         verbose: Enable or disable logging to stdout.
         visualiseInfo: None to disable plotting and plotObject to display plot of recall.
         optimise: Number of optimisation iterations to perform during recall.

     Returns:
         Classification label and calibrated familiarity values.
     """
     cmSize = len(thisModel[0].textLabels)
     familiarities_tmp = []
     classif_tmp = []

     label = 'unknown'

     if not thisModel[0].classificationDict['classifiers']:
         calibrateMultipleModelRecall(thisModel)

     for j in range(cmSize):
         tempTest = testInstance - thisModel[j + 1].Ymean
         tempTest /= thisModel[j + 1].Ystd
         yy_test = thisModel[j + 1].SAMObject.familiarity(tempTest, optimise=optimise)[:, None]
         # yy_test *= thisModel[j+1].Ystd
         # yy_test += thisModel[j+1].Ymean
         cc = thisModel[0].classificationDict['classifiers'][j].predict_proba(yy_test)[:, j]
         if verbose:
             logging.info('Familiarity with ' + thisModel[j + 1].modelLabel + ' given current instance is: ' +
                          str(yy_test) + ' ' + str(cc[0]))
         familiarities_tmp.append(yy_test)
         classif_tmp.append(cc)

     bestConfidence = np.argmax(classif_tmp)

     for j in range(cmSize):
         if thisModel[0].classificationDict['classif_thresh'][j][0] <= \
                 classif_tmp[j] <= thisModel[0].classificationDict['classif_thresh'][j][1]:
             bestConfidence = j
             label = thisModel[0].textLabels[j]

             # logging.info('min, classifier, max = ' + str(thisModel[0].classif_thresh[j][0]) + \
             #       ' ' + str(classif_tmp[j]) + ' ' + \
             #       str(thisModel[0].classif_thresh[j][1])

     # if visualiseInfo:
     #     pass

     return [label, classif_tmp[bestConfidence][0]]


 def wait_watching_stdout(ar, dt=1, truncate=1000):
     """
     Monitoring function that logs to stdout the logging output of multiple threads.

     Args:
         ar: Thread to log.
         dt: Integer delta time between readings of ar.stdout.
         truncate: Integer limit on the number of lines returned by the threads.

     Returns:
         None
     """
     while not ar.ready():
         stdouts = ar.stdout
         if any(stdouts):
             clear_output()
             logging.info('-' * 30)
             logging.info("%.3fs elapsed" % ar.elapsed)
             logging.info("")
             for stdout in ar.stdout:
                 if stdout:
                     logging.info("\n%s" % (stdout[-truncate:]))
             sys.stdout.flush()
         time.sleep(dt)


 def testSegment(thisModel, Ysample, verbose, visualiseInfo=None, optimise=100):
     """
     Utility function to test a sample.

     This model determines the type of model being used for the testing and directs the query to the appropriate function.

     Args:
         thisModel: SAMObject model to recall from.
         Ysample: Novel feature vector to test.
         verbose: Enable or disable logging to stdout.
         visualiseInfo: `None` to disable plotting and plotObject to display plot of recall.
         optimise: Number of optimisation iterations to perform during recall.

     Returns:
         Classification result containing a list with the classification string and a measure of the familiarity or probability of the recall.
     """
     if len(thisModel) > 1:
         d = multipleRecall(thisModel, Ysample, verbose, visualiseInfo, optimise=optimise)
     else:
         d = singleRecall(thisModel[0], Ysample, verbose, visualiseInfo, optimise=optimise)
     return d


 def segmentTesting(thisModel, Ysample, Lnum, verbose, label, serialMode=False, optimise=100, calibrate=False):
     """
     Method to test multiple samples at a time.

     Args:
         thisModel : SAMObject model to recall from.
         Ysample : Novel feature vector to test.
         Lnum : Ground truth labels to compare with.
         verbose : Enable or disable logging to stdout.
         label : Label for the current segments being tested.
         serialMode : Boolean to test serially or in parallel.
         optimise : Number of optimisation iterations to perform during recall.
         calibrate : Indicate calibration mode when True which requires a different return.

     Returns:
         labelList, confMatrix, ret, variancesKnown, variancesUnknown if calibrate is `True`.
         labelList, confMatrix, labelComparisonDict if calibrate is `False`.

         labelList : List of classification labels
         confMatrix : Numpy array with the confusion matrix
         ret : Classification object
         variancesKnown : Variances returned during calibration for known training instances
         variancesUnknown : Variances returned during calibration for unknown training instances
         labelComparisonDict : Dictionary with two items `'original'` and `'results'`.

     """
     def testFunc(data, lab):
         d = testSegment(thisModel, data, verbose, visualiseInfo=None, optimise=optimise)
         if verbose:
             if lab == d[0]:
                 res = True
             else:
                 res = False
             logging.info('Actual  ' + str(lab).ljust(11) + '  Classification:  ' + str(d[0]).ljust(11) + '  with ' + \
                   str(d[1])[:6] + ' confidence: ' + str(res) + '\n')
         return d

     logging.info('')

     if type(Lnum).__module__ == np.__name__:
         useModelLabels = True
     else:
         useModelLabels = False

     if len(thisModel) > 1:
         labelList = copy.deepcopy(thisModel[0].textLabels)
         labelList.append('unknown')
     else:
         labelList = copy.deepcopy(thisModel[0].textLabels)
         labelList.append('unknown')

     confMatrix = np.zeros((len(labelList), len(labelList)))

     numItems = len(Ysample)

     off1 = 11
     off2 = 8
     off3 = len(str(numItems))
     if useModelLabels:
         Lsample = [thisModel[0].textLabels[int(Lnum[i])] for i in range(len(Lnum))]
     else:
         Lsample = Lnum

     if numItems < 1500:
         serialMode = True
     c = None
     logging.info('serialMode: ' + str(serialMode))
     if not serialMode and thisModel[0].parallelOperation:
         try:
             logging.info('Trying engines ...')
             c = ipp.Client()
             numWorkers = len(c._engines)
             logging.info('Number of engines: ' + str(numWorkers))
         except:
             logging.error("Parallel workers not found")
             thisModel[0].parallelOperation = False
             numWorkers = 1
     else:
         logging.info(str(serialMode) + '= True')
         thisModel[0].parallelOperation = False
         numWorkers = 1
         logging.info('Number of engines: ' + str(numWorkers))

     # average 5 classifications before providing this time
     vTemp = copy.deepcopy(verbose)
     verbose = False
     if len(Lsample) < 400:
         numTrials = len(Lsample)*0.1
         numTrials = int(numTrials)
     else:
         numTrials = 20
     t0 = time.time()
     for j in range(numTrials):
         testFunc(Ysample[j], Lsample[j])
     t1 = time.time()
     verbose = vTemp
     thisModel[0].avgClassTime = (t1 - t0) / numTrials
     logging.info('classification rate: ' + str(1.0 / thisModel[0].avgClassTime) + 'fps')
     logging.info('estimated time: ' + str(thisModel[0].avgClassTime * numItems / (60*numWorkers)) + 'mins for ' +
                  str(numItems) + ' items with ' + str(numWorkers) + ' workers')
     t0 = time.time()
     logging.info(t0)
     # check size of model
     # modelSize is size in megabytes
     modelSize = deep_getsizeof(thisModel, set()) / 1024.0 / 1024.0
     logging.info("modelSize: " + str(modelSize))
     logging.warning("required testing size: " + str((modelSize * numWorkers * 2) + 400) + " MB")
     # check available system memory in megabytes
     freeSystemMem = float(psutil.virtual_memory()[4]) / 1024.0 / 1024.0
     logging.info("free memory: " + str(freeSystemMem) + " MB")

     if modelSize > 100 or not thisModel[0].parallelOperation or serialMode:
         # serial testing
         logging.warning('Testing serially')
         ret = []
         for j in range(len(Lsample)):
             logging.info(str(j) + '/' + str(len(Lsample)))
             ret.append(testFunc(Ysample[j], Lsample[j]))
     else:
         # parallel testing
         logging.info('Testing in parallel')
         dview = c[:]  # not load balanced
         lb = c.load_balanced_view()  # load balanced

         # with dview.sync_imports():
         #     from SAM.SAM_Core import utils
         # if not thisModel[0].modelLoaded :
         dview.push({'thisModel': thisModel})
         dview.push({'verbose': verbose})
         dview.push({'optimise': optimise})
         # thisModel[0].modelLoaded = True
         syn = lb.map_async(testFunc, Ysample, Lsample)
         wait_watching_stdout(syn, dt=1, truncate=1000)
         ret = syn.get()
         # maybe these are upsetting the ipcluster
         # dview.clear()
         # dview.purge_results('all')
     t1 = time.time()
     logging.info(t1)
     logging.info('Actual time taken = ' + str(t1-t0))
     if calibrate:
         variancesKnown = []
         variancesUnknown = []
         for i in range(len(ret)):
             currLabel = Lsample[i]

             if verbose:
                 if currLabel == ret[i][0]:
                     result = True
                 else:
                     result = False
                 logging.info(str(i).rjust(off3) + '/' + str(numItems) + ' Truth: ' + currLabel.ljust(off1) + ' Model: '
                              + ret[i][0].ljust(off1) + ' with ' + str(ret[i][1])[:6].ljust(off2) +
                              ' confidence: ' + str(result))

             if currLabel in thisModel[0].textLabels:
                 knownLabel = True
             else:
                 knownLabel = False
                 currLabel = 'unknown'

             if knownLabel:
                 variancesKnown.append(ret[i][1])
             else:
                 variancesUnknown.append(ret[i][1])

             confMatrix[labelList.index(currLabel), labelList.index(ret[i][0])] += 1

         return labelList, confMatrix, ret, variancesKnown, variancesUnknown
     else:
         labelComparisonDict = dict()
         labelComparisonDict['original'] = []
         labelComparisonDict['results'] = []
         for i in range(len(ret)):
             currLabel = Lsample[i]
             retLabel = ret[i][0]

             if currLabel not in thisModel[0].textLabels:
                 currLabel = 'unknown'

             if verbose:
                 if currLabel == retLabel:
                     result = True
                 else:
                     result = False
                 logging.info(str(i).rjust(off3) + '/' + str(numItems) + ' Truth: ' + currLabel.ljust(off1) +
                              ' Model: ' + retLabel.ljust(off1) + ' with ' + str(ret[i][1])[:6].ljust(off2) +
                              ' confidence: ' + str(result))

             labelComparisonDict['original'].append(Lsample[i])
             labelComparisonDict['results'].append(retLabel)
             confMatrix[labelList.index(currLabel), labelList.index(retLabel)] += 1
         return labelList, confMatrix, labelComparisonDict


 def testSegments(thisModel, Ysample, Lnum, verbose, label, serialMode=False):
     """
     Function to test segments and return a confusion matrix.

     Args:
         thisModel : SAMObject model to recall from.
         Ysample : Novel feature vector to test.
         Lnum : Ground truth labels to compare with.
         verbose : Enable or disable logging to stdout.
         label : Label for the current segments being tested.
         serialMode : Boolean to test serially or in parallel.

     Returns:
         Confusion matrix, confusion labels, list of possible labels and dictionary with results and comparison or truth and classification.
     """
     labelList, confMatrix, labelComparisonDict = segmentTesting(thisModel, Ysample, Lnum, verbose, label,
                                                                 serialMode=serialMode,
                                                                 optimise=thisModel[0].optimiseRecall, calibrate=False)

     dCalc = calculateData(labelList, confMatrix)

     return [dCalc[0], dCalc[1], labelList, labelComparisonDict]


 def calculateVarianceThreshold(segIntersections, mk, muk, vk, vuk):
     """
     Method to decide on the approach to be used for setting variance thresholds and method of thresholding.

     Args:
         segIntersections : Number of gaussian intersections.
         mk : Means of known.
         muk : Means of unknown.
         vk : Variances of known.
         vuk : Variances of unknown.

     Returns:
         List of threshold variances and method of thresholding.
     """
     thresh = None
     direction = None
     if len(segIntersections) == 0:
         # either gaussians exactly equal to each other(worst) or no overlap(best)
         if mk == muk:
             # gaussians on top of each other .. can only happen with 0 intersections when mean is identical
             # and var is identical
             thresh = [mk]
             direction = ['smaller']
         else:
             # gaussians completely separated, threshold set equidistant to means
             thresh = [(max(mk, muk) - min(mk, muk)) / 2 + min(mk, muk)]
             if thresh[0] > mk:
                 direction = ['smaller']
     elif len(segIntersections) == 1:
         thresh = [segIntersections]
         if thresh[0] > mk:
             direction = ['smaller']
             # set threshold at this point
     elif len(segIntersections) == 2:
         if mk == muk:
             # set upper and lower bounds on threshold
             thresh = [min(segIntersections), max(segIntersections)]
             # works
             if vk > vuk:
                 direction = ['smaller', 'greater']
             else:
                 direction = ['greater', 'smaller']
         else:
             thresh = [np.ptp(segIntersections) / 2 + min(segIntersections)]
             if thresh[0] < muk:
                 direction = ['smaller']
                 # set threshold equidistant from intersection

     if direction is None:
         direction = ['greater']

     return [thresh, direction]


 def calculateData(textLabels, confMatrix, numItems=None):
     """
     Calculate the normalised confusion matrix.

     Args:
         textLabels: List of classifications.
         confMatrix: Confusion matrix to normalise.
         numItems: Total number of items tested.

     Returns:
         Normalised confusion matrix and overall percentage correct.
     """
     logging.info(confMatrix)
     if not numItems:
         numItems = np.sum(confMatrix)

     h = confMatrix
     total = h.astype(np.float).sum(axis=1)
     normConf = copy.deepcopy(h)
     normConf = normConf.astype(np.float)

     for l in range(h.shape[0]):
         if total[l] != 0:
             normConf[l, :] = normConf[l, :].astype(np.float) * 100 / total[l].astype(np.float)

     logging.info(normConf)

     # percCorect = 100 * np.diag(h.astype(np.float)).sum() / numItems
     percCorect = 100 * np.diag(normConf.astype(np.float)).sum() / np.sum(normConf)

     logging.info(str(percCorect)[:5].ljust(7) + "% correct for training data")
     logging.info('')
     for i in range(confMatrix.shape[0]):
         for j in range(confMatrix.shape[0]):
             logging.info(str(normConf[i, j])[:5].ljust(7) + '% of ' + str(textLabels[i]) +
                          ' classified as ' + str(textLabels[j]))
         logging.info('')
     return [normConf, percCorect]


 def combineClassifications(thisModel, labels, likelihoods):
     """
     Combine multiple classifications into a single classification.

     Args:
         thisModel: SAMObject model.
         labels: List of labels for classifications.
         likelihoods: List of likelihoods.

     Returns:
         Label with the highest likelihood together with the normalised likelihood.
     """
     # if len(thisModel) > 1:
     #     labelList = copy.deepcopy(thisModel[0].textLabels)
     #     labelList.append('unknown')
     # else:
     labelList = copy.deepcopy(thisModel[0].textLabels)

     sumLikelihoods = [None] * (len(labelList))
     counts = [0] * (len(labelList))

     for i in range(len(labels)):
         idx = [j for j, k in enumerate(labelList) if k == labels[i]][0]
         counts[idx] += 1
         if sumLikelihoods[idx] is None:
             sumLikelihoods[idx] = likelihoods[i][thisModel[0].SAMObject.Q]
         else:
             sumLikelihoods[idx] += likelihoods[i][thisModel[0].SAMObject.Q]

     m = max(sumLikelihoods)
     maxIdx = [j for j, k in enumerate(sumLikelihoods) if k == m][0]

     return [labelList[maxIdx], m / counts[maxIdx]]


SAM.SAM_Core.SAMTesting.wait_watching_stdout
def wait_watching_stdout(ar, dt=1, truncate=1000)
Monitoring function that logs to stdout the logging output of multiple threads.
Definition: SAMTesting.py:521

SAM.SAM_Core.SAMTesting.segmentTesting
def segmentTesting(thisModel, Ysample, Lnum, verbose, label, serialMode=False, optimise=100, calibrate=False)
Method to test multiple samples at a time.
Definition: SAMTesting.py:584

SAM.SAM_Core.SAMTesting.calibrateSingleModelRecall
def calibrateSingleModelRecall(thisModel)
Perform calibration for single model implementations.
Definition: SAMTesting.py:113

SAM.SAM_Core.SAMTesting.testSegments
def testSegments(thisModel, Ysample, Lnum, verbose, label, serialMode=False)
Function to test segments and return a confusion matrix.
Definition: SAMTesting.py:768

SAM.SAM_Core.SAMTesting.combineClassifications
def combineClassifications(thisModel, labels, likelihoods)
Combine multiple classifications into a single classification.
Definition: SAMTesting.py:883

SAM.SAM_Core.SAMTesting.testSegment
def testSegment(thisModel, Ysample, verbose, visualiseInfo=None, optimise=100)
Utility function to test a sample.
Definition: SAMTesting.py:551

SAM.SAM_Core.SAMTesting.calculateVarianceThreshold
def calculateVarianceThreshold(segIntersections, mk, muk, vk, vuk)
Method to decide on the approach to be used for setting variance thresholds and method of thresholdin...
Definition: SAMTesting.py:791

SAM.SAM_Core.SAMTesting.multipleRecall_noCalib
def multipleRecall_noCalib(thisModel, testInstance, verbose, visualiseInfo=None, optimise=True)
Method that performs classification for uncalibrated multiple model implementations.
Definition: SAMTesting.py:432

SAM.SAM_Core.SAMTesting.singleRecall
def singleRecall(thisModel, testInstance, verbose, visualiseInfo=None, optimise=100)
Method that performs classification for single model implementations.
Definition: SAMTesting.py:326

SAM.SAM_Core.SAMTesting.calibrateModelRecall
def calibrateModelRecall(thisModel)
Logic to initialise calibration of model recall in order to recognize known from unknown instances...
Definition: SAMTesting.py:87

SAM.SAM_Core.SAMTesting.calibrateMultipleModelRecall
def calibrateMultipleModelRecall(thisModel)
Perform calibration for multiple model implementations.
Definition: SAMTesting.py:208

SAM.SAM_Core.SAMTesting.formatDataFunc
def formatDataFunc(Ydata)
Utility function to format data for testing.
Definition: SAMTesting.py:304

SAM.SAM_Core.SAMTesting.calculateData
def calculateData(textLabels, confMatrix, numItems=None)
Calculate the normalised confusion matrix.
Definition: SAMTesting.py:843

SAM.SAM_Core.SAMTesting.multipleRecall
def multipleRecall(thisModel, testInstance, verbose, visualiseInfo=None, optimise=100)
Method that performs classification for calibrated multiple model implementations.
Definition: SAMTesting.py:469

SAM.SAM_Core
Definition: __init__.py:1

SAM.SAM_Core.SAMTesting.deep_getsizeof
def deep_getsizeof(o, ids)
Method to calculate the size of an object o in bytes.
Definition: SAMTesting.py:52