13 from IPython.display
import clear_output
14 from sklearn.mixture
import GMM
16 import ipyparallel
as ipp
19 matplotlib.use(
"TkAgg")
20 import matplotlib.pyplot
as plt
27 from collections
import Mapping, Container
28 from sys
import getsizeof
29 from operator
import gt
32 np.set_printoptions(threshold=np.nan, precision=2)
44 Method to calculate the size of an object `o` in bytes. 47 o: Object to calculate the size of. 48 ids: Set of ids to not consider when calculating the size of the object. 51 Size of object in bytes. 60 if isinstance(o, str)
or isinstance(0, unicode):
63 if isinstance(o, Mapping):
64 return r + sum(d(k, ids) + d(v, ids)
for k, v
in o.iteritems())
66 if isinstance(o, Container):
67 return r + sum(d(x, ids)
for x
in o)
69 if 'SAM' in o.__class__.__name__:
71 for attr, value
in o.__dict__.iteritems():
73 total += d(value, ids)
81 Logic to initialise calibration of model recall in order to recognize known from unknown instances. 84 thisModel: SAMObject model to calibrate. 88 if len(thisModel) > 1:
90 elif hasattr(thisModel[0],
'allDataDict'):
91 logging.info(
'calibrating model')
94 logging.warning(
'no calibration')
99 Perform calibration for single model implementations. 101 This method either uses the bhattacharyya distance to perform calibration of known and unknown or uses histograms to use the histogram distribution of known and unknown labels in the training data to carry out the classification. This method depends on the following parameters present in config.ini. \n 103 1) __useMaxDistance__ : `False` or `True`. This enables the use of bhattacharyya distance method to recognise known and unknown. \n 104 2) __calibrateUnknown__ : `True` or `False`. This turns on or off the calibration of the model for known and unknown inputs. \n 105 3) __noBins__ : Integer number of bins to be used for the histogram method if __calibrateUnknown__ is `True` and __useMaxDistance__ is `False`. \n 106 4) __method__ : String indicating the method used when histograms are used for calibration. When using histograms, the multi-dimensional probability of known and unknown are both calculated using the histogram. `sumProb` then performs a decision based on the largest sum after summing the probabilities of known and unknown independently. `mulProb` performs a decision based on the largest sum after multiplying the probabilities of known and unknown independently.\n 109 thisModel: SAMObject model to calibrate. 115 logging.info(
'entering segment testing')
116 labelList, confMatrix, ret, variancesKnown, variancesUnknown =
segmentTesting(thisModel, yCalib,
117 thisModel[0].allDataDict[
'L'],
118 thisModel[0].verbose,
'calib',
120 optimise=thisModel[0].optimiseRecall,
122 thisModel[0].classificationDict = dict()
124 if thisModel[0].useMaxDistance:
125 [mk, vk, rk] = utils.meanVar_varianceDistribution(variancesKnown)
126 [muk, vuk, ruk] = utils.meanVar_varianceDistribution(variancesUnknown)
129 for j
in range(len(mk)):
130 distance.append(utils.bhattacharyya_distance(mk[j], muk[j], vk[j], vuk[j]))
132 if distance
is not None:
133 maxIdx = distance.index(max(distance))
134 thisModel[0].classificationDict[
'bestDistanceIDX'] = maxIdx
135 thisModel[0].classificationDict[
'bestDistance_props'] = {
'KnownMean': mk[maxIdx],
'UnknownMean': muk[maxIdx],
136 'KnownVar': vk[maxIdx],
'UnknownVar': vuk[maxIdx]}
145 intersection = utils.solve_intersections(mk[maxIdx], muk[maxIdx], np.sqrt(vk[maxIdx]), np.sqrt(vuk[maxIdx]))
147 maxLim = max(rk[maxIdx][1], ruk[maxIdx][1])
148 minLim = min(rk[maxIdx][0], ruk[maxIdx][0])
151 for j
in range(len(intersection)):
152 if intersection[j] > maxLim
or intersection[j] < minLim:
155 thisModel[0].classificationDict[
'segIntersections'] = np.delete(intersection, delList)
156 thisModel[0].classificationDict[
'bhattaDistances'] = distance
158 logging.info(
'Num Intersections: ' + str(len(thisModel[0].classificationDict[
'segIntersections'])))
160 [thisModel[0].classificationDict[
'varianceThreshold'],
161 thisModel[0].classificationDict[
'varianceDirection']] = \
163 vk[maxIdx], vuk[maxIdx])
165 logging.info(
'varianceThreshold ' + str(thisModel[0].classificationDict[
'varianceThreshold']))
166 logging.info(
'varianceDirection ' + str(thisModel[0].classificationDict[
'varianceDirection']))
168 variancesKnownArray = np.asarray(variancesKnown)
169 variancesUnknownArray = np.asarray(variancesUnknown)
170 varianceAllArray = np.vstack([variancesKnownArray, variancesUnknownArray])
171 histKnown = [
None] * (len(variancesKnownArray[0]) - 2)
172 binEdges = [
None] * (len(variancesKnownArray[0]) - 2)
173 histUnknown = [
None] * (len(variancesKnownArray[0]) - 2)
175 thisModel[0].classificationDict[
'binWidth'] = thisModel[0].paramsDict[
'binWidth']
176 thisModel[0].classificationDict[
'method'] = thisModel[0].paramsDict[
'method']
178 numBins = np.ceil(np.max(varianceAllArray) / thisModel[0].classificationDict[
'binWidth'])
180 bins = range(int(numBins))
181 bins = np.multiply(bins, thisModel[0].classificationDict[
'binWidth'])
183 for j
in range(len(variancesKnown[0]) - 2):
184 histKnown[j], binEdges[j] = np.histogram(variancesKnownArray[:, j], bins=bins)
185 histKnown[j] = 1.0 * histKnown[j] / np.sum(histKnown[j])
187 histUnknown[j], _ = np.histogram(variancesUnknownArray[:, j], bins=bins)
188 histUnknown[j] = 1.0 * histUnknown[j] / np.sum(histUnknown[j])
190 thisModel[0].classificationDict[
'histKnown'] = histKnown
191 thisModel[0].classificationDict[
'binEdgesKnown'] = binEdges
192 thisModel[0].classificationDict[
'histUnknown'] = histUnknown
194 thisModel[0].calibrated =
True 199 Perform calibration for multiple model implementations. 201 In contrast with calibrateSingleModelRecall, in this method known and unknown are calibrated according to measures of familiarity between all model classes. The familiarity of each class with each other class and with itself are then used to perform a Bayesian decision depending on the resulting familiarity when testing a new instance. 204 SAMObject model to calibrate. 209 cmSize = len(thisModel[0].textLabels)
210 confMatrix = np.zeros((cmSize, cmSize))
215 for i
in range(len(thisModel)):
216 if thisModel[i].SAMObject.model:
218 yy_test = thisModel[i].Ytestn.copy()
219 yy_test *= thisModel[i].Ystd
220 yy_test += thisModel[i].Ymean
221 y_valid_tmp, y_test_tmp, _, _ = utils.random_data_split(yy_test, [0.5, 0.5])
222 Y_valid.append(y_valid_tmp.copy())
223 Y_testing.append(y_test_tmp.copy())
226 familiarities = [
None] * (len(thisModel) - 1)
227 for i
in range(len(thisModel)):
228 if thisModel[i].SAMObject.model:
230 familiarities[i - 1] = np.zeros((Y_valid[i - 1].shape[0], (len(thisModel) - 1)))
231 logging.info(
"## True label is " + thisModel[i].modelLabel)
232 for k
in range(Y_valid[i - 1].shape[0]):
234 logging.info(
'# k= ' + str(k))
235 for j
in range(len(thisModel)):
236 if thisModel[j].SAMObject.model:
237 yy_test = Y_valid[i - 1][k, :][
None, :].copy()
239 yy_test -= thisModel[j].Ymean
240 yy_test /= thisModel[j].Ystd
241 sstest.append(thisModel[j].SAMObject.familiarity(yy_test, optimise=thisModel[0].optimiseRecall))
242 familiarities[i - 1][k, j - 1] = sstest[-1]
244 for j
in range(len(sstest)):
245 if j == np.argmax(sstest):
249 logging.info(msg +
' Familiarity of model ' + thisModel[j + 1].modelLabel +
' given label: ' +
250 thisModel[i].modelLabel +
' in valid: ' + str(sstest[j]))
252 confMatrix[i - 1, np.argmax(sstest)] += 1
262 familiarity_predictions = []
264 for i
in range(len(thisModel[0].textLabels)):
265 X_train = familiarities[0][:, i][:,
None]
266 y_train = np.zeros((familiarities[0][:, i][:,
None].shape[0], 1))
267 for j
in range(1, len(thisModel[0].textLabels)):
268 X_train = np.vstack((X_train, familiarities[j][:, i][:,
None]))
269 y_train = np.vstack((y_train, j + np.zeros((familiarities[j][:, i][:,
None].shape[0], 1))))
271 n_classes = len(np.unique(y_train))
274 classifiers.append(GMM(n_components=n_classes, covariance_type=
'full', init_params=
'wc', n_iter=2000))
278 classifiers[-1].means_ = np.array([X_train[y_train == kk].mean(axis=0)
279 for kk
in xrange(n_classes)])[:,
None]
280 classifiers[-1].fit(X_train)
281 familiarity_predictions.append(classifiers[-1].predict(X_train))
284 tmp_i = classifiers[i].predict_proba(X_train[y_train == i][:,
None])[:, i]
288 classif_thresh.append([tmp_i.mean() - tmp_s * tmp_i.std(), tmp_i.mean() + tmp_s * tmp_i.std()])
290 thisModel[0].classificationDict[
'classifiers'] = classifiers
291 thisModel[0].classificationDict[
'classif_thresh'] = classif_thresh
292 thisModel[0].calibrated =
True 297 Utility function to format data for testing. 300 Ydata: Data to format for testing. 303 Formatted data for testing. 306 for j
in range(Ydata.shape[0]):
307 yDataList.append(Ydata[j][
None, :])
311 def singleRecall(thisModel, testInstance, verbose, visualiseInfo=None, optimise=100):
314 Method that performs classification for single model implementations. 316 This method returns the classification label of a test instance by calculating the predictive mean and variance of the backwards mapping and subsequently decides whether the test instance is first known or unknown and if known its most probable classification label. 319 thisModel: SAMObject model to recall from. 320 testInstance: Novel feature vector to test. 321 verbose: Enable or disable logging to stdout. 322 visualiseInfo: None to disable plotting and plotObject to display plot of recall. 323 optimise: Number of optimisation iterations to perform during recall. 326 Classification label and variance if __calibrateUnknown__ is set to `False` in the config file. Otherwise returns classification label and normalised classification probability. 334 testValue = testInstance - thisModel.Ymean
335 testValue /= thisModel.Ystd
338 ret = thisModel.SAMObject.pattern_completion(testValue, visualiseInfo=visualiseInfo, optimise=optimise)
340 return [
'unknown', 0]
350 k = np.matlib.repmat(mm[0].values, thisModel.SAMObject.model.X.mean.shape[0], 1)
351 pow2 = np.power(thisModel.SAMObject.model.X.mean - k, 2)
352 s = np.power(np.sum(pow2, 1), 0.5)
356 if thisModel.SAMObject.type ==
'mrd':
357 classLabel = thisModel.textLabels[int(thisModel.SAMObject.model.bgplvms[1].Y[nn, :])]
358 elif thisModel.SAMObject.type ==
'bgplvm':
359 classLabel = thisModel.textLabels[int(thisModel.L[nn, :])]
362 if thisModel.calibrated:
363 if thisModel.useMaxDistance:
364 known = utils.varianceClass(thisModel.classificationDict[
'varianceDirection'],
365 vv[thisModel.classificationDict[
'bestDistanceIDX']],
366 thisModel.classificationDict[
'varianceThreshold'])
368 details = str(thisModel.classificationDict[
'varianceThreshold']) +
' ' + \
369 str(thisModel.classificationDict[
'varianceDirection'])
371 probClass = vv[thisModel.classificationDict[
'bestDistanceIDX']]
373 P_Known_given_X = utils.PfromHist(vv[:-2], thisModel.classificationDict[
'histKnown'],
374 thisModel.classificationDict[
'binWidth'])
375 P_Unknown_given_X = utils.PfromHist(vv[:-2], thisModel.classificationDict[
'histUnknown'],
376 thisModel.classificationDict[
'binWidth'])
378 if thisModel.classificationDict[
'method'] ==
'mulProb':
379 s1 = reduce(
lambda x, y: x * y, P_Known_given_X)
380 s2 = reduce(
lambda x, y: x * y, P_Unknown_given_X)
383 s1 = np.sum(P_Known_given_X)
384 s2 = np.sum(P_Unknown_given_X)
389 details = s1,
' > ', s2
392 details = s2,
' > ', s1
394 if thisModel.calibrated:
396 textStringOut = classLabel
398 textStringOut =
'unknown' 399 runnerUp = classLabel
401 textStringOut = classLabel
404 if thisModel.calibrated:
405 if textStringOut ==
'unknown':
406 logging.info(
"With " + str(probClass) +
" prob. error the new instance is " + str(runnerUp))
407 logging.info(
'But ' + str(details) +
' than ' + str(probClass) +
' so class as ' + str(textStringOut))
409 logging.info(
"With " + str(probClass) +
" prob. error the new instance is " + str(textStringOut))
411 logging.info(
"With " + str(vv) +
" prob. error the new instance is " + str(textStringOut))
413 if thisModel.calibrated:
414 return [textStringOut, probClass/len(vv)]
416 return [textStringOut, vv]
421 Method that performs classification for uncalibrated multiple model implementations. 424 thisModel: SAMObject model to recall from. 425 testInstance: Novel feature vector to test. 426 verbose: Enable or disable logging to stdout. 427 visualiseInfo: None to disable plotting and plotObject to display plot of recall. 428 optimise: Number of optimisation iterations to perform during recall. 431 Classification label and raw familiarity values. 439 if j.SAMObject.model:
440 tempTest = testInstance - j.Ymean
442 yy_test = j.SAMObject.familiarity(tempTest, optimise=optimise)
444 logging.info(
'Familiarity with ' + j.modelLabel +
' given current instance is: ' + str(yy_test))
447 result.append(yy_test)
448 maxIdx = np.argmax(result)
453 return [thisModel[0].textLabels[maxIdx - 1], result[maxIdx][0]]
456 def multipleRecall(thisModel, testInstance, verbose, visualiseInfo=None, optimise=100):
458 Method that performs classification for calibrated multiple model implementations. 461 thisModel: SAMObject model to recall from. 462 testInstance: Novel feature vector to test. 463 verbose: Enable or disable logging to stdout. 464 visualiseInfo: None to disable plotting and plotObject to display plot of recall. 465 optimise: Number of optimisation iterations to perform during recall. 468 Classification label and calibrated familiarity values. 470 cmSize = len(thisModel[0].textLabels)
471 familiarities_tmp = []
476 if not thisModel[0].classificationDict[
'classifiers']:
479 for j
in range(cmSize):
480 tempTest = testInstance - thisModel[j + 1].Ymean
481 tempTest /= thisModel[j + 1].Ystd
482 yy_test = thisModel[j + 1].SAMObject.familiarity(tempTest, optimise=optimise)[:,
None]
485 cc = thisModel[0].classificationDict[
'classifiers'][j].predict_proba(yy_test)[:, j]
487 logging.info(
'Familiarity with ' + thisModel[j + 1].modelLabel +
' given current instance is: ' +
488 str(yy_test) +
' ' + str(cc[0]))
489 familiarities_tmp.append(yy_test)
490 classif_tmp.append(cc)
492 bestConfidence = np.argmax(classif_tmp)
494 for j
in range(cmSize):
495 if thisModel[0].classificationDict[
'classif_thresh'][j][0] <= \
496 classif_tmp[j] <= thisModel[0].classificationDict[
'classif_thresh'][j][1]:
498 label = thisModel[0].textLabels[j]
507 return [label, classif_tmp[bestConfidence][0]]
512 Monitoring function that logs to stdout the logging output of multiple threads. 516 dt: Integer delta time between readings of ar.stdout. 517 truncate: Integer limit on the number of lines returned by the threads. 522 while not ar.ready():
526 logging.info(
'-' * 30)
527 logging.info(
"%.3fs elapsed" % ar.elapsed)
529 for stdout
in ar.stdout:
531 logging.info(
"\n%s" % (stdout[-truncate:]))
536 def testSegment(thisModel, Ysample, verbose, visualiseInfo=None, optimise=100):
538 Utility function to test a sample. 540 This model determines the type of model being used for the testing and directs the query to the appropriate function. 543 thisModel: SAMObject model to recall from. 544 Ysample: Novel feature vector to test. 545 verbose: Enable or disable logging to stdout. 546 visualiseInfo: `None` to disable plotting and plotObject to display plot of recall. 547 optimise: Number of optimisation iterations to perform during recall. 550 Classification result containing a list with the classification string and a measure of the familiarity or probability of the recall. 552 if len(thisModel) > 1:
553 d =
multipleRecall(thisModel, Ysample, verbose, visualiseInfo, optimise=optimise)
555 d =
singleRecall(thisModel[0], Ysample, verbose, visualiseInfo, optimise=optimise)
559 def segmentTesting(thisModel, Ysample, Lnum, verbose, label, serialMode=False, optimise=100, calibrate=False):
561 Method to test multiple samples at a time. 564 thisModel : SAMObject model to recall from. 565 Ysample : Novel feature vector to test. 566 Lnum : Ground truth labels to compare with. 567 verbose : Enable or disable logging to stdout. 568 label : Label for the current segments being tested. 569 serialMode : Boolean to test serially or in parallel. 570 optimise : Number of optimisation iterations to perform during recall. 571 calibrate : Indicate calibration mode when True which requires a different return. 574 labelList, confMatrix, ret, variancesKnown, variancesUnknown if calibrate is `True`. 575 labelList, confMatrix, labelComparisonDict if calibrate is `False`. 577 labelList : List of classification labels 578 confMatrix : Numpy array with the confusion matrix 579 ret : Classification object 580 variancesKnown : Variances returned during calibration for known training instances 581 variancesUnknown : Variances returned during calibration for unknown training instances 582 labelComparisonDict : Dictionary with two items `'original'` and `'results'`. 585 def testFunc(data, lab):
586 d =
testSegment(thisModel, data, verbose, visualiseInfo=
None, optimise=optimise)
592 logging.info(
'Actual ' + str(lab).ljust(11) +
' Classification: ' + str(d[0]).ljust(11) +
' with ' + \
593 str(d[1])[:6] +
' confidence: ' + str(res) +
'\n')
598 if type(Lnum).__module__ == np.__name__:
599 useModelLabels =
True 601 useModelLabels =
False 603 if len(thisModel) > 1:
604 labelList = copy.deepcopy(thisModel[0].textLabels)
605 labelList.append(
'unknown')
607 labelList = copy.deepcopy(thisModel[0].textLabels)
608 labelList.append(
'unknown')
610 confMatrix = np.zeros((len(labelList), len(labelList)))
612 numItems = len(Ysample)
616 off3 = len(str(numItems))
618 Lsample = [thisModel[0].textLabels[int(Lnum[i])]
for i
in range(len(Lnum))]
625 logging.info(
'serialMode: ' + str(serialMode))
626 if not serialMode
and thisModel[0].parallelOperation:
628 logging.info(
'Trying engines ...')
630 numWorkers = len(c._engines)
631 logging.info(
'Number of engines: ' + str(numWorkers))
633 logging.error(
"Parallel workers not found")
634 thisModel[0].parallelOperation =
False 637 logging.info(str(serialMode) +
'= True')
638 thisModel[0].parallelOperation =
False 640 logging.info(
'Number of engines: ' + str(numWorkers))
643 vTemp = copy.deepcopy(verbose)
645 if len(Lsample) < 400:
646 numTrials = len(Lsample)*0.1
647 numTrials = int(numTrials)
651 for j
in range(numTrials):
652 testFunc(Ysample[j], Lsample[j])
655 thisModel[0].avgClassTime = (t1 - t0) / numTrials
656 logging.info(
'classification rate: ' + str(1.0 / thisModel[0].avgClassTime) +
'fps')
657 logging.info(
'estimated time: ' + str(thisModel[0].avgClassTime * numItems / (60*numWorkers)) +
'mins for ' +
658 str(numItems) +
' items with ' + str(numWorkers) +
' workers')
664 logging.info(
"modelSize: " + str(modelSize))
665 logging.warning(
"required testing size: " + str((modelSize * numWorkers * 2) + 400) +
" MB")
667 freeSystemMem = float(psutil.virtual_memory()[4]) / 1024.0 / 1024.0
668 logging.info(
"free memory: " + str(freeSystemMem) +
" MB")
670 if modelSize > 100
or not thisModel[0].parallelOperation
or serialMode:
672 logging.warning(
'Testing serially')
674 for j
in range(len(Lsample)):
675 logging.info(str(j) +
'/' + str(len(Lsample)))
676 ret.append(testFunc(Ysample[j], Lsample[j]))
679 logging.info(
'Testing in parallel')
681 lb = c.load_balanced_view()
686 dview.push({
'thisModel': thisModel})
687 dview.push({
'verbose': verbose})
688 dview.push({
'optimise': optimise})
690 syn = lb.map_async(testFunc, Ysample, Lsample)
698 logging.info(
'Actual time taken = ' + str(t1-t0))
701 variancesUnknown = []
702 for i
in range(len(ret)):
703 currLabel = Lsample[i]
706 if currLabel == ret[i][0]:
710 logging.info(str(i).rjust(off3) +
'/' + str(numItems) +
' Truth: ' + currLabel.ljust(off1) +
' Model: ' 711 + ret[i][0].ljust(off1) +
' with ' + str(ret[i][1])[:6].ljust(off2) +
712 ' confidence: ' + str(result))
714 if currLabel
in thisModel[0].textLabels:
718 currLabel =
'unknown' 721 variancesKnown.append(ret[i][1])
723 variancesUnknown.append(ret[i][1])
725 confMatrix[labelList.index(currLabel), labelList.index(ret[i][0])] += 1
727 return labelList, confMatrix, ret, variancesKnown, variancesUnknown
729 labelComparisonDict = dict()
730 labelComparisonDict[
'original'] = []
731 labelComparisonDict[
'results'] = []
732 for i
in range(len(ret)):
733 currLabel = Lsample[i]
736 if currLabel
not in thisModel[0].textLabels:
737 currLabel =
'unknown' 740 if currLabel == retLabel:
744 logging.info(str(i).rjust(off3) +
'/' + str(numItems) +
' Truth: ' + currLabel.ljust(off1) +
745 ' Model: ' + retLabel.ljust(off1) +
' with ' + str(ret[i][1])[:6].ljust(off2) +
746 ' confidence: ' + str(result))
748 labelComparisonDict[
'original'].append(Lsample[i])
749 labelComparisonDict[
'results'].append(retLabel)
750 confMatrix[labelList.index(currLabel), labelList.index(retLabel)] += 1
751 return labelList, confMatrix, labelComparisonDict
754 def testSegments(thisModel, Ysample, Lnum, verbose, label, serialMode=False):
756 Function to test segments and return a confusion matrix. 759 thisModel : SAMObject model to recall from. 760 Ysample : Novel feature vector to test. 761 Lnum : Ground truth labels to compare with. 762 verbose : Enable or disable logging to stdout. 763 label : Label for the current segments being tested. 764 serialMode : Boolean to test serially or in parallel. 767 Confusion matrix, confusion labels, list of possible labels and dictionary with results and comparison or truth and classification. 769 labelList, confMatrix, labelComparisonDict =
segmentTesting(thisModel, Ysample, Lnum, verbose, label,
770 serialMode=serialMode,
771 optimise=thisModel[0].optimiseRecall, calibrate=
False)
775 return [dCalc[0], dCalc[1], labelList, labelComparisonDict]
780 Method to decide on the approach to be used for setting variance thresholds and method of thresholding. 783 segIntersections : Number of gaussian intersections. 785 muk : Means of unknown. 786 vk : Variances of known. 787 vuk : Variances of unknown. 790 List of threshold variances and method of thresholding. 794 if len(segIntersections) == 0:
800 direction = [
'smaller']
803 thresh = [(max(mk, muk) - min(mk, muk)) / 2 + min(mk, muk)]
805 direction = [
'smaller']
806 elif len(segIntersections) == 1:
807 thresh = [segIntersections]
809 direction = [
'smaller']
811 elif len(segIntersections) == 2:
814 thresh = [min(segIntersections), max(segIntersections)]
817 direction = [
'smaller',
'greater']
819 direction = [
'greater',
'smaller']
821 thresh = [np.ptp(segIntersections) / 2 + min(segIntersections)]
823 direction = [
'smaller']
826 if direction
is None:
827 direction = [
'greater']
829 return [thresh, direction]
834 Calculate the normalised confusion matrix. 837 textLabels: List of classifications. 838 confMatrix: Confusion matrix to normalise. 839 numItems: Total number of items tested. 842 Normalised confusion matrix and overall percentage correct. 844 logging.info(confMatrix)
846 numItems = np.sum(confMatrix)
849 total = h.astype(np.float).sum(axis=1)
850 normConf = copy.deepcopy(h)
851 normConf = normConf.astype(np.float)
853 for l
in range(h.shape[0]):
855 normConf[l, :] = normConf[l, :].astype(np.float) * 100 / total[l].astype(np.float)
857 logging.info(normConf)
860 percCorect = 100 * np.diag(normConf.astype(np.float)).sum() / np.sum(normConf)
862 logging.info(str(percCorect)[:5].ljust(7) +
"% correct for training data")
864 for i
in range(confMatrix.shape[0]):
865 for j
in range(confMatrix.shape[0]):
866 logging.info(str(normConf[i, j])[:5].ljust(7) +
'% of ' + str(textLabels[i]) +
867 ' classified as ' + str(textLabels[j]))
869 return [normConf, percCorect]
874 Combine multiple classifications into a single classification. 877 thisModel: SAMObject model. 878 labels: List of labels for classifications. 879 likelihoods: List of likelihoods. 882 Label with the highest likelihood together with the normalised likelihood. 888 labelList = copy.deepcopy(thisModel[0].textLabels)
890 sumLikelihoods = [
None] * (len(labelList))
891 counts = [0] * (len(labelList))
893 for i
in range(len(labels)):
894 idx = [j
for j, k
in enumerate(labelList)
if k == labels[i]][0]
896 if sumLikelihoods[idx]
is None:
897 sumLikelihoods[idx] = likelihoods[i][thisModel[0].SAMObject.Q]
899 sumLikelihoods[idx] += likelihoods[i][thisModel[0].SAMObject.Q]
901 m = max(sumLikelihoods)
902 maxIdx = [j
for j, k
in enumerate(sumLikelihoods)
if k == m][0]
904 return [labelList[maxIdx], m / counts[maxIdx]]
def wait_watching_stdout(ar, dt=1, truncate=1000)
Monitoring function that logs to stdout the logging output of multiple threads.
def segmentTesting(thisModel, Ysample, Lnum, verbose, label, serialMode=False, optimise=100, calibrate=False)
Method to test multiple samples at a time.
def calibrateSingleModelRecall(thisModel)
Perform calibration for single model implementations.
def testSegments(thisModel, Ysample, Lnum, verbose, label, serialMode=False)
Function to test segments and return a confusion matrix.
def combineClassifications(thisModel, labels, likelihoods)
Combine multiple classifications into a single classification.
def testSegment(thisModel, Ysample, verbose, visualiseInfo=None, optimise=100)
Utility function to test a sample.
def calculateVarianceThreshold(segIntersections, mk, muk, vk, vuk)
Method to decide on the approach to be used for setting variance thresholds and method of thresholdin...
def multipleRecall_noCalib(thisModel, testInstance, verbose, visualiseInfo=None, optimise=True)
Method that performs classification for uncalibrated multiple model implementations.
def singleRecall(thisModel, testInstance, verbose, visualiseInfo=None, optimise=100)
Method that performs classification for single model implementations.
def calibrateModelRecall(thisModel)
Logic to initialise calibration of model recall in order to recognize known from unknown instances...
def calibrateMultipleModelRecall(thisModel)
Perform calibration for multiple model implementations.
def formatDataFunc(Ydata)
Utility function to format data for testing.
def calculateData(textLabels, confMatrix, numItems=None)
Calculate the normalised confusion matrix.
def multipleRecall(thisModel, testInstance, verbose, visualiseInfo=None, optimise=100)
Method that performs classification for calibrated multiple model implementations.
def deep_getsizeof(o, ids)
Method to calculate the size of an object o in bytes.