22 import matplotlib.cm
as cm
25 import cPickle
as pickle
26 from scipy.spatial
import distance
39 SAM based on Latent Feature Models. 43 Initalise the Latent Feature Models. 56 def store(self, observed, inputs=None, Q=None, kernel=None, num_inducing=None, init_X='PCA'):
60 Read in the args observed and inputs and configure the LFM model for training or recollection. 63 observed: An `(N x D)` matrix, where `N` is the number of points and `D` the number of features needed to describe each point. 64 inputs: A `(N x Q)` matrix, where `Q` is the number of features per input. Leave `"None"` for unsupervised learning. 65 Q: Leave `None` for supervised learning (`Q` will then be the dimensionality of inputs). Otherwise, specify with an integer `Q` the dimensionality (number of features) for the compressed space that acts as "latent" inputs. 66 kernel: For the GP. Can be left as `"None"` for the default kernel. 67 num_inducing: Integer of how many inducing points to use. Inducing points are a fixed number of variables through which all memory is filtered, to achieve full compression. E.g. it can correspond to the number of neurons. This is not absolutely fixed, but it also doesn't grow necessarily proportionally to the data, since synapses can make more complicated combinations of the existing neurons. The GP is here playing the role of "synapses", by learning non-linear and rich combinations of the inducing points. 68 init_X: Initialisation method for model output. String either `PCA` or `PPCA`. Default= `PCA`. Initialisation uses `PPCA` when `PCA` 73 assert(isinstance(observed, dict))
78 self.
N = observed[observed.keys()[0]].shape[0]
80 if num_inducing
is None:
86 assert(self.
type == []
or self.
type ==
'bgplvm')
89 assert(self.
type == []
or self.
type ==
'mrd')
92 assert(self.
type == []
or self.
type ==
'gp')
94 self.
Q = inputs.shape[1]
99 kernel = GPy.kern.RBF(self.
Q, ARD=
True) + GPy.kern.Bias(self.
Q) + GPy.kern.White(self.
Q)
101 if self.
type ==
'bgplvm':
106 self.
model = GPy.models.BayesianGPLVM(Ytmp, self.
Q, kernel=kernel, num_inducing=self.
num_inducing)
109 print "Initialisation with PCA failed. Initialising with PPCA..." 110 elif init_X ==
'PPCA' or pcaFailed:
111 print "Initialising with PPCA..." 112 Xr = GPy.util.linalg.ppca(Ytmp, self.
Q, 2000)[0]
115 self.
model = GPy.models.BayesianGPLVM(Ytmp, self.
Q, kernel=kernel, num_inducing=self.
num_inducing, X=Xr)
116 self.
model[
'.*noise'] = Ytmp.var() / 100.
117 elif self.
type ==
'mrd':
130 kernel=kernel, initx=
"PCA_single", initz=
'permute')
133 print "Initialisation with PCA failed. Initialising with PPCA..." 134 elif init_X ==
'PPCA' or pcaFailed:
135 print "Initialising with PPCA..." 136 from GPy.util.initialization
import initialize_latent
137 Xr = np.zeros((self.
Ylist[0].shape[0], self.
Q))
138 for qs, Y
in zip(np.array_split(np.arange(self.
Q), len(self.
Ylist)), self.
Ylist):
140 x, frcs = initialize_latent(
'PCA', len(qs), Y)
142 x = GPy.util.linalg.ppca(Y, len(qs), 2000)[0]
146 self.
model = GPy.models.MRD(self.
Ylist, input_dim=self.
Q, num_inducing=self.
num_inducing, kernel=kernel, initx=
"PCA_single", initz=
'permute', X=Xr)
147 self.
model[
'.*noise'] = [yy.var() / 100.
for yy
in self.
model.Ylist]
148 elif self.
type ==
'gp':
151 self.
model.data_labels =
None 152 self.
model.textLabelPts = dict()
159 """Add labels to observations. 162 If observables are associated with labels, they can be added here. Labels has to be a matrix of size N x K, where K is the total number of different labels. If e.g. the i-th row of L is [1 0 0] (or [1 -1 -1]) then this means that there are K=3 different classes and the i-th row of the observables belongs to the first class. 165 labels: list of strings containing the labels for the observations 170 if len(labels.shape) == 1
or labels.shape[1] == 1:
171 self.
model.data_labels = labels
173 print "Warning: labels assumed to be in 1-of-K encoding!" 174 self.
model.data_labels = np.argmax(labels, 1)[:,
None]
176 def learn(self, optimizer='bfgs', max_iters=1000, init_iters=300, verbose=True):
178 Learn the model (analogous to "forming synapses" after perceiving data). 181 optimizer: String with the requested optimiser taken from a the list of available scipy optimisers. 182 max_iters: Integer with the maximum number of training iterations for the second phase of training the model. 183 init_iters: Integer with the maximum number of training iterations for the first phase of training. 184 verbose: Boolean to turn logging to stdout on or off. 189 if self.
type ==
'bgplvm' or self.
type ==
'mrd':
190 self.
model[
'.*noise'].fix()
191 self.
model.optimize(optimizer, messages=verbose, max_iters=init_iters)
192 self.
model[
'.*noise'].unfix()
193 self.
model[
'.*noise'].constrain_positive()
195 self.
model.optimize(optimizer, messages=verbose, max_iters=max_iters)
198 for j
in list(np.unique(self.
model.data_labels)):
199 self.
model.textLabelPts[j] = [l
for l, k
in enumerate(self.
model.data_labels)
if k[0] == j]
201 def check_snr(self, warningEnable=True, messages=True):
202 """Checks the signal to noise ratio(SNR) of the trained model. 205 Provides an indicator of successful learning by looking at the variance distribution of the model. 208 warningEnable: Boolean to switch warnings on or off in the case of a low SNR. 209 messages: Boolean to turn output to stdout on or off. 213 if self.
type ==
'bgplvm':
214 snr = self.
model.Y.var()/self.
model.Gaussian_noise.variance.values[0]
217 print(
'# SNR: ' + str(snr))
218 if warningEnable
and snr < 8:
219 print(
' WARNING! SNR is small!')
220 elif self.
type ==
'mrd':
222 for i
in range(len(self.
model.bgplvms)):
223 snr.append(self.
model.bgplvms[i].Y.var()/self.
model.bgplvms[i].Gaussian_noise.variance.values[0])
225 print(
'# SNR view ' + str(i) +
': ' + str(snr[-1]))
226 if warningEnable
and snr[-1] < 8:
227 print(
' WARNING! SNR for view ' + str(i) +
' is small!!')
232 def visualise(self, which_indices=None, plot_scales=True):
234 Show the internal representation of the memory. 237 Creates a 2D plot showing the mean and variance distribution of the model. 240 which_indices: Tuple of two integers that specify which indices of the `Q` indices that make up the model are to be plotted. 241 plot_scales: Boolean to switch scale labelling on or off in the plots. 251 if self.
type ==
'bgplvm' or self.
type ==
'mrd':
252 if self.
model.data_labels
is not None:
253 ret = self.
model.plot_latent(labels=self.
model.data_labels, which_indices=which_indices)
255 ret = self.
model.plot_latent(which_indices=which_indices)
256 elif self.
type ==
'gp':
257 ret = self.
model.plot()
258 if self.
type ==
'mrd' and plot_scales:
259 ret2 = self.
model.plot_scales()
267 def visualise_interactive(self, dimensions=(20, 28), transpose=
True, order=
'F', invert=
False, scale=
False,
268 colorgray=
True, view=0, which_indices=(0, 1)):
269 """Interactive plot of the model. 272 Show the internal representation of the memory and allow the user to interact with it to map samples/points from the compressed space to the original output space. 275 dimensions: Tuple of integers describing the dimensions that the image needs to be transposed to for display. 276 transpose: Boolean whether to transpose the image before display. 277 order: Boolean whether array is in Fortan ordering ('F') or Python ordering ('C'). 278 invert: Boolean whether to invert the pixels or not. 279 scale: Boolean whether to scale the image or not. 280 colorgray: Boolean whether to plot in grayscale or not. 281 view: Integer in the case of MRD models which describes the view to be plotted. 282 which_indices: Tuple of two integers that specify which indices of the `Q` indices that make up the model are to be plotted. 288 if self.
type ==
'bgplvm':
289 ax = self.
model.plot_latent(which_indices)
290 y = self.
model.Y[0, :]
293 data_show = GPy.plotting.matplot_dep.visualize.image_show(y[
None, :], dimensions=dimensions, transpose=transpose, order=order, invert=invert, scale=scale, cmap = cm.Greys_r)
295 data_show = GPy.plotting.matplot_dep.visualize.image_show(y[
None, :], dimensions=dimensions, transpose=transpose, order=order, invert=invert, scale=scale)
296 lvm = GPy.plotting.matplot_dep.visualize.lvm(self.
model.X.mean[0, :].copy(), self.
model, data_show, ax)
297 raw_input(
'Press enter to finish')
298 elif self.
type ==
'mrd':
302 ax = self.
model.bgplvms[view].plot_latent(which_indices)
303 y = self.
model.bgplvms[view].Y[0, :]
306 data_show = GPy.plotting.matplot_dep.visualize.image_show(y[
None, :], dimensions=dimensions, transpose=transpose, order=order, invert=invert, scale=scale, cmap = cm.Greys_r)
308 data_show = GPy.plotting.matplot_dep.visualize.image_show(y[
None, :], dimensions=dimensions, transpose=transpose, order=order, invert=invert, scale=scale)
309 lvm = GPy.plotting.matplot_dep.visualize.lvm(self.
model.bgplvms[view].X.mean[0, :].copy(), self.
model.bgplvms[view], data_show, ax)
310 raw_input(
'Press enter to finish')
312 def recall(self, locations):
314 Recall stored events. 317 This is closely related to performing pattern pattern_completion but given "training" data. 320 locations: Integer which is the index of the stored event. 323 A `(Dx1)` numpy array containing the data of a training point as reconstructed by the model. 326 locations = range(self.
N)
327 if self.
type ==
'bgplvm' or self.
type ==
'gp':
328 return self.
model.Y[locations, :].values
329 elif self.
type ==
'mrd':
330 return self.
model.bgplvms[0].Y[locations, :].values
332 def pattern_completion(self, test_data, view=0, verbose=False, visualiseInfo=None, optimise=100):
333 """Recall novel events 336 In the case of supervised learning, pattern completion means that we give new inputs and infer their corresponding outputs. In the case of unsupervised learning, pattern completion means that we give new outputs and we infer their corresponding "latent" inputs, ie the internal compressed representation of the new outputs in terms of the already formed "synapses". 339 test_data : A `(Dx1)` numpy array containing the feature vector for which you would like to obtain the closest neighbour. 340 view : Integer which is the index for the view of the MRD model that will be used for pattern completion. 341 verbose : Boolean switching logging to stdout on and off. 342 visualiseInfo: Plot object returned by visualiseInfo. If present, plot the location of the pattern completed point. If none, no plotting. 343 optimise : Integer number of optimisation iterations when performing pattern completion. 346 A `(Qx1)` numpy array with the predicted mean, a `(Qx1)` numpy array with the predicted variance, a plot object with plotted point and an inference object returned by optimiser. 348 if self.
type ==
'bgplvm':
352 tmp = self.
model.infer_newX(test_data, optimize=
False)[1]
354 tmp.optimize(max_iters=optimise, messages=verbose)
355 pred_mean = tmp.X.mean
356 pred_variance = tmp.X.variance
357 elif self.
type ==
'mrd':
358 tmp = self.
model.bgplvms[view].infer_newX(test_data, optimize=
False)[1]
360 tmp.optimize(max_iters=optimise, messages=verbose)
361 pred_mean = tmp.X.mean
362 pred_variance = tmp.X.variance
363 elif self.
type ==
'gp':
365 pred_mean, pred_variance = self.
model.predict(test_data)
367 if (self.
type ==
'mrd' or self.
type ==
'bgplvm')
and visualiseInfo
is not None:
368 ax = visualiseInfo[
'ax']
370 pp = ax.plot(pred_mean[:, inds0], pred_mean[:, inds1],
'om', markersize=11, mew=11)
375 return pred_mean, pred_variance, pp, tmp
378 """Pattern completion wrapper. 381 1) First, we do normal pattern completion, where given an output y, out map to the memory space to get a test memory x*. 382 2) Now the test memory x* is compared with stored memories. This allows us to infer the label of x*. If the labels are given in another modality (by default in the last one), then we return the label from that modality (careful, The encoding might be 1-of-K, e.g. -1 1 -1 -> 2 and also noise might exist). Instead, if the labels are not given in another modality (completely unsupervised learning), then we just return the index to the most similar training memory. 385 y : A `(Dx1)` numpy array containing the feature vector for which you would like to obtain the closest neighbour. 386 target_modality : Integer which is the index for the view of the MRD model that will be used for pattern completion. 389 Inference object returned by optimiser containing multi-dimensional mean and variance of nearest neighbour. 396 dists = np.zeros((self.
model.X.shape[0], 1))
398 for j
in range(dists.shape[0]):
399 dists[j, :] = distance.euclidean(self.
model.X.mean[j, :], mm[0].values)
400 nn, min_value = min(enumerate(dists), key=operator.itemgetter(1))
401 if self.
type ==
'mrd':
402 ret = self.
model.bgplvms[target_modality].Y[nn, :]
403 elif self.
type ==
'bgplvm':
408 """Generating novel outputs. 411 The opposite of pattern completion. Instead of finding a memory from an output, here we find an output from a (possibly fantasy) memory. Here, fantasy memory is a memory not existing in the training set, found by interpolating or sampling in the memory space. 414 X: A `(Qx1)` numpy array with the location of the `Q` dimensional model that is to be generated. 415 view: Integer which is the index for the view of the MRD model that will be used to generate the fantasy_memory. 417 A `(Qx1)` numpy array with the predicted mean and a `(Qx1)` numpy array with the predicted variance. 419 if self.
type ==
'mrd':
420 pred_mean, pred_variance = self.
model.bgplvms[view].predict(X)
421 elif self.
type ==
'bgplvm':
422 pred_mean, pred_variance = self.
model.predict(X)
423 elif self.
type ==
'gp':
424 pred_mean, pred_variance = self.
model.predict(X)
425 return pred_mean, pred_variance
427 def familiarity(self, Ytest, ytrmean=None, ytrstd=None, optimise=100):
428 """Familiarity testing. 431 This function tests the familiarity/similarity of an input with the inputs used to train the model. 434 Ytest : A `(Dx1)` numpy array whose familiarity/similarity is tested with trained outputs of the model. 435 ytrmean : A `(Dx1)` numpy array with the mean of the training inputs. 436 ytrstd : A `(Dx1)` numpy array with the variance of the training inputs. 437 optimise : Integer number of optimisation iterations when performing pattern completion. 440 A float with a measure of familiarity for Ytest with the current model. 442 assert(self.
type ==
'bgplvm')
445 if ytrmean
is not None:
456 ll += s.inference(self.
model.kern, qX[i, :][
None, :], self.
model.Z, self.
model.likelihood,
457 Ytest[i, :][
None, :], self.
model.posterior)[0]
462 """ Return number of latent dimensions. 465 Convenience function to return the number of latent dimensions. 471 Integer with the number of latent dimensions of the model. 473 if self.
type ==
'bgplvm':
474 numLatentDimensions = self.
model.X.mean
475 elif self.
type ==
'mrd':
476 numLatentDimensions = self.
model.bgplvms[0].X.mean
478 print(
'No latent space for this type of model.')
479 numLatentDimensions =
None 480 return numLatentDimensions
484 def save_model(mm, fileName='m_serialized.txt'):
485 """ Save serialised model. 488 mm : Model object to save. 489 fileName : String with the filename of saved model. 494 output = open(fileName,
'wb')
496 pickle.dump(mm, output)
501 """ Load serialised model. 504 fileName : String with the filename of model to load. 508 mm = pickle.load(open(fileName,
'r')) 513 """Save a pruned model 516 Save a trained model after pruning things that are not needed to be stored. Economy set to `True` will trigger a storing which creates much smaller files. See the load_pruned_model discussion on what this means in terms of restrictions. 519 mm : Model object to save. 520 fileName : String with the filename of saved model. 521 economy : Boolean to enable or disable economy saving. 522 extraDict : Dictionary with parameters that are requested to be saved which are not in the default saved parameters but are required when loading the model for interaction. 528 SAMObjPruned[
'type'] = mm.type
530 SAMObjPruned[
'textLabelPts'] = mm.model.textLabelPts
534 SAMObjPruned[
'__num_views'] =
None 535 SAMObjPruned[
'Q'] = mm.Q
536 SAMObjPruned[
'N'] = mm.N
537 SAMObjPruned[
'num_inducing'] = mm.num_inducing
538 SAMObjPruned[
'namesList'] = mm.namesList
539 SAMObjPruned[
'kernelString'] = mm.kernelString
540 SAMObjPruned.update(extraDict)
551 folderPath = os.path.join(
'/', *fileName.split(
'/')[:-1])
552 fileName = fileName.split(
'/')[-1]
555 SAMObjPruned[
'modelPath'] = fileName +
'_model.h5' 557 if os.path.isfile(os.path.join(folderPath, SAMObjPruned[
'modelPath'])):
558 os.remove(os.path.join(folderPath, SAMObjPruned[
'modelPath']))
560 mm.model.save(os.path.join(folderPath, SAMObjPruned[
'modelPath']))
562 SAMObjPruned[
'modelPath'] = fileName +
'_model.pickle' 563 mm.model.pickle(os.path.join(folderPath, SAMObjPruned[
'modelPath']))
565 output = open(os.path.join(folderPath, fileName) +
'.pickle',
'wb')
566 pickle.dump(SAMObjPruned, output)
571 """Load a pruned model 574 Load a trained model. If economy is set to `True`, then a not-None initial model m is needed. This model needs to be created exactly as the one that was saved (so, it is demo specific!) and in this case calling the present function will set its parameters (meaning that you still need to create a model but don't need to optimize it). 577 fileName : String with the filename of the model to load. 578 economy : Boolean to indicate whether an economy object is being loaded or not. 579 m : Model object into which the data to be loaded is to be stored in. If left at `None` model will be loaded into a default model initialisation. 584 folderPath = os.path.join(
'/', *fileName.split(
'/')[:-1])
585 SAMObjPruned = pickle.load(open(fileName +
'.pickle',
'rb'))
590 f = tables.open_file(os.path.join(fileName+
'_model.h5'),
'r') 591 m.param_array[:] = f.root.param_array[:] 593 m._trigger_params_changed() 596 with open(SAMObjPruned[
'modelPath'],
'rb')
as f:
597 print "Loading file: " + str(f)
598 SAMObject.model = pickle.load(f)
604 SAMObject.type = SAMObjPruned[
'type']
607 SAMObject.model.textLabelPts = SAMObjPruned[
'textLabelPts']
608 SAMObject.__num_views = SAMObjPruned[
'__num_views']
609 SAMObject.Q = SAMObjPruned[
'Q']
610 SAMObject.N = SAMObjPruned[
'N']
611 SAMObject.num_inducing = SAMObjPruned[
'num_inducing']
612 SAMObject.namesList = SAMObjPruned[
'namesList']
618 """ Determine the most descriptive output dimensions. 621 Helper function to determine which dimensions should be plotted based on the relevance weights. 624 model: Model object to be assessed. 627 Integer indicating the most descriptive dimension and an integer indicating the second most descriptive dimension. 629 if model.input_dim == 1:
632 if model.input_dim == 2:
633 input_1, input_2 = 0, 1
636 input_1, input_2 = np.argsort(model.input_sensitivity())[::-1][:2]
638 raise ValueError(
"cannot automatically determine which dimensions to plot, please pass 'which_indices'")
640 return input_1, input_2
def check_snr(self, warningEnable=True, messages=True)
Checks the signal to noise ratio(SNR) of the trained model.
def add_labels(self, labels)
Add labels to observations.
Inference the marginal likelihood through {p(y,y*)}{p(y)}.
def most_significant_input_dimensions(model)
Determine the most descriptive output dimensions.
def save_pruned_model(mm, fileName='m_pruned', economy=False, extraDict=dict())
Save a pruned model.
def pattern_completion(self, test_data, view=0, verbose=False, visualiseInfo=None, optimise=100)
Recall novel events.
def __get_latent__(self)
Return number of latent dimensions.
def recall(self, locations)
Recall stored events.
def load_model(fileName='m_serialized.txt')
Load serialised model.
def load_pruned_model(fileName='m_pruned', economy=False, m=None)
Load a pruned model.
def visualise(self, which_indices=None, plot_scales=True)
Show the internal representation of the memory.
def visualise_interactive(self, dimensions=(20, 28), transpose=True, order='F', invert=False, scale=False, colorgray=True, view=0, which_indices=(0, 1))
Interactive plot of the model.
def save_model(mm, fileName='m_serialized.txt')
Save serialised model.
def pattern_completion_inference(self, y, target_modality=-1)
Pattern completion wrapper.
def fantasy_memory(self, X, view=0)
Generating novel outputs.
def __init__(self)
Initalise the Latent Feature Models.
def familiarity(self, Ytest, ytrmean=None, ytrstd=None, optimise=100)
Familiarity testing.
SAM based on Latent Feature Models.
def store(self, observed, inputs=None, Q=None, kernel=None, num_inducing=None, init_X='PCA')
Store events.
def learn(self, optimizer='bfgs', max_iters=1000, init_iters=300, verbose=True)
Learn the model (analogous to "forming synapses" after perceiving data).