1 #include "DictionaryLearning.h"
4 using namespace yarp::sig;
5 using namespace yarp::os;
6 using namespace yarp::math;
8 DictionaryLearning::DictionaryLearning(
string dictionariesFilePath,
string group,
string codingM,
int K)
10 this->dictionariesFilePath=dictionariesFilePath;
12 this->codingType=codingM;
19 dictionaryT=dictionary.transposed();
20 convert(dictionaryT,dict);
28 feat.resize(featuresize);
31 rankIndices.resize(Knn);
33 fprintf(stdout,
"featureSize %d, dictionarySize %d, usePCA %s, dimPCA %d, KNN %d, powerNormalization %s, alpha %f, Mapping %s \n",featuresize,dictionarySize,(usePCA)?
"true":
"false",dimPCA, Knn, (powerNormalization)?
"true":
"false",alpha,mappingType.c_str());
37 bool DictionaryLearning::read()
39 Property config; config.fromConfigFile(dictionariesFilePath.c_str());
40 Bottle& bgeneral=config.findGroup(
"GENERAL");
41 lambda=bgeneral.find(
"lambda").asFloat64();
42 usePCA=bgeneral.check(
"usePCA",Value(0)).asInt32();
43 dimPCA=bgeneral.check(
"dimPCA",Value(80)).asInt32();
45 powerNormalization=bgeneral.check(
"powerNormalization",Value(0)).asInt32();
46 alpha=bgeneral.check(
"alpha",Value(3)).asInt32();
50 mappingType=bgeneral.check(
"mappingType",Value(
"linear")).asString().c_str();
52 dictionarySize=bgeneral.find(
"dictionarySize").asInt32();
53 Bottle& dictionaryGroup=config.findGroup(group.c_str());
54 featuresize=dictionaryGroup.find(
"featureSize").asInt32();
56 dictionary.resize(featuresize,dictionarySize);
58 dictionary.resize(dimPCA,dictionarySize);
61 int fdim=usePCA? dimPCA:featuresize;
63 for (
int i=0; i<fdim; i++)
67 string num =
"line"+oss.str();
68 Bottle* line=dictionaryGroup.find(num.c_str()).asList();
70 for (
int j=0; j<dictionarySize; j++)
71 dictionary(i,j)=line->get(j).asFloat64();
75 Matrix trans=dictionary.transposed();
76 Matrix identity=eye(dictionarySize,dictionarySize);
77 Matrix diagonal=2*beta*identity;
78 A=trans*dictionary+diagonal;
84 Bottle& eigengroup=config.findGroup(
"EIGENVECTORS");
85 yarp::sig::Matrix eig(dimPCA,featuresize);
86 for (
int i=0; i<dimPCA; i++)
90 string num =
"line"+oss.str();
91 Bottle* line=eigengroup.find(num.c_str()).asList();
93 for (
int j=0; j<featuresize; j++)
94 eig(i,j)=line->get(j).asFloat64();
99 PCA.eigenvectors=eigens;
101 eigengroup=config.findGroup(
"EIGENVALUES");
102 yarp::sig::Matrix eigv(dimPCA,1);
103 for (
int i=0; i<dimPCA; i++)
107 string num =
"line"+oss.str();
108 Bottle* line=eigengroup.find(num.c_str()).asList();
110 eigv(i,0)=line->get(0).asFloat64();
114 convert(eigv,eigenvs);
115 PCA.eigenvalues=eigenvs;
117 eigengroup=config.findGroup(
"MEAN");
118 yarp::sig::Matrix mean(1,featuresize);
119 string num =
"line1";
120 Bottle* line=eigengroup.find(num.c_str()).asList();
122 for (
int j=0; j<featuresize; j++)
123 mean(0,j)=line->get(j).asFloat64();
133 void DictionaryLearning::isBestElement(
double score,
int index,
int numberOfBestPoints)
135 if (rankScores.size()==0)
137 rankScores.push_back(score);
138 rankIndices.push_back(index);
140 else if (rankScores.size()<numberOfBestPoints)
143 std::vector<int>::iterator itind=rankIndices.begin();
144 for (std::vector<double>::iterator itsc = rankScores.begin(); itsc!=rankScores.end(); itsc++)
148 rankScores.insert(itsc,score);
149 rankIndices.insert(itind,index);
157 rankScores.push_back(score);
158 rankIndices.push_back(index);
163 if (rankScores[rankScores.size()-1]>score)
165 else if (rankScores[0]<score)
167 std::vector<double>::iterator itsc=rankScores.begin();
168 std::vector<int>::iterator itind=rankIndices.begin();
169 rankScores.insert(itsc,score);
170 rankIndices.insert(itind,index);
171 rankScores.pop_back();
172 rankIndices.pop_back();
176 std::vector<int>::iterator itind=rankIndices.begin();
177 for (std::vector<double>::iterator itsc = rankScores.begin(); itsc!=rankScores.end(); itsc++)
181 rankScores.insert(itsc,score);
182 rankIndices.insert(itind,index);
183 rankScores.pop_back();
184 rankIndices.pop_back();
193 void DictionaryLearning::bestCodingEver_ANN(yarp::sig::Vector& feature, yarp::sig::Vector& descriptor)
197 cv::Mat mat(1,feature.size(),CV_64FC1,(
double*) feature.data());
198 mat=PCA.project(mat);
199 feature.resize(dimPCA,0.0);
200 for (
int i=0; i<feature.size(); i++)
201 feature[i]=mat.at<
float>(0,i);
204 descriptor.resize(dictionarySize,0.0);
205 for (
int i=0; i<feature.size(); i++)
210 kdtree.findNearest(feat,Knn,maxComparisons,rankIndices);
212 for (
int i=0; i<Knn; i++)
214 int id=rankIndices[i];
216 yarp::sig::Vector v=dictionaryT.getRow(
id);
218 if(mappingType==
"linear")
219 val=yarp::math::dot(v,feature);
221 if(mappingType==
"additive_chisquare")
223 val=additive_chisquare(v, feature);
226 if(mappingType==
"gauss_chisquare")
228 val=gauss_chisquare(v, feature);
238 void DictionaryLearning::bestCodingEver(
const yarp::sig::Vector& feature, yarp::sig::Vector& descriptor)
242 Vector code=dictionaryT*feature;
243 descriptor.resize(code.size(),0.0);
245 for (
int i=0; i<code.size(); i++)
247 isBestElement(code[i], i, Knn);
250 for (
int i=0; i<Knn; i++)
252 int id=rankIndices[i];
253 descriptor[id]=rankScores[i];
262 double DictionaryLearning::customNorm(
const yarp::sig::Vector &A,
const yarp::sig::Vector &B)
266 for (
int i=0; i<128; i++)
278 void DictionaryLearning::bow(yarp::sig::Vector & feature, yarp::sig::Vector & code)
283 cv::Mat mat(1,feature.size(),CV_64FC1,(
double*) feature.data());
284 mat=PCA.project(mat);
285 feature.resize(dimPCA,0.0);
286 for (
int i=0; i<feature.size(); i++)
287 feature[i]=mat.at<
float>(0,i);
291 fprintf(stdout,
"Starting bow coding \n");
292 double time=Time::now();
293 code.resize(dictionarySize);
301 for(
int atom_idx=0; atom_idx<dictionarySize; atom_idx++)
303 yarp::sig::Vector v=dictionary.getCol(atom_idx);
305 for (
int i=0; i<featuresize; i++)
307 float d=feature[i]-v[i];
323 time=Time::now()-time;
324 fprintf(stdout,
"%f \n",time);
332 void DictionaryLearning::avgPooling(std::vector<yarp::sig::Vector> & features, yarp::sig::Vector & code, vector<SiftGPU::SiftKeypoint> & keypoints,
int pLevels,
int imgW,
int imgH)
335 double time=Time::now();
336 vector<Vector> allCodes;
337 allCodes.resize(features.size());
339 for (
int i=0; i<features.size(); i++)
341 Vector ¤t=features[i];
347 computeSCCode(current,currentCode);
348 if(codingType==
"BCE")
349 bestCodingEver_ANN(current,currentCode);
350 if(codingType==
"BOW")
351 bow(current,currentCode);
352 allCodes[i]=currentCode;
357 int sizeF=allCodes[0].size();
358 int sizeS=allCodes.size();
361 vector<int> pBins(pLevels);
362 vector<int> pyramid(pLevels);
368 for (
int i=1; i<pLevels; i++)
371 pBins[i]=(i*2)*(i*2);
377 code.resize(sizeF*tBins);
380 for (
int iter=0; iter<pLevels; iter++)
383 int nBins=pyramid[iter];
384 int wUnit= imgW/pyramid[iter];
385 int hUnit= imgH/pyramid[iter];
389 for (
int iterBinW=0; iterBinW<nBins; iterBinW++)
391 for (
int iterBinH=0; iterBinH<nBins; iterBinH++)
393 int currBinW=wUnit*(iterBinW+1);
394 int currBinH=hUnit*(iterBinH+1);
395 int prevBinW=wUnit*iterBinW;
396 int prevBinH=hUnit*iterBinH;
398 double normalizer=0.0;
400 for (
int i=0; i<sizeF; i++)
404 for (
int k=0; k<sizeS; k++)
407 if(keypoints[k].x< prevBinW || keypoints[k].x>= currBinW || keypoints[k].y< prevBinH || keypoints[k].y>= currBinH)
417 value=value+allCodes[k][i];
422 code[binId]=value/total;
423 normalizer=normalizer+((value/total)*(value/total));
429 if(powerNormalization)
432 normalizer=sqrt(normalizer);
434 for (
int i=start; i<binId; i++)
436 code[i]=code[i]/normalizer;
437 int sign = code[i]>0? 1:-1;
438 code[i]=sign*pow(abs(code[i]),alpha);
448 if(!powerNormalization)
450 double norm= yarp::math::norm(code);
454 time=Time::now()-time;
459 void DictionaryLearning::maxPooling(std::vector<yarp::sig::Vector> & features, yarp::sig::Vector & code, vector<SiftGPU::SiftKeypoint> & keypoints,
int pLevels,
int imgW,
int imgH)
462 double time=Time::now();
463 vector<Vector> allCodes;
464 allCodes.resize(features.size());
466 for (
int i=0; i<features.size(); i++)
468 Vector ¤t=features[i];
474 computeSCCode(current,currentCode);
475 if(codingType==
"BCE")
476 bestCodingEver_ANN(current,currentCode);
477 if(codingType==
"BOW")
478 bow(current,currentCode);
479 allCodes[i]=currentCode;
484 int sizeF=allCodes[0].size();
485 int sizeS=allCodes.size();
488 vector<int> pBins(pLevels);
489 vector<int> pyramid(pLevels);
495 for (
int i=1; i<pLevels; i++)
498 pBins[i]=(i*2)*(i*2);
504 code.resize(sizeF*tBins);
507 for (
int iter=0; iter<pLevels; iter++)
510 int nBins=pyramid[iter];
511 int wUnit= imgW/pyramid[iter];
512 int hUnit= imgH/pyramid[iter];
516 for (
int iterBinW=0; iterBinW<nBins; iterBinW++)
518 for (
int iterBinH=0; iterBinH<nBins; iterBinH++)
520 int currBinW=wUnit*(iterBinW+1);
521 int currBinH=hUnit*(iterBinH+1);
522 int prevBinW=wUnit*iterBinW;
523 int prevBinH=hUnit*iterBinH;
525 double normalizer=0.0;
527 for (
int i=0; i<sizeF; i++)
529 double maxVal=-1000.0;
530 for (
int k=0; k<sizeS; k++)
533 if(keypoints[k].x< prevBinW || keypoints[k].x>= currBinW || keypoints[k].y< prevBinH || keypoints[k].y>= currBinH)
543 double val=allCodes[k][i];
548 normalizer=normalizer+maxVal*maxVal;
553 if(powerNormalization)
556 normalizer=sqrt(normalizer);
558 for (
int i=start; i<binId; i++)
560 code[i]=code[i]/normalizer;
561 int sign = code[i]>0? 1:-1;
562 code[i]=sign*pow(abs(code[i]),alpha);
572 if(!powerNormalization)
574 double norm= yarp::math::norm(code);
578 time=Time::now()-time;
583 void DictionaryLearning::computeSCCode(yarp::sig::Vector& feature, yarp::sig::Vector& descriptor)
587 cv::Mat mat(1,feature.size(),CV_64FC1,(
double*) feature.data());
588 mat=PCA.project(mat);
589 feature.resize(dimPCA,0.0);
590 for (
int i=0; i<feature.size(); i++)
591 feature[i]=mat.at<
float>(0,i);
595 Vector b=(-1)*feature*dictionary;
598 descriptor.resize(dictionarySize,0.0);
599 double maxValue;
int index=-1;
600 max(grad,maxValue,index);
604 if (grad[index]>lambda+eps)
605 descriptor[index]=(lambda-grad[index])/A(index,index);
606 else if (grad[index]<-lambda-eps)
607 descriptor[index]=(-lambda-grad[index])/A(index,index);
611 for (
int j=0; j<descriptor.size(); j++)
625 for (
int i=0; i<descriptor.size(); i++)
627 if (descriptor[i]!=0.0)
631 xa.push_back(descriptor[i]);
635 signes.push_back(-1);
641 Vector vect=(-lambda*signes)-ba;
642 Vector xnew=luinv(Aa)*vect;
649 for (
int i=0; i<xnew.size(); i++)
653 vectidx.push_back(vect[i]);
654 bidx.push_back(ba[i]);
655 xnewidx.push_back(xnew[i]);
660 double onew=dot((vectidx/2+bidx),xnewidx)+lambda*sum;
663 bool changeSign=
false;
664 for (
int i=0; i<xa.size(); i++)
666 if (xa[i]*xnew[i]<=0)
675 for (
int i=0; i<a.size(); i++)
676 descriptor[a[i]]=xnew[i];
686 for (
int i=0; i<s.size(); i++)
688 Vector x_s=xa-d/t[s[i]];
695 for (
int j=0; j<x_s.size(); j++)
701 x_sidx.push_back(x_s[j]);
702 baidx.push_back(ba[j]);
706 subMatrix(Aa,idx,Atmp2);
708 Vector otmp=Atmp2*(x_sidx/2)+baidx;
709 double o_s=dot(otmp,x_sidx)+lambda*sum;
718 for (
int i=0; i<a.size(); i++)
719 descriptor[a[i]]=xmin[i];
730 for (
int i=0; i<descriptor.size(); i++)
732 if (descriptor[i]==0.0)
733 tmp.push_back(grad[i]);
737 max(tmp,maxValue,index);
739 if (maxValue<=lambda+eps)
744 void DictionaryLearning::subMatrix(
const Matrix& A,
const Vector& indexes, Matrix& Atmp)
746 int indSize=indexes.size();
747 Atmp.resize(indSize,indSize);
750 for (
int i=0; i<indSize; i++)
752 for (
int j=0; j<indSize; j++)
754 Atmp(m,n)=A(indexes[i],indexes[j]);
762 void DictionaryLearning::max(
const Vector& x,
double& maxValue,
int& index)
765 for (
int i=0; i<x.size(); i++)
767 if(abs((
double)x[i])>maxValue)
774 vector<vector<double> > DictionaryLearning::readFeatures(std::string filePath)
776 vector<vector<double> > featuresMat;
780 infile.open (filePath.c_str());
781 while(!infile.eof() && infile.is_open())
784 getline(infile,line);
786 char * val= strtok((
char*) line.c_str(),
" ");
791 double value=atof(val);
793 val=strtok(NULL,
" ");
796 featuresMat.push_back(f);
803 void DictionaryLearning::learnDictionary(
string featureFile,
int dictionarySize,
bool usePCA,
int dim)
808 vector<vector<double> > Features=readFeatures(featureFile);
809 if(Features.size()==0)
812 this->featuresize=Features[0].size();
814 cv::Mat samples(Features.size(), featuresize,CV_32F);
817 for (
int i=0; i<Features.size(); i++)
819 for (
int j=0; j<Features[i].size(); j++)
821 samples.at<
float>(i,j)=(
float)Features[i][j];
827 fprintf(stdout,
"Using PCA with dim: %d \n", dimPCA);
828 PCA(samples,cv::Mat(), CV_PCA_DATA_AS_ROW,dimPCA);
829 samples=PCA.project(samples);
832 int feaSize=usePCA? dimPCA:featuresize;
834 cv::Mat centroids(dictionarySize,feaSize,CV_32F);
835 fprintf(stdout,
"Learning Dictionary with %d atoms. \n", dictionarySize);
838 cv::kmeans(samples,dictionarySize,labels, cv::TermCriteria(),3,cv::KMEANS_PP_CENTERS , centroids);
840 dictionary.resize(feaSize,dictionarySize);
844 for(
int j=0; j<dictionary.cols(); j++)
846 for (
int i=0; i<dictionary.rows(); i++)
848 dictionary(i,j)=centroids.at<
float>(j,i);
851 Vector col =dictionary.getCol(j);
852 double normVal=norm(col);
857 for (
int i=0; i<dictionary.rows(); i++)
859 dictionary(i,j)=dictionary(i,j)/normVal;
861 Vector col2 =dictionary.getCol(j);
865 dictionaryT=dictionary.transposed();
867 Matrix trans=dictionary.transposed();
868 Matrix identity=eye(dictionarySize,dictionarySize);
869 Matrix diagonal=2*beta*identity;
870 A=trans*dictionary+diagonal;
873 convert(dictionaryT,dict);
876 this->dictionarySize=dictionarySize;
884 feat.resize(featuresize);
888 bool DictionaryLearning::saveDictionary(
string filePath)
891 dictfile.open(filePath.c_str(),ios::trunc);
893 dictfile <<
"[GENERAL] \n";
894 dictfile <<
"lambda " << lambda <<
"\n";
895 int use= usePCA? 1:0;
896 dictfile <<
"usePCA " << use <<
"\n";
897 dictfile <<
"dimPCA " << dimPCA <<
"\n";
899 dictfile <<
"dictionarySize " << dictionaryT.rows() <<
"\n";
901 dictfile <<
"[DICTIONARY] \n";
902 dictfile <<
"featureSize " << featuresize <<
"\n";
905 for (
int i=0; i<dictionary.rows(); i++)
907 dictfile <<
"line" << i+1 <<
" ( ";
908 for (
int j=0; j<dictionary.cols(); j++)
910 dictfile << dictionary(i,j) <<
" ";
918 dictfile <<
"[EIGENVECTORS] \n";
919 for (
int i=0; i<PCA.eigenvectors.rows; i++)
921 dictfile <<
"line" << i+1 <<
" ( ";
922 for (
int j=0; j<PCA.eigenvectors.cols; j++)
924 dictfile << PCA.eigenvectors.at<
float>(i,j) <<
" ";
930 dictfile <<
"[EIGENVALUES] \n";
931 for (
int i=0; i<PCA.eigenvalues.rows; i++)
933 dictfile <<
"line" << i+1 <<
" ( ";
934 for (
int j=0; j<PCA.eigenvalues.cols; j++)
936 dictfile << PCA.eigenvalues.at<
float>(i,j) <<
" ";
942 dictfile <<
"[MEAN] \n";
943 for (
int i=0; i<PCA.mean.rows; i++)
945 dictfile <<
"line" << i+1 <<
" ( ";
946 for (
int j=0; j<PCA.mean.cols; j++)
948 dictfile << PCA.mean.at<
float>(i,j) <<
" ";
961 void DictionaryLearning::printMatrixYarp(
const yarp::sig::Matrix &A)
964 for (
int i=0; i<A.rows(); i++)
965 for (
int j=0; j<A.cols(); j++)
972 DictionaryLearning::~DictionaryLearning()
977 void DictionaryLearning::convert(yarp::sig::Matrix& matrix, cv::Mat& mat) {
978 mat=cv::Mat(matrix.rows(),matrix.cols(),CV_32FC1);
979 for(
int i=0; i<matrix.rows(); i++)
980 for(
int j=0; j<matrix.cols(); j++)
981 mat.at<
float>(i,j)=matrix(i,j);
984 void DictionaryLearning::convert(cv::Mat& mat, yarp::sig::Matrix& matrix) {
985 matrix.resize(mat.rows,mat.cols);
986 for(
int i=0; i<mat.rows; i++)
987 for(
int j=0; j<mat.cols; j++)
988 matrix(i,j)=mat.at<
double>(i,j);
991 double DictionaryLearning::additive_chisquare(
const yarp::sig::Vector &x,
const yarp::sig::Vector &y){
993 for (
int i=0; i<x.size(); i++)
995 double res=x[i]*y[i];
996 double sumAbs=(abs(x[i])+abs(y[i]));
1006 double DictionaryLearning::gauss_chisquare(
const yarp::sig::Vector &x,
const yarp::sig::Vector &y){
1008 for (
int i=0; i<x.size(); i++)
1010 double res=x[i]-y[i];
1011 int sign=(x[i]*y[i])>0? 1:-1;
1013 double sumAbs=(abs(x[i])+abs(y[i]));
1017 val=val+(sign*exp(-(res*res)));