speech
All Data Structures Functions Modules Pages
SpeechRecognizerModule.cpp
1 /*
2  * Copyright (C) 2011 EFAA Consortium, European Commission FP7 Project IST-270490
3  * Authors: Stephane Lallee
4  * email: stephane.lallee@gmail.com
5  * website: http://efaa.upf.edu/
6  * Permission is granted to copy, distribute, and/or modify this program
7  * under the terms of the GNU General Public License, version 2 or any
8  * later version published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
13  * Public License for more details
14  */
15 
16 #include "SpeechRecognizerModule.h"
17 
19 //Helpers for dealing with the weird strings of windows...
20 std::wstring s2ws(const std::string& s)
21 {
22  int len;
23  int slength = (int)s.length() + 1;
24  len = MultiByteToWideChar(CP_ACP, 0, s.c_str(), slength, 0, 0);
25  wchar_t* buf = new wchar_t[len];
26  MultiByteToWideChar(CP_ACP, 0, s.c_str(), slength, buf, len);
27  std::wstring r(buf);
28  delete[] buf;
29  return r;
30 }
31 std::string ws2s(LPCWSTR s)
32 {
33  char *pmbbuf = (char *)malloc( 100 );
34  wcstombs( pmbbuf, s, 100 );
35  return pmbbuf;
36 }
37 std::vector<std::string> &split(const std::string &s, char delim, std::vector<std::string> &elems) {
38  std::stringstream ss(s);
39  std::string item;
40  while (std::getline(ss, item, delim)) {
41  elems.push_back(item);
42  }
43  return elems;
44 }
45 std::vector<std::string> split(const std::string &s, char delim) {
46  std::vector<std::string> elems;
47  split(s, delim, elems);
48  return elems;
49 }
50 
52 //Module implementation
53 bool SpeechRecognizerModule::configure(ResourceFinder &rf )
54 {
55  setName( rf.check("name",Value("speechRecognizer")).asString().c_str() );
56  m_timeout = rf.check("timeout",Value(10000)).asInt32();
57  USE_LEGACY = !rf.check("noLegacy");
58  m_forwardSound = rf.check("forwardSound");
59  m_tmpFileFolder = rf.getHomeContextPath();
60  interruptRecognition = false;
61 
62  //Deal with speech recognition
63  string grammarFile = rf.check("grammarFile",Value("defaultGrammar.grxml")).asString();
64  grammarFile = rf.findFile(grammarFile);
65 
66  std::wstring tmp = s2ws(grammarFile);
67  LPCWSTR cwgrammarfile = tmp.c_str();
68 
69  m_useTalkBack = rf.check("talkback");
70 
71  //Initialise the speech crap
72  bool everythingIsFine = true;
73  HRESULT hr;
74  everythingIsFine = SUCCEEDED( m_cpRecoEngine.CoCreateInstance(CLSID_SpInprocRecognizer));
75  everythingIsFine &= SUCCEEDED( SpCreateDefaultObjectFromCategoryId(SPCAT_AUDIOIN, &m_cpAudio));
76  everythingIsFine &= SUCCEEDED( m_cpRecoEngine->CreateRecoContext( &m_cpRecoCtxt ));
77 
78  // Here, all we are interested in is the beginning and ends of sounds, as well as
79  // when the engine has recognized something
80  const ULONGLONG ullInterest = SPFEI(SPEI_RECOGNITION);
81  everythingIsFine &= SUCCEEDED(m_cpRecoCtxt->SetInterest(ullInterest, ullInterest));
82 
83  // set the input for the engine
84  everythingIsFine &= SUCCEEDED( m_cpRecoEngine->SetInput(m_cpAudio, TRUE));
85  everythingIsFine &= SUCCEEDED( m_cpRecoEngine->SetRecoState( SPRST_ACTIVE ));
86 
87  //Load grammar from file
88  everythingIsFine &= SUCCEEDED( m_cpRecoCtxt->CreateGrammar( 1, &m_cpGrammarFromFile ));
89  everythingIsFine &= SUCCEEDED( m_cpGrammarFromFile->SetGrammarState(SPGS_DISABLED));
90  everythingIsFine &= SUCCEEDED( m_cpGrammarFromFile->LoadCmdFromFile(cwgrammarfile, SPLO_DYNAMIC));
91 // everythingIsFine &= loadGrammarFromRf(rf);
92 
93  //Create a runtime grammar
94  everythingIsFine &= SUCCEEDED( m_cpRecoCtxt->CreateGrammar( 2, &m_cpGrammarRuntime ));
95  everythingIsFine &= SUCCEEDED( m_cpGrammarRuntime->SetGrammarState(SPGS_DISABLED));
96 
97  //Create a dictation grammar
98  everythingIsFine &= SUCCEEDED(m_cpRecoCtxt->CreateGrammar( GID_DICTATION, &m_cpGrammarDictation ));
99  everythingIsFine &= SUCCEEDED(m_cpGrammarDictation->LoadDictation(NULL, SPLO_STATIC));
100  everythingIsFine &= SUCCEEDED(m_cpGrammarDictation->SetDictationState(SPRS_INACTIVE));
101 
102  //Setup thing for the raw audio processing
103  everythingIsFine &= SUCCEEDED(m_cAudioFmt.AssignFormat(SPSF_22kHz16BitMono));
104  hr = m_cpRecoCtxt->SetAudioOptions(SPAO_RETAIN_AUDIO, &m_cAudioFmt.FormatId(), m_cAudioFmt.WaveFormatExPtr());
105  //everythingIsFine &= SUCCEEDED(hr = SPBindToFile((const WCHAR *)"C:\\temp.wav", SPFM_CREATE_ALWAYS, &m_streamFormat, &m_cAudioFmt.FormatId(), m_cAudioFmt.WaveFormatExPtr()));
106 
107  //CComPtr <ISpStream> cpStream = NULL;
108  //CSpStreamFormat cAudioFmt;
109  //hr = cAudioFmt.AssignFormat(SPSF_22kHz16BitMono);
110  //hr = SPBindToFile((const WCHAR *)"c:\\ttstemp.wav", SPFM_CREATE_ALWAYS, &cpStream, &cAudioFmt.FormatId(), cAudioFmt.WaveFormatExPtr());
111 
112  if( everythingIsFine )
113  {
114  string pName = "/";
115  pName += getName();
116  pName += "/recog/continuous:o";
117  m_portContinuousRecognition.open( pName );
118 
119  pName = "/";
120  pName += getName();
121  pName += "/recog/continuousGrammar:o";
122  m_portContinuousRecognitionGrammar.open( pName );
123 
124  pName = "/";
125  pName += getName();
126  pName += "/recog/sound:o";
127  m_portSound.open( pName );
128 
129  //iSpeak
130  pName = "/";
131  pName += getName();
132  pName += "/tts/iSpeak:o";
133  m_port2iSpeak.open( pName );
134 
135  pName = "/";
136  pName += getName();
137  pName += "/tts/iSpeak/rpc";
138  m_port2iSpeakRpc.open( pName );
139  if (Network::connect(m_port2iSpeak.getName(),"/iSpeak")&&Network::connect(m_port2iSpeakRpc.getName(),"/iSpeak/rpc"))
140  yInfo() <<"Connection to iSpeak succesfull" ;
141  else
142  yWarning() <<"Unable to connect to iSpeak. Connect manually." ;
143 
144  pName = "/";
145  pName += getName();
146  pName += "/rpc";
147  m_portRPC.open( pName );
148  attach(m_portRPC);
149 
150  //Start recognition
151  //everythingIsFine &= SUCCEEDED(m_cpRecoEngine->SetRecoState(SPRST_ACTIVE_ALWAYS));
152  everythingIsFine &= SUCCEEDED(m_cpGrammarFromFile->SetRuleState(NULL, NULL, SPRS_ACTIVE));
153  everythingIsFine &= SUCCEEDED( m_cpGrammarFromFile->SetGrammarState(SPGS_ENABLED));
154  }
155 
156  return (everythingIsFine);
157 }
158 yarp::sig::Sound SpeechRecognizerModule::toSound(CComPtr<ISpRecoResult> cpRecoResult)
159 {
160  HRESULT hr = S_OK;
161  CComPtr<ISpStreamFormat> cpStreamFormat = NULL;
162  SPPHRASE* pPhrase;
163  bool successGetPhrase = SUCCEEDED(cpRecoResult->GetPhrase(&pPhrase));
164  hr = cpRecoResult->GetAudio(0, pPhrase->Rule.ulCountOfElements, &cpStreamFormat);
165 
166  CComPtr<ISpStream> cpStream;
167  ULONG cbWritten = 0;
168 
169  string sPath = m_tmpFileFolder + "//tmp.wav";
170  //static const WCHAR path[] = L"C://tmpSnd.wav";
171 
172  // create file on hard-disk for storing recognized audio, and specify audio format as the retained audio format
173  hr = SPBindToFile(s2ws(sPath).c_str(), SPFM_CREATE_ALWAYS, &cpStream, &m_cAudioFmt.FormatId(), m_cAudioFmt.WaveFormatExPtr(), SPFEI_ALL_EVENTS);
174 
175  //Continuously transfer data between the two streams until no more data is found (i.e. end of stream)
176  //Note only transfer 1000 bytes at a time to creating large chunks of data at one time
177  while (TRUE)
178  {
179  // for logging purposes, the app can retrieve the recognized audio stream length in bytes
180  STATSTG stats;
181  hr = cpStreamFormat->Stat(&stats, NULL);
182  // Check hr
183 
184  // create a 1000-byte buffer for transferring
185  BYTE bBuffer[1000];
186  ULONG cbRead;
187 
188  // request 1000 bytes of data from the input stream
189  hr = cpStreamFormat->Read(bBuffer, 1000, &cbRead);
190  // if data was returned�
191  if (SUCCEEDED(hr) && cbRead > 0)
192  {
193  //then transfer/write the audio to the file-based stream
194  hr = cpStream->Write(bBuffer, cbRead, &cbWritten);
195  // Check hr
196  }
197 
198  // since there is no more data being added to the input stream, if the read request returned less than expected, the end of stream was reached, so break data transfer loop
199  if (cbRead < 1000)
200  {
201  break;
202  }
203  }
204  cpStream->Close();
205  cpStream.Release();
206 
207  yarp::sig::Sound s;
208  yarp::sig::file::read(s, sPath.c_str());
209  return s;
210  return true;
211 }
212 
213 
214 /************************************************************************/
215 bool SpeechRecognizerModule::updateModule()
216 {
217  cout<<".";
218  USES_CONVERSION;
219  CSpEvent event;
220 
221  // Process all of the recognition events
222  while (event.GetFrom(m_cpRecoCtxt) == S_OK)
223  {
224  switch (event.eEventId)
225  {
226  case SPEI_SOUND_START:
227  {
228  m_bInSound = TRUE;
229  yInfo() << "Sound in...";
230  break;
231  }
232 
233  case SPEI_SOUND_END:
234  if (m_bInSound)
235  {
236  m_bInSound = FALSE;
237  if (!m_bGotReco)
238  {
239  // The sound has started and ended,
240  // but the engine has not succeeded in recognizing anything
241  yWarning() << "Chunk of sound detected: Recognition is null";
242  }
243  m_bGotReco = FALSE;
244  }
245  break;
246 
247  case SPEI_RECOGNITION:
248  // There may be multiple recognition results, so get all of them
249  {
250  m_bGotReco = TRUE;
251  static const WCHAR wszUnrecognized[] = L"<Unrecognized>";
252 
253  CSpDynamicString dstrText;
254  if (SUCCEEDED(event.RecoResult()->GetText(SP_GETWHOLEPHRASE, SP_GETWHOLEPHRASE, TRUE,
255  &dstrText, NULL)))
256  {
257  SPPHRASE* pPhrase = NULL;
258  bool successGetPhrase = SUCCEEDED(event.RecoResult()->GetPhrase(&pPhrase));
259  int confidence=pPhrase->Rule.Confidence;
260 
261  string fullSentence = ws2s(dstrText);
262  yInfo() <<"Recognized "<<fullSentence<<" with confidence "<<confidence ;
263 
264 
265  //Send over yarp
266  Bottle bOut;
267  bOut.addString(fullSentence);
268  bOut.addInt32(confidence);
269  m_portContinuousRecognition.write(bOut);
270 
271  //Treat the semantic
272  if (successGetPhrase)
273  {
274  //Send sound
275  if (m_forwardSound)
276  {
277  yarp::sig::Sound& rawSnd = m_portSound.prepare();
278  rawSnd = toSound(event.RecoResult());
279  m_portSound.write();
280  }
281 
282  //--------------------------------------------------- 1. 1st subBottle : raw Sentence -----------------------------------------------//
283  int wordCount = pPhrase->Rule.ulCountOfElements;
284  string rawPhrase = "";
285  for(int i=0; i< wordCount; i++){
286  rawPhrase += ws2s(pPhrase->pElements[i].pszDisplayText) + " ";
287  yDebug() << "word : " << ws2s(pPhrase->pElements[i].pszDisplayText) ;
288  }
289  yInfo() <<"Raw sentence: "<<rawPhrase ;
290  if (&pPhrase->Rule == NULL)
291  {
292  yError() <<"Cannot parse the sentence!";
293  return true;
294  }
295  //--------------------------------------------------- 2. 2nd subottle : Word/Role ---------------------------------------------------//
296  Bottle bOutGrammar;
297  bOutGrammar.addString(rawPhrase);
298  bOutGrammar.addList()=toBottle(pPhrase,&pPhrase->Rule);
299  yInfo() << "Sending semantic bottle : " << bOutGrammar.toString();
300  m_portContinuousRecognitionGrammar.write(bOutGrammar);
301  ::CoTaskMemFree(pPhrase);
302  }
303 
304  if (m_useTalkBack)
305  say(fullSentence);
306  }
307  }
308  break;
309  }
310  }
311  return true;
312 }
313 
314 /************************************************************************/
315 Bottle SpeechRecognizerModule::toBottle(SPPHRASE* pPhrase, const SPPHRASERULE* pRule)
316 {
317  Bottle bCurrentLevelGlobal;
318 
319  const SPPHRASERULE* siblingRule = pRule;
320  while (siblingRule != NULL)
321  {
322  Bottle bCurrentSubLevel;
323  bCurrentSubLevel.addString(ws2s(siblingRule->pszName));
324 
325  //we backtrack
326  if(siblingRule->pFirstChild != NULL )
327  {
328  bCurrentSubLevel.addList()=toBottle(pPhrase, siblingRule->pFirstChild);
329  }
330  else
331  {
332  string nodeString = "";
333  for(unsigned int i=0; i<siblingRule->ulCountOfElements; i++)
334  {
335  nodeString += ws2s(pPhrase->pElements[siblingRule->ulFirstElement + i].pszDisplayText);
336  if (i<siblingRule->ulCountOfElements-1)
337  nodeString += " ";
338  }
339  bCurrentSubLevel.addString(nodeString);
340  }
341  siblingRule = siblingRule->pNextSibling;
342  if (pRule->pNextSibling !=NULL)
343  bCurrentLevelGlobal.addList() = bCurrentSubLevel;
344  else
345  bCurrentLevelGlobal = bCurrentSubLevel;
346 
347  }
348  return bCurrentLevelGlobal;
349 }
350 
351 /************************************************************************/
352 bool SpeechRecognizerModule::respond(const Bottle& cmd, Bottle& reply)
353 {
354  reply.addString("ACK");
355  string firstVocab = cmd.get(0).asString();
356 
357  if (firstVocab == "tts")
358  {
359  string sentence = cmd.get(1).asString();
360  say(sentence);
361  reply.addString("OK");
362  }
363  else if (firstVocab == "RGM" || firstVocab == "rgm" )
364  {
365  string secondVocab = cmd.get(1).asString();
366  if (secondVocab=="vocabulory")
367  handleRGMCmd(cmd.tail().tail(), reply);
368  }
369  else if (firstVocab == "recog")
370  {
371  handleRecognitionCmd(cmd.tail(), reply);
372  }
373  else if (firstVocab == "asyncrecog")
374  {
375  handleAsyncRecognitionCmd(cmd.tail(), reply);
376  }
377  else if (firstVocab == "interrupt")
378  {
379  handleInterrupt(cmd.tail(), reply);
380  }
381  else
382  reply.addString("UNKNOWN");
383 
384  return true;
385 }
386 
387 /************************************************************************/
388 bool SpeechRecognizerModule::handleInterrupt(const Bottle& cmd, Bottle& reply)
389 {
390  yInfo() << "Grammar interrupted";
391  interruptRecognition = true;
392  yarp::os::Time::delay(0.5);
393  interruptRecognition = true; // just in case of a previous race condition
394  reply.addString("OK");
395  return true;
396 }
397 
398 /************************************************************************/
399 bool SpeechRecognizerModule::handleRGMCmd(const Bottle& cmd, Bottle& reply)
400 {
401  string firstVocab = cmd.get(0).asString();
402  if (firstVocab == "add")
403  {
404  string vocabulory = cmd.get(1).asString();
405  if (vocabulory[0] != '#')
406  {
407  //reply.addString("Vocabulories have to start with a #. #Dictation and #WildCard are reserved. Aborting.");
408  reply.addString("ERROR");
409  return true;
410  }
411  string word = cmd.get(2).asString();
412  m_vocabulories[vocabulory].push_back(word);
413  refreshFromVocabulories(m_cpGrammarFromFile);
414  reply.addString("OK");
415  return true;
416  }
417 
418  if (firstVocab == "addAuto")
419  {
420  string vocabuloryType = cmd.get(1).asString();
421  yInfo() <<"Trying to enrich the "<<vocabuloryType<<" vocabulary.";
422 
423  say("Let's improve my dictionary.");
424 
425  //Try first with open dictation
426  int TRIALS_BEFORE_SPELLING = 2;
427  bool isFine = false;
428  int trial=0;
429  string newWord = "";
430  while(!isFine && trial<TRIALS_BEFORE_SPELLING)
431  {
432  say("Please, say the word.");
433  newWord = "";
434  while(newWord=="")
435  newWord=getFromDictaction(m_timeout);
436  say("I understood "+ newWord + ". Did you say that?");
437 
438  Bottle cmdTmp, replyTmp;
439  cmdTmp.addString("grammarSimple");
440  cmdTmp.addString("Yes I did.|No I did not.|Skip");
441  bool gotAConfirmation = false;
442  while(!gotAConfirmation)
443  {
444  replyTmp.clear();
445  handleRecognitionCmd(cmdTmp,replyTmp);
446  //cout<<"DEBUG="<<replyTmp.toString()<<endl;
447  //cout<<"DEBUG FIRST ELEMENT =|"<<replyTmp.get(0).asString()<<"|"<<endl;
448  if (replyTmp.get(0).asString() == "Skip")
449  {
450  say("Fine, we give up.");
451  reply.addString("ERROR");
452  return true;
453  }
454  gotAConfirmation =
455  replyTmp.size()>0 &&
456  (replyTmp.get(0).asString() == "Yes" ||
457  replyTmp.get(0).asString() == "No");
458  }
459  //cout<<"Reply is "<<replyTmp.toString()<<endl;
460  if ( replyTmp.get(0).asString() == "Yes")
461  isFine = true;
462  else
463  trial++;
464  }
465 
466  //Try then with spelling
467  int TRIALS_BEFORE_GIVING_UP = 2;
468  trial =0;
469  while(!isFine && trial<TRIALS_BEFORE_GIVING_UP)
470  {
471  say("Sorry, I cannot get it. Please, spell this word for me?");
472  string spelledWord = "";
473  while(spelledWord == "")
474  spelledWord=getFromDictaction(m_timeout,SPTOPIC_SPELLING);
475  newWord = spelledWord;
476 
477  say("I understood "+ newWord + ". Is that right?");
478 
479  Bottle cmdTmp, replyTmp;
480  cmdTmp.addString("grammarSimple");
481  cmdTmp.addString("Yes I did.|No I did not.|Skip");
482  bool gotAConfirmation = false;
483  while(!gotAConfirmation)
484  {
485  replyTmp.clear();
486  handleRecognitionCmd(cmdTmp,replyTmp);
487  //cout<<"DEBUG="<<replyTmp.toString()<<endl;
488  //cout<<"DEBUG FIRST ELEMENT =|"<<replyTmp.get(0).asString()<<"|"<<endl;
489  if (replyTmp.get(0).asString() == "Skip")
490  {
491  say("Fine, we give up.");
492  reply.addString("ERROR");
493  return true;
494  }
495 
496  gotAConfirmation =
497  replyTmp.size()>0 &&
498  (replyTmp.get(0).asString() == "Yes" ||
499  replyTmp.get(0).asString() == "No");
500  }
501  //cout<<"Reply is "<<replyTmp.toString()<<endl;
502  if ( replyTmp.get(0).asString() == "Yes")
503  isFine = true;
504  else
505  trial++;
506  }
507  //Give up
508  if (!isFine)
509  {
510  say("Sorry, I think we should give up with this word.");
511  reply.addString("ERROR");
512  }
513  else
514  {
515  say("Perfect! I know the word " + newWord);
516  m_vocabulories[vocabuloryType].push_back(newWord);
517  refreshFromVocabulories(m_cpGrammarFromFile);
518  reply.addString(newWord);
519  }
520 
521  return true;
522  }
523  reply.addString("UNKNOWN");
524  return false;
525 }
526 
527 /************************************************************************/
528 bool SpeechRecognizerModule::handleAsyncRecognitionCmd(const Bottle& cmd, Bottle& reply)
529 {
530  HRESULT hr;
531  string firstVocab = cmd.get(0).asString();
532  if (firstVocab == "getGrammar")
533  {
534  reply.addString("NOT_IMPLEMENTED");
535  return true;
536  }
537 
538  if (firstVocab == "clear")
539  {
540  bool everythingIsFine=true;
541  SPSTATEHANDLE rootRule;
542  everythingIsFine &= SUCCEEDED(m_cpGrammarFromFile->SetGrammarState(SPGS_DISABLED));
543  everythingIsFine &= SUCCEEDED(m_cpGrammarFromFile->GetRule(L"rootRule", NULL, SPRAF_TopLevel | SPRAF_Active, TRUE, &rootRule));
544  everythingIsFine &= SUCCEEDED(m_cpGrammarFromFile->ClearRule(rootRule));
545  everythingIsFine &= SUCCEEDED(hr = m_cpGrammarFromFile->Commit(NULL));
546  everythingIsFine &= SUCCEEDED(m_cpGrammarFromFile->SetGrammarState(SPGS_ENABLED));
547  everythingIsFine &= SUCCEEDED(m_cpGrammarFromFile->SetRuleState(NULL, NULL, SPRS_ACTIVE));
548  everythingIsFine &= SUCCEEDED(m_cpRecoCtxt->Resume(0));
549  reply.addString("Cleared");
550  return true;
551  }
552 
553  if (firstVocab == "addGrammar")
554  {
555  string grammar = cmd.get(1).asString();
556  bool everythingIsFine = setGrammarCustom(m_cpGrammarFromFile,grammar,true);
557  reply.addString("Added");
558  return true;
559  }
560 
561  if (firstVocab == "loadXML")
562  {
563  string xml = cmd.get(1).asString();
564  ofstream fileTmp("grammarTmp.grxml");
565  fileTmp<<xml;
566  fileTmp.close();
567 
568  std::wstring tmp = s2ws("grammarTmp.grxml");
569  LPCWSTR cwgrammarfile = tmp.c_str();
570 
571  bool everythingIsFine =true;
572  //everythingIsFine &= SUCCEEDED( m_cpRecoCtxt->CreateGrammar( 1, &m_cpGrammarFromFile ));
573  everythingIsFine &= SUCCEEDED( m_cpGrammarFromFile->SetGrammarState(SPGS_DISABLED));
574  everythingIsFine &= SUCCEEDED( m_cpGrammarFromFile->LoadCmdFromFile(cwgrammarfile, SPLO_DYNAMIC));
575 
576  everythingIsFine &= SUCCEEDED( m_cpGrammarFromFile->SetGrammarState(SPGS_ENABLED));
577  everythingIsFine &= SUCCEEDED(m_cpGrammarFromFile->SetRuleState(NULL, NULL, SPRS_ACTIVE));
578  everythingIsFine &= SUCCEEDED(m_cpRecoCtxt->Resume(0));
579 
580  refreshFromVocabulories(m_cpGrammarFromFile);
581  reply.addString("Loaded");
582  return true;
583  }
584 
585  return false;
586 }
587 
588 /************************************************************************/
589 bool SpeechRecognizerModule::refreshFromVocabulories(CComPtr<ISpRecoGrammar> grammarToModify)
590 {
591  //return true;
592  bool everythingIsFine = true;
593 
594  everythingIsFine &= SUCCEEDED(grammarToModify->SetGrammarState(SPGS_DISABLED));
595 
596  //Build a rule for each vocabulory
597  for(map<string, list<string> >::iterator vIt = m_vocabulories.begin(); vIt != m_vocabulories.end(); vIt++)
598  {
599  //Get the rule name from the key in the dictionary (i.e Agent, Action, etc...)
600  string removedSharp =vIt->first;
601  removedSharp.erase(0,1);
602  std::wstring tmp = s2ws(removedSharp);
603  LPCWSTR cwRuleName = tmp.c_str();
604 
605  SPSTATEHANDLE hinit,hstate;
606  HRESULT hr;
607  //Get the rule or create it
608  everythingIsFine &= SUCCEEDED(hr = grammarToModify->GetRule(cwRuleName, NULL, SPRAF_Dynamic, false, &hinit));
609  everythingIsFine &= SUCCEEDED(hr = grammarToModify->ClearRule(hinit));
610  for(list<string>::iterator wordIt = vIt->second.begin() ; wordIt != vIt->second.end(); wordIt++)
611  {
612  std::wstring wordTmp = s2ws(*wordIt);
613  LPCWSTR cwWord = wordTmp.c_str();
614  everythingIsFine &= SUCCEEDED( grammarToModify->AddWordTransition(hinit, NULL, cwWord, NULL, SPWT_LEXICAL, 1, NULL) );
615  }
616  }
617 
618  everythingIsFine &= SUCCEEDED(grammarToModify->Commit(NULL));
619  everythingIsFine &= SUCCEEDED(grammarToModify->SetGrammarState(SPGS_ENABLED));
620  everythingIsFine &= SUCCEEDED(grammarToModify->SetRuleState(NULL, NULL, SPRS_ACTIVE));
621  yInfo() << "Grammar is paused, DO NOT SPEAK! (if next message is coming after too long, check your microphone level and lower it!" ;
622  everythingIsFine &= SUCCEEDED(m_cpRecoCtxt->Resume(NULL));
623  yInfo() << "Grammar is resumed : everything is fine = " << everythingIsFine;
624 
625  return everythingIsFine;
626 }
627 
628 /************************************************************************/
629 string SpeechRecognizerModule::getFromDictaction(int timeout, LPCWSTR options )
630 {
631  bool everythingIsFine = TRUE;
632  everythingIsFine &= SUCCEEDED(m_cpGrammarDictation->UnloadDictation());
633  everythingIsFine &= SUCCEEDED(m_cpGrammarDictation->LoadDictation(options, SPLO_STATIC));
634  everythingIsFine &= SUCCEEDED(m_cpGrammarDictation->SetDictationState( SPRS_ACTIVE ));
635  yInfo() <<"Dictation is on..." ;
636  Bottle botTmp;
637  if (!USE_LEGACY)
638  {
639  botTmp = waitNextRecognition(m_timeout);
640  }
641  else
642  {
643  list< pair<string, double> > results = waitNextRecognitionLEGACY(m_timeout);
644  for(list< pair<string, double> >::iterator it = results.begin(); it != results.end(); it++)
645  {
646  botTmp.addString(it->first);
647  //botTmp.addFloat64(it->second);
648  }
649  }
650  yInfo() <<"Dictation is off...";
651  yInfo() <<"Got : "<<botTmp.toString();
652  //Turn off dictation and go back to the file grammar
653  everythingIsFine &= SUCCEEDED(m_cpGrammarDictation->SetDictationState( SPRS_INACTIVE ));
654  everythingIsFine &=SUCCEEDED(m_cpGrammarFromFile->SetGrammarState(SPGS_ENABLED));
655  return botTmp.toString();
656 }
657 
658 /************************************************************************/
659 bool SpeechRecognizerModule::handleRecognitionCmd(const Bottle& cmd, Bottle& reply)
660 {
661  string firstVocab = cmd.get(0).asString();
662 
663  if (firstVocab == "timeout")
664  {
665  m_timeout = cmd.get(1).asInt32();
666  //reply.addInt32(true);
667  return false;
668  }
669 
670  else if (firstVocab == "dictation")
671  {
672  bool everythingIsFine = TRUE;
673  everythingIsFine &= SUCCEEDED(m_cpGrammarDictation->SetDictationState( SPRS_ACTIVE ));
674  yInfo() <<"Dictation is on..." ;
675 
676  if (!USE_LEGACY)
677  {
678  reply.addList() = waitNextRecognition(m_timeout);
679  }
680  else
681  {
682  list< pair<string, double> > results = waitNextRecognitionLEGACY(m_timeout);
683  if (results.size()>0)
684  for(list< pair<string, double> >::iterator it = results.begin(); it != results.end(); it++)
685  {
686  reply.addString(it->first);
687  reply.addFloat64(it->second);
688  }
689  else
690  reply.addString("-1");
691  }
692  yInfo() <<"Dictation is off...";
693 
694  //Turn off dictation and go back to the file grammar
695  everythingIsFine &= SUCCEEDED(m_cpGrammarDictation->SetDictationState( SPRS_INACTIVE ));
696  everythingIsFine &=SUCCEEDED(m_cpGrammarFromFile->SetGrammarState(SPGS_ENABLED));
697  //reply.addInt32(true);
698  return true;
699  }
700  // If we are not in dictation then we set and switch to the runtimeGrammar
701  else if (firstVocab == "grammarXML")
702  {
703  string xml = cmd.get(1).asString();
704  ofstream fileTmp("grammarTmp.grxml");
705  fileTmp<<xml;
706  fileTmp.close();
707 
708  std::wstring tmp = s2ws("grammarTmp.grxml");
709  LPCWSTR cwgrammarfile = tmp.c_str();
710 
711  bool everythingIsFine =true;
712  everythingIsFine &= SUCCEEDED( m_cpGrammarRuntime->SetGrammarState(SPGS_DISABLED));
713  everythingIsFine &= SUCCEEDED( m_cpGrammarRuntime->LoadCmdFromFile(cwgrammarfile, SPLO_DYNAMIC));
714  everythingIsFine &= SUCCEEDED(m_cpGrammarRuntime->SetRuleState(NULL, NULL, SPRS_ACTIVE));
715  everythingIsFine &= SUCCEEDED(m_cpRecoCtxt->Resume(0));
716 
717  refreshFromVocabulories(m_cpGrammarRuntime);
718 
719  //reply.addInt32(everythingIsFine);
720  }
721 
722  else if (firstVocab == "choices")
723  {
724  string choices ="";
725  for (int wI = 1; wI < cmd.size(); wI++)
726  {
727  choices+=cmd.get(wI).asString();
728  if (wI<cmd.size()-1)
729  choices+="|";
730  }
731  setGrammarCustom(m_cpGrammarRuntime,choices,false);
732  }
733  else if (firstVocab == "grammarSimple")
734  {
735  string RADStyle = cmd.get(1).asString();
736  yInfo() <<"Setting runtime grammar to : "<<RADStyle ;
737  setGrammarCustom(m_cpGrammarRuntime,RADStyle,false);
738  }
739  else
740  {
741  reply.addString("UNKNOWN");
742  return false;
743  }
744 
745  //Disable the from file grammar
746  SUCCEEDED(m_cpGrammarFromFile->SetGrammarState(SPGS_DISABLED));
747  SUCCEEDED(m_cpGrammarRuntime->SetGrammarState(SPGS_ENABLED));
748 
749  //Force blocking recognition
750  if (!USE_LEGACY)
751  {
752  reply.addList() = waitNextRecognition(m_timeout);
753  }
754  else
755  {
756  list< pair<string, double> > results = waitNextRecognitionLEGACY(m_timeout);
757  if (results.size()>0)
758  for(list< pair<string, double> >::iterator it = results.begin(); it != results.end(); it++)
759  {
760  reply.addString(it->first);
761  reply.addFloat64(it->second);
762  }
763  else
764  reply.addString("-1");
765  }
766  //Disable the runtime grammar
767  SUCCEEDED(m_cpGrammarRuntime->SetGrammarState(SPGS_DISABLED));
768  SUCCEEDED(m_cpGrammarFromFile->SetGrammarState(SPGS_ENABLED));
769  return true;
770 }
771 /************************************************************************/
772 Bottle SpeechRecognizerModule::waitNextRecognition(int timeout)
773 {
774  yInfo() <<"Recognition: blocking mode on" ;
775  Bottle bOutGrammar;
776 
777  bool gotSomething = false;
778  double endTime = Time::now() + timeout/1000.0;
779  interruptRecognition = false;
780 
781  cout << endl ;
782  yInfo() << "=========== GO Waiting for recog! ===========" ;
783 
784  while(Time::now()<endTime && !gotSomething && !interruptRecognition)
785  {
786  //std::cout<<".";
787  const float ConfidenceThreshold = 0.3f;
788  SPEVENT curEvent;
789  ULONG fetched = 0;
790  HRESULT hr = S_OK;
791 
792  m_cpRecoCtxt->GetEvents(1, &curEvent, &fetched);
793 
794  while (fetched > 0)
795  {
796  yInfo() << " received something in waitNextRecognition" ;
797  gotSomething = true;
798  ISpRecoResult* result = reinterpret_cast<ISpRecoResult*>(curEvent.lParam);
799  CSpDynamicString dstrText;
800  result->GetText(SP_GETWHOLEPHRASE, SP_GETWHOLEPHRASE, TRUE, &dstrText, NULL);
801  string fullSentence = ws2s(dstrText);
802  yInfo() <<fullSentence ;
803  if (m_useTalkBack)
804  say(fullSentence);
805  bOutGrammar.addString(fullSentence);
806 
807  SPPHRASE* pPhrase = NULL;
808  result->GetPhrase(&pPhrase);
809  bOutGrammar.addList() = toBottle(pPhrase,&pPhrase->Rule);
810  yInfo() <<"Sending semantic bottle : "<<bOutGrammar.toString() ;
811  m_cpRecoCtxt->GetEvents(1, &curEvent, &fetched);
812 
813  if (m_forwardSound)
814  {
815  yarp::sig::Sound& rawSnd = m_portSound.prepare();
816  rawSnd = toSound(result);
817  m_portSound.write();
818  }
819 
820  }
821  }
822 
823  if(interruptRecognition) {
824  yDebug() << "interrupted speech recognizer!";
825  }
826  yInfo() <<"Recognition: blocking mode off";
827  return bOutGrammar;
828 }
829 
830 /************************************************************************/
831 list< pair<string, double> > SpeechRecognizerModule::waitNextRecognitionLEGACY(int timeout)
832 {
833  yInfo() <<"Recognition LEGACY: blocking mode on" ;
834  list< pair<string, double> > recognitionResults;
835 
836  bool gotSomething = false;
837  double endTime = Time::now() + timeout/1000.0;
838  while(Time::now()<endTime && !gotSomething && !interruptRecognition)
839  {
840  //std::cout<<".";
841  const float ConfidenceThreshold = 0.3f;
842  SPEVENT curEvent;
843  ULONG fetched = 0;
844  HRESULT hr = S_OK;
845 
846  m_cpRecoCtxt->GetEvents(1, &curEvent, &fetched);
847 
848  while (fetched > 0)
849  {
850  gotSomething = true;
851  ISpRecoResult* result = reinterpret_cast<ISpRecoResult*>(curEvent.lParam);
852 
853  //Convert the catched sentence to strings.
854  CSpDynamicString dstrText;
855  result->GetText(SP_GETWHOLEPHRASE, SP_GETWHOLEPHRASE, TRUE, &dstrText, NULL);
856  string fullSentence = ws2s(dstrText);
857  yInfo() <<fullSentence ;
858 
859  if (m_useTalkBack)
860  say(fullSentence);
861  vector<string> words = split(fullSentence,' ');
862  for(unsigned int w=0;w<words.size();w++)
863  {
864  //Todo extract the confidence value somehow...
865  recognitionResults.push_back(make_pair(words[w], -1.0));
866  }
867  m_cpRecoCtxt->GetEvents(1, &curEvent, &fetched);
868  }
869  }
870  interruptRecognition = false;
871  yInfo() <<"Recognition: blocking mode off" ;
872  return recognitionResults;
873 }
874 
875 /************************************************************************/
876 void SpeechRecognizerModule::say(string s, bool wait)
877 {
878  yInfo() <<"TTS: "<<s ;
879  Bottle b;
880  b.addString(s);
881  m_port2iSpeak.write(b);
882  if(wait)
883  {
884  yarp::os::Bottle cmd,reply;
885  cmd.addVocab32(VOCAB('s','t','a','t'));
886  std::string status = "speaking";
887  bool speechStarted = false;
888  while(wait&&(!speechStarted ||status=="speaking"))
889  {
890  m_port2iSpeakRpc.write(cmd,reply);
891  status = reply.get(0).asString();
892  if (!speechStarted && status != "quiet")
893  {
894  speechStarted = true;
895  }
896  yarp::os::Time::delay(0.2);
897  }
898  }
899 }
900 
901 /************************************************************************/
902 bool SpeechRecognizerModule::setGrammarCustom(CComPtr<ISpRecoGrammar> grammarToModify, string grammar, bool append)
903 {
904  //Clear the existing runtime grammar
905  SPSTATEHANDLE runtimeRootRule;
906  bool everythingIsFine = true;
907  everythingIsFine &= SUCCEEDED(grammarToModify->SetGrammarState(SPGS_DISABLED));
908  everythingIsFine &= SUCCEEDED(grammarToModify->GetRule(L"rootRule", NULL, SPRAF_TopLevel | SPRAF_Active, TRUE, &runtimeRootRule));
909  if(!append)
910  everythingIsFine &= SUCCEEDED(grammarToModify->ClearRule(runtimeRootRule));
911 
912  //Build a rule for each vocabulory
913  map<string, SPSTATEHANDLE> vocabRules;
914  for(map<string, list<string> >::iterator vIt = m_vocabulories.begin(); vIt != m_vocabulories.end(); vIt++)
915  {
916  //Get the rule name from the key in the dictionary (i.e Agent, Action, etc...)
917  std::wstring tmp = s2ws(vIt->first);
918  LPCWSTR cwRuleName = tmp.c_str();
919 
920  //Get the rule or create it
921  everythingIsFine &= SUCCEEDED(grammarToModify->GetRule(cwRuleName, NULL, SPRAF_Dynamic, TRUE, &vocabRules[vIt->first]));
922  everythingIsFine &= SUCCEEDED(grammarToModify->ClearRule(vocabRules[vIt->first]));
923  for(list<string>::iterator wordIt = vIt->second.begin() ; wordIt != vIt->second.end(); wordIt++)
924  {
925  std::wstring wordTmp = s2ws(*wordIt);
926  LPCWSTR cwWord = wordTmp.c_str();
927  everythingIsFine &= SUCCEEDED( grammarToModify->AddWordTransition(vocabRules[vIt->first], NULL, cwWord, NULL, SPWT_LEXICAL, 1, NULL) );
928  }
929  }
930 
931  //Go through the given string and build the according grammar
932  //Split the choices
933  vector<string> sentences = split(grammar,'|');
934  for(vector<string>::iterator it = sentences.begin() ; it != sentences.end() ; it++)
935  {
936  //Split the words
937  vector<string> words = split(*it,' ');
938  SPSTATEHANDLE beforeWordHandle = runtimeRootRule;
939  SPSTATEHANDLE afterWordHandle;
940  for(vector<string>::iterator itWord = words.begin() ; itWord != words.end() ; itWord++)
941  {
942  if((*itWord)=="")
943  continue;
944 
945  everythingIsFine &= SUCCEEDED(grammarToModify->CreateNewState(beforeWordHandle, &afterWordHandle));
946 
947  //Check if the current word is the name of a vocabulory
948  if ( (*itWord)[0] == '#' && m_vocabulories.find(*itWord) != m_vocabulories.end())
949  {
950  everythingIsFine &= SUCCEEDED(grammarToModify->AddRuleTransition(beforeWordHandle, afterWordHandle, vocabRules[*itWord], 1, NULL));
951  }
952  else
953  {
954  std::wstring wordTmp = s2ws(*itWord);
955  LPCWSTR cwWord = wordTmp.c_str();
956  everythingIsFine &= SUCCEEDED( grammarToModify->AddWordTransition(beforeWordHandle, afterWordHandle, cwWord, NULL, SPWT_LEXICAL, 1, NULL) );
957  }
958  beforeWordHandle = afterWordHandle;
959  }
960  everythingIsFine &= SUCCEEDED( grammarToModify->AddWordTransition(beforeWordHandle, NULL, NULL, NULL, SPWT_LEXICAL, 1, NULL) );
961  }
962  everythingIsFine &= SUCCEEDED(grammarToModify->Commit(NULL));
963  everythingIsFine &= SUCCEEDED(grammarToModify->SetGrammarState(SPGS_ENABLED));
964  everythingIsFine &= SUCCEEDED(grammarToModify->SetRuleState(NULL, NULL, SPRS_ACTIVE));
965  everythingIsFine &= SUCCEEDED(m_cpRecoCtxt->Resume(0));
966 
967  return everythingIsFine;
968 }
969 
970 
971 
972 
973 /************************************************************************/
974 bool SpeechRecognizerModule::loadGrammarFromRf(ResourceFinder &RF)
975 {
976  Bottle &bAgent = RF.findGroup("agent");
977  Bottle &bAction = RF.findGroup("action");
978  Bottle &bObject = RF.findGroup("object");
979 
980  Bottle bMessenger, bReply;
981 
982  yInfo() << "Agents are: " ;
983  for (int iBottle = 1 ; iBottle < bAgent.size() ; iBottle++)
984  {
985  yInfo() << "\t" << bAgent.get(iBottle).toString();
986  bMessenger.clear();
987  bMessenger.addString("add");
988  bMessenger.addString("#agent");
989  bMessenger.addString(bAgent.get(iBottle).toString());
990 
991  handleRGMCmd(bMessenger, bReply);
992 
993  yInfo() << "\t\t" << bReply.toString() ;
994  }
995 
996  yInfo() << "\n" << "Actions are: " ;
997  for (int iBottle = 1 ; iBottle < bAction.size() ; iBottle++)
998  {
999  yInfo() << "\t" << bAction.get(iBottle).toString();
1000  bMessenger.clear();
1001  bMessenger.addString("add");
1002  bMessenger.addString("#action");
1003  bMessenger.addString(bAction.get(iBottle).toString());
1004 
1005  handleRGMCmd(bMessenger, bReply);
1006 
1007  yInfo() << "\t\t" << bReply.toString() ;
1008  }
1009 
1010  yInfo() << "\n" << "Objects are: " ;
1011  for (int iBottle = 1 ; iBottle < bObject.size() ; iBottle++)
1012  {
1013  yInfo() << "\t" << bObject.get(iBottle).toString();
1014  bMessenger.clear();
1015  bMessenger.addString("add");
1016  bMessenger.addString("#object");
1017  bMessenger.addString(bObject.get(iBottle).toString());
1018 
1019  handleRGMCmd(bMessenger, bReply);
1020 
1021  yInfo() << "\t\t" << bReply.toString() ;
1022  }
1023 
1024  Bottle bGrammarMain, bGrammarDef;
1025 
1026 
1027  bGrammarMain.addString("addGrammar");
1028  bGrammarMain.addString("#agent #action #object");
1029 
1030  yInfo() << "\n" << bGrammarMain.toString() ;
1031 
1032 // handleAsyncRecognitionCmd(bGrammarMain, bReply);
1033 
1034  yInfo() << "\n" << bReply.toString() ;
1035 
1036  return true;
1037 }
1038