speech
Loading...
Searching...
No Matches
SpeechRecognizerModule.cpp
1/*
2 * Copyright (C) 2011 EFAA Consortium, European Commission FP7 Project IST-270490
3 * Authors: Stephane Lallee
4 * email: stephane.lallee@gmail.com
5 * website: http://efaa.upf.edu/
6 * Permission is granted to copy, distribute, and/or modify this program
7 * under the terms of the GNU General Public License, version 2 or any
8 * later version published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
13 * Public License for more details
14 */
15
16#include "SpeechRecognizerModule.h"
17
19//Helpers for dealing with the weird strings of windows...
20std::wstring s2ws(const std::string& s)
21{
22 int len;
23 int slength = (int)s.length() + 1;
24 len = MultiByteToWideChar(CP_ACP, 0, s.c_str(), slength, 0, 0);
25 wchar_t* buf = new wchar_t[len];
26 MultiByteToWideChar(CP_ACP, 0, s.c_str(), slength, buf, len);
27 std::wstring r(buf);
28 delete[] buf;
29 return r;
30}
31std::string ws2s(LPCWSTR s)
32{
33 char *pmbbuf = (char *)malloc( 100 );
34 wcstombs( pmbbuf, s, 100 );
35 return pmbbuf;
36}
37std::vector<std::string> &split(const std::string &s, char delim, std::vector<std::string> &elems) {
38 std::stringstream ss(s);
39 std::string item;
40 while (std::getline(ss, item, delim)) {
41 elems.push_back(item);
42 }
43 return elems;
44}
45std::vector<std::string> split(const std::string &s, char delim) {
46 std::vector<std::string> elems;
47 split(s, delim, elems);
48 return elems;
49}
50
52//Module implementation
53bool SpeechRecognizerModule::configure(ResourceFinder &rf )
54{
55 setName( rf.check("name",Value("speechRecognizer")).asString().c_str() );
56 m_timeout = rf.check("timeout",Value(10000)).asInt32();
57 USE_LEGACY = !rf.check("noLegacy");
58 m_forwardSound = rf.check("forwardSound");
59 m_tmpFileFolder = rf.getHomeContextPath();
60 interruptRecognition = false;
61
62 //Deal with speech recognition
63 string grammarFile = rf.check("grammarFile",Value("defaultGrammar.grxml")).asString();
64 grammarFile = rf.findFile(grammarFile);
65
66 std::wstring tmp = s2ws(grammarFile);
67 LPCWSTR cwgrammarfile = tmp.c_str();
68
69 m_useTalkBack = rf.check("talkback");
70
71 //Initialise the speech crap
72 bool everythingIsFine = true;
73 HRESULT hr;
74 everythingIsFine = SUCCEEDED( m_cpRecoEngine.CoCreateInstance(CLSID_SpInprocRecognizer));
75 everythingIsFine &= SUCCEEDED( SpCreateDefaultObjectFromCategoryId(SPCAT_AUDIOIN, &m_cpAudio));
76 everythingIsFine &= SUCCEEDED( m_cpRecoEngine->CreateRecoContext( &m_cpRecoCtxt ));
77
78 // Here, all we are interested in is the beginning and ends of sounds, as well as
79 // when the engine has recognized something
80 const ULONGLONG ullInterest = SPFEI(SPEI_RECOGNITION);
81 everythingIsFine &= SUCCEEDED(m_cpRecoCtxt->SetInterest(ullInterest, ullInterest));
82
83 // set the input for the engine
84 everythingIsFine &= SUCCEEDED( m_cpRecoEngine->SetInput(m_cpAudio, TRUE));
85 everythingIsFine &= SUCCEEDED( m_cpRecoEngine->SetRecoState( SPRST_ACTIVE ));
86
87 //Load grammar from file
88 everythingIsFine &= SUCCEEDED( m_cpRecoCtxt->CreateGrammar( 1, &m_cpGrammarFromFile ));
89 everythingIsFine &= SUCCEEDED( m_cpGrammarFromFile->SetGrammarState(SPGS_DISABLED));
90 everythingIsFine &= SUCCEEDED( m_cpGrammarFromFile->LoadCmdFromFile(cwgrammarfile, SPLO_DYNAMIC));
91// everythingIsFine &= loadGrammarFromRf(rf);
92
93 //Create a runtime grammar
94 everythingIsFine &= SUCCEEDED( m_cpRecoCtxt->CreateGrammar( 2, &m_cpGrammarRuntime ));
95 everythingIsFine &= SUCCEEDED( m_cpGrammarRuntime->SetGrammarState(SPGS_DISABLED));
96
97 //Create a dictation grammar
98 everythingIsFine &= SUCCEEDED(m_cpRecoCtxt->CreateGrammar( GID_DICTATION, &m_cpGrammarDictation ));
99 everythingIsFine &= SUCCEEDED(m_cpGrammarDictation->LoadDictation(NULL, SPLO_STATIC));
100 everythingIsFine &= SUCCEEDED(m_cpGrammarDictation->SetDictationState(SPRS_INACTIVE));
101
102 //Setup thing for the raw audio processing
103 everythingIsFine &= SUCCEEDED(m_cAudioFmt.AssignFormat(SPSF_22kHz16BitMono));
104 hr = m_cpRecoCtxt->SetAudioOptions(SPAO_RETAIN_AUDIO, &m_cAudioFmt.FormatId(), m_cAudioFmt.WaveFormatExPtr());
105 //everythingIsFine &= SUCCEEDED(hr = SPBindToFile((const WCHAR *)"C:\\temp.wav", SPFM_CREATE_ALWAYS, &m_streamFormat, &m_cAudioFmt.FormatId(), m_cAudioFmt.WaveFormatExPtr()));
106
107 //CComPtr <ISpStream> cpStream = NULL;
108 //CSpStreamFormat cAudioFmt;
109 //hr = cAudioFmt.AssignFormat(SPSF_22kHz16BitMono);
110 //hr = SPBindToFile((const WCHAR *)"c:\\ttstemp.wav", SPFM_CREATE_ALWAYS, &cpStream, &cAudioFmt.FormatId(), cAudioFmt.WaveFormatExPtr());
111
112 if( everythingIsFine )
113 {
114 string pName = "/";
115 pName += getName();
116 pName += "/recog/continuous:o";
117 m_portContinuousRecognition.open( pName );
118
119 pName = "/";
120 pName += getName();
121 pName += "/recog/continuousGrammar:o";
122 m_portContinuousRecognitionGrammar.open( pName );
123
124 pName = "/";
125 pName += getName();
126 pName += "/recog/sound:o";
127 m_portSound.open( pName );
128
129 //iSpeak
130 pName = "/";
131 pName += getName();
132 pName += "/tts/iSpeak:o";
133 m_port2iSpeak.open( pName );
134
135 pName = "/";
136 pName += getName();
137 pName += "/tts/iSpeak/rpc";
138 m_port2iSpeakRpc.open( pName );
139 if (Network::connect(m_port2iSpeak.getName(),"/iSpeak")&&Network::connect(m_port2iSpeakRpc.getName(),"/iSpeak/rpc"))
140 yInfo() <<"Connection to iSpeak succesfull" ;
141 else
142 yWarning() <<"Unable to connect to iSpeak. Connect manually." ;
143
144 pName = "/";
145 pName += getName();
146 pName += "/rpc";
147 m_portRPC.open( pName );
148 attach(m_portRPC);
149
150 //Start recognition
151 //everythingIsFine &= SUCCEEDED(m_cpRecoEngine->SetRecoState(SPRST_ACTIVE_ALWAYS));
152 everythingIsFine &= SUCCEEDED(m_cpGrammarFromFile->SetRuleState(NULL, NULL, SPRS_ACTIVE));
153 everythingIsFine &= SUCCEEDED( m_cpGrammarFromFile->SetGrammarState(SPGS_ENABLED));
154 }
155
156 return (everythingIsFine);
157}
158yarp::sig::Sound SpeechRecognizerModule::toSound(CComPtr<ISpRecoResult> cpRecoResult)
159{
160 HRESULT hr = S_OK;
161 CComPtr<ISpStreamFormat> cpStreamFormat = NULL;
162 SPPHRASE* pPhrase;
163 bool successGetPhrase = SUCCEEDED(cpRecoResult->GetPhrase(&pPhrase));
164 hr = cpRecoResult->GetAudio(0, pPhrase->Rule.ulCountOfElements, &cpStreamFormat);
165
166 CComPtr<ISpStream> cpStream;
167 ULONG cbWritten = 0;
168
169 string sPath = m_tmpFileFolder + "//tmp.wav";
170 //static const WCHAR path[] = L"C://tmpSnd.wav";
171
172 // create file on hard-disk for storing recognized audio, and specify audio format as the retained audio format
173 hr = SPBindToFile(s2ws(sPath).c_str(), SPFM_CREATE_ALWAYS, &cpStream, &m_cAudioFmt.FormatId(), m_cAudioFmt.WaveFormatExPtr(), SPFEI_ALL_EVENTS);
174
175 //Continuously transfer data between the two streams until no more data is found (i.e. end of stream)
176 //Note only transfer 1000 bytes at a time to creating large chunks of data at one time
177 while (TRUE)
178 {
179 // for logging purposes, the app can retrieve the recognized audio stream length in bytes
180 STATSTG stats;
181 hr = cpStreamFormat->Stat(&stats, NULL);
182 // Check hr
183
184 // create a 1000-byte buffer for transferring
185 BYTE bBuffer[1000];
186 ULONG cbRead;
187
188 // request 1000 bytes of data from the input stream
189 hr = cpStreamFormat->Read(bBuffer, 1000, &cbRead);
190 // if data was returned�
191 if (SUCCEEDED(hr) && cbRead > 0)
192 {
193 //then transfer/write the audio to the file-based stream
194 hr = cpStream->Write(bBuffer, cbRead, &cbWritten);
195 // Check hr
196 }
197
198 // since there is no more data being added to the input stream, if the read request returned less than expected, the end of stream was reached, so break data transfer loop
199 if (cbRead < 1000)
200 {
201 break;
202 }
203 }
204 cpStream->Close();
205 cpStream.Release();
206
207 yarp::sig::Sound s;
208 yarp::sig::file::read(s, sPath.c_str());
209 return s;
210 return true;
211}
212
213
214/************************************************************************/
215bool SpeechRecognizerModule::updateModule()
216{
217 cout<<".";
218 USES_CONVERSION;
219 CSpEvent event;
220
221 // Process all of the recognition events
222 while (event.GetFrom(m_cpRecoCtxt) == S_OK)
223 {
224 switch (event.eEventId)
225 {
226 case SPEI_SOUND_START:
227 {
228 m_bInSound = TRUE;
229 yInfo() << "Sound in...";
230 break;
231 }
232
233 case SPEI_SOUND_END:
234 if (m_bInSound)
235 {
236 m_bInSound = FALSE;
237 if (!m_bGotReco)
238 {
239 // The sound has started and ended,
240 // but the engine has not succeeded in recognizing anything
241 yWarning() << "Chunk of sound detected: Recognition is null";
242 }
243 m_bGotReco = FALSE;
244 }
245 break;
246
247 case SPEI_RECOGNITION:
248 // There may be multiple recognition results, so get all of them
249 {
250 m_bGotReco = TRUE;
251 static const WCHAR wszUnrecognized[] = L"<Unrecognized>";
252
253 CSpDynamicString dstrText;
254 if (SUCCEEDED(event.RecoResult()->GetText(SP_GETWHOLEPHRASE, SP_GETWHOLEPHRASE, TRUE,
255 &dstrText, NULL)))
256 {
257 SPPHRASE* pPhrase = NULL;
258 bool successGetPhrase = SUCCEEDED(event.RecoResult()->GetPhrase(&pPhrase));
259 int confidence=pPhrase->Rule.Confidence;
260
261 string fullSentence = ws2s(dstrText);
262 yInfo() <<"Recognized "<<fullSentence<<" with confidence "<<confidence ;
263
264
265 //Send over yarp
266 Bottle bOut;
267 bOut.addString(fullSentence);
268 bOut.addInt32(confidence);
269 m_portContinuousRecognition.write(bOut);
270
271 //Treat the semantic
272 if (successGetPhrase)
273 {
274 //Send sound
275 if (m_forwardSound)
276 {
277 yarp::sig::Sound& rawSnd = m_portSound.prepare();
278 rawSnd = toSound(event.RecoResult());
279 m_portSound.write();
280 }
281
282 //--------------------------------------------------- 1. 1st subBottle : raw Sentence -----------------------------------------------//
283 int wordCount = pPhrase->Rule.ulCountOfElements;
284 string rawPhrase = "";
285 for(int i=0; i< wordCount; i++){
286 rawPhrase += ws2s(pPhrase->pElements[i].pszDisplayText) + " ";
287 yDebug() << "word : " << ws2s(pPhrase->pElements[i].pszDisplayText) ;
288 }
289 yInfo() <<"Raw sentence: "<<rawPhrase ;
290 if (&pPhrase->Rule == NULL)
291 {
292 yError() <<"Cannot parse the sentence!";
293 return true;
294 }
295 //--------------------------------------------------- 2. 2nd subottle : Word/Role ---------------------------------------------------//
296 Bottle bOutGrammar;
297 bOutGrammar.addString(rawPhrase);
298 bOutGrammar.addList()=toBottle(pPhrase,&pPhrase->Rule);
299 yInfo() << "Sending semantic bottle : " << bOutGrammar.toString();
300 m_portContinuousRecognitionGrammar.write(bOutGrammar);
301 ::CoTaskMemFree(pPhrase);
302 }
303
304 if (m_useTalkBack)
305 say(fullSentence);
306 }
307 }
308 break;
309 }
310 }
311 return true;
312}
313
314/************************************************************************/
315Bottle SpeechRecognizerModule::toBottle(SPPHRASE* pPhrase, const SPPHRASERULE* pRule)
316{
317 Bottle bCurrentLevelGlobal;
318
319 const SPPHRASERULE* siblingRule = pRule;
320 while (siblingRule != NULL)
321 {
322 Bottle bCurrentSubLevel;
323 bCurrentSubLevel.addString(ws2s(siblingRule->pszName));
324
325 //we backtrack
326 if(siblingRule->pFirstChild != NULL )
327 {
328 bCurrentSubLevel.addList()=toBottle(pPhrase, siblingRule->pFirstChild);
329 }
330 else
331 {
332 string nodeString = "";
333 for(unsigned int i=0; i<siblingRule->ulCountOfElements; i++)
334 {
335 nodeString += ws2s(pPhrase->pElements[siblingRule->ulFirstElement + i].pszDisplayText);
336 if (i<siblingRule->ulCountOfElements-1)
337 nodeString += " ";
338 }
339 bCurrentSubLevel.addString(nodeString);
340 }
341 siblingRule = siblingRule->pNextSibling;
342 if (pRule->pNextSibling !=NULL)
343 bCurrentLevelGlobal.addList() = bCurrentSubLevel;
344 else
345 bCurrentLevelGlobal = bCurrentSubLevel;
346
347 }
348 return bCurrentLevelGlobal;
349}
350
351/************************************************************************/
352bool SpeechRecognizerModule::respond(const Bottle& cmd, Bottle& reply)
353{
354 reply.addString("ACK");
355 string firstVocab = cmd.get(0).asString();
356
357 if (firstVocab == "tts")
358 {
359 string sentence = cmd.get(1).asString();
360 say(sentence);
361 reply.addString("OK");
362 }
363 else if (firstVocab == "RGM" || firstVocab == "rgm" )
364 {
365 string secondVocab = cmd.get(1).asString();
366 if (secondVocab=="vocabulory")
367 handleRGMCmd(cmd.tail().tail(), reply);
368 }
369 else if (firstVocab == "recog")
370 {
371 handleRecognitionCmd(cmd.tail(), reply);
372 }
373 else if (firstVocab == "asyncrecog")
374 {
375 handleAsyncRecognitionCmd(cmd.tail(), reply);
376 }
377 else if (firstVocab == "interrupt")
378 {
379 handleInterrupt(cmd.tail(), reply);
380 }
381 else
382 reply.addString("UNKNOWN");
383
384 return true;
385}
386
387/************************************************************************/
388bool SpeechRecognizerModule::handleInterrupt(const Bottle& cmd, Bottle& reply)
389{
390 yInfo() << "Grammar interrupted";
391 interruptRecognition = true;
392 yarp::os::Time::delay(0.5);
393 interruptRecognition = true; // just in case of a previous race condition
394 reply.addString("OK");
395 return true;
396}
397
398/************************************************************************/
399bool SpeechRecognizerModule::handleRGMCmd(const Bottle& cmd, Bottle& reply)
400{
401 string firstVocab = cmd.get(0).asString();
402 if (firstVocab == "add")
403 {
404 string vocabulory = cmd.get(1).asString();
405 if (vocabulory[0] != '#')
406 {
407 //reply.addString("Vocabulories have to start with a #. #Dictation and #WildCard are reserved. Aborting.");
408 reply.addString("ERROR");
409 return true;
410 }
411 string word = cmd.get(2).asString();
412 m_vocabulories[vocabulory].push_back(word);
413 refreshFromVocabulories(m_cpGrammarFromFile);
414 reply.addString("OK");
415 return true;
416 }
417
418 if (firstVocab == "addAuto")
419 {
420 string vocabuloryType = cmd.get(1).asString();
421 yInfo() <<"Trying to enrich the "<<vocabuloryType<<" vocabulary.";
422
423 say("Let's improve my dictionary.");
424
425 //Try first with open dictation
426 int TRIALS_BEFORE_SPELLING = 2;
427 bool isFine = false;
428 int trial=0;
429 string newWord = "";
430 while(!isFine && trial<TRIALS_BEFORE_SPELLING)
431 {
432 say("Please, say the word.");
433 newWord = "";
434 while(newWord=="")
435 newWord=getFromDictaction(m_timeout);
436 say("I understood "+ newWord + ". Did you say that?");
437
438 Bottle cmdTmp, replyTmp;
439 cmdTmp.addString("grammarSimple");
440 cmdTmp.addString("Yes I did.|No I did not.|Skip");
441 bool gotAConfirmation = false;
442 while(!gotAConfirmation)
443 {
444 replyTmp.clear();
445 handleRecognitionCmd(cmdTmp,replyTmp);
446 //cout<<"DEBUG="<<replyTmp.toString()<<endl;
447 //cout<<"DEBUG FIRST ELEMENT =|"<<replyTmp.get(0).asString()<<"|"<<endl;
448 if (replyTmp.get(0).asString() == "Skip")
449 {
450 say("Fine, we give up.");
451 reply.addString("ERROR");
452 return true;
453 }
454 gotAConfirmation =
455 replyTmp.size()>0 &&
456 (replyTmp.get(0).asString() == "Yes" ||
457 replyTmp.get(0).asString() == "No");
458 }
459 //cout<<"Reply is "<<replyTmp.toString()<<endl;
460 if ( replyTmp.get(0).asString() == "Yes")
461 isFine = true;
462 else
463 trial++;
464 }
465
466 //Try then with spelling
467 int TRIALS_BEFORE_GIVING_UP = 2;
468 trial =0;
469 while(!isFine && trial<TRIALS_BEFORE_GIVING_UP)
470 {
471 say("Sorry, I cannot get it. Please, spell this word for me?");
472 string spelledWord = "";
473 while(spelledWord == "")
474 spelledWord=getFromDictaction(m_timeout,SPTOPIC_SPELLING);
475 newWord = spelledWord;
476
477 say("I understood "+ newWord + ". Is that right?");
478
479 Bottle cmdTmp, replyTmp;
480 cmdTmp.addString("grammarSimple");
481 cmdTmp.addString("Yes I did.|No I did not.|Skip");
482 bool gotAConfirmation = false;
483 while(!gotAConfirmation)
484 {
485 replyTmp.clear();
486 handleRecognitionCmd(cmdTmp,replyTmp);
487 //cout<<"DEBUG="<<replyTmp.toString()<<endl;
488 //cout<<"DEBUG FIRST ELEMENT =|"<<replyTmp.get(0).asString()<<"|"<<endl;
489 if (replyTmp.get(0).asString() == "Skip")
490 {
491 say("Fine, we give up.");
492 reply.addString("ERROR");
493 return true;
494 }
495
496 gotAConfirmation =
497 replyTmp.size()>0 &&
498 (replyTmp.get(0).asString() == "Yes" ||
499 replyTmp.get(0).asString() == "No");
500 }
501 //cout<<"Reply is "<<replyTmp.toString()<<endl;
502 if ( replyTmp.get(0).asString() == "Yes")
503 isFine = true;
504 else
505 trial++;
506 }
507 //Give up
508 if (!isFine)
509 {
510 say("Sorry, I think we should give up with this word.");
511 reply.addString("ERROR");
512 }
513 else
514 {
515 say("Perfect! I know the word " + newWord);
516 m_vocabulories[vocabuloryType].push_back(newWord);
517 refreshFromVocabulories(m_cpGrammarFromFile);
518 reply.addString(newWord);
519 }
520
521 return true;
522 }
523 reply.addString("UNKNOWN");
524 return false;
525}
526
527/************************************************************************/
528bool SpeechRecognizerModule::handleAsyncRecognitionCmd(const Bottle& cmd, Bottle& reply)
529{
530 HRESULT hr;
531 string firstVocab = cmd.get(0).asString();
532 if (firstVocab == "getGrammar")
533 {
534 reply.addString("NOT_IMPLEMENTED");
535 return true;
536 }
537
538 if (firstVocab == "clear")
539 {
540 bool everythingIsFine=true;
541 SPSTATEHANDLE rootRule;
542 everythingIsFine &= SUCCEEDED(m_cpGrammarFromFile->SetGrammarState(SPGS_DISABLED));
543 everythingIsFine &= SUCCEEDED(m_cpGrammarFromFile->GetRule(L"rootRule", NULL, SPRAF_TopLevel | SPRAF_Active, TRUE, &rootRule));
544 everythingIsFine &= SUCCEEDED(m_cpGrammarFromFile->ClearRule(rootRule));
545 everythingIsFine &= SUCCEEDED(hr = m_cpGrammarFromFile->Commit(NULL));
546 everythingIsFine &= SUCCEEDED(m_cpGrammarFromFile->SetGrammarState(SPGS_ENABLED));
547 everythingIsFine &= SUCCEEDED(m_cpGrammarFromFile->SetRuleState(NULL, NULL, SPRS_ACTIVE));
548 everythingIsFine &= SUCCEEDED(m_cpRecoCtxt->Resume(0));
549 reply.addString("Cleared");
550 return true;
551 }
552
553 if (firstVocab == "addGrammar")
554 {
555 string grammar = cmd.get(1).asString();
556 bool everythingIsFine = setGrammarCustom(m_cpGrammarFromFile,grammar,true);
557 reply.addString("Added");
558 return true;
559 }
560
561 if (firstVocab == "loadXML")
562 {
563 string xml = cmd.get(1).asString();
564 ofstream fileTmp("grammarTmp.grxml");
565 fileTmp<<xml;
566 fileTmp.close();
567
568 std::wstring tmp = s2ws("grammarTmp.grxml");
569 LPCWSTR cwgrammarfile = tmp.c_str();
570
571 bool everythingIsFine =true;
572 //everythingIsFine &= SUCCEEDED( m_cpRecoCtxt->CreateGrammar( 1, &m_cpGrammarFromFile ));
573 everythingIsFine &= SUCCEEDED( m_cpGrammarFromFile->SetGrammarState(SPGS_DISABLED));
574 everythingIsFine &= SUCCEEDED( m_cpGrammarFromFile->LoadCmdFromFile(cwgrammarfile, SPLO_DYNAMIC));
575
576 everythingIsFine &= SUCCEEDED( m_cpGrammarFromFile->SetGrammarState(SPGS_ENABLED));
577 everythingIsFine &= SUCCEEDED(m_cpGrammarFromFile->SetRuleState(NULL, NULL, SPRS_ACTIVE));
578 everythingIsFine &= SUCCEEDED(m_cpRecoCtxt->Resume(0));
579
580 refreshFromVocabulories(m_cpGrammarFromFile);
581 reply.addString("Loaded");
582 return true;
583 }
584
585 return false;
586}
587
588/************************************************************************/
589bool SpeechRecognizerModule::refreshFromVocabulories(CComPtr<ISpRecoGrammar> grammarToModify)
590{
591 //return true;
592 bool everythingIsFine = true;
593
594 everythingIsFine &= SUCCEEDED(grammarToModify->SetGrammarState(SPGS_DISABLED));
595
596 //Build a rule for each vocabulory
597 for(map<string, list<string> >::iterator vIt = m_vocabulories.begin(); vIt != m_vocabulories.end(); vIt++)
598 {
599 //Get the rule name from the key in the dictionary (i.e Agent, Action, etc...)
600 string removedSharp =vIt->first;
601 removedSharp.erase(0,1);
602 std::wstring tmp = s2ws(removedSharp);
603 LPCWSTR cwRuleName = tmp.c_str();
604
605 SPSTATEHANDLE hinit,hstate;
606 HRESULT hr;
607 //Get the rule or create it
608 everythingIsFine &= SUCCEEDED(hr = grammarToModify->GetRule(cwRuleName, NULL, SPRAF_Dynamic, false, &hinit));
609 everythingIsFine &= SUCCEEDED(hr = grammarToModify->ClearRule(hinit));
610 for(list<string>::iterator wordIt = vIt->second.begin() ; wordIt != vIt->second.end(); wordIt++)
611 {
612 std::wstring wordTmp = s2ws(*wordIt);
613 LPCWSTR cwWord = wordTmp.c_str();
614 everythingIsFine &= SUCCEEDED( grammarToModify->AddWordTransition(hinit, NULL, cwWord, NULL, SPWT_LEXICAL, 1, NULL) );
615 }
616 }
617
618 everythingIsFine &= SUCCEEDED(grammarToModify->Commit(NULL));
619 everythingIsFine &= SUCCEEDED(grammarToModify->SetGrammarState(SPGS_ENABLED));
620 everythingIsFine &= SUCCEEDED(grammarToModify->SetRuleState(NULL, NULL, SPRS_ACTIVE));
621 yInfo() << "Grammar is paused, DO NOT SPEAK! (if next message is coming after too long, check your microphone level and lower it!" ;
622 everythingIsFine &= SUCCEEDED(m_cpRecoCtxt->Resume(NULL));
623 yInfo() << "Grammar is resumed : everything is fine = " << everythingIsFine;
624
625 return everythingIsFine;
626}
627
628/************************************************************************/
629string SpeechRecognizerModule::getFromDictaction(int timeout, LPCWSTR options )
630{
631 bool everythingIsFine = TRUE;
632 everythingIsFine &= SUCCEEDED(m_cpGrammarDictation->UnloadDictation());
633 everythingIsFine &= SUCCEEDED(m_cpGrammarDictation->LoadDictation(options, SPLO_STATIC));
634 everythingIsFine &= SUCCEEDED(m_cpGrammarDictation->SetDictationState( SPRS_ACTIVE ));
635 yInfo() <<"Dictation is on..." ;
636 Bottle botTmp;
637 if (!USE_LEGACY)
638 {
639 botTmp = waitNextRecognition(m_timeout);
640 }
641 else
642 {
643 list< pair<string, double> > results = waitNextRecognitionLEGACY(m_timeout);
644 for(list< pair<string, double> >::iterator it = results.begin(); it != results.end(); it++)
645 {
646 botTmp.addString(it->first);
647 //botTmp.addFloat64(it->second);
648 }
649 }
650 yInfo() <<"Dictation is off...";
651 yInfo() <<"Got : "<<botTmp.toString();
652 //Turn off dictation and go back to the file grammar
653 everythingIsFine &= SUCCEEDED(m_cpGrammarDictation->SetDictationState( SPRS_INACTIVE ));
654 everythingIsFine &=SUCCEEDED(m_cpGrammarFromFile->SetGrammarState(SPGS_ENABLED));
655 return botTmp.toString();
656}
657
658/************************************************************************/
659bool SpeechRecognizerModule::handleRecognitionCmd(const Bottle& cmd, Bottle& reply)
660{
661 string firstVocab = cmd.get(0).asString();
662
663 if (firstVocab == "timeout")
664 {
665 m_timeout = cmd.get(1).asInt32();
666 //reply.addInt32(true);
667 return false;
668 }
669
670 else if (firstVocab == "dictation")
671 {
672 bool everythingIsFine = TRUE;
673 everythingIsFine &= SUCCEEDED(m_cpGrammarDictation->SetDictationState( SPRS_ACTIVE ));
674 yInfo() <<"Dictation is on..." ;
675
676 if (!USE_LEGACY)
677 {
678 reply.addList() = waitNextRecognition(m_timeout);
679 }
680 else
681 {
682 list< pair<string, double> > results = waitNextRecognitionLEGACY(m_timeout);
683 if (results.size()>0)
684 for(list< pair<string, double> >::iterator it = results.begin(); it != results.end(); it++)
685 {
686 reply.addString(it->first);
687 reply.addFloat64(it->second);
688 }
689 else
690 reply.addString("-1");
691 }
692 yInfo() <<"Dictation is off...";
693
694 //Turn off dictation and go back to the file grammar
695 everythingIsFine &= SUCCEEDED(m_cpGrammarDictation->SetDictationState( SPRS_INACTIVE ));
696 everythingIsFine &=SUCCEEDED(m_cpGrammarFromFile->SetGrammarState(SPGS_ENABLED));
697 //reply.addInt32(true);
698 return true;
699 }
700 // If we are not in dictation then we set and switch to the runtimeGrammar
701 else if (firstVocab == "grammarXML")
702 {
703 string xml = cmd.get(1).asString();
704 ofstream fileTmp("grammarTmp.grxml");
705 fileTmp<<xml;
706 fileTmp.close();
707
708 std::wstring tmp = s2ws("grammarTmp.grxml");
709 LPCWSTR cwgrammarfile = tmp.c_str();
710
711 bool everythingIsFine =true;
712 everythingIsFine &= SUCCEEDED( m_cpGrammarRuntime->SetGrammarState(SPGS_DISABLED));
713 everythingIsFine &= SUCCEEDED( m_cpGrammarRuntime->LoadCmdFromFile(cwgrammarfile, SPLO_DYNAMIC));
714 everythingIsFine &= SUCCEEDED(m_cpGrammarRuntime->SetRuleState(NULL, NULL, SPRS_ACTIVE));
715 everythingIsFine &= SUCCEEDED(m_cpRecoCtxt->Resume(0));
716
717 refreshFromVocabulories(m_cpGrammarRuntime);
718
719 //reply.addInt32(everythingIsFine);
720 }
721
722 else if (firstVocab == "choices")
723 {
724 string choices ="";
725 for (int wI = 1; wI < cmd.size(); wI++)
726 {
727 choices+=cmd.get(wI).asString();
728 if (wI<cmd.size()-1)
729 choices+="|";
730 }
731 setGrammarCustom(m_cpGrammarRuntime,choices,false);
732 }
733 else if (firstVocab == "grammarSimple")
734 {
735 string RADStyle = cmd.get(1).asString();
736 yInfo() <<"Setting runtime grammar to : "<<RADStyle ;
737 setGrammarCustom(m_cpGrammarRuntime,RADStyle,false);
738 }
739 else
740 {
741 reply.addString("UNKNOWN");
742 return false;
743 }
744
745 //Disable the from file grammar
746 SUCCEEDED(m_cpGrammarFromFile->SetGrammarState(SPGS_DISABLED));
747 SUCCEEDED(m_cpGrammarRuntime->SetGrammarState(SPGS_ENABLED));
748
749 //Force blocking recognition
750 if (!USE_LEGACY)
751 {
752 reply.addList() = waitNextRecognition(m_timeout);
753 }
754 else
755 {
756 list< pair<string, double> > results = waitNextRecognitionLEGACY(m_timeout);
757 if (results.size()>0)
758 for(list< pair<string, double> >::iterator it = results.begin(); it != results.end(); it++)
759 {
760 reply.addString(it->first);
761 reply.addFloat64(it->second);
762 }
763 else
764 reply.addString("-1");
765 }
766 //Disable the runtime grammar
767 SUCCEEDED(m_cpGrammarRuntime->SetGrammarState(SPGS_DISABLED));
768 SUCCEEDED(m_cpGrammarFromFile->SetGrammarState(SPGS_ENABLED));
769 return true;
770}
771/************************************************************************/
772Bottle SpeechRecognizerModule::waitNextRecognition(int timeout)
773{
774 yInfo() <<"Recognition: blocking mode on" ;
775 Bottle bOutGrammar;
776
777 bool gotSomething = false;
778 double endTime = Time::now() + timeout/1000.0;
779 interruptRecognition = false;
780
781 cout << endl ;
782 yInfo() << "=========== GO Waiting for recog! ===========" ;
783
784 while(Time::now()<endTime && !gotSomething && !interruptRecognition)
785 {
786 //std::cout<<".";
787 const float ConfidenceThreshold = 0.3f;
788 SPEVENT curEvent;
789 ULONG fetched = 0;
790 HRESULT hr = S_OK;
791
792 m_cpRecoCtxt->GetEvents(1, &curEvent, &fetched);
793
794 while (fetched > 0)
795 {
796 yInfo() << " received something in waitNextRecognition" ;
797 gotSomething = true;
798 ISpRecoResult* result = reinterpret_cast<ISpRecoResult*>(curEvent.lParam);
799 CSpDynamicString dstrText;
800 result->GetText(SP_GETWHOLEPHRASE, SP_GETWHOLEPHRASE, TRUE, &dstrText, NULL);
801 string fullSentence = ws2s(dstrText);
802 yInfo() <<fullSentence ;
803 if (m_useTalkBack)
804 say(fullSentence);
805 bOutGrammar.addString(fullSentence);
806
807 SPPHRASE* pPhrase = NULL;
808 result->GetPhrase(&pPhrase);
809 bOutGrammar.addList() = toBottle(pPhrase,&pPhrase->Rule);
810 yInfo() <<"Sending semantic bottle : "<<bOutGrammar.toString() ;
811 m_cpRecoCtxt->GetEvents(1, &curEvent, &fetched);
812
813 if (m_forwardSound)
814 {
815 yarp::sig::Sound& rawSnd = m_portSound.prepare();
816 rawSnd = toSound(result);
817 m_portSound.write();
818 }
819
820 }
821 }
822
823 if(interruptRecognition) {
824 yDebug() << "interrupted speech recognizer!";
825 }
826 yInfo() <<"Recognition: blocking mode off";
827 return bOutGrammar;
828}
829
830/************************************************************************/
831list< pair<string, double> > SpeechRecognizerModule::waitNextRecognitionLEGACY(int timeout)
832{
833 yInfo() <<"Recognition LEGACY: blocking mode on" ;
834 list< pair<string, double> > recognitionResults;
835
836 bool gotSomething = false;
837 double endTime = Time::now() + timeout/1000.0;
838 while(Time::now()<endTime && !gotSomething && !interruptRecognition)
839 {
840 //std::cout<<".";
841 const float ConfidenceThreshold = 0.3f;
842 SPEVENT curEvent;
843 ULONG fetched = 0;
844 HRESULT hr = S_OK;
845
846 m_cpRecoCtxt->GetEvents(1, &curEvent, &fetched);
847
848 while (fetched > 0)
849 {
850 gotSomething = true;
851 ISpRecoResult* result = reinterpret_cast<ISpRecoResult*>(curEvent.lParam);
852
853 //Convert the catched sentence to strings.
854 CSpDynamicString dstrText;
855 result->GetText(SP_GETWHOLEPHRASE, SP_GETWHOLEPHRASE, TRUE, &dstrText, NULL);
856 string fullSentence = ws2s(dstrText);
857 yInfo() <<fullSentence ;
858
859 if (m_useTalkBack)
860 say(fullSentence);
861 vector<string> words = split(fullSentence,' ');
862 for(unsigned int w=0;w<words.size();w++)
863 {
864 //Todo extract the confidence value somehow...
865 recognitionResults.push_back(make_pair(words[w], -1.0));
866 }
867 m_cpRecoCtxt->GetEvents(1, &curEvent, &fetched);
868 }
869 }
870 interruptRecognition = false;
871 yInfo() <<"Recognition: blocking mode off" ;
872 return recognitionResults;
873}
874
875/************************************************************************/
876void SpeechRecognizerModule::say(string s, bool wait)
877{
878 yInfo() <<"TTS: "<<s ;
879 Bottle b;
880 b.addString(s);
881 m_port2iSpeak.write(b);
882 if(wait)
883 {
884 yarp::os::Bottle cmd,reply;
885 cmd.addVocab32(VOCAB('s','t','a','t'));
886 std::string status = "speaking";
887 bool speechStarted = false;
888 while(wait&&(!speechStarted ||status=="speaking"))
889 {
890 m_port2iSpeakRpc.write(cmd,reply);
891 status = reply.get(0).asString();
892 if (!speechStarted && status != "quiet")
893 {
894 speechStarted = true;
895 }
896 yarp::os::Time::delay(0.2);
897 }
898 }
899}
900
901/************************************************************************/
902bool SpeechRecognizerModule::setGrammarCustom(CComPtr<ISpRecoGrammar> grammarToModify, string grammar, bool append)
903{
904 //Clear the existing runtime grammar
905 SPSTATEHANDLE runtimeRootRule;
906 bool everythingIsFine = true;
907 everythingIsFine &= SUCCEEDED(grammarToModify->SetGrammarState(SPGS_DISABLED));
908 everythingIsFine &= SUCCEEDED(grammarToModify->GetRule(L"rootRule", NULL, SPRAF_TopLevel | SPRAF_Active, TRUE, &runtimeRootRule));
909 if(!append)
910 everythingIsFine &= SUCCEEDED(grammarToModify->ClearRule(runtimeRootRule));
911
912 //Build a rule for each vocabulory
913 map<string, SPSTATEHANDLE> vocabRules;
914 for(map<string, list<string> >::iterator vIt = m_vocabulories.begin(); vIt != m_vocabulories.end(); vIt++)
915 {
916 //Get the rule name from the key in the dictionary (i.e Agent, Action, etc...)
917 std::wstring tmp = s2ws(vIt->first);
918 LPCWSTR cwRuleName = tmp.c_str();
919
920 //Get the rule or create it
921 everythingIsFine &= SUCCEEDED(grammarToModify->GetRule(cwRuleName, NULL, SPRAF_Dynamic, TRUE, &vocabRules[vIt->first]));
922 everythingIsFine &= SUCCEEDED(grammarToModify->ClearRule(vocabRules[vIt->first]));
923 for(list<string>::iterator wordIt = vIt->second.begin() ; wordIt != vIt->second.end(); wordIt++)
924 {
925 std::wstring wordTmp = s2ws(*wordIt);
926 LPCWSTR cwWord = wordTmp.c_str();
927 everythingIsFine &= SUCCEEDED( grammarToModify->AddWordTransition(vocabRules[vIt->first], NULL, cwWord, NULL, SPWT_LEXICAL, 1, NULL) );
928 }
929 }
930
931 //Go through the given string and build the according grammar
932 //Split the choices
933 vector<string> sentences = split(grammar,'|');
934 for(vector<string>::iterator it = sentences.begin() ; it != sentences.end() ; it++)
935 {
936 //Split the words
937 vector<string> words = split(*it,' ');
938 SPSTATEHANDLE beforeWordHandle = runtimeRootRule;
939 SPSTATEHANDLE afterWordHandle;
940 for(vector<string>::iterator itWord = words.begin() ; itWord != words.end() ; itWord++)
941 {
942 if((*itWord)=="")
943 continue;
944
945 everythingIsFine &= SUCCEEDED(grammarToModify->CreateNewState(beforeWordHandle, &afterWordHandle));
946
947 //Check if the current word is the name of a vocabulory
948 if ( (*itWord)[0] == '#' && m_vocabulories.find(*itWord) != m_vocabulories.end())
949 {
950 everythingIsFine &= SUCCEEDED(grammarToModify->AddRuleTransition(beforeWordHandle, afterWordHandle, vocabRules[*itWord], 1, NULL));
951 }
952 else
953 {
954 std::wstring wordTmp = s2ws(*itWord);
955 LPCWSTR cwWord = wordTmp.c_str();
956 everythingIsFine &= SUCCEEDED( grammarToModify->AddWordTransition(beforeWordHandle, afterWordHandle, cwWord, NULL, SPWT_LEXICAL, 1, NULL) );
957 }
958 beforeWordHandle = afterWordHandle;
959 }
960 everythingIsFine &= SUCCEEDED( grammarToModify->AddWordTransition(beforeWordHandle, NULL, NULL, NULL, SPWT_LEXICAL, 1, NULL) );
961 }
962 everythingIsFine &= SUCCEEDED(grammarToModify->Commit(NULL));
963 everythingIsFine &= SUCCEEDED(grammarToModify->SetGrammarState(SPGS_ENABLED));
964 everythingIsFine &= SUCCEEDED(grammarToModify->SetRuleState(NULL, NULL, SPRS_ACTIVE));
965 everythingIsFine &= SUCCEEDED(m_cpRecoCtxt->Resume(0));
966
967 return everythingIsFine;
968}
969
970
971
972
973/************************************************************************/
974bool SpeechRecognizerModule::loadGrammarFromRf(ResourceFinder &RF)
975{
976 Bottle &bAgent = RF.findGroup("agent");
977 Bottle &bAction = RF.findGroup("action");
978 Bottle &bObject = RF.findGroup("object");
979
980 Bottle bMessenger, bReply;
981
982 yInfo() << "Agents are: " ;
983 for (int iBottle = 1 ; iBottle < bAgent.size() ; iBottle++)
984 {
985 yInfo() << "\t" << bAgent.get(iBottle).toString();
986 bMessenger.clear();
987 bMessenger.addString("add");
988 bMessenger.addString("#agent");
989 bMessenger.addString(bAgent.get(iBottle).toString());
990
991 handleRGMCmd(bMessenger, bReply);
992
993 yInfo() << "\t\t" << bReply.toString() ;
994 }
995
996 yInfo() << "\n" << "Actions are: " ;
997 for (int iBottle = 1 ; iBottle < bAction.size() ; iBottle++)
998 {
999 yInfo() << "\t" << bAction.get(iBottle).toString();
1000 bMessenger.clear();
1001 bMessenger.addString("add");
1002 bMessenger.addString("#action");
1003 bMessenger.addString(bAction.get(iBottle).toString());
1004
1005 handleRGMCmd(bMessenger, bReply);
1006
1007 yInfo() << "\t\t" << bReply.toString() ;
1008 }
1009
1010 yInfo() << "\n" << "Objects are: " ;
1011 for (int iBottle = 1 ; iBottle < bObject.size() ; iBottle++)
1012 {
1013 yInfo() << "\t" << bObject.get(iBottle).toString();
1014 bMessenger.clear();
1015 bMessenger.addString("add");
1016 bMessenger.addString("#object");
1017 bMessenger.addString(bObject.get(iBottle).toString());
1018
1019 handleRGMCmd(bMessenger, bReply);
1020
1021 yInfo() << "\t\t" << bReply.toString() ;
1022 }
1023
1024 Bottle bGrammarMain, bGrammarDef;
1025
1026
1027 bGrammarMain.addString("addGrammar");
1028 bGrammarMain.addString("#agent #action #object");
1029
1030 yInfo() << "\n" << bGrammarMain.toString() ;
1031
1032// handleAsyncRecognitionCmd(bGrammarMain, bReply);
1033
1034 yInfo() << "\n" << bReply.toString() ;
1035
1036 return true;
1037}
1038