speech
All Data Structures Functions Modules Pages
speech.cpp
1 /*
2  * Copyright (C) 2014 iCub Facility
3  * Authors: Ali Paikan
4  * CopyPolicy: Released under the terms of the LGPLv2.1 or later, see LGPL.TXT
5  */
6 
7 
8 #include <cstdio>
9 #include <cstdlib>
10 #include <cstring>
11 #include <algorithm>
12 
13 #include <yarp/os/Thread.h>
14 #include <yarp/os/Time.h>
15 #include <yarp/os/Stamp.h>
16 #include <yarp/os/LogStream.h>
17 
18 #include <speech.h>
19 
20 using namespace yarp::os;
21 using namespace yarp::dev;
22 
23 #define PICO_MEM_SIZE 2500000 /* adaptation layer defines */
24 #define DummyLen 100000000
25 #define MAX_OUTBUF_SIZE 128 /* string constants */
26 
27 const char * PICO_VOICE_NAME = "PicoVoice";
28 
29 // supported voices Pico does not seperately specify the voice and locale.
30 const char * picoSupportedLangIso3[] = { "eng", "eng", "deu", "spa", "fra", "ita" };
31 const char * picoSupportedCountryIso3[] = { "USA", "GBR", "DEU", "ESP", "FRA", "ITA" };
32 const char * picoSupportedLang[] = { "en-US", "en-GB", "de-DE", "es-ES", "fr-FR", "it-IT" };
33 const char * picoInternalLang[] = { "en-US", "en-GB", "de-DE", "es-ES", "fr-FR", "it-IT" };
34 const char * picoInternalTaLingware[] = { "en-US_ta.bin", "en-GB_ta.bin", "de-DE_ta.bin", "es-ES_ta.bin", "fr-FR_ta.bin", "it-IT_ta.bin" };
35 const char * picoInternalSgLingware[] = { "en-US_lh0_sg.bin", "en-GB_kh0_sg.bin", "de-DE_gl0_sg.bin", "es-ES_zl0_sg.bin", "fr-FR_nk0_sg.bin", "it-IT_cm0_sg.bin" };
36 const char * picoInternalUtppLingware[] = { "en-US_utpp.bin", "en-GB_utpp.bin", "de-DE_utpp.bin", "es-ES_utpp.bin", "fr-FR_utpp.bin", "it-IT_utpp.bin" };
37 const int picoNumSupportedVocs = 6;
38 
39 
40 /****************************************************************
41  * @brief The yarp::dev::Speech class
42  */
43 Speech::Speech() {
44  pcmDevice.clear();
45  language = "en-US";
46  supportedLangs.push_back("en-US");
47  supportedLangs.push_back("en-GB");
48  supportedLangs.push_back("es-ES");
49  supportedLangs.push_back("fr-FR");
50  supportedLangs.push_back("it-IT");
51  supportedLangs.push_back("de-DE");
52  // picotts
53  picoMemArea = NULL;
54  picoSystem = NULL;
55  picoTaResource = NULL;
56  picoSgResource = NULL;
57  picoUtppResource = NULL;
58  picoEngine = NULL;
59  picoTaFileName = NULL;
60  picoSgFileName = NULL;
61  picoUtppFileName = NULL;
62  picoTaResourceName = NULL;
63  picoSgResourceName = NULL;
64  picoUtppResourceName = NULL;
65  picoSynthAbort = 0;
66 }
67 
68 Speech::~Speech() {
69  close();
70 }
71 
72 
73 bool Speech::open(yarp::os::Searchable &config)
74 {
75  Speech::config.fromString(config.toString());
76 
77  if(config.check("pcm-device"))
78  pcmDevice = config.find("pcm-device").asString();
79  if(config.check("default-language"))
80  if(!setLanguage(config.find("default-language").asString())) {
81  yError()<<"Cannot set the default language to"<<config.find("default-language").asString();
82  return false;
83  }
84 
85  setPitch(config.check("pitch",Value(90)).asInt32());
86  setSpeed(config.check("speed",Value(105)).asInt32());
87 
88  lingwareRF.setDefaultContext(config.check("lingware-context",Value("speech")).asString());
89  lingwareRF.configure(0,NULL);
90 
91  this->yarp().attachAsServer(rpcPort);
92 
93  std::string robot=config.check("robot",Value("icub")).asString();
94  std::string portName=std::string("/"+robot+"/speech:rpc");
95  if(!rpcPort.open(portName)) {
96  yError()<<"Cannot open port "<<portName;
97  return false;
98  }
99 
100  //return PeriodicThread::start();
101  return true;
102 }
103 
104 bool Speech::close()
105 {
106  yInfo()<<"closing Speech!";
107  Thread::stop();
108  return true;
109 }
110 
111 bool Speech::threadInit() {
112  return true;
113 }
114 
115 void Speech::threadRelease() {
116  rpcPort.close();
117 }
118 
119 void Speech::run() {
120 }
121 
122 
123 bool Speech::playWav(const std::string& filename) {
124  std::string cmd;
125 #if WIN32
126  cmd = "powershell -c (New-Object Media.SoundPlayer ";
127  cmd += filename;
128  cmd += ").PlaySync()";
129 #else
130  // aplay --device="plughw:1,0" speech.wav
131  cmd = "aplay ";
132  if(pcmDevice.size())
133  cmd += "--device=\""+pcmDevice+"\" ";
134  cmd += filename;
135 #endif
136  yInfo()<<cmd;
137  int ret = system(cmd.c_str());
138  if(ret != 0) {
139  yWarning()<<"Cannot play wave file"<<filename;
140  return false;
141  }
142  return true;
143 }
144 
145 bool Speech::setLanguage(const std::string& language) {
146  if(std::find(supportedLangs.begin(),
147  supportedLangs.end(),
148  language) == supportedLangs.end()) {
149  return false;
150  }
151 
152  Speech::language = language;
153  return true;
154 }
155 
156 bool Speech::setSpeed(const int16_t speed) {
157  Speech::speed = speed;
158  return true;
159 }
160 
161 bool Speech::setPitch(const int16_t pitch){
162  Speech::pitch = pitch;
163  return true;
164 }
165 
166 std::vector<std::string> Speech::getSupportedLang() {
167  return supportedLangs;
168 }
169 
170 int16_t Speech::getSpeed() {
171  return speed;
172 }
173 
174 int16_t Speech::getPitch(){
175  return pitch;
176 }
177 
178 
179 bool Speech::say(const std::string& text) {
180  std::string waveFile = renderSpeech(text);
181  if(waveFile.size() == 0)
182  return false;
183  return playWav(waveFile);
184 }
185 
186 bool Speech::play() {
187  return false;
188 }
189 
190 bool Speech::pause() {
191  return false;
192 }
193 
194 bool Speech::stop() {
195  return false;
196 }
197 
198 const std::string Speech::renderSpeech(const std::string &text) {
199  //<pitch level='70'><speed level='100'></speed></pitch>"
200  char* cmdText = (char*) std::malloc(text.size()+256);
201  std::string filename;
202 #if WIN32
203  if (const char* env_tmp = std::getenv("TMP"))
204  {
205  filename = env_tmp;
206  filename += "\\speech.wav";
207  }
208  else
209  filename = "speech.wav";
210  _snprintf
211 #else
212  filename = "/tmp/speech.wav";
213  snprintf
214 #endif
215  (cmdText,text.size()+255,
216  "<pitch level='%d'><speed level='%d'> %s </speed></pitch>",
217  pitch, speed, text.c_str());
218  /*
219  //pico2wave -len-US -w out.wav "hello!"
220  std::string cmd = "pico2wave -l" + language + " -w " + filename;
221  cmd = cmd + " \"" + text + "\"";
222  int ret = system(cmd.c_str());
223  if(ret != 0) {
224  yWarning()<<"Cannot render the speech!";
225  filename.clear();
226  }
227 */
228  const char * lang = language.c_str();
229  int langIndex = -1, langIndexTmp = -1;
230  size_t bufferSize = 256;
231 
232 
233  /* option: --lang */
234  for(langIndexTmp =0; langIndexTmp<picoNumSupportedVocs; langIndexTmp++) {
235  if(!std::strcmp(picoSupportedLang[langIndexTmp], lang)) {
236  langIndex = langIndexTmp;
237  break;
238  }
239  }
240  yAssert(langIndex != -1);
241 
242  int ret, getstatus;
243  pico_Char * inp = NULL;
244  pico_Char * local_text = NULL;
245  short outbuf[MAX_OUTBUF_SIZE/2];
246  pico_Int16 bytes_sent, bytes_recv, text_remaining, out_data_type;
247  pico_Retstring outMessage;
248 
249  picoSynthAbort = 0;
250 
251  picoMemArea = std::malloc( PICO_MEM_SIZE );
252  if((ret = pico_initialize( picoMemArea, PICO_MEM_SIZE, &picoSystem ))) {
253  pico_getSystemStatusMessage(picoSystem, ret, outMessage);
254  std::fprintf(stderr, "Cannot initialize pico (%i): %s\n", ret, outMessage);
255  releasePico();
256  return ("");
257  }
258 
259  /* Load the text analysis Lingware resource file. */
260  picoTaFileName = (pico_Char *) std::malloc( PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE );
261  std::strcpy((char *) picoTaFileName, lingwareRF.findFileByName(picoInternalTaLingware[langIndex]).c_str());
262  if((ret = pico_loadResource( picoSystem, picoTaFileName, &picoTaResource ))) {
263  pico_getSystemStatusMessage(picoSystem, ret, outMessage);
264  std::fprintf(stderr, "Cannot load text analysis resource file (%i): %s\n", ret, outMessage);
265  releasePico();
266  return ("");
267  }
268 
269  /* Load the signal generation Lingware resource file. */
270  picoSgFileName = (pico_Char *) std::malloc( PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE );
271  std::strcpy((char *) picoSgFileName, lingwareRF.findFileByName(picoInternalSgLingware[langIndex]).c_str());
272  if((ret = pico_loadResource( picoSystem, picoSgFileName, &picoSgResource ))) {
273  pico_getSystemStatusMessage(picoSystem, ret, outMessage);
274  std::fprintf(stderr, "Cannot load signal generation Lingware resource file (%i): %s\n", ret, outMessage);
275  releasePico();
276  return ("");
277  }
278 
279  /* Load the utpp Lingware resource file if exists - NOTE: this file is optional
280  and is currently not used. Loading is only attempted for future compatibility.
281  If this file is not present the loading will still succeed. //
282  picoUtppFileName = (pico_Char *) std::malloc( PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE );
283  std::strcpy((char *) picoUtppFileName, PICO_LINGWARE_PATH);
284  std::strcat((char *) picoUtppFileName, (const char *) picoInternalUtppLingware[langIndex]);
285  ret = pico_loadResource( picoSystem, picoUtppFileName, &picoUtppResource );
286  pico_getSystemStatusMessage(picoSystem, ret, outMessage);
287  printf("pico_loadResource: %i: %s\n", ret, outMessage);
288  */
289 
290  /* Get the text analysis resource name. */
291  picoTaResourceName = (pico_Char *) std::malloc( PICO_MAX_RESOURCE_NAME_SIZE );
292  if((ret = pico_getResourceName( picoSystem, picoTaResource, (char *) picoTaResourceName ))) {
293  pico_getSystemStatusMessage(picoSystem, ret, outMessage);
294  std::fprintf(stderr, "Cannot get the text analysis resource name (%i): %s\n", ret, outMessage);
295  releasePico();
296  return ("");
297  }
298 
299  /* Get the signal generation resource name. */
300  picoSgResourceName = (pico_Char *) std::malloc( PICO_MAX_RESOURCE_NAME_SIZE );
301  if((ret = pico_getResourceName( picoSystem, picoSgResource, (char *) picoSgResourceName ))) {
302  pico_getSystemStatusMessage(picoSystem, ret, outMessage);
303  std::fprintf(stderr, "Cannot get the signal generation resource name (%i): %s\n", ret, outMessage);
304  releasePico();
305  return ("");
306  }
307 
308 
309  /* Create a voice definition. */
310  if((ret = pico_createVoiceDefinition( picoSystem, (const pico_Char *) PICO_VOICE_NAME ))) {
311  pico_getSystemStatusMessage(picoSystem, ret, outMessage);
312  std::fprintf(stderr, "Cannot create voice definition (%i): %s\n", ret, outMessage);
313  releasePico();
314  return ("");
315  }
316 
317  /* Add the text analysis resource to the voice. */
318  if((ret = pico_addResourceToVoiceDefinition( picoSystem, (const pico_Char *) PICO_VOICE_NAME, picoTaResourceName ))) {
319  pico_getSystemStatusMessage(picoSystem, ret, outMessage);
320  std::fprintf(stderr, "Cannot add the text analysis resource to the voice (%i): %s\n", ret, outMessage);
321  releasePico();
322  return ("");
323  }
324 
325  /* Add the signal generation resource to the voice. */
326  if((ret = pico_addResourceToVoiceDefinition( picoSystem, (const pico_Char *) PICO_VOICE_NAME, picoSgResourceName ))) {
327  pico_getSystemStatusMessage(picoSystem, ret, outMessage);
328  std::fprintf(stderr, "Cannot add the signal generation resource to the voice (%i): %s\n", ret, outMessage);
329  releasePico();
330  return ("");
331  }
332 
333  /* Create a new Pico engine. */
334  if((ret = pico_newEngine( picoSystem, (const pico_Char *) PICO_VOICE_NAME, &picoEngine ))) {
335  pico_getSystemStatusMessage(picoSystem, ret, outMessage);
336  std::fprintf(stderr, "Cannot create a new pico engine (%i): %s\n", ret, outMessage);
337  releasePico();
338  return ("");
339  }
340 
341  local_text = (pico_Char *) cmdText;
342  text_remaining = std::strlen((const char *) local_text) + 1;
343 
344  inp = (pico_Char *) local_text;
345 
346  size_t bufused = 0;
347 
348  picoos_Common common = (picoos_Common) pico_sysGetCommon(picoSystem);
349 
350  picoos_SDFile sdOutFile = NULL;
351 
352  picoos_bool done = TRUE;
353  if(TRUE != (done = picoos_sdfOpenOut(common, &sdOutFile,
354  (picoos_char *) filename.c_str(), SAMPLE_FREQ_16KHZ, PICOOS_ENC_LIN)))
355  {
356  std::fprintf(stderr, "Cannot open output wave file\n");
357  ret = 1;
358  releasePico();
359  return ("");
360  }
361 
362  int8_t* buffer = (int8_t*) std::malloc( bufferSize );
363  /* synthesis loop */
364  while (text_remaining) {
365  /* Feed the text into the engine. */
366  if((ret = pico_putTextUtf8( picoEngine, inp, text_remaining, &bytes_sent ))) {
367  pico_getSystemStatusMessage(picoSystem, ret, outMessage);
368  std::fprintf(stderr, "Cannot put Text (%i): %s\n", ret, outMessage);
369  releasePico();
370  return ("");
371  }
372 
373  text_remaining -= bytes_sent;
374  inp += bytes_sent;
375  do {
376  if (picoSynthAbort) {
377  releasePico();
378  return ("");
379  }
380  /* Retrieve the samples and add them to the buffer. */
381  getstatus = pico_getData( picoEngine, (void *) outbuf,
382  MAX_OUTBUF_SIZE, &bytes_recv, &out_data_type );
383  if((getstatus !=PICO_STEP_BUSY) && (getstatus !=PICO_STEP_IDLE)){
384  pico_getSystemStatusMessage(picoSystem, getstatus, outMessage);
385  std::fprintf(stderr, "Cannot get Data (%i): %s\n", getstatus, outMessage);
386  releasePico();
387  return ("");
388  }
389  if (bytes_recv) {
390  if ((bufused + bytes_recv) <= bufferSize) {
391  std::memcpy(buffer+bufused, (int8_t *) outbuf, bytes_recv);
392  bufused += bytes_recv;
393  } else {
394  done = picoos_sdfPutSamples(
395  sdOutFile,
396  bufused / 2,
397  (picoos_int16*) (buffer));
398  bufused = 0;
399  std::memcpy(buffer, (int8_t *) outbuf, bytes_recv);
400  bufused += bytes_recv;
401  }
402  }
403  } while (PICO_STEP_BUSY == getstatus);
404  /* This chunk of synthesis is finished; pass the remaining samples. */
405  if (!picoSynthAbort) {
406  done = picoos_sdfPutSamples(
407  sdOutFile,
408  bufused / 2,
409  (picoos_int16*) (buffer));
410  }
411  picoSynthAbort = 0;
412  }
413 
414  if(TRUE != (done = picoos_sdfCloseOut(common, &sdOutFile))) {
415  std::fprintf(stderr, "Cannot close output wave file\n");
416  ret = 1;
417  std::free(buffer);
418  releasePico();
419  return ("");
420  }
421 
422  std::free(buffer);
423  releasePico();
424  return filename;
425 }
426 
427 void Speech::releasePico() {
428 
429  if (picoEngine) {
430  pico_disposeEngine( picoSystem, &picoEngine );
431  pico_releaseVoiceDefinition( picoSystem, (pico_Char *) PICO_VOICE_NAME );
432  picoEngine = NULL;
433  }
434 
435  if (picoUtppResource) {
436  pico_unloadResource( picoSystem, &picoUtppResource );
437  picoUtppResource = NULL;
438  }
439 
440  if (picoSgResource) {
441  pico_unloadResource( picoSystem, &picoSgResource );
442  picoSgResource = NULL;
443  }
444 
445  if (picoTaResource) {
446  pico_unloadResource( picoSystem, &picoTaResource );
447  picoTaResource = NULL;
448  }
449 
450  if (picoSystem) {
451  pico_terminate(&picoSystem);
452  picoSystem = NULL;
453  }
454  if(picoMemArea) {
455  std::free(picoMemArea);
456  picoMemArea = NULL;
457  }
458 }