speech
Loading...
Searching...
No Matches
speech.cpp
1/*
2 * Copyright (C) 2014 iCub Facility
3 * Authors: Ali Paikan
4 * CopyPolicy: Released under the terms of the LGPLv2.1 or later, see LGPL.TXT
5 */
6
7
8#include <cstdio>
9#include <cstdlib>
10#include <cstring>
11#include <algorithm>
12
13#include <yarp/os/Thread.h>
14#include <yarp/os/Time.h>
15#include <yarp/os/Stamp.h>
16#include <yarp/os/LogStream.h>
17
18#include <speech.h>
19
20using namespace yarp::os;
21using namespace yarp::dev;
22
23#define PICO_MEM_SIZE 2500000 /* adaptation layer defines */
24#define DummyLen 100000000
25#define MAX_OUTBUF_SIZE 128 /* string constants */
26
27const char * PICO_VOICE_NAME = "PicoVoice";
28
29// supported voices Pico does not seperately specify the voice and locale.
30const char * picoSupportedLangIso3[] = { "eng", "eng", "deu", "spa", "fra", "ita" };
31const char * picoSupportedCountryIso3[] = { "USA", "GBR", "DEU", "ESP", "FRA", "ITA" };
32const char * picoSupportedLang[] = { "en-US", "en-GB", "de-DE", "es-ES", "fr-FR", "it-IT" };
33const char * picoInternalLang[] = { "en-US", "en-GB", "de-DE", "es-ES", "fr-FR", "it-IT" };
34const char * picoInternalTaLingware[] = { "en-US_ta.bin", "en-GB_ta.bin", "de-DE_ta.bin", "es-ES_ta.bin", "fr-FR_ta.bin", "it-IT_ta.bin" };
35const char * picoInternalSgLingware[] = { "en-US_lh0_sg.bin", "en-GB_kh0_sg.bin", "de-DE_gl0_sg.bin", "es-ES_zl0_sg.bin", "fr-FR_nk0_sg.bin", "it-IT_cm0_sg.bin" };
36const char * picoInternalUtppLingware[] = { "en-US_utpp.bin", "en-GB_utpp.bin", "de-DE_utpp.bin", "es-ES_utpp.bin", "fr-FR_utpp.bin", "it-IT_utpp.bin" };
37const int picoNumSupportedVocs = 6;
38
39
40/****************************************************************
41 * @brief The yarp::dev::Speech class
42 */
43Speech::Speech() {
44 pcmDevice.clear();
45 language = "en-US";
46 supportedLangs.push_back("en-US");
47 supportedLangs.push_back("en-GB");
48 supportedLangs.push_back("es-ES");
49 supportedLangs.push_back("fr-FR");
50 supportedLangs.push_back("it-IT");
51 supportedLangs.push_back("de-DE");
52 // picotts
53 picoMemArea = NULL;
54 picoSystem = NULL;
55 picoTaResource = NULL;
56 picoSgResource = NULL;
57 picoUtppResource = NULL;
58 picoEngine = NULL;
59 picoTaFileName = NULL;
60 picoSgFileName = NULL;
61 picoUtppFileName = NULL;
62 picoTaResourceName = NULL;
63 picoSgResourceName = NULL;
64 picoUtppResourceName = NULL;
65 picoSynthAbort = 0;
66}
67
68Speech::~Speech() {
69 close();
70}
71
72
73bool Speech::open(yarp::os::Searchable &config)
74{
75 Speech::config.fromString(config.toString());
76
77 if(config.check("pcm-device"))
78 pcmDevice = config.find("pcm-device").asString();
79 if(config.check("default-language"))
80 if(!setLanguage(config.find("default-language").asString())) {
81 yError()<<"Cannot set the default language to"<<config.find("default-language").asString();
82 return false;
83 }
84
85 setPitch(config.check("pitch",Value(90)).asInt32());
86 setSpeed(config.check("speed",Value(105)).asInt32());
87
88 lingwareRF.setDefaultContext(config.check("lingware-context",Value("speech")).asString());
89 lingwareRF.configure(0,NULL);
90
91 this->yarp().attachAsServer(rpcPort);
92
93 std::string robot=config.check("robot",Value("icub")).asString();
94 std::string portName=std::string("/"+robot+"/speech:rpc");
95 if(!rpcPort.open(portName)) {
96 yError()<<"Cannot open port "<<portName;
97 return false;
98 }
99
100 //return PeriodicThread::start();
101 return true;
102}
103
104bool Speech::close()
105{
106 yInfo()<<"closing Speech!";
107 Thread::stop();
108 return true;
109}
110
111bool Speech::threadInit() {
112 return true;
113}
114
115void Speech::threadRelease() {
116 rpcPort.close();
117}
118
119void Speech::run() {
120}
121
122
123bool Speech::playWav(const std::string& filename) {
124 std::string cmd;
125#if WIN32
126 cmd = "powershell -c (New-Object Media.SoundPlayer ";
127 cmd += filename;
128 cmd += ").PlaySync()";
129#else
130 // aplay --device="plughw:1,0" speech.wav
131 cmd = "aplay ";
132 if(pcmDevice.size())
133 cmd += "--device=\""+pcmDevice+"\" ";
134 cmd += filename;
135#endif
136 yInfo()<<cmd;
137 int ret = system(cmd.c_str());
138 if(ret != 0) {
139 yWarning()<<"Cannot play wave file"<<filename;
140 return false;
141 }
142 return true;
143}
144
145bool Speech::setLanguage(const std::string& language) {
146 if(std::find(supportedLangs.begin(),
147 supportedLangs.end(),
148 language) == supportedLangs.end()) {
149 return false;
150 }
151
152 Speech::language = language;
153 return true;
154}
155
156bool Speech::setSpeed(const int16_t speed) {
157 Speech::speed = speed;
158 return true;
159}
160
161bool Speech::setPitch(const int16_t pitch){
162 Speech::pitch = pitch;
163 return true;
164}
165
166std::vector<std::string> Speech::getSupportedLang() {
167 return supportedLangs;
168}
169
170int16_t Speech::getSpeed() {
171 return speed;
172}
173
174int16_t Speech::getPitch(){
175 return pitch;
176}
177
178
179bool Speech::say(const std::string& text) {
180 std::string waveFile = renderSpeech(text);
181 if(waveFile.size() == 0)
182 return false;
183 return playWav(waveFile);
184}
185
186bool Speech::play() {
187 return false;
188}
189
190bool Speech::pause() {
191 return false;
192}
193
194bool Speech::stop() {
195 return false;
196}
197
198const std::string Speech::renderSpeech(const std::string &text) {
199 //<pitch level='70'><speed level='100'></speed></pitch>"
200 char* cmdText = (char*) std::malloc(text.size()+256);
201 std::string filename;
202#if WIN32
203 if (const char* env_tmp = std::getenv("TMP"))
204 {
205 filename = env_tmp;
206 filename += "\\speech.wav";
207 }
208 else
209 filename = "speech.wav";
210 _snprintf
211#else
212 filename = "/tmp/speech.wav";
213 snprintf
214#endif
215 (cmdText,text.size()+255,
216 "<pitch level='%d'><speed level='%d'> %s </speed></pitch>",
217 pitch, speed, text.c_str());
218 /*
219 //pico2wave -len-US -w out.wav "hello!"
220 std::string cmd = "pico2wave -l" + language + " -w " + filename;
221 cmd = cmd + " \"" + text + "\"";
222 int ret = system(cmd.c_str());
223 if(ret != 0) {
224 yWarning()<<"Cannot render the speech!";
225 filename.clear();
226 }
227*/
228 const char * lang = language.c_str();
229 int langIndex = -1, langIndexTmp = -1;
230 size_t bufferSize = 256;
231
232
233 /* option: --lang */
234 for(langIndexTmp =0; langIndexTmp<picoNumSupportedVocs; langIndexTmp++) {
235 if(!std::strcmp(picoSupportedLang[langIndexTmp], lang)) {
236 langIndex = langIndexTmp;
237 break;
238 }
239 }
240 yAssert(langIndex != -1);
241
242 int ret, getstatus;
243 pico_Char * inp = NULL;
244 pico_Char * local_text = NULL;
245 short outbuf[MAX_OUTBUF_SIZE/2];
246 pico_Int16 bytes_sent, bytes_recv, text_remaining, out_data_type;
247 pico_Retstring outMessage;
248
249 picoSynthAbort = 0;
250
251 picoMemArea = std::malloc( PICO_MEM_SIZE );
252 if((ret = pico_initialize( picoMemArea, PICO_MEM_SIZE, &picoSystem ))) {
253 pico_getSystemStatusMessage(picoSystem, ret, outMessage);
254 std::fprintf(stderr, "Cannot initialize pico (%i): %s\n", ret, outMessage);
255 releasePico();
256 return ("");
257 }
258
259 /* Load the text analysis Lingware resource file. */
260 picoTaFileName = (pico_Char *) std::malloc( PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE );
261 std::strcpy((char *) picoTaFileName, lingwareRF.findFileByName(picoInternalTaLingware[langIndex]).c_str());
262 if((ret = pico_loadResource( picoSystem, picoTaFileName, &picoTaResource ))) {
263 pico_getSystemStatusMessage(picoSystem, ret, outMessage);
264 std::fprintf(stderr, "Cannot load text analysis resource file (%i): %s\n", ret, outMessage);
265 releasePico();
266 return ("");
267 }
268
269 /* Load the signal generation Lingware resource file. */
270 picoSgFileName = (pico_Char *) std::malloc( PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE );
271 std::strcpy((char *) picoSgFileName, lingwareRF.findFileByName(picoInternalSgLingware[langIndex]).c_str());
272 if((ret = pico_loadResource( picoSystem, picoSgFileName, &picoSgResource ))) {
273 pico_getSystemStatusMessage(picoSystem, ret, outMessage);
274 std::fprintf(stderr, "Cannot load signal generation Lingware resource file (%i): %s\n", ret, outMessage);
275 releasePico();
276 return ("");
277 }
278
279 /* Load the utpp Lingware resource file if exists - NOTE: this file is optional
280 and is currently not used. Loading is only attempted for future compatibility.
281 If this file is not present the loading will still succeed. //
282 picoUtppFileName = (pico_Char *) std::malloc( PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE );
283 std::strcpy((char *) picoUtppFileName, PICO_LINGWARE_PATH);
284 std::strcat((char *) picoUtppFileName, (const char *) picoInternalUtppLingware[langIndex]);
285 ret = pico_loadResource( picoSystem, picoUtppFileName, &picoUtppResource );
286 pico_getSystemStatusMessage(picoSystem, ret, outMessage);
287 printf("pico_loadResource: %i: %s\n", ret, outMessage);
288 */
289
290 /* Get the text analysis resource name. */
291 picoTaResourceName = (pico_Char *) std::malloc( PICO_MAX_RESOURCE_NAME_SIZE );
292 if((ret = pico_getResourceName( picoSystem, picoTaResource, (char *) picoTaResourceName ))) {
293 pico_getSystemStatusMessage(picoSystem, ret, outMessage);
294 std::fprintf(stderr, "Cannot get the text analysis resource name (%i): %s\n", ret, outMessage);
295 releasePico();
296 return ("");
297 }
298
299 /* Get the signal generation resource name. */
300 picoSgResourceName = (pico_Char *) std::malloc( PICO_MAX_RESOURCE_NAME_SIZE );
301 if((ret = pico_getResourceName( picoSystem, picoSgResource, (char *) picoSgResourceName ))) {
302 pico_getSystemStatusMessage(picoSystem, ret, outMessage);
303 std::fprintf(stderr, "Cannot get the signal generation resource name (%i): %s\n", ret, outMessage);
304 releasePico();
305 return ("");
306 }
307
308
309 /* Create a voice definition. */
310 if((ret = pico_createVoiceDefinition( picoSystem, (const pico_Char *) PICO_VOICE_NAME ))) {
311 pico_getSystemStatusMessage(picoSystem, ret, outMessage);
312 std::fprintf(stderr, "Cannot create voice definition (%i): %s\n", ret, outMessage);
313 releasePico();
314 return ("");
315 }
316
317 /* Add the text analysis resource to the voice. */
318 if((ret = pico_addResourceToVoiceDefinition( picoSystem, (const pico_Char *) PICO_VOICE_NAME, picoTaResourceName ))) {
319 pico_getSystemStatusMessage(picoSystem, ret, outMessage);
320 std::fprintf(stderr, "Cannot add the text analysis resource to the voice (%i): %s\n", ret, outMessage);
321 releasePico();
322 return ("");
323 }
324
325 /* Add the signal generation resource to the voice. */
326 if((ret = pico_addResourceToVoiceDefinition( picoSystem, (const pico_Char *) PICO_VOICE_NAME, picoSgResourceName ))) {
327 pico_getSystemStatusMessage(picoSystem, ret, outMessage);
328 std::fprintf(stderr, "Cannot add the signal generation resource to the voice (%i): %s\n", ret, outMessage);
329 releasePico();
330 return ("");
331 }
332
333 /* Create a new Pico engine. */
334 if((ret = pico_newEngine( picoSystem, (const pico_Char *) PICO_VOICE_NAME, &picoEngine ))) {
335 pico_getSystemStatusMessage(picoSystem, ret, outMessage);
336 std::fprintf(stderr, "Cannot create a new pico engine (%i): %s\n", ret, outMessage);
337 releasePico();
338 return ("");
339 }
340
341 local_text = (pico_Char *) cmdText;
342 text_remaining = std::strlen((const char *) local_text) + 1;
343
344 inp = (pico_Char *) local_text;
345
346 size_t bufused = 0;
347
348 picoos_Common common = (picoos_Common) pico_sysGetCommon(picoSystem);
349
350 picoos_SDFile sdOutFile = NULL;
351
352 picoos_bool done = TRUE;
353 if(TRUE != (done = picoos_sdfOpenOut(common, &sdOutFile,
354 (picoos_char *) filename.c_str(), SAMPLE_FREQ_16KHZ, PICOOS_ENC_LIN)))
355 {
356 std::fprintf(stderr, "Cannot open output wave file\n");
357 ret = 1;
358 releasePico();
359 return ("");
360 }
361
362 int8_t* buffer = (int8_t*) std::malloc( bufferSize );
363 /* synthesis loop */
364 while (text_remaining) {
365 /* Feed the text into the engine. */
366 if((ret = pico_putTextUtf8( picoEngine, inp, text_remaining, &bytes_sent ))) {
367 pico_getSystemStatusMessage(picoSystem, ret, outMessage);
368 std::fprintf(stderr, "Cannot put Text (%i): %s\n", ret, outMessage);
369 releasePico();
370 return ("");
371 }
372
373 text_remaining -= bytes_sent;
374 inp += bytes_sent;
375 do {
376 if (picoSynthAbort) {
377 releasePico();
378 return ("");
379 }
380 /* Retrieve the samples and add them to the buffer. */
381 getstatus = pico_getData( picoEngine, (void *) outbuf,
382 MAX_OUTBUF_SIZE, &bytes_recv, &out_data_type );
383 if((getstatus !=PICO_STEP_BUSY) && (getstatus !=PICO_STEP_IDLE)){
384 pico_getSystemStatusMessage(picoSystem, getstatus, outMessage);
385 std::fprintf(stderr, "Cannot get Data (%i): %s\n", getstatus, outMessage);
386 releasePico();
387 return ("");
388 }
389 if (bytes_recv) {
390 if ((bufused + bytes_recv) <= bufferSize) {
391 std::memcpy(buffer+bufused, (int8_t *) outbuf, bytes_recv);
392 bufused += bytes_recv;
393 } else {
394 done = picoos_sdfPutSamples(
395 sdOutFile,
396 bufused / 2,
397 (picoos_int16*) (buffer));
398 bufused = 0;
399 std::memcpy(buffer, (int8_t *) outbuf, bytes_recv);
400 bufused += bytes_recv;
401 }
402 }
403 } while (PICO_STEP_BUSY == getstatus);
404 /* This chunk of synthesis is finished; pass the remaining samples. */
405 if (!picoSynthAbort) {
406 done = picoos_sdfPutSamples(
407 sdOutFile,
408 bufused / 2,
409 (picoos_int16*) (buffer));
410 }
411 picoSynthAbort = 0;
412 }
413
414 if(TRUE != (done = picoos_sdfCloseOut(common, &sdOutFile))) {
415 std::fprintf(stderr, "Cannot close output wave file\n");
416 ret = 1;
417 std::free(buffer);
418 releasePico();
419 return ("");
420 }
421
422 std::free(buffer);
423 releasePico();
424 return filename;
425}
426
427void Speech::releasePico() {
428
429 if (picoEngine) {
430 pico_disposeEngine( picoSystem, &picoEngine );
431 pico_releaseVoiceDefinition( picoSystem, (pico_Char *) PICO_VOICE_NAME );
432 picoEngine = NULL;
433 }
434
435 if (picoUtppResource) {
436 pico_unloadResource( picoSystem, &picoUtppResource );
437 picoUtppResource = NULL;
438 }
439
440 if (picoSgResource) {
441 pico_unloadResource( picoSystem, &picoSgResource );
442 picoSgResource = NULL;
443 }
444
445 if (picoTaResource) {
446 pico_unloadResource( picoSystem, &picoTaResource );
447 picoTaResource = NULL;
448 }
449
450 if (picoSystem) {
451 pico_terminate(&picoSystem);
452 picoSystem = NULL;
453 }
454 if(picoMemArea) {
455 std::free(picoMemArea);
456 picoMemArea = NULL;
457 }
458}