speech
Loading...
Searching...
No Matches
main.cpp
1/*
2 * Copyright (C) 2018 iCub Facility - Istituto Italiano di Tecnologia
3 * Author: Vadim Tikhanoff Laura Cavaliere Ilaria Carlini
4 * email: vadim.tikhanoff@iit.it laura.cavaliere@iit.it ilaria.carlini@iit.it
5 * Permission is granted to copy, distribute, and/or modify this program
6 * under the terms of the GNU General Public License, version 2 or any
7 * later version published by the Free Software Foundation.
8 *
9 * A copy of the license can be found at
10 * http://www.robotcub.org/icub/license/gpl.txt
11 *
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
15 * Public License for more details
16 */
17
18#include <vector>
19#include <iostream>
20#include <deque>
21#include <cstdio>
22#include <cmath>
23
24#include <fstream>
25#include <iterator>
26#include <string>
27#include <map>
28
29#include <yarp/os/BufferedPort.h>
30#include <yarp/os/ResourceFinder.h>
31#include <yarp/os/RFModule.h>
32#include <yarp/os/Network.h>
33#include <yarp/os/Time.h>
34#include <yarp/os/Log.h>
35#include <yarp/os/LogStream.h>
36#include <yarp/os/Semaphore.h>
37#include <yarp/sig/SoundFile.h>
38#include <yarp/dev/PolyDriver.h>
39
40#include <grpc++/grpc++.h>
41#include "google/cloud/language/v1/language_service.grpc.pb.h"
42#include "google/cloud/texttospeech/v1/cloud_tts.grpc.pb.h"
43
44#include "googleSynthesis_IDL.h"
45
46using namespace google::cloud::language::v1;
47using namespace google::cloud::texttospeech::v1;
48bool is_changed;
49using namespace std;
50
51
52static const std::map<grpc::StatusCode, std::string> status_code_to_string {
53 {grpc::OK, "ok"},
54 {grpc::CANCELLED, "cancelled"},
55 {grpc::UNKNOWN, "unknown"},
56 {grpc::INVALID_ARGUMENT, "invalid_argument"},
57 {grpc::DEADLINE_EXCEEDED, "deadline_exceeded"},
58 {grpc::NOT_FOUND, "not_found"},
59 {grpc::ALREADY_EXISTS, "already_exists"},
60 {grpc::PERMISSION_DENIED, "permission_denied"},
61 {grpc::UNAUTHENTICATED, "unauthenticated"},
62 {grpc::RESOURCE_EXHAUSTED , "resource_exhausted"},
63 {grpc::FAILED_PRECONDITION, "failed_precondition"},
64 {grpc::ABORTED, "aborted"},
65 {grpc::OUT_OF_RANGE, "out_of_range"},
66 {grpc::UNIMPLEMENTED, "unimplemented"},
67 {grpc::INTERNAL, "internal"},
68 {grpc::UNAVAILABLE, "unavailable"},
69 {grpc::DATA_LOSS, "data_loss"},
70 {grpc::DO_NOT_USE, "do_not_use"}
71};
72/********************************************************/
73class Processing : public yarp::os::BufferedPort<yarp::os::Bottle>
74{
75 std::string moduleName;
76 std::string language;
77 std::string voice;
78 std::string &state;
79 double speed;
80 double pitch;
81 yarp::os::RpcServer handlerPort;
82 yarp::os::Port syncPort;
83 yarp::os::Port soundOutputPort;
84 enum playbackmode_t
85 {
86 playFromDisk=0,
87 sendToPort_compressed=1,
88 sendToPort_uncompressed=2
89 } playbackmode;
90
91public:
92 /********************************************************/
93
94 Processing( const std::string &moduleName, const std::string &language, const std::string &voice, const double &speed, const double &pitch, std::string &state, string playmode ): state(state)
95 {
96 this->moduleName = moduleName;
97 this->language = language;
98 this->voice = voice;
99 this->speed = speed;
100 this->pitch = pitch;
101 if (playmode=="playFromDisk") {playbackmode=playbackmode_t::playFromDisk;}
102 else if (playmode=="sendToPort_compressed") {playbackmode=playbackmode_t::sendToPort_compressed;}
103 else if (playmode=="sendToPort_uncompressed") {playbackmode=playbackmode_t::sendToPort_uncompressed;}
104 }
105
106 /********************************************************/
107 ~Processing()
108 {
109
110 };
111
112 /********************************************************/
113 bool open()
114 {
115 this->useCallback();
116 yarp::os::BufferedPort<yarp::os::Bottle >::open( "/" + moduleName + "/text:i" );
117 syncPort.open( "/" + moduleName + "/sync:o" );
118
119 if (playbackmode==playbackmode_t::sendToPort_compressed ||
120 playbackmode==playbackmode_t::sendToPort_uncompressed)
121 {
122 soundOutputPort.open("/"+moduleName+"/sound:o");
123 }
124
125 return true;
126 }
127
128 /********************************************************/
129 void close()
130 {
131 yarp::os::BufferedPort<yarp::os::Bottle >::close();
132 syncPort.close();
133 if (playbackmode==playbackmode_t::sendToPort_compressed ||
134 playbackmode==playbackmode_t::sendToPort_uncompressed)
135 {
136 soundOutputPort.close();
137 }
138 }
139
140 /********************************************************/
141 void sendDone()
142 {
143 yarp::os::Bottle syncBot;
144 syncBot.addString("done");
145 syncPort.write(syncBot);
146 yDebug() << "done querying google";
147 }
148 /********************************************************/
149 void onRead( yarp::os::Bottle &bot )
150 {
151 queryGoogleSynthesis(bot);
152 sendDone();
153 }
154
155 /********************************************************/
156 void queryGoogleSynthesis(yarp::os::Bottle& text)
157 {
158 yDebug() << "in queryGoogleSynthesis";
159
160 yDebug() << "Phrase is " << text.toString().c_str();
161
162 std::string tmp = text.toString();
163
164 tmp.erase(std::remove(tmp.begin(),tmp.end(),'\"'),tmp.end());
165
166 yDebug() << "Phrase is now " << tmp.c_str();
167
168 std::string content = tmp;
169
170 if (content.size()>0)
171 {
172 SynthesizeSpeechRequest request;
173 SynthesizeSpeechResponse response;
174
175 grpc::Status status;
176 grpc::ClientContext context;
177
178 auto creds = grpc::GoogleDefaultCredentials();
179 auto channel = grpc::CreateChannel("texttospeech.googleapis.com", creds);
180 std::unique_ptr<TextToSpeech::Stub> tts(TextToSpeech::NewStub(channel));
181
182 AudioConfig audio_config;
183 VoiceSelectionParams params;
184
185 SynthesisInput input;
186 input.set_text(content);
187
188 audio_config.set_audio_encoding(MP3);
189 params.set_language_code(language);
190 params.set_ssml_gender(NEUTRAL);
191 params.set_name(voice);
192 audio_config.set_speaking_rate(speed);
193 audio_config.set_pitch(pitch);
194
195 request.set_allocated_input(&input);
196 request.set_allocated_voice(&params);
197 request.set_allocated_audio_config(&audio_config);
198
199 checkState("Busy");
200 yarp::os::Time::delay(0.2);
201 grpc::Status tts_status = tts->SynthesizeSpeech(&context, request, &response);
202 std::string status_string = status_code_to_string.at(tts_status.error_code());
203 yInfo() << "Status string:" << status_string;
204 checkState("Done");
205
206 if ( tts_status.ok() )
207 {
208 yInfo() << "Status returned OK";
209 yInfo() << "\n------Response------\n";
210
211 if (playbackmode==playbackmode_t::playFromDisk)
212 {
213 std::string file = "test.mp3";
214 std::ofstream mp3File(file, std::ios::out | std::ios::binary);
215
216 mp3File.write( response.audio_content().data(), response.audio_content().size());
217
218 std::string command = "play test.mp3";// + file;
219
220 system(command.c_str());
221 }
222 else if (playbackmode==playbackmode_t::sendToPort_compressed)
223 {
224 yarp::os::Value v (response.audio_content().data(), response.audio_content().size());
225 yarp::os::Bottle bot; bot.add(v);
226 soundOutputPort.write(bot);
227 }
228 else if (playbackmode==playbackmode_t::sendToPort_uncompressed)
229 {
230 yarp::sig::Sound snd;
231 yarp::sig::file::read_bytestream(snd, response.audio_content().data(), response.audio_content().size(), ".mp3");
232 soundOutputPort.write(snd);
233 }
234 else
235 {
236 yError() << "Invalid playbackmode";
237 }
238 }
239 else
240 {
241 yError() << "Status Returned Cancelled";
242 checkState("Failure_" + status_string);
243 yInfo() << tts_status.error_message();
244 }
245
246 request.release_input();
247 request.release_voice();
248 request.release_audio_config();
249
250 yInfo() << "\n------finished google query------\n";
251 }
252 else if (content.size()==0)
253 {
254 checkState("Empty_input");
255 }
256 }
257 /********************************************************/
258 bool start_acquisition()
259 {
260 return true;
261 }
262
263 /********************************************************/
264 bool stop_acquisition()
265 {
266 return true;
267 }
268
269
270 /********************************************************/
271 bool setLanguageCode(const std::string &languageCode)
272 {
273 language = languageCode;
274 return true;
275 }
276
277 /********************************************************/
278 bool setVoiceCode(const std::string &voiceCode)
279 {
280 voice = voiceCode;
281 return true;
282 }
283
284 /********************************************************/
285 bool setPitch(const double pitchVal)
286 {
287 pitch = pitchVal;
288 return true;
289 }
290
291 /********************************************************/
292 bool setSpeed(const double speedVal)
293 {
294 speed = speedVal;
295 return true;
296 }
297 /********************************************************/
298 std::string getLanguageCode()
299 {
300 return language;
301 }
302
303 /********************************************************/
304 std::string getVoiceCode()
305 {
306 return voice;
307 }
308
309 /********************************************************/
310 double getPitch()
311 {
312 return pitch;
313 }
314
315 /********************************************************/
316 double getSpeed()
317 {
318 return speed;
319 }
320
321 /********************************************************/
322 bool checkState(std::string new_state)
323 {
324 if(new_state!=state){
325 is_changed=true;
326 state=new_state;
327 }
328 else{
329 is_changed=false;
330 }
331 return is_changed;
332 }
333};
334
335/********************************************************/
336class Module : public yarp::os::RFModule, public googleSynthesis_IDL
337{
338 yarp::os::ResourceFinder *rf;
339 yarp::os::RpcServer rpcPort;
340 yarp::os::BufferedPort<yarp::os::Bottle> statePort;
341 std::string state;
342
343 Processing *processing;
344 friend class processing;
345
346 bool closing;
347
348 std::vector<std::string> allLanguageCodes;
349 std::vector<std::string> allVoiceCodes;
350
351 /********************************************************/
352
353public:
354
355 /********************************************************/
356 bool configure(yarp::os::ResourceFinder &rf)
357 {
358 this->rf=&rf;
359 this->state="Ready";
360 std::string moduleName = rf.check("name", yarp::os::Value("googleSynthesis"), "module name (string)").asString();
361
362 std::string language = rf.check("language", yarp::os::Value("en-US"), "language to use (string)").asString();
363 std::string voice = rf.check("voice", yarp::os::Value("en-US-Wavenet-D"), "voice to use (string)").asString();
364
365 double speed = rf.check("speed", yarp::os::Value(1.0), "speed to use (double)").asFloat64();
366 double pitch = rf.check("pitch", yarp::os::Value(0.0), "pitch to use (double)").asFloat64();
367
368 string playmode_string = rf.check("playbackmode", yarp::os::Value("playFromDisk"), "can be one of the following: `playFromDisk`(default), `sendToPort_compressed`, `sendToPort_uncompressed`").asString();
369
370 if (rf.check("languageCodes", "Getting language codes"))
371 {
372 yarp::os::Bottle &grp=rf.findGroup("languageCodes");
373 int sz=grp.size()-1;
374
375 for (int i=0; i<sz; i++)
376 allLanguageCodes.push_back(grp.get(1+i).asString());
377 }
378
379 if (rf.check("voiceCodes", "Getting voice codes"))
380 {
381 yarp::os::Bottle &grp=rf.findGroup("voiceCodes");
382 int sz=grp.size()-1;
383
384 for (int i=0; i<sz; i++)
385 allVoiceCodes.push_back(grp.get(1+i).asString());
386 }
387
388 setName(moduleName.c_str());
389
390 rpcPort.open(("/"+getName("/rpc")).c_str());
391 statePort.open("/"+ moduleName + "/state:o");
392
393 closing = false;
394
395 processing = new Processing( moduleName, language, voice, speed, pitch, state, playmode_string);
396
397 /* now start the thread to do the work */
398 processing->open();
399
400 if(!attach(rpcPort)) {
401 yError()<<"Cannot attach to rpc port";
402 return false;
403 }
404
405 return true;
406 }
407
408 /************************************************************************/
409 bool attach(yarp::os::RpcServer &source)
410 {
411 return this->yarp().attachAsServer(source);
412 }
413
414 /**********************************************************/
415 bool close()
416 {
417 statePort.close();
418 processing->close();
419 delete processing;
420 return true;
421 }
422
423 /********************************************************/
424 double getPeriod()
425 {
426 return 0.1;
427 }
428
429 /********************************************************/
430 bool say(const std::string& phrase)
431 {
432 yarp::os::Bottle bot;
433 bot.addString(phrase);
434 processing->queryGoogleSynthesis(bot);
435 processing->sendDone();
436 return true;
437 }
438
439 /********************************************************/
440 std::string setLanguage(const std::string& languageCode, const std::string& voiceCode)
441 {
442 std::string returnVal = "Error, wrong language or voice code (eg: en-US en-US-Wavenet-A)";
443
444 std::string language, voice;
445
446 for (int i = 0; i < allLanguageCodes.size(); i++)
447 {
448 if (languageCode == allLanguageCodes[i])
449 {
450 for (int v = 0; v < allVoiceCodes.size(); v++)
451 {
452 if (voiceCode == allVoiceCodes[v])
453 {
454 language = languageCode;
455 voice = voiceCode;
456 returnVal = "[ok]";
457 break;
458 }
459 }
460 break;
461 }
462 }
463
464 if(returnVal =="[ok]")
465 {
466 processing->setLanguageCode(languageCode);
467 processing->setVoiceCode(voiceCode);
468 }
469 return returnVal;
470 }
471
472 /********************************************************/
473 bool setPitch(const double pitchVal)
474 {
475 processing->setPitch(pitchVal);
476 return true;
477 }
478
479 /********************************************************/
480 bool setSpeed(const double speedVal)
481 {
482 processing->setSpeed(speedVal);
483 return true;
484 }
485
486 /********************************************************/
487 std::string getLanguageCode()
488 {
489 return processing->getLanguageCode();
490 }
491
492 /********************************************************/
493 std::string getVoiceCode()
494 {
495 return processing->getVoiceCode();
496 }
497
498 /********************************************************/
499 double getPitch()
500 {
501 return processing->getPitch();
502 }
503
504 /********************************************************/
505 double getSpeed()
506 {
507 return processing->getSpeed();
508 }
509
510 /********************************************************/
511 bool quit()
512 {
513 closing=true;
514 return true;
515 }
516
517 /********************************************************/
518 bool updateModule()
519 {
520 if(is_changed){
521 is_changed=false;
522 yarp::os::Bottle &outTargets = statePort.prepare();
523 outTargets.clear();
524 outTargets.addString(state);
525 yDebug() << "outTarget:" << outTargets.toString().c_str();
526 statePort.write();
527 }
528 return !closing;
529 }
530};
531
532/********************************************************/
533int main(int argc, char *argv[])
534{
535 yarp::os::Network::init();
536
537 yarp::os::Network yarp;
538 if (!yarp.checkNetwork())
539 {
540 yError("YARP server not available!");
541 return 1;
542 }
543
544 Module module;
545 yarp::os::ResourceFinder rf;
546
547 rf.setVerbose( true );
548 rf.setDefaultContext( "googleSynthesis" );
549 rf.setDefaultConfigFile( "config.ini" );
550 rf.setDefault("name","googleSynthesis");
551 rf.configure(argc,argv);
552
553 return module.runModule(rf);
554}