speech
All Data Structures Functions Modules Pages
main.cpp
1 /*
2  * Copyright (C) 2018 iCub Facility - Istituto Italiano di Tecnologia
3  * Author: Vadim Tikhanoff Laura Cavaliere Ilaria Carlini
4  * email: vadim.tikhanoff@iit.it laura.cavaliere@iit.it ilaria.carlini@iit.it
5  * Permission is granted to copy, distribute, and/or modify this program
6  * under the terms of the GNU General Public License, version 2 or any
7  * later version published by the Free Software Foundation.
8  *
9  * A copy of the license can be found at
10  * http://www.robotcub.org/icub/license/gpl.txt
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
15  * Public License for more details
16  */
17 
18 #include <vector>
19 #include <iostream>
20 #include <deque>
21 #include <cstdio>
22 #include <cmath>
23 
24 #include <fstream>
25 #include <iterator>
26 #include <string>
27 #include <map>
28 
29 #include <yarp/os/BufferedPort.h>
30 #include <yarp/os/ResourceFinder.h>
31 #include <yarp/os/RFModule.h>
32 #include <yarp/os/Network.h>
33 #include <yarp/os/Time.h>
34 #include <yarp/os/Log.h>
35 #include <yarp/os/LogStream.h>
36 #include <yarp/os/Semaphore.h>
37 #include <yarp/sig/SoundFile.h>
38 #include <yarp/dev/PolyDriver.h>
39 
40 #include <grpc++/grpc++.h>
41 #include "google/cloud/language/v1/language_service.grpc.pb.h"
42 #include "google/cloud/texttospeech/v1/cloud_tts.grpc.pb.h"
43 
44 #include "googleSynthesis_IDL.h"
45 
46 using namespace google::cloud::language::v1;
47 using namespace google::cloud::texttospeech::v1;
48 bool is_changed;
49 using namespace std;
50 
51 
52 static const std::map<grpc::StatusCode, std::string> status_code_to_string {
53  {grpc::OK, "ok"},
54  {grpc::CANCELLED, "cancelled"},
55  {grpc::UNKNOWN, "unknown"},
56  {grpc::INVALID_ARGUMENT, "invalid_argument"},
57  {grpc::DEADLINE_EXCEEDED, "deadline_exceeded"},
58  {grpc::NOT_FOUND, "not_found"},
59  {grpc::ALREADY_EXISTS, "already_exists"},
60  {grpc::PERMISSION_DENIED, "permission_denied"},
61  {grpc::UNAUTHENTICATED, "unauthenticated"},
62  {grpc::RESOURCE_EXHAUSTED , "resource_exhausted"},
63  {grpc::FAILED_PRECONDITION, "failed_precondition"},
64  {grpc::ABORTED, "aborted"},
65  {grpc::OUT_OF_RANGE, "out_of_range"},
66  {grpc::UNIMPLEMENTED, "unimplemented"},
67  {grpc::INTERNAL, "internal"},
68  {grpc::UNAVAILABLE, "unavailable"},
69  {grpc::DATA_LOSS, "data_loss"},
70  {grpc::DO_NOT_USE, "do_not_use"}
71 };
72 /********************************************************/
73 class Processing : public yarp::os::BufferedPort<yarp::os::Bottle>
74 {
75  std::string moduleName;
76  std::string language;
77  std::string voice;
78  std::string &state;
79  double speed;
80  double pitch;
81  yarp::os::RpcServer handlerPort;
82  yarp::os::Port syncPort;
83  yarp::os::Port soundOutputPort;
84  enum playbackmode_t
85  {
86  playFromDisk=0,
87  sendToPort_compressed=1,
88  sendToPort_uncompressed=2
89  } playbackmode;
90 
91 public:
92  /********************************************************/
93 
94  Processing( const std::string &moduleName, const std::string &language, const std::string &voice, const double &speed, const double &pitch, std::string &state, string playmode ): state(state)
95  {
96  this->moduleName = moduleName;
97  this->language = language;
98  this->voice = voice;
99  this->speed = speed;
100  this->pitch = pitch;
101  if (playmode=="playFromDisk") {playbackmode=playbackmode_t::playFromDisk;}
102  else if (playmode=="sendToPort_compressed") {playbackmode=playbackmode_t::sendToPort_compressed;}
103  else if (playmode=="sendToPort_uncompressed") {playbackmode=playbackmode_t::sendToPort_uncompressed;}
104  }
105 
106  /********************************************************/
107  ~Processing()
108  {
109 
110  };
111 
112  /********************************************************/
113  bool open()
114  {
115  this->useCallback();
116  yarp::os::BufferedPort<yarp::os::Bottle >::open( "/" + moduleName + "/text:i" );
117  syncPort.open( "/" + moduleName + "/sync:o" );
118 
119  if (playbackmode==playbackmode_t::sendToPort_compressed ||
120  playbackmode==playbackmode_t::sendToPort_uncompressed)
121  {
122  soundOutputPort.open("/"+moduleName+"/sound:o");
123  }
124 
125  return true;
126  }
127 
128  /********************************************************/
129  void close()
130  {
131  yarp::os::BufferedPort<yarp::os::Bottle >::close();
132  syncPort.close();
133  if (playbackmode==playbackmode_t::sendToPort_compressed ||
134  playbackmode==playbackmode_t::sendToPort_uncompressed)
135  {
136  soundOutputPort.close();
137  }
138  }
139 
140  /********************************************************/
141  void sendDone()
142  {
143  yarp::os::Bottle syncBot;
144  syncBot.addString("done");
145  syncPort.write(syncBot);
146  yDebug() << "done querying google";
147  }
148  /********************************************************/
149  void onRead( yarp::os::Bottle &bot )
150  {
151  queryGoogleSynthesis(bot);
152  sendDone();
153  }
154 
155  /********************************************************/
156  void queryGoogleSynthesis(yarp::os::Bottle& text)
157  {
158  yDebug() << "in queryGoogleSynthesis";
159 
160  yDebug() << "Phrase is " << text.toString().c_str();
161 
162  std::string tmp = text.toString();
163 
164  tmp.erase(std::remove(tmp.begin(),tmp.end(),'\"'),tmp.end());
165 
166  yDebug() << "Phrase is now " << tmp.c_str();
167 
168  std::string content = tmp;
169 
170  if (content.size()>0)
171  {
172  SynthesizeSpeechRequest request;
173  SynthesizeSpeechResponse response;
174 
175  grpc::Status status;
176  grpc::ClientContext context;
177 
178  auto creds = grpc::GoogleDefaultCredentials();
179  auto channel = grpc::CreateChannel("texttospeech.googleapis.com", creds);
180  std::unique_ptr<TextToSpeech::Stub> tts(TextToSpeech::NewStub(channel));
181 
182  AudioConfig audio_config;
183  VoiceSelectionParams params;
184 
185  SynthesisInput input;
186  input.set_text(content);
187 
188  audio_config.set_audio_encoding(MP3);
189  params.set_language_code(language);
190  params.set_ssml_gender(NEUTRAL);
191  params.set_name(voice);
192  audio_config.set_speaking_rate(speed);
193  audio_config.set_pitch(pitch);
194 
195  request.set_allocated_input(&input);
196  request.set_allocated_voice(&params);
197  request.set_allocated_audio_config(&audio_config);
198 
199  checkState("Busy");
200  yarp::os::Time::delay(0.2);
201  grpc::Status tts_status = tts->SynthesizeSpeech(&context, request, &response);
202  std::string status_string = status_code_to_string.at(tts_status.error_code());
203  yInfo() << "Status string:" << status_string;
204  checkState("Done");
205 
206  if ( tts_status.ok() )
207  {
208  yInfo() << "Status returned OK";
209  yInfo() << "\n------Response------\n";
210 
211  if (playbackmode==playbackmode_t::playFromDisk)
212  {
213  std::string file = "test.mp3";
214  std::ofstream mp3File(file, std::ios::out | std::ios::binary);
215 
216  mp3File.write( response.audio_content().data(), response.audio_content().size());
217 
218  std::string command = "play test.mp3";// + file;
219 
220  system(command.c_str());
221  }
222  else if (playbackmode==playbackmode_t::sendToPort_compressed)
223  {
224  yarp::os::Value v (response.audio_content().data(), response.audio_content().size());
225  yarp::os::Bottle bot; bot.add(v);
226  soundOutputPort.write(bot);
227  }
228  else if (playbackmode==playbackmode_t::sendToPort_uncompressed)
229  {
230  yarp::sig::Sound snd;
231  yarp::sig::file::read_bytestream(snd, response.audio_content().data(), response.audio_content().size(), ".mp3");
232  soundOutputPort.write(snd);
233  }
234  else
235  {
236  yError() << "Invalid playbackmode";
237  }
238  }
239  else
240  {
241  yError() << "Status Returned Cancelled";
242  checkState("Failure_" + status_string);
243  yInfo() << tts_status.error_message();
244  }
245 
246  request.release_input();
247  request.release_voice();
248  request.release_audio_config();
249 
250  yInfo() << "\n------finished google query------\n";
251  }
252  else if (content.size()==0)
253  {
254  checkState("Empty_input");
255  }
256  }
257  /********************************************************/
258  bool start_acquisition()
259  {
260  return true;
261  }
262 
263  /********************************************************/
264  bool stop_acquisition()
265  {
266  return true;
267  }
268 
269 
270  /********************************************************/
271  bool setLanguageCode(const std::string &languageCode)
272  {
273  language = languageCode;
274  return true;
275  }
276 
277  /********************************************************/
278  bool setVoiceCode(const std::string &voiceCode)
279  {
280  voice = voiceCode;
281  return true;
282  }
283 
284  /********************************************************/
285  bool setPitch(const double pitchVal)
286  {
287  pitch = pitchVal;
288  return true;
289  }
290 
291  /********************************************************/
292  bool setSpeed(const double speedVal)
293  {
294  speed = speedVal;
295  return true;
296  }
297  /********************************************************/
298  std::string getLanguageCode()
299  {
300  return language;
301  }
302 
303  /********************************************************/
304  std::string getVoiceCode()
305  {
306  return voice;
307  }
308 
309  /********************************************************/
310  double getPitch()
311  {
312  return pitch;
313  }
314 
315  /********************************************************/
316  double getSpeed()
317  {
318  return speed;
319  }
320 
321  /********************************************************/
322  bool checkState(std::string new_state)
323  {
324  if(new_state!=state){
325  is_changed=true;
326  state=new_state;
327  }
328  else{
329  is_changed=false;
330  }
331  return is_changed;
332  }
333 };
334 
335 /********************************************************/
336 class Module : public yarp::os::RFModule, public googleSynthesis_IDL
337 {
338  yarp::os::ResourceFinder *rf;
339  yarp::os::RpcServer rpcPort;
340  yarp::os::BufferedPort<yarp::os::Bottle> statePort;
341  std::string state;
342 
343  Processing *processing;
344  friend class processing;
345 
346  bool closing;
347 
348  std::vector<std::string> allLanguageCodes;
349  std::vector<std::string> allVoiceCodes;
350 
351  /********************************************************/
352 
353 public:
354 
355  /********************************************************/
356  bool configure(yarp::os::ResourceFinder &rf)
357  {
358  this->rf=&rf;
359  this->state="Ready";
360  std::string moduleName = rf.check("name", yarp::os::Value("googleSynthesis"), "module name (string)").asString();
361 
362  std::string language = rf.check("language", yarp::os::Value("en-US"), "language to use (string)").asString();
363  std::string voice = rf.check("voice", yarp::os::Value("en-US-Wavenet-D"), "voice to use (string)").asString();
364 
365  double speed = rf.check("speed", yarp::os::Value(1.0), "speed to use (double)").asFloat64();
366  double pitch = rf.check("pitch", yarp::os::Value(0.0), "pitch to use (double)").asFloat64();
367 
368  string playmode_string = rf.check("playbackmode", yarp::os::Value("playFromDisk"), "can be one of the following: `playFromDisk`(default), `sendToPort_compressed`, `sendToPort_uncompressed`").asString();
369 
370  if (rf.check("languageCodes", "Getting language codes"))
371  {
372  yarp::os::Bottle &grp=rf.findGroup("languageCodes");
373  int sz=grp.size()-1;
374 
375  for (int i=0; i<sz; i++)
376  allLanguageCodes.push_back(grp.get(1+i).asString());
377  }
378 
379  if (rf.check("voiceCodes", "Getting voice codes"))
380  {
381  yarp::os::Bottle &grp=rf.findGroup("voiceCodes");
382  int sz=grp.size()-1;
383 
384  for (int i=0; i<sz; i++)
385  allVoiceCodes.push_back(grp.get(1+i).asString());
386  }
387 
388  setName(moduleName.c_str());
389 
390  rpcPort.open(("/"+getName("/rpc")).c_str());
391  statePort.open("/"+ moduleName + "/state:o");
392 
393  closing = false;
394 
395  processing = new Processing( moduleName, language, voice, speed, pitch, state, playmode_string);
396 
397  /* now start the thread to do the work */
398  processing->open();
399 
400  if(!attach(rpcPort)) {
401  yError()<<"Cannot attach to rpc port";
402  return false;
403  }
404 
405  return true;
406  }
407 
408  /************************************************************************/
409  bool attach(yarp::os::RpcServer &source)
410  {
411  return this->yarp().attachAsServer(source);
412  }
413 
414  /**********************************************************/
415  bool close()
416  {
417  statePort.close();
418  processing->close();
419  delete processing;
420  return true;
421  }
422 
423  /********************************************************/
424  double getPeriod()
425  {
426  return 0.1;
427  }
428 
429  /********************************************************/
430  bool say(const std::string& phrase)
431  {
432  yarp::os::Bottle bot;
433  bot.addString(phrase);
434  processing->queryGoogleSynthesis(bot);
435  processing->sendDone();
436  return true;
437  }
438 
439  /********************************************************/
440  std::string setLanguage(const std::string& languageCode, const std::string& voiceCode)
441  {
442  std::string returnVal = "Error, wrong language or voice code (eg: en-US en-US-Wavenet-A)";
443 
444  std::string language, voice;
445 
446  for (int i = 0; i < allLanguageCodes.size(); i++)
447  {
448  if (languageCode == allLanguageCodes[i])
449  {
450  for (int v = 0; v < allVoiceCodes.size(); v++)
451  {
452  if (voiceCode == allVoiceCodes[v])
453  {
454  language = languageCode;
455  voice = voiceCode;
456  returnVal = "[ok]";
457  break;
458  }
459  }
460  break;
461  }
462  }
463 
464  if(returnVal =="[ok]")
465  {
466  processing->setLanguageCode(languageCode);
467  processing->setVoiceCode(voiceCode);
468  }
469  return returnVal;
470  }
471 
472  /********************************************************/
473  bool setPitch(const double pitchVal)
474  {
475  processing->setPitch(pitchVal);
476  return true;
477  }
478 
479  /********************************************************/
480  bool setSpeed(const double speedVal)
481  {
482  processing->setSpeed(speedVal);
483  return true;
484  }
485 
486  /********************************************************/
487  std::string getLanguageCode()
488  {
489  return processing->getLanguageCode();
490  }
491 
492  /********************************************************/
493  std::string getVoiceCode()
494  {
495  return processing->getVoiceCode();
496  }
497 
498  /********************************************************/
499  double getPitch()
500  {
501  return processing->getPitch();
502  }
503 
504  /********************************************************/
505  double getSpeed()
506  {
507  return processing->getSpeed();
508  }
509 
510  /********************************************************/
511  bool quit()
512  {
513  closing=true;
514  return true;
515  }
516 
517  /********************************************************/
518  bool updateModule()
519  {
520  if(is_changed){
521  is_changed=false;
522  yarp::os::Bottle &outTargets = statePort.prepare();
523  outTargets.clear();
524  outTargets.addString(state);
525  yDebug() << "outTarget:" << outTargets.toString().c_str();
526  statePort.write();
527  }
528  return !closing;
529  }
530 };
531 
532 /********************************************************/
533 int main(int argc, char *argv[])
534 {
535  yarp::os::Network::init();
536 
537  yarp::os::Network yarp;
538  if (!yarp.checkNetwork())
539  {
540  yError("YARP server not available!");
541  return 1;
542  }
543 
544  Module module;
545  yarp::os::ResourceFinder rf;
546 
547  rf.setVerbose( true );
548  rf.setDefaultContext( "googleSynthesis" );
549  rf.setDefaultConfigFile( "config.ini" );
550  rf.setDefault("name","googleSynthesis");
551  rf.configure(argc,argv);
552 
553  return module.runModule(rf);
554 }