speech
All Data Structures Functions Modules Pages
start-ask.cpp
1 /*
2  * Copyright (C) 2017 iCub Facility - Istituto Italiano di Tecnologia
3  * Author: Vadim Tikhanoff
4  * email: vadim.tikhanoff@iit.it
5  * Permission is granted to copy, distribute, and/or modify this program
6  * under the terms of the GNU General Public License, version 2 or any
7  * later version published by the Free Software Foundation.
8  *
9  * A copy of the license can be found at
10  * http://www.robotcub.org/icub/license/gpl.txt
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
15  * Public License for more details
16  */
17 
18 #include "start-ask.h"
19 #include <yarp/os/Log.h>
20 #include <yarp/os/LogStream.h>
21 #include <vector>
22 
23 /**********************************************************/
24 bool STARTModule::configure(yarp::os::ResourceFinder &rf)
25 {
26  moduleName = rf.check("name", yarp::os::Value("start-ask"), "module name (string)").asString();
27 
28  setName(moduleName.c_str());
29 
30  handlerPortName = "/";
31  handlerPortName += getName();
32  handlerPortName += "/rpc:i";
33 
34  if (!rpcPort.open(handlerPortName.c_str()))
35  {
36  yError( "%s : Unable to open port %s\n", getName().c_str(), handlerPortName.c_str());
37  return false;
38  }
39 
40  attach(rpcPort);
41 
42  /* create the thread and pass pointers to the module parameters */
43  startManager = new STARTManager( moduleName );
44 
45  /* now start the thread to do the work */
46  startManager->open();
47 
48  return true ;
49 }
50 
51 /**********************************************************/
52 bool STARTModule::interruptModule()
53 {
54  rpcPort.interrupt();
55  return true;
56 }
57 
58 /**********************************************************/
59 bool STARTModule::close()
60 {
61  //rpcPort.close();
62  yInfo( "starting the shutdown procedure\n");
63  startManager->interrupt();
64  startManager->close();
65  yInfo( "deleting thread\n");
66  delete startManager;
67  yInfo( "done deleting thread\n");
68  return true;
69 }
70 
71 /**********************************************************/
72 bool STARTModule::updateModule()
73 {
74  return !closing;
75 }
76 
77 /**********************************************************/
78 double STARTModule::getPeriod()
79 {
80  return 0.1;
81 }
82 
83 /**********************************************************/
84 STARTManager::~STARTManager()
85 {
86 
87 }
88 
89 /**********************************************************/
90 STARTManager::STARTManager( const std::string &moduleName )
91 {
92  yInfo("initialising Variables\n");
93  this->moduleName = moduleName;
94 }
95 
96 /**********************************************************/
97 bool STARTManager::open()
98 {
99  this->useCallback();
100 
101  //create all ports
102  inSpeechPortName = "/" + moduleName + "/speech:i";
103  BufferedPort<yarp::os::Bottle >::open( inSpeechPortName.c_str() );
104 
105  outSTARTUrlName = "/" + moduleName + "/url";
106  portURL.openFake(outSTARTUrlName); // don't use a full, real port
107 
108  outSTARTPortName = "/" + moduleName + "/start:o";
109  startOutPort.open( outSTARTPortName.c_str() );
110 
111  inFacePortName = "/" + moduleName + "/faces:i";
112  faceInPort.open( inFacePortName.c_str() );
113 
114  return true;
115 }
116 
117 /**********************************************************/
118 void STARTManager::close()
119 {
120  yInfo("now closing ports...\n");
121  startOutPort.close();
122  faceInPort.close();
123  portURL.close();
124  yarp::os::BufferedPort<yarp::os::Bottle >::close();
125  yInfo("finished closing the read port...\n");
126 }
127 
128 /**********************************************************/
129 void STARTManager::interrupt()
130 {
131  yInfo("cleaning up...\n");
132  yInfo("attempting to interrupt ports\n");
133  yarp::os::BufferedPort<yarp::os::Bottle >::interrupt();
134  portURL.interrupt();
135  faceInPort.interrupt();
136  yInfo("finished interrupt ports\n");
137 }
138 
139 /**********************************************************/
140 void STARTManager::onRead(yarp::os::Bottle &bot)
141 {
142  yarp::os::Bottle &final = startOutPort.prepare();
143  final.clear();
144 
145  if (bot.size()>0){
146 
147  std::string question = bot.toString();
148 
149  if (isalpha(question[0]))
150  yInfo("avoiding erase\n");
151  else
152  question.erase(std::remove_if(question.begin(), question.end(), aZCheck), question.end());
153 
154  yInfo("cmd is %s", question.c_str());
155 
156  yarp::os::Bottle *faceList = faceInPort.read(false);
157  std::string name;
158 
159  if (faceList!=NULL)
160  {
161  yarp::os::Bottle *item=faceList->get(0).asList();
162  name = item->get(4).asString().c_str();
163  yInfo() << "NAME IS " << name;
164  }
165 
166  std::stringstream s(question);
167  std::string word;
168 
169  std::string speech = "";
170 
171  bool greeting = false;
172 
173  if (strcmp (question.c_str(), "how are you") == 0 || strcmp (question.c_str(), "how are you robot") == 0 || strcmp (question.c_str(), "how are you doing") == 0)
174  greeting = true;
175 
176  //format it correctly for START
177  for (int i = 0; s >> word; i++){
178  speech += word.c_str();
179 
180  if (strcmp (speech.c_str(), "hello" ) == 0 || strcmp (speech.c_str(), "hi" ) == 0)
181  greeting = true;
182 
183  speech += "+";
184  }
185 
186  //clean it
187  speech.erase(speech.size()-1,1);
188 
189  //string port = "http://start.csail.mit.edu:80/askstart.cgi?te=formatted-text&query=";
190  std::string port = "http://start.csail.mit.edu:80/justanswer.php?te=formatted-text&query=";
191 
192  std::string query = port + speech;
193 
194  portURL.addOutput(query.c_str());
195  yarp::os::Bottle cmd, reply;
196  cmd.addString("1"); // send any message you like, it is ignored
197  portURL.write(cmd,reply);
198 
199  std::string html;
200  std::vector<std::string> testtags;
201  std::vector<std::string> testtext;
202 
203  yInfo("query is: %s\n", query.c_str());
204 
205  yInfo() << "\n--------------------------------Start cleaning up session-------------------------------\n";
206 
207  if (reply.size()>0){
208 
209  html = reply.toString().c_str();
210 
211  //clean the start.
212  std::string tags = "<P><p>";
213  std::string::size_type initial = html.find(tags);
214  if (initial!=std::string::npos)
215  {
216  yInfo() << "Response found "<< tags << " @ " << initial << " with length " << tags.length();
217  html.erase (0,initial+tags.length());
218  }
219  else
220  {
221  tags.clear();
222  tags = "</P>\\n\\n";
223  std::string::size_type initial = html.find(tags);
224  if (initial!=std::string::npos)
225  {
226  yInfo() << "Response found " << tags << " @ " << initial << " with length " << tags.length();
227  html.erase (0,initial+tags.length());
228  }
229  else
230  {
231  tags.clear();
232  tags = "</P>";
233  std::string::size_type initial = html.find(tags);
234  if (initial!=std::string::npos)
235  {
236  yInfo() << "Response found " << tags << " @ " << initial << " with length " << tags.length();
237  html.erase (0,initial+tags.length());
238  }
239  else
240  yError() << "Cannot seem to find any info in the initial part of the response";
241  }
242  }
243 
244  //clean the end.
245  tags.clear();
246  tags="</p></P>";
247  std::string::size_type final = html.find(tags);
248  if (final!=std::string::npos)
249  {
250  yInfo() << "Complex response found "<< tags << " @ " << final << " with length " << tags.length();
251  html.erase (html.begin()+final, html.end());
252  }
253  else
254  {
255  tags.clear();
256  tags = "<STRONG>";
257  std::string::size_type final = html.find(tags);
258  if (final!=std::string::npos)
259  {
260  yInfo() << "Normal response found "<< tags << " @ " << final << " with length " << tags.length();
261  html.erase (html.begin()+final, html.end());
262  }
263  else
264  {
265  tags.clear();
266  tags = "</body>";
267  std::string::size_type final = html.find(tags);
268  if (final!=std::string::npos)
269  {
270  yInfo() << "Response found " << tags << " @ " << final << " with length " << tags.length();
271  html.erase (html.begin()+final, html.end());
272  }
273  else
274  yError() << "Cannot seem to find any info in the final part of the response";
275  }
276  }
277 
278  //cleaning "line feed".
279 
280  std::string::size_type n;
281  tags="\\n";
282  while ( (n = html.find(tags)) != html.npos)
283  {
284  yInfo() << "Cleaning up " << tags << " @ " << n;
285  html.replace(n,tags.length()," ");
286  }
287 
288  //cleaning "i know about".
289 
290  tags.clear();
291  tags="I know about";
292  std::string::size_type iknow = html.find(tags);
293  if (iknow!=std::string::npos)
294  {
295  yInfo() << "Response found " << tags << " @ " << iknow << " with length " << tags.length();
296  html.erase (html.begin()+iknow, html.end());
297  }
298 
299  //cleaning "numeric entry".
300  std::string::size_type numericEntry;
301  tags="&#160;";
302  while ( (numericEntry = html.find(tags)) != html.npos)
303  {
304  yInfo() << "Cleaning up " << tags << " @ " << numericEntry;
305  html.replace(numericEntry,tags.length()," ");
306  }
307 
308  //cleaning "en dash".
309  std::string::size_type enDash;
310  tags="&#8211;";
311  while ( (enDash = html.find(tags)) != html.npos)
312  {
313  yInfo() << "Cleaning up " << tags << " @ " << enDash;
314  html.replace(enDash,tags.length(),",");
315  }
316 
317  //cleaning "en dash".
318  std::string::size_type enDash2;
319  tags="&#8212;";
320  while ( (enDash2 = html.find(tags)) != html.npos)
321  {
322  yInfo() << "Cleaning up " << tags << " @ " << enDash2;
323  html.replace(enDash2,tags.length(),",");
324  }
325 
326  //to REMOVE &#1071; &#1090;&#1074;&#1086;&#1081; &#1089;&#1083;&#1091;&#1075;&#1072;
327  //and &#1071; &#1090;&#1074;&#1086;&#1081; &#1088;&#1072;&#1073;&#1086;&#1090;&#1085;&#1080;&#1082;
328 
329  //cleaning "nonSpace".
330  std::string::size_type nonSpace;
331  tags="&nbsp;";
332  while ( (nonSpace = html.find(tags)) != html.npos)
333  {
334  yInfo() << "Cleaning up " << tags << " @ " << nonSpace;
335  html.replace(nonSpace,tags.length(),",");
336  }
337 
338  //cleaning "quote".
339  std::string::size_type quote;
340  tags="\"";
341  while ( (quote = html.find(tags)) != html.npos)
342  {
343  yInfo() << "Cleaning up " << tags << " @ " << quote;
344  html.replace(quote,tags.length()," ");
345  }
346 
347  //cleaning "forward slashes".
348  std::string::size_type fslash;
349  tags="/";
350  while ( (fslash = html.find(tags)) != html.npos)
351  {
352  yInfo() << "Cleaning up " << tags << " @ " << fslash;
353  html.replace(fslash,tags.length()," ");
354  }
355 
356  //cleaning "back slashes".
357  std::string::size_type bslash;
358  tags="\\";
359  while ( (bslash = html.find(tags)) != html.npos)
360  {
361  yInfo() << "Cleaning up " << tags << " @ " << bslash;
362  html.replace(bslash,tags.length()," ");
363  }
364 
365  //cleaning "dashes".
366  std::string::size_type dash;
367  tags="-";
368  while ( (dash = html.find(tags)) != html.npos)
369  {
370  yInfo() << "Cleaning up " << tags << " @ " << dash;
371  html.replace(dash,tags.length(),"");
372  }
373 
374  //cleaning "dashes".
375  std::string::size_type amp;
376  tags="&amp;";
377  while ( (amp = html.find(tags)) != html.npos)
378  {
379  yInfo() << "Cleaning up " << tags << " @ " << amp;
380  html.replace(amp,tags.length(),"&");
381  }
382 
383  yInfo() << "---------------------------Done the first cleaning up session---------------------------";
384 
385  for(int i=0; i<html.length(); i++)
386  {
387  std::string::size_type startpos;
388 
389  startpos = html.find('<');
390  if(startpos == std::string::npos)
391  {
392  // no tags left only text!
393  testtext.push_back(html);
394  break;
395  }
396 
397  // handle the text before the tag
398  if(0 != startpos)
399  {
400  testtext.push_back(html.substr(0, startpos));
401  html = html.substr(startpos, html.size() - startpos);
402  startpos = 0;
403  }
404 
405  // skip all the text in the html tag
406  std::string::size_type endpos;
407  for(endpos = startpos; endpos < html.size() && html[endpos] != '>'; ++endpos)
408  {
409  // since '>' can appear inside of an attribute string we need
410  // to make sure we process it properly.
411  if(html[endpos] == '"')
412  {
413  endpos++;
414  while(endpos < html.size() && html[endpos] != '"')
415  endpos++;
416  }
417  }
418 
419  // Handle text and end of html that has beginning of tag but not the end
420  if(endpos == html.size())
421  {
422  html = html.substr(endpos, html.size() - endpos);
423  break;
424  }
425  else
426  {
427  // handle the entire tag
428  endpos++;
429  testtags.push_back(html.substr(startpos, endpos - startpos));
430  html = html.substr(endpos, html.size() - endpos);
431  }
432  }
433 
434  //yInfo() << "tags:\n-----------------";
435  //for(size_t i = 0; i < testtags.size(); i++)
436  //yInfo() << testtags[i];
437 
438  html.clear();
439  for(size_t i = 0; i < testtext.size(); i++)
440  html = html + testtext[i];
441 
442  yInfo() << "\n-------------------------------Starting specific cleaning-------------------------------";
443  yInfo() << "--------------------------------- check for [ ] removal ----------------------------------";
444 
445  std::string first = "[";
446  std::string second = "]";
447 
448  removetags(first, second, html);
449 
450  yInfo() << "--------------------------------- check for ( ) removal ----------------------------------";
451  first.clear();
452  second.clear();
453  first = "(";
454  second = ")";
455 
456  removetags(first, second, html);
457  }
458  else
459  yError("Something is wrong with the reply from START");
460  if (greeting && name.size()>0)
461  html = name + ", " + html;
462 
463  final.addString(html.c_str());
464  yInfo() << "\n\n\n";
465  yInfo() << "The original answer was:\n " << reply.toString().c_str();
466  yInfo() << "\n\n\n";
467  yInfo() << "The answer is:\n " << final.toString().c_str();
468  startOutPort.write();
469  }else
470  yError("Something is wrong with the query");
471 }
472 
473 /**********************************************************/
474 void STARTManager::removetags(std::string &first, std::string &second, std::string &text)
475 {
476  std::string tags = first;
477 
478  std::vector<size_t> startPos;
479  std::vector<size_t> endPos;
480 
481  size_t pos = text.find(tags);
482  while(pos != std::string::npos)
483  {
484  yInfo() << "found " << tags << " @ " << pos;
485  startPos.push_back(pos);
486  pos = text.find(tags, pos+1);
487  }
488 
489  tags.clear();
490  tags=second;
491  pos = 0;
492  pos = text.find(tags);
493  while(pos != std::string::npos)
494  {
495  yInfo() << "found " << tags << " @ " << pos;
496  endPos.push_back(pos);
497  pos = text.find(tags, pos+1);
498  }
499 
500  int shift = 0;
501 
502  std::vector<size_t> tmpstartPos = startPos;
503  std::vector<size_t> tmpendPos = endPos;
504 
505  if (tmpstartPos.size() == tmpendPos.size()) //check for consistencies
506  {
507  for (int i =0; i<tmpstartPos.size(); i++)
508  {
509  //check if tag occurs withing another tag
510 
511  if ( i != tmpstartPos.size()-1 && tmpendPos[i] > tmpstartPos[i+1] )
512  {
513  startPos.erase(startPos.begin()+1);
514  endPos.erase(endPos.begin());
515  }
516  }
517 
518  for (int i =0; i<startPos.size(); i++)
519  {
520  if (i>0)
521  shift = shift + (endPos[i-1] - startPos[i-1] + 1 );
522 
523  yInfo() << "deleting pos " << startPos[i] << " until " << endPos[i] << " with shift " << shift;
524 
525  text.erase (text.begin()+(startPos[i]-shift), text.begin()+(endPos[i] + 1 - shift));
526  }
527  }
528 }
529 
530 //empty line to make gcc happy