speech
Loading...
Searching...
No Matches
start-ask.cpp
1/*
2 * Copyright (C) 2017 iCub Facility - Istituto Italiano di Tecnologia
3 * Author: Vadim Tikhanoff
4 * email: vadim.tikhanoff@iit.it
5 * Permission is granted to copy, distribute, and/or modify this program
6 * under the terms of the GNU General Public License, version 2 or any
7 * later version published by the Free Software Foundation.
8 *
9 * A copy of the license can be found at
10 * http://www.robotcub.org/icub/license/gpl.txt
11 *
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
15 * Public License for more details
16 */
17
18#include "start-ask.h"
19#include <yarp/os/Log.h>
20#include <yarp/os/LogStream.h>
21#include <vector>
22
23/**********************************************************/
24bool STARTModule::configure(yarp::os::ResourceFinder &rf)
25{
26 moduleName = rf.check("name", yarp::os::Value("start-ask"), "module name (string)").asString();
27
28 setName(moduleName.c_str());
29
30 handlerPortName = "/";
31 handlerPortName += getName();
32 handlerPortName += "/rpc:i";
33
34 if (!rpcPort.open(handlerPortName.c_str()))
35 {
36 yError( "%s : Unable to open port %s\n", getName().c_str(), handlerPortName.c_str());
37 return false;
38 }
39
40 attach(rpcPort);
41
42 /* create the thread and pass pointers to the module parameters */
43 startManager = new STARTManager( moduleName );
44
45 /* now start the thread to do the work */
46 startManager->open();
47
48 return true ;
49}
50
51/**********************************************************/
52bool STARTModule::interruptModule()
53{
54 rpcPort.interrupt();
55 return true;
56}
57
58/**********************************************************/
59bool STARTModule::close()
60{
61 //rpcPort.close();
62 yInfo( "starting the shutdown procedure\n");
63 startManager->interrupt();
64 startManager->close();
65 yInfo( "deleting thread\n");
66 delete startManager;
67 yInfo( "done deleting thread\n");
68 return true;
69}
70
71/**********************************************************/
72bool STARTModule::updateModule()
73{
74 return !closing;
75}
76
77/**********************************************************/
78double STARTModule::getPeriod()
79{
80 return 0.1;
81}
82
83/**********************************************************/
84STARTManager::~STARTManager()
85{
86
87}
88
89/**********************************************************/
90STARTManager::STARTManager( const std::string &moduleName )
91{
92 yInfo("initialising Variables\n");
93 this->moduleName = moduleName;
94}
95
96/**********************************************************/
97bool STARTManager::open()
98{
99 this->useCallback();
100
101 //create all ports
102 inSpeechPortName = "/" + moduleName + "/speech:i";
103 BufferedPort<yarp::os::Bottle >::open( inSpeechPortName.c_str() );
104
105 outSTARTUrlName = "/" + moduleName + "/url";
106 portURL.openFake(outSTARTUrlName); // don't use a full, real port
107
108 outSTARTPortName = "/" + moduleName + "/start:o";
109 startOutPort.open( outSTARTPortName.c_str() );
110
111 inFacePortName = "/" + moduleName + "/faces:i";
112 faceInPort.open( inFacePortName.c_str() );
113
114 return true;
115}
116
117/**********************************************************/
118void STARTManager::close()
119{
120 yInfo("now closing ports...\n");
121 startOutPort.close();
122 faceInPort.close();
123 portURL.close();
124 yarp::os::BufferedPort<yarp::os::Bottle >::close();
125 yInfo("finished closing the read port...\n");
126}
127
128/**********************************************************/
129void STARTManager::interrupt()
130{
131 yInfo("cleaning up...\n");
132 yInfo("attempting to interrupt ports\n");
133 yarp::os::BufferedPort<yarp::os::Bottle >::interrupt();
134 portURL.interrupt();
135 faceInPort.interrupt();
136 yInfo("finished interrupt ports\n");
137}
138
139/**********************************************************/
140void STARTManager::onRead(yarp::os::Bottle &bot)
141{
142 yarp::os::Bottle &final = startOutPort.prepare();
143 final.clear();
144
145 if (bot.size()>0){
146
147 std::string question = bot.toString();
148
149 if (isalpha(question[0]))
150 yInfo("avoiding erase\n");
151 else
152 question.erase(std::remove_if(question.begin(), question.end(), aZCheck), question.end());
153
154 yInfo("cmd is %s", question.c_str());
155
156 yarp::os::Bottle *faceList = faceInPort.read(false);
157 std::string name;
158
159 if (faceList!=NULL)
160 {
161 yarp::os::Bottle *item=faceList->get(0).asList();
162 name = item->get(4).asString().c_str();
163 yInfo() << "NAME IS " << name;
164 }
165
166 std::stringstream s(question);
167 std::string word;
168
169 std::string speech = "";
170
171 bool greeting = false;
172
173 if (strcmp (question.c_str(), "how are you") == 0 || strcmp (question.c_str(), "how are you robot") == 0 || strcmp (question.c_str(), "how are you doing") == 0)
174 greeting = true;
175
176 //format it correctly for START
177 for (int i = 0; s >> word; i++){
178 speech += word.c_str();
179
180 if (strcmp (speech.c_str(), "hello" ) == 0 || strcmp (speech.c_str(), "hi" ) == 0)
181 greeting = true;
182
183 speech += "+";
184 }
185
186 //clean it
187 speech.erase(speech.size()-1,1);
188
189 //string port = "http://start.csail.mit.edu:80/askstart.cgi?te=formatted-text&query=";
190 std::string port = "http://start.csail.mit.edu:80/justanswer.php?te=formatted-text&query=";
191
192 std::string query = port + speech;
193
194 portURL.addOutput(query.c_str());
195 yarp::os::Bottle cmd, reply;
196 cmd.addString("1"); // send any message you like, it is ignored
197 portURL.write(cmd,reply);
198
199 std::string html;
200 std::vector<std::string> testtags;
201 std::vector<std::string> testtext;
202
203 yInfo("query is: %s\n", query.c_str());
204
205 yInfo() << "\n--------------------------------Start cleaning up session-------------------------------\n";
206
207 if (reply.size()>0){
208
209 html = reply.toString().c_str();
210
211 //clean the start.
212 std::string tags = "<P><p>";
213 std::string::size_type initial = html.find(tags);
214 if (initial!=std::string::npos)
215 {
216 yInfo() << "Response found "<< tags << " @ " << initial << " with length " << tags.length();
217 html.erase (0,initial+tags.length());
218 }
219 else
220 {
221 tags.clear();
222 tags = "</P>\\n\\n";
223 std::string::size_type initial = html.find(tags);
224 if (initial!=std::string::npos)
225 {
226 yInfo() << "Response found " << tags << " @ " << initial << " with length " << tags.length();
227 html.erase (0,initial+tags.length());
228 }
229 else
230 {
231 tags.clear();
232 tags = "</P>";
233 std::string::size_type initial = html.find(tags);
234 if (initial!=std::string::npos)
235 {
236 yInfo() << "Response found " << tags << " @ " << initial << " with length " << tags.length();
237 html.erase (0,initial+tags.length());
238 }
239 else
240 yError() << "Cannot seem to find any info in the initial part of the response";
241 }
242 }
243
244 //clean the end.
245 tags.clear();
246 tags="</p></P>";
247 std::string::size_type final = html.find(tags);
248 if (final!=std::string::npos)
249 {
250 yInfo() << "Complex response found "<< tags << " @ " << final << " with length " << tags.length();
251 html.erase (html.begin()+final, html.end());
252 }
253 else
254 {
255 tags.clear();
256 tags = "<STRONG>";
257 std::string::size_type final = html.find(tags);
258 if (final!=std::string::npos)
259 {
260 yInfo() << "Normal response found "<< tags << " @ " << final << " with length " << tags.length();
261 html.erase (html.begin()+final, html.end());
262 }
263 else
264 {
265 tags.clear();
266 tags = "</body>";
267 std::string::size_type final = html.find(tags);
268 if (final!=std::string::npos)
269 {
270 yInfo() << "Response found " << tags << " @ " << final << " with length " << tags.length();
271 html.erase (html.begin()+final, html.end());
272 }
273 else
274 yError() << "Cannot seem to find any info in the final part of the response";
275 }
276 }
277
278 //cleaning "line feed".
279
280 std::string::size_type n;
281 tags="\\n";
282 while ( (n = html.find(tags)) != html.npos)
283 {
284 yInfo() << "Cleaning up " << tags << " @ " << n;
285 html.replace(n,tags.length()," ");
286 }
287
288 //cleaning "i know about".
289
290 tags.clear();
291 tags="I know about";
292 std::string::size_type iknow = html.find(tags);
293 if (iknow!=std::string::npos)
294 {
295 yInfo() << "Response found " << tags << " @ " << iknow << " with length " << tags.length();
296 html.erase (html.begin()+iknow, html.end());
297 }
298
299 //cleaning "numeric entry".
300 std::string::size_type numericEntry;
301 tags="&#160;";
302 while ( (numericEntry = html.find(tags)) != html.npos)
303 {
304 yInfo() << "Cleaning up " << tags << " @ " << numericEntry;
305 html.replace(numericEntry,tags.length()," ");
306 }
307
308 //cleaning "en dash".
309 std::string::size_type enDash;
310 tags="&#8211;";
311 while ( (enDash = html.find(tags)) != html.npos)
312 {
313 yInfo() << "Cleaning up " << tags << " @ " << enDash;
314 html.replace(enDash,tags.length(),",");
315 }
316
317 //cleaning "en dash".
318 std::string::size_type enDash2;
319 tags="&#8212;";
320 while ( (enDash2 = html.find(tags)) != html.npos)
321 {
322 yInfo() << "Cleaning up " << tags << " @ " << enDash2;
323 html.replace(enDash2,tags.length(),",");
324 }
325
326 //to REMOVE &#1071; &#1090;&#1074;&#1086;&#1081; &#1089;&#1083;&#1091;&#1075;&#1072;
327 //and &#1071; &#1090;&#1074;&#1086;&#1081; &#1088;&#1072;&#1073;&#1086;&#1090;&#1085;&#1080;&#1082;
328
329 //cleaning "nonSpace".
330 std::string::size_type nonSpace;
331 tags="&nbsp;";
332 while ( (nonSpace = html.find(tags)) != html.npos)
333 {
334 yInfo() << "Cleaning up " << tags << " @ " << nonSpace;
335 html.replace(nonSpace,tags.length(),",");
336 }
337
338 //cleaning "quote".
339 std::string::size_type quote;
340 tags="\"";
341 while ( (quote = html.find(tags)) != html.npos)
342 {
343 yInfo() << "Cleaning up " << tags << " @ " << quote;
344 html.replace(quote,tags.length()," ");
345 }
346
347 //cleaning "forward slashes".
348 std::string::size_type fslash;
349 tags="/";
350 while ( (fslash = html.find(tags)) != html.npos)
351 {
352 yInfo() << "Cleaning up " << tags << " @ " << fslash;
353 html.replace(fslash,tags.length()," ");
354 }
355
356 //cleaning "back slashes".
357 std::string::size_type bslash;
358 tags="\\";
359 while ( (bslash = html.find(tags)) != html.npos)
360 {
361 yInfo() << "Cleaning up " << tags << " @ " << bslash;
362 html.replace(bslash,tags.length()," ");
363 }
364
365 //cleaning "dashes".
366 std::string::size_type dash;
367 tags="-";
368 while ( (dash = html.find(tags)) != html.npos)
369 {
370 yInfo() << "Cleaning up " << tags << " @ " << dash;
371 html.replace(dash,tags.length(),"");
372 }
373
374 //cleaning "dashes".
375 std::string::size_type amp;
376 tags="&amp;";
377 while ( (amp = html.find(tags)) != html.npos)
378 {
379 yInfo() << "Cleaning up " << tags << " @ " << amp;
380 html.replace(amp,tags.length(),"&");
381 }
382
383 yInfo() << "---------------------------Done the first cleaning up session---------------------------";
384
385 for(int i=0; i<html.length(); i++)
386 {
387 std::string::size_type startpos;
388
389 startpos = html.find('<');
390 if(startpos == std::string::npos)
391 {
392 // no tags left only text!
393 testtext.push_back(html);
394 break;
395 }
396
397 // handle the text before the tag
398 if(0 != startpos)
399 {
400 testtext.push_back(html.substr(0, startpos));
401 html = html.substr(startpos, html.size() - startpos);
402 startpos = 0;
403 }
404
405 // skip all the text in the html tag
406 std::string::size_type endpos;
407 for(endpos = startpos; endpos < html.size() && html[endpos] != '>'; ++endpos)
408 {
409 // since '>' can appear inside of an attribute string we need
410 // to make sure we process it properly.
411 if(html[endpos] == '"')
412 {
413 endpos++;
414 while(endpos < html.size() && html[endpos] != '"')
415 endpos++;
416 }
417 }
418
419 // Handle text and end of html that has beginning of tag but not the end
420 if(endpos == html.size())
421 {
422 html = html.substr(endpos, html.size() - endpos);
423 break;
424 }
425 else
426 {
427 // handle the entire tag
428 endpos++;
429 testtags.push_back(html.substr(startpos, endpos - startpos));
430 html = html.substr(endpos, html.size() - endpos);
431 }
432 }
433
434 //yInfo() << "tags:\n-----------------";
435 //for(size_t i = 0; i < testtags.size(); i++)
436 //yInfo() << testtags[i];
437
438 html.clear();
439 for(size_t i = 0; i < testtext.size(); i++)
440 html = html + testtext[i];
441
442 yInfo() << "\n-------------------------------Starting specific cleaning-------------------------------";
443 yInfo() << "--------------------------------- check for [ ] removal ----------------------------------";
444
445 std::string first = "[";
446 std::string second = "]";
447
448 removetags(first, second, html);
449
450 yInfo() << "--------------------------------- check for ( ) removal ----------------------------------";
451 first.clear();
452 second.clear();
453 first = "(";
454 second = ")";
455
456 removetags(first, second, html);
457 }
458 else
459 yError("Something is wrong with the reply from START");
460 if (greeting && name.size()>0)
461 html = name + ", " + html;
462
463 final.addString(html.c_str());
464 yInfo() << "\n\n\n";
465 yInfo() << "The original answer was:\n " << reply.toString().c_str();
466 yInfo() << "\n\n\n";
467 yInfo() << "The answer is:\n " << final.toString().c_str();
468 startOutPort.write();
469 }else
470 yError("Something is wrong with the query");
471}
472
473/**********************************************************/
474void STARTManager::removetags(std::string &first, std::string &second, std::string &text)
475{
476 std::string tags = first;
477
478 std::vector<size_t> startPos;
479 std::vector<size_t> endPos;
480
481 size_t pos = text.find(tags);
482 while(pos != std::string::npos)
483 {
484 yInfo() << "found " << tags << " @ " << pos;
485 startPos.push_back(pos);
486 pos = text.find(tags, pos+1);
487 }
488
489 tags.clear();
490 tags=second;
491 pos = 0;
492 pos = text.find(tags);
493 while(pos != std::string::npos)
494 {
495 yInfo() << "found " << tags << " @ " << pos;
496 endPos.push_back(pos);
497 pos = text.find(tags, pos+1);
498 }
499
500 int shift = 0;
501
502 std::vector<size_t> tmpstartPos = startPos;
503 std::vector<size_t> tmpendPos = endPos;
504
505 if (tmpstartPos.size() == tmpendPos.size()) //check for consistencies
506 {
507 for (int i =0; i<tmpstartPos.size(); i++)
508 {
509 //check if tag occurs withing another tag
510
511 if ( i != tmpstartPos.size()-1 && tmpendPos[i] > tmpstartPos[i+1] )
512 {
513 startPos.erase(startPos.begin()+1);
514 endPos.erase(endPos.begin());
515 }
516 }
517
518 for (int i =0; i<startPos.size(); i++)
519 {
520 if (i>0)
521 shift = shift + (endPos[i-1] - startPos[i-1] + 1 );
522
523 yInfo() << "deleting pos " << startPos[i] << " until " << endPos[i] << " with shift " << shift;
524
525 text.erase (text.begin()+(startPos[i]-shift), text.begin()+(endPos[i] + 1 - shift));
526 }
527 }
528}
529
530//empty line to make gcc happy