@article{72c9c28d1fc4463f8345d2f07404f169,
title = "A cloud robotics approach towards dialogue-oriented robot speech",
abstract = "Robot utterances generally sound monotonous, unnatural and unfriendly because their Text-to-Speech systems are not optimized for communication but for text reading. Here, we present a non-monologue speech synthesis for robots. The key novelty lies in speech synthesis based on Hidden Markov models (HMMs) using a non-monologue corpus: we collected a speech corpus in a non-monologue style in which two professional voice talents read scripted dialogues, and HMMs were then trained with the corpus and used for speech synthesis. We conducted experiments in which the proposed method was evaluated by 24 subjects in three scenarios: text reading, dialogue and domestic service robot (DSR) scenarios. In the DSR scenario, we used a physical robot and compared our proposed method with a baseline method using the standard Mean Opinion Score criterion. Our experimental results showed that our proposed methods performance was (1) at the same level as the baseline method in the text-reading scenario and (2) exceeded it in the DSR scenario. We deployed our proposed system as a cloud-based speech synthesis service so that it can be used without any cost.",
keywords = "human-robot interaction, service robots, social robots, speech synthesis",
author = "Komei Sugiura and Yoshinori Shiga and Hisashi Kawai and Teruhisa Misu and Chiori Hori",
note = "Funding Information: 2003, ME degree in 2005 and PhD degree in 2008, all in information science, from Kyoto University, Kyoto, Japan. From 2005 to 2008, he was a research fellow (DC1) of the Japan Society for the Promotion of Science (JSPS). From 2008 to 2013, he was a researcher at NICT Spoken Language Communication Group. In 2013, he joined Honda Research Institute USA, 2011/11 to 2012/2, he was a visiting researcher at Chiori Hori received her BE and ME degrees in electrical and information engineering from Yamagata University, Yonezawa, Japan, in 1994 and 1997, respectively and received her PhD degree in Tokyo Institute of Technology, Tokyo, Japan, in 2002. She engaged in research on spoken language processing at the NTT Communication Science Laboratories, NTT Corporation, Kyoto, Japan, from 2002 to 2004. She researched at Carnegie Mellon University in Pittsburgh from 2004 to 2006. She joined ATR from 2007 and started to research NICT from 2008. She is currently the director of spoken language communication laboratory. She received the Paper Award from the Institute of Electronics, Information and Communication Engineers (IEICE) in 2003, the 24th TELECOM System Technology Award by the Telecommunications Advancement Foundation in 2009, International Cooperation Award from the ITU Association of Japan (ITU-AJ) in 2012, DOCOMO Mobile Science Award for Social Science Sector from Mobile Communication Fund in 2012 and the 58th Maejima Hisoka Award, Tsushinbunka Association, 2013. She is a member of the IEEE, the ISCA, the IEICE and the ASJ. Funding Information: This work was partially supported by MEXT/JSPS KAKENHI [grant number 24118710/24700188]. Publisher Copyright: {\textcopyright} 2015 Taylor & Francis and The Robotics Society of Japan.",
year = "2015",
month = apr,
day = "3",
doi = "10.1080/01691864.2015.1009164",
language = "English",
volume = "29",
pages = "449--456",
journal = "Advanced Robotics",
issn = "0169-1864",
publisher = "Taylor and Francis Ltd.",
number = "7",
}