@inproceedings{56eaefe98def40068476326e740d5ec3,
title = "SHARED TRANSFORMER ENCODER WITH MASK-BASED 3D MODEL ESTIMATION FOR CONTAINER MASS ESTIMATION",
abstract = "For human-safe robot control in human-to-robot handover, the physical properties of containers and fillings should be accurately estimated. In this paper, we propose a Transformer encoder that shares the same architecture and parameters for filling level and type estimation. We also propose a mask-based geometric algorithm to estimate 3D models of containers for the estimation of their capacity and dimensions. We further use these estimations to estimate their mass in a Convolutional Neural Network model. Experiments show that our Transformer model produced encouraging results in both estimations. While challenges remain in our mask-based algorithm and Convolutional Neural Network model, their results revealed several ways for improvement.",
keywords = "Mask R-CNN, Transformer encoder, point cloud, visual hull",
author = "Tomoya Matsubara and Seitaro Otsuki and Yuiga Wada and Haruka Matsuo and Takumi Komatsu and Yui Iioka and Komei Sugiura and Hideo Saito",
note = "Publisher Copyright: {\textcopyright} 2022 IEEE; 47th IEEE International Conference on Acoustics, Speech, and Signal Processing, ICASSP 2022 ; Conference date: 23-05-2022 Through 27-05-2022",
year = "2022",
doi = "10.1109/ICASSP43922.2022.9747110",
language = "English",
series = "ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "9142--9146",
booktitle = "2022 IEEE International Conference on Acoustics, Speech, and Signal Processing, ICASSP 2022 - Proceedings",
}