@inproceedings{dubba_aaai14,
  author    = {Krishna S.R. Dubba and  Miguel R. de Oliveira and  Gi Hyun Lim and  Hamidreza Kasaei and  Luis Seabra Lopes and Ana Tome and Anthony G. Cohn},
  title     = {Grounding Language in Perception for Scene Conceptualization in Autonomous Robots},
  year      = {2014},
  booktitle     = {{AAAI Spring Symposium on Qualitative Representations for Robots}},
  publisher = {{AAAI Press}},
  abstract = {{In order to behave autonomously, it is desirable for 
robots to have the ability to use human supervision and 
learn from different input sources (perception, gestures, 
verbal and textual descriptions etc). In many machine 
learning tasks, the supervision is directed speci?cally 
towards machines and hence is straight forward clearly 
annotated examples. But this is not always very practical 
and recently it was found that the most preferred 
interface to robots is natural language. Also the supervision 
might only be available in a rather indirect
form, which may be vague and incomplete. This is frequently 
the case when humans teach other humans since 
they may assume a particular context and existing world 
knowledge. We explore this idea here in the setting of 
conceptualizing objects and scene layouts. Initially the 
robot undergoes training from a human in recognizing 
some objects in the world and armed with this acquired 
knowledge it sets out in the world to explore and learn 
more higher level concepts like static scene layouts and 
environment activities. Here it has to exploit its learned 
knowledge and ground language into perception to use 
inputs from different sources that might have overlapping 
as well as novel information. When exploring, we 
assume that the robot is given visual input, without explicit 
type labels for objects, and also that it has access 
to more or less generic linguistic descriptions of scene layout. 
Thus our task here is to learn the spatial structure of 
a scene layout and simultaneously visual object 
models it was not trained on. In this paper, we present a 
cognitive architecture and learning framework for robot 
learning through natural human supervision and using 
multiple input sources by grounding language in perception.}}
}