@INPROCEEDINGS {Saxen2014, author = {Saxen, Frerk and Al-Hamadi, Ayoub}, title = {Color-based skin segmentation: An evaluation of the state of the art}, booktitle = {IEEE International Conference on Image Processing (ICIP)}, year = {2014}, month = {October}, pages = {4467--4471}, issn = {1522-4880}, doi = {10.1109/ICIP.2014.7025906} } @INPROCEEDINGS {gossen2014usability, author = {Gossen, Tatiana and H{\"o}bel, Juliane and N{\"u}rnberger, Andreas}, title = {Usability and Perception of Young Users and Adults on Targeted Web Search Engines}, booktitle = {Proceedings of the 5th Information Interaction in Context Symposium (IIiX'14)}, year = {2014}, pages = {18--27}, location = {Regensburg, Germany}, publisher = {ACM}, address = {New York, NY, USA}, isbn = {978-1-4503-2976-7}, doi = {10.1145/2637002.2637007}, keywords = {children, eye-tracker, search engine, user study}, } @INPROCEEDINGS {gossen2014comparative, author = {Gossen, Tatiana and H{\"o}bel, Juliane and N{\"u}rnberger, Andreas}, title = {A Comparative Study About Children's and Adults' Perception of Targeted Web Search Engines}, booktitle = {Proceedings of the SIGCHI Conference on Human Factors in Computing Systems (CHI'14)}, year = {2014}, location = {Toronto, Ontario, Canada}, publisher = {ACM}, address = {New York, NY, USA}, pages = {1821--1824}, isbn = {978-1-4503-2473-1}, doi = {10.1145/2556288.2557031}, location = {Toronto, Ontario, Canada}, keywords = {children, eye-tracker, search engine, user study} } @ARTICLE {gossen2013graph, author = {Gossen, Tatiana and Kotzyba, Michael and N{\"u}rnberger, Andreas}, title = {Graph Clusterings with Overlaps: Adapted Quality Indices and a Generation Model}, journal = {Neurocomputing}, year = {2014}, month = {January}, volume = {123}, pages = {13--22}, issn = {0925-2312}, doi = {10.1016/j.neucom.2012.09.046}, publisher = {Elsevier Science Publishers B. V.}, keywords = {Clustered graph, Evaluation measures, Graph generation model, Overlapping graph clustering} } @INPROCEEDINGS {low2014visual, author = {Low, Thomas and Hentschel, Christian and Stober, Sebastian and Sack, Harald and N{\"u}rnberger, Andreas}, title = {Visual Berrypicking in Large Image Collections}, booktitle = {Proceedings of the 8th Nordic Conference on Human-Computer Interaction: Fun, Fast, Foundational}, year = {2014}, series = {NordiCHI'14}, pages = {1043--1046}, publisher = {ACM}, address = {New York, NY, USA}, doi = {10.1145/2639189.2670271}, keywords = {image retrieval, interactive exploration, multi-dimensional scaling, procrustes analysis} } @INPROCEEDINGS {Stange2014search, author = {Stange, Dominic and N{\"u}rnberger, Andreas}, title = {Search Maps: Enhancing Traceability and Overview in Collaborative Information Seeking}, booktitle = {Advances in Information Retrieval: Proceedings of the 36th European Conference on IR Research (ECIR 2014)}, year = {2014}, series = {Lecture Notes in Computer Science}, volume = {8416}, publisher = {Springer International Publishing}, pages = {763--766}, isbn = {978-3-319-06028-6}, doi = {10.1007/978-3-319-06028-6_91} } @INPROCEEDINGS {DBLP:conf/acmidc/GossenMSN14, author = {Gossen, Tatiana and M{\"u}ller, Ren{\'e} and Stober, Sebastian and N{\"u}rnberger, Andreas}, title = {Search Result Visualization with Characters for Children}, booktitle = {Proceedings of the 2014 Conference on Interaction Design and Children (IDC '14)}, year = {2014}, month = {June}, pages = {125--134}, location = {Aarhus, Denmark}, isbn = {978-1-4503-2272-0}, doi = {10.1145/2593968.2593983}, publisher = {ACM}, address = {New York, NY, USA}, keywords = {children, information retrieval, result visualization, search engine, usercentered design} } @INPROCEEDINGS {2014fn02, AUTHOR = {Nothdurft, Florian and Ultes, Stefan and Minker, Wolfgang}, TITLE = {Finding Appropriate Interaction Strategies for Proactive Dialogue Systems---An Open Quest}, BOOKTITLE = {Proc. of the 2nd European and the 5th Nordic Symposium on Multimodal Communication}, YEAR = {2014}, MONTH = {aug}, PAGES = {73--80}, PUBLISHER = {LiU Electronic Press}, ISBN = {978-91-7519-074-7}, ISSN = {1650-3740}, LOCATION = {Tartu, Estonia}, URL = {http://www.ep.liu.se/ecp/110/010/ecp15110010.pdf} } @ARTICLE{kliegl2014duration, AUTHOR = {Kliegl, Katrin M. and Watrin, Luc and Huckauf, Anke}, TITLE = {Duration perception of emotional stimuli: Using evaluative conditioning to avoid sensory confounds}, JOURNAL = {Cognition and Emotion}, YEAR = {2014}, PAGES = {1--18}, DOI = {10.1080/02699931.2014.978841}, NOTE = {PMID: 25398048}, ABSTRACT = {It has been found that emotional pictures are estimated to last longer than neutral ones. However, emotional and neutral stimuli often differ in their physical characteristics, too. Since this might also affect time perception, we present a method disentangling a possible confounding regarding the processing of physically different stimulus material. In the evaluative condition paradigm, participants, at first, learnt the association of neutral images with a certain Landolt ring and of emotional images with another Landolt ring with a different gap position. The conditioned Landolt rings were subsequently used in a temporal bisection task. In two experiments, the results revealed a temporal overestimation of Landolt rings conditioned with emotional pictures compared to neutral pictures showing that the temporal overestimation of emotional stimuli cannot be attributed to perceptual differences between neutral and emotional stimuli. The method provides the potential for investigating emotional effects on various perceptual processes.} } @ARTICLE{TschechneNeumann2014b, AUTHOR = {Tschechne, Stephan and Neumann, Heiko}, TITLE = {Hierarchical representation of shapes in visual cortex - from localized features to figural shape segregation}, JOURNAL = {Frontiers in Computational Neuroscience}, YEAR = {2014}, VOLUME = {8}, NUMBER = {93}, URL = {http://www.frontiersin.org/computational_neuroscience/10.3389/fncom.2014.00093/abstract}, DOI = {10.3389/fncom.2014.00093}, ISSN = {1662-5188}, ABSTRACT = {Visual structures in the environment are segmented into image regions and those combined to a representation of surfaces and prototypical objects. Such a perceptual organization is performed by complex neural mechanisms in the visual cortex of primates. Multiple mutually connected areas in the ventral cortical pathway receive visual input and extract local form features that are subsequently grouped into increasingly complex, more meaningful image elements. Such a distributed network of processing must be capable to make accessible highly articulated changes in shape boundary as well as very subtle curvature changes that contribute to the perception of an object. We propose a recurrent computational network architecture that utilizes hierarchical distributed representations of shape features to encode surface and object boundary over different scales of resolution. Our model makes use of neural mechanisms that model the processing capabilities of early and intermediate stages in visual cortex, namely areas V1-V4 and IT. We suggest that multiple specialized component representations interact by feedforward hierarchical processing that is combined with feedback signals driven by representations generated at higher stages. Based on this, global configurational as well as local information is made available to distinguish changes in the object's contour. Once the outline of a shape has been established, contextual contour configurations are used to assign border ownership directions and thus achieve segregation of figure and ground. The model, thus, proposes how separate mechanisms contribute to distributed hierarchical cortical shape representation and combine with processes of figure-ground segregation. Our model is probed with a selection of stimuli to illustrate processing results at different processing stages. We especially highlight how modulatory feedback connections contribute to the processing of visual input at various stages in the processing hierarchy.} } @ARTICLE{LayherEtAl2014b, AUTHOR = {Layher, Georg and Schrodt, Fabian and Butz, Martin V and Neumann, Heiko}, TITLE = {Adaptive learning in a compartmental model of visual cortex - how feedback enables stable category learning and refinement}, JOURNAL = {Frontiers in Psychology}, YEAR = {2014}, VOLUME = {5}, NUMBER = {1287}, DOI = {10.3389/fpsyg.2014.01287}, ISSN = {1664-1078}, ABSTRACT = {The categorization of real world objects is often reflected in the similarity of their visual appearances. Such categories of objects do not necessarily form disjunct sets of objects, neither semantically nor visually. The relationship between categories can often be described in terms of a hierarchical structure. For instance, tigers and leopards build two separate mammalian categories, both of which are subcategories of the category Felidae. In the last decades, the unsupervised learning of categories of visual input stimuli has been addressed by numerous approaches in machine learning as well as in computational neuroscience. However, the question of what kind of mechanisms might be involved in the process of subcategory learning, or category refinement, remains a topic of active investigation. We propose a recurrent computational network architecture for the unsupervised learning of categorial and subcategorial visual input representations. During learning, the connection strengths of bottom-up weights from input to higher-level category representations are adapted according to the input activity distribution. In a similar manner, top-down weights learn to encode the characteristics of a specific stimulus category. Feedforward and feedback learning in combination realize an associative memory mechanism, enabling the selective top-down propagation of a category's feedback weight distribution. We suggest that the difference between the expected input encoded in the projective field of a category node and the current input pattern controls the amplification of feedforward-driven representations. Large enough differences trigger the recruitment of new representational resources and the establishment of additional (sub-) category representations. We demonstrate the temporal evolution of such learning and show how the proposed combination of an associative memory with a modulatory feedback integration successfully establishes category and subcategory representations.} } @ARTICLE{BroschNeumann2014b, AUTHOR = {Brosch, T. and Neumann, H.}, TITLE = {Computing with a Canonical Neural Circuits Model with Pool Normalization and Modulating Feedback}, JOURNAL = {Neural Computation}, YEAR = {2014}, PAGES = {2735--89}, VOLUME = {26}, NUMBER = {12}, ABSTRACT = {Evidence suggests that the brain uses an operational set of canonical computations like normalization, input filtering, and response gain enhancement via reentrant feedback. Here, we propose a three-stage columnar architecture of cascaded model neurons to describe a core circuit combining signal pathways of feedforward and feedback processing and the inhibitory pooling of neurons to normalize the activity. We present an analytical investigation of such a circuit by first reducing its detail through the lumping of initial feedforward response filtering and reentrant modulating signal amplification. The resulting excitatory-inhibitory pair of neurons is analyzed in a 2D phase-space. The inhibitory pool activation is treated as a separate mechanism exhibiting different effects. We analyze subtractive as well as divisive (shunting) interaction to implement center-surround mechanisms that include normalization effects in the characteristics of real neurons. Different variants of a core model architecture are derived and analyzed--in particular, individual excitatory neurons (without pool inhibition), the interaction with an inhibitory subtractive or divisive (i.e., shunting) pool, and the dynamics of recurrent self-excitation combined with divisive inhibition. The stability and existence properties of these model instances are characterized, which serve as guidelines to adjust these properties through proper model parameterization. The significance of the derived results is demonstrated by theoretical predictions of response behaviors in the case of multiple interacting hypercolumns in a single and in multiple feature dimensions. In numerical simulations, we confirm these predictions and provide some explanations for different neural computational properties. Among those, we consider orientation contrast-dependent response behavior, different forms of attentional modulation, contrast element grouping, and the dynamic adaptation of the silent surround in extraclassical receptive field configurations, using only slight variations of the same core reference model.} } @INPROCEEDINGS{SchrodtEtAl2014b, AUTHOR = {Schrodt, F. and Layher, G. and Neumann, H. and Butz, M.V.}, TITLE = {Modeling perspective-taking upon observation of 3D biological motion}, BOOKTITLE = {Development and Learning and Epigenetic Robotics (ICDL-Epirob), 2014 Joint IEEE International Conferences on}, YEAR = {2014}, MONTH = {Oct}, PAGES = {305--310}, DOI = {10.1109/DEVLRN.2014.6982998}, ABSTRACT = {It appears that the mirror neuron system plays a crucial role when learning by imitation. However, it remains unclear how mirror neuron properties develop in the first place. A likely prerequisite for developing mirror neurons may be the capability to transform observed motion into a sufficiently self-centered frame of reference. We propose an artificial neural network (NN) model that implements such a transformation capability by a highly embodied approach: The model first learns to correlate and predict self-induced motion patterns by associating egocentric visual and proprioceptive perceptions. Once these predictions are sufficiently accurate, a robust and invariant recognition of observed biological motion becomes possible by allowing a self-supervised, error-driven adaption of the visual frame of reference. The NN is a modified, dynamic, adaptive resonance model, which features self-supervised learning and adjustment, neural field normalization, and information-driven neural noise adaptation. The developed architecture is evaluated with a simulated 3D humanoid walker with 12 body landmarks and 10 angular DOF. The model essentially shows how an internal frame of reference adaptation for deriving the perspective of another person can be acquired by first learning about the own bodily motion dynamics and by then exploiting this self-knowledge upon the observation of other, relative, biological motion patterns. The insights gained by the model may have significant implications for the development of social capabilities and respective impairments.} } @INPROCEEDINGS{TschechneEtAl2014a, AUTHOR = {Tschechne, S. and Brosch, T. and Sailer, R. and von Egloffstein, N. and Abdul-Kreem, L. I. and Neumann, H.}, TITLE = {On Event-Based Motion Detection and Integration}, BOOKTITLE = {8th International Conference on Bio-inspired Information and Communications Technologies}, YEAR = {2014}, PUBLISHER = {ACM} } @INPROCEEDINGS{TschechneEtAl2014b, AUTHOR = {Tschechne, Stephan and Sailer, Roman and Neumann, Heiko}, TITLE = {Bio-Inspired Optic Flow from Event-Based Neuromorphic Sensor Input}, BOOKTITLE = {Proceedings of the 6th IAPR TC 3 International Workshop on Artificial Neural Networks in Pattern Recognition - Volume 8774}, YEAR = {2014}, PUBLISHER = {Springer-Verlag New York, Inc.}, ISBN = {978-3-319-11655-6}, PAGES = {171--182}, NUMPAGES = {12}, URL = {http://dx.doi.org/10.1007/978-3-319-11656-3_16}, DOI = {10.1007/978-3-319-11656-3_16}, ACMID = {2695075}, ADDRESS = {New York, NY, USA}, KEYWORDS = {Classification, Event-Vision, Neural Model, Optic Flow}, ABSTRACT = {Computational models of visual processing often use frame-based image acquisition techniques to process a temporally changing stimulus. This approach is unlike biological mechanisms that are spike-based and independent of individual frames. The neuromorphic Dynamic Vision Sensor (DVS) [Lichtsteiner et al., 2008] provides a stream of independent visual events that indicate local illumination changes, resembling spiking neurons at a retinal level. We introduce a new approach for the modelling of cortical mechanisms of motion detection along the dorsal pathway using this type of representation. Our model combines filters with spatio-temporal tunings also found in visual cortex to yield spatio-temporal and direction specificity. We probe our model with recordings of test stimuli, articulated motion and ego-motion. We show how our approach robustly estimates optic flow and also demonstrate how this output can be used for classification purposes.} } ------------------------------------------------------------------- @inproceedings{DBLP:conf/dlog/SteigmillerGL14, author = {Steigmiller, Andreas and Glimm, Birte and Liebig, Thorsten}, title = {Optimised Absorption for Expressive Description Logics}, booktitle = {Informal Proceedings of the 27th International Workshop on Description Logics}, editor = {Bienvenu, Meghyn and Ortiz, Magdalena and Rosati, Riccardo and Simkus, Mantas}, pages = {324--335}, year = {2014}, series = {{CEUR} Workshop Proceedings}, volume = {1193}, publisher = {CEUR-WS.org} } @article{DBLP:journals/ws/SteigmillerLG14, author = {Steigmiller, Andreas and Liebig, Thorsten and Glimm, Birte}, title = {Konclude: System description}, abstract = {This paper introduces Konclude, a high-performance reasoner for the Description Logic SROIQVSROIQV. The supported ontology language is a superset of the logic underlying OWL 2 extended by nominal schemas, which allows for expressing arbitrary DL-safe rules. Konclude’s reasoning core is primarily based on the well-known tableau calculus for expressive Description Logics. In addition, Konclude also incorporates adaptations of more specialised procedures, such as consequence-based reasoning, in order to support the tableau algorithm. Konclude is designed for performance and uses well-known optimisations such as absorption or caching, but also implements several new optimisation techniques. The system can furthermore take advantage of multiple CPU’s at several levels of its processing architecture. This paper describes Konclude’s interface options, reasoner architecture, processing workflow, and key optimisations. Furthermore, we provide results of a comparison with other widely used OWL 2 reasoning systems, which show that Konclude performs eminently well on ontologies from any language fragment of OWL 2.}, journal = {Journal of Web Semantics}, volume = {27}, pages = {78--85}, year = {2014}, doi = {10.1016/j.websem.2014.06.003} } @article{DBLP:journals/jar/SteigmillerGL14, author = {Steigmiller, Andreas and Glimm, Birte and Liebig, Thorsten}, title = {Reasoning with Nominal Schemas through Absorption}, abstract = {Nominal schemas have recently been introduced as a new approach for the integration of DL-safe rules into the Description Logic framework. The efficient processing of knowledge bases with nominal schemas remains, however, challenging. We address this by extending the well-known optimisation of absorption as well as the standard tableau calculus to directly handle the (absorbed) nominal schema axioms. We implement the resulting extension of standard tableau calculi in the novel reasoning system Konclude and present further optimisations. In our empirical evaluation, we show the effect of these optimisations and we find that the proposed nominal schema handling performs well even when compared to (hyper)tableau systems with dedicated rule support.}, journal = {Journal of Automated Reasoning}, volume = {53}, number = {4}, pages = {351--405}, year = {2014}, doi = {10.1007/s10817-014-9310-4} } @inproceedings{DBLP:conf/cade/SteigmillerGL14, author = {Steigmiller, Andreas and Glimm, Birte and Liebig, Thorsten}, title = {Coupling Tableau Algorithms for Expressive Description Logics with Completion-Based Saturation Procedures}, abstract = {Nowadays, saturation-based reasoners for the OWL EL profile are able to handle large ontologies such as SNOMED very efficiently. However, saturation-based reasoning procedures become incomplete if the ontology is extended with axioms that use features of more expressive Description Logics, e.g., disjunctions. Tableau-based procedures, on the other hand, are not limited to a specific OWL profile, but even highly optimised reasoners might not be efficient enough to handle large ontologies such as SNOMED. In this paper, we present an approach for tightly coupling tableau- and saturation-based procedures that we implement in the OWL DL reasoner Konclude. Our detailed evaluation shows that this combination significantly improves the reasoning performance on a wide range of ontologies.}, booktitle = {Proceedings of the 7th International Joint Conference on Automated Reasoning ({IJCAR} 2014)}, editor = {Demri, St{\'{e}}phane and Kapur, Deepak and Weidenbach, Christoph}, pages = {449--463}, year = {2014}, doi = {10.1007/978-3-319-08587-6_35}, series = {Lecture Notes in Computer Science}, volume = {8562}, publisher = {Springer} } @inproceedings{EmPo14a, author = {Emelyanov, Pavel and Ponomaryov, Denis}, title = {On Tractability of Disjoint {AND}-Decomposition of {B}oolean Formulas}, abstract = {Disjoint AND-decomposition of a boolean formula means its representation as a conjunction of two (or several) formulas having disjoint sets of variables. We show that deciding AND-decomposability is intractable in general for boolean formulas given in CNF or DNF and prove tractability of computing AND-decompositions of boolean formulas given in positive DNF, Full DNF, and ANF. The results follow from tractability of multilinear polynomial factorization over the finite field of order 2, for which we provide a polytime factorization algorithm based on identity testing for partial derivatives of multilinear polynomials.}, booktitle = {Proceedings of the 9th International Ershov Informatics Conference ({PSI} 2014)}, xeditor = {Voronkov, Andrei and Virbitskaite, Irina}, year = {2014}, publisher = {Springer}, series = {Lecture Notes in Computer Science}, volume = {8974} } @article{RefWorks:618, author ={Schmidt, Miriam and Palm, G{\"u}nther and Schwenker, Friedhelm}, year ={2014}, title ={Spectral graph features for the classification of graphs and graph sequences}, abstract={In this paper, the classification power of the eigenvalues of six graph-associated matrices is investigated. Each matrix contains a certain type of geometric/ spatial information, which may be important for the classification process. The performances of the different feature types is evaluated on two data sets: first a benchmark data set for optical character recognition, where the extracted eigenvalues were utilized as feature vectors for multi-class classification using support vector machines. Classification results are presented for all six feature types, as well as for classifier combinations at decision level. For the decision level combination, probabilistic output support vector machines have been applied, with a performance up to 92.4 %. To investigate the power of the spectra for time dependent tasks, too, a second data set was investigated, consisting of human activities in video streams. To model the time dependency, hidden Markov models were utilized and the classification rate reached 98.3 %.}, journal ={Computational Statistics}, doi ={10.1007/s00180-012-0381-6}, url ={http://dx.doi.org/10.1007/s00180-012-0381-6}, volume ={29}, number ={1-2}, pages ={65--80} } @article{RefWorks:621, author ={Fau{\ss}er, Stefan and Schwenker, Friedhelm}, year ={2014}, title ={Semi-supervised clustering of large data sets with kernel methods}, abstract={Labelling real world data sets is a difficult problem. Often, the human expert is unsure about a class label of a specific sample point or, in case of very large data sets, it is impractical to label them manually. In semi-supervised clustering, the sample labels, which are external informations, are used to find better matching cluster partitions. Further, kernel-based clustering methods are able to partition the data with nonlinear boundaries in feature space. While these methods improve the clustering results, they have a quadratic computation time. In this paper, we propose a meta-algorithm that processes small-sized subsets of a large data set, clusters them with the sample labels and merges the points close to the resulting prototypes with the next points, until the whole data set has been processed. It has a linear computation time. The error function that this meta-algorithm minimizes is presented. Although we applied this meta-algorithm to Kernel Fuzzy C-Means, Relational Neural Gas and Kernel K-Means, it can be applied to a broad range of kernel-based clustering methods. The proposed method has been empirically evaluated on two real world benchmark data sets.}, journal ={Pattern Recognition Letters}, volume ={37}, pages ={78--84}, note ={Partially Supervised Learning for Pattern Recognition}, url ={http://dx.doi.org/10.1016/j.patrec.2013.01.00} } @inproceedings{Kachele2014a, Title = {Cascaded Fusion of Dynamic, Spatial, and Textural Feature Sets for Person-Independent Facial Emotion Recognition}, Author = {K{\"a}chele, Markus and Schwenker, Friedhelm}, Booktitle = {Proceedings of the International Conference on Pattern Recognition (ICPR)}, Year = {2014}, Pages = {4660--4665}, Doi = {10.1109/ICPR.2014.797}, Owner = {mkaechele}, Abstract = {Emotion recognition from facial expressions is a highly demanding task, especially in everyday life scenarios. Different sources of artifacts have to be considered in order to successfully extract the intended emotional nuances of the face. The exact and robust detection and orientation of faces impeded by occlusions, inhomogeneous lighting and fast movements is only one difficulty. Another one is the question of selecting suitable features for the application at hand. In the literature, a vast body of different visual features grouped into dynamic, spatial and textural families, has been proposed. These features exhibit different advantages/disadvantages over each other due to their inherent structure, and thus capture complementary information, which is a promising vantage point for fusion architectures. To combine different feature sets and exploit their respective advantages, an adaptive multilevel fusion architecture is proposed. The cascaded approach integrates information on different levels and time scales using artificial neural networks for adaptive weighting of propagated intermediate results. The performance of the proposed architecture is analysed on the GEMEP-FERA corpus as well as on a novel dataset obtained from an unconstrained, spontaneuous human-computer interaction scenario. The obtained performance is superior to single channels and basic fusion techniques.}, Timestamp = {2014.04.26} } @incollection{Refworks:667, author = {K{\"a}chele, Markus and Thiam, Patrick and Palm, G{\"u}nther and Schwenker, Friedhelm}, title = {Majority-Class Aware Support Vector Domain Oversampling for Imbalanced Classification Problems}, booktitle = {Artificial Neural Networks in Pattern Recognition}, publisher = {Springer International Publishing}, year = {2014}, abstract = {In this work, a method is presented to overcome the difficulties posed by imbalanced classification problems. The proposed algorithm fits a data description to the minority class but in contrast to many other algorithms, awareness of samples of the majority class is used to improve the estimation process. The majority samples are incorporated in the optimization procedure and the resulting domain descriptions are generally superior to those without knowledge about the majority class. Extensive experimental results support the validity of this approach}, editor = {El Gayar, Neamat and Schwenker, Friedhelm and Suen, Cheng}, volume = {8774}, series = {Lecture Notes in Computer Science}, pages = {83--92}, doi = {10.1007/978-3-319-11656-3_8}, isbn = {978-3-319-11655-6}, keywords = {Imbalanced classification; One-class SVM; Kernel methods}, language = {English}, url = {http://dx.doi.org/10.1007/978-3-319-11656-3_8} } @inproceedings{Kachele2014b, Title = {Prosodic, spectral and voice quality feature selection using a long-term stopping criterion for audio-based emotion recognition}, Author = {K{\"a}chele, Markus and Zharkov, Dimitrij and Meudt, Sascha and Schwenker, Friedhelm}, Booktitle = {Proceedings of the International Conference on Pattern Recognition (ICPR)}, Year = {2014}, Pages = {803--808}, Abstract = {Emotion recognition from speech is an important field of research in human-machine-interfaces, and has begun to influence everyday life by employment in different areas such as call centers or wearable companions in the form of smartphones. In the proposed classification architecture, different spectral, prosodic and the relatively novel voice quality features are extracted from the speech signals. These features are then used to represent long-term information of the speech, leading to utterance-wise suprasegmental features. The most promising of these features are selected using a forward-selection/backward-elimination algorithm with a novel long-term termination criterion for the selection. The overall system has been evaluated using recordings from the public Berlin emotion database. Utilizing the resulted features, a recognition rate of 88,97% has been achieved which surpasses the performance of humans on this database and is comparable to the state of the art performance on this dataset.}, Doi = {10.1109/ICPR.2014.148}, Owner = {mkaechele}, Timestamp = {2014.04.26} } @inproceedings{KaecheleAVEC2014, author = {K{\"a}chele, Markus and Schels, Martin and Schwenker, Friedhelm}, title = {Inferring Depression and Affect from Application Dependent Meta Knowledge}, booktitle = {Proceedings of the 4th International Workshop on Audio/Visual Emotion Challenge}, series = {AVEC '14}, year = {2014}, abstract = {This paper outlines our contribution to the 2014 edition of the AVEC competition. It comprises classification results and considerations for both the continuous affect recognition sub-challenge and also the depression recognition subchallenge. Rather than relying on statistical features that are normally extracted from the raw audio-visual data we propose an approach based on abstract meta information about individual subjects and also prototypical task and label dependent templates to infer the respective emotional states. The results of the approach that were submitted to both parts of the challenge significantly outperformed the baseline approaches. Further, we elaborate on several issues about the labeling of affective corpora and the choice of appropriate performance measures.}, isbn = {978-1-4503-3119-7}, location = {Orlando, Florida, USA}, pages = {41--48}, numpages = {8}, url = {http://doi.acm.org/10.1145/2661806.2661813}, doi = {10.1145/2661806.2661813}, acmid = {2661813}, publisher = {ACM}, address = {New York, NY, USA}, keywords = {AVEC 2014, affect recognition, depression recognition, meta knowledge} } @article{RefWorks:647, author = {Schels, Martin and K{\"a}chele, Markus and Glodek, Michael and Hrabal, David and Walter, Steffen and Schwenker, Friedhelm}, year = {2014}, editor = {B{\"o}ck, Ronald and Bonin, Francesca and Campbell, Nick and Edlund, Jens and Kok, Iwan de and Poppe, Ronald and Traum, David}, title = {Using unlabeled data to improve classification of emotional states in human computer interaction}, journal = {Journal on Multimodal User Interfaces (JMUI)}, abstract = {The individual nature of physiological measurements of human affective states makes it very difficult to transfer statistical classifiers from one subject to another. In this work, we propose an approach to incorporate unlabeled data into a supervised classifier training in order to conduct an emotion classification. The key idea of the method is to conduct a density estimation of all available data (labeled and unlabeled) to create a new encoding of the problem. Based on this a supervised classifier is constructed. Further, numerical evaluations on the EmoRec II corpus are given, examining to what extent additional data can improve classification and which parameters of the density estimation are optimal.}, volume = {8}, number = {1}, publisher = {Springer}, url = {http://dx.doi.org/10.1007/s12193-013-0133-0}, pages = {5--16} } @inproceedings{Meudt2014, Title = {Enhanced Autocorrelation in Real World Emotion Recognition}, Author = {Meudt, Sascha and Schwenker, Friedhelm}, Booktitle = {Proceedings of the 16th International Conference on Multimodal Interaction}, Abstract = {Multimodal emotion recognition in real world environments is still a challenging task of affective computing research. Recognizing the affective or physiological state of an individual is difficult for humans as well as for computer systems, and thus finding suitable discriminative features is the most promising approach in multimodal emotion recognition. In the literature numerous features have been developed or adapted from related signal processing tasks. But still, classifying emotional states in real world scenarios is difficult and the performance of automatic classifiers is rather limited. This is mainly due to the fact that emotional states can not be distinguished by a well defined set of discriminating features. In this work we present an enhanced autocorrelation feature as a multi pitch detection feature and compare its performance to feature well known, and state-of-the-art in signal and speech processing. Results of the evaluation show that the enhanced autocorrelation outperform other state-of-the-art features in case of the challenge data set. The complexity of this benchmark data set lies in between real world data sets showing naturalistic emotional utterances, and the widely applied and well-understood acted emotional data sets.}, Year = {2014}, Address = {New York, NY, USA}, Pages = {502--507}, Publisher = {ACM}, Series = {ICMI '14}, Doi = {10.1145/2663204.2666276}, ISBN = {978-1-4503-2885-2}, Keywords = {affective computing, audio features, emotion recognition, enhanced autocorrelation, human computer interaction}, Location = {Istanbul, Turkey}, Numpages = {6}, Url = {http://doi.acm.org/10.1145/2663204.2666276} } @inproceedings{Thiam2014, Title = {Detection of Emotional Events Utilizing Support Vector Methods in an Active Learning HCI Scenario}, Author = {Thiam, Patrick and Meudt, Sascha and K{\"a}chele, Markus and Palm, G{\"u}nther and Schwenker, Friedhelm}, Booktitle = {Proceedings of the 2014 Workshop on Emotion Representations and Modelling for HCI Systems}, Year = {2014}, Abstract = {In recent years the fields of affective computing and emotion recognition have experienced a steady increase in attention and especially the creation and analysis of multi-modal corpora has been the focus of intense research. Plausible annotation of this data, however is an enormous problem. In detail emotion annotation is very time consuming, cumbersome and sensitive with respect to the annotator. Furthermore emotional reactions are often very sparse in HCI scenarios resulting in a large annotation overhead to gather the interesting moments of a recording, which in turn are highly relevant for powerful features, classifiers and fusion architectures. Active learning techniques provide methods to improve the annotation processes since the annotator is asked to only label the relevant instances of a given dataset. In this work an unsupervised one-class Support Vector Machine is used to build a background model of non-emotional sequences on a novel HCI dataset. The human annotator is iteratively asked to label instances that are not well explained by the background model, which in turn renders them candidates for being interesting events such as emotional reactions that diverge from the norm. The outcome of the active learning procedure is a reduced dataset of only 14% the size of the original dataset that contains most of the significant information, in this case more than 75% of the emotional events.}, Address = {New York, NY, USA}, Pages = {31--36}, Publisher = {ACM}, Series = {ERM4HCI '14}, Acmid = {2668062}, Doi = {10.1145/2668056.2668062}, ISBN = {978-1-4503-0124-4}, Keywords = {active learning, affective computing, emotion recognition, hci, machine learning, one class svm}, Location = {Istanbul, Turkey}, Numpages = {6}, Url = {http://doi.acm.org/10.1145/2668056.2668062} } @incollection{Schwenker2014, Title = {A New Multi-class Fuzzy Support Vector Machine Algorithm}, Author = {Schwenker, Friedhelm and Frey, Markus and Glodek, Michael and K{\"a}chele, Markus and Meudt, Sascha and Schels, Martin and Schmidt, Miriam}, Booktitle = {Artificial Neural Networks in Pattern Recognition}, Publisher = {Springer International Publishing}, Abstract = {In this paper a novel approach to fuzzy support vector machines (SVM) in multi-class classification problems is presented. The proposed algorithm has the property to benefit from fuzzy labeled data in the training phase and can determine fuzzy memberships for input data. The algorithm can be considered as an extension of the traditional multi-class SVM for crisp labeled data, and it also extents the fuzzy SVM approach for fuzzy labeled training data in the two-class classification setting. Its behavior is demonstrated on three benchmark data sets, the achieved results motivate the inclusion of fuzzy labeled data into the training set for various tasks in pattern recognition and machine learning, such as the design of aggregation rules in multiple classifier systems, or in partially supervised learning.}, Year = {2014}, Editor = {El Gayar, Neamat and Schwenker, Friedhelm and Suen, Cheng}, Pages = {153--164}, Series = {Lecture Notes in Computer Science}, Volume = {8774}, Doi = {10.1007/978-3-319-11656-3_14}, ISBN = {978-3-319-11655-6}, Language = {English}, Url = {http://dx.doi.org/10.1007/978-3-319-11656-3_14} } @inproceedings{ehlers2014improving, author ={Ehlers, Jan and Georgi, Juliane and Huckauf, Anke}, title ={Improving voluntary pupil size changes for HCI}, booktitle ={Proceedings of the 8th International Conference on Pervasive Computing Technologies for Healthcare, REHAB’14}, publisher ={Institute of Computer Science and Technology}, abstract ={Previous research (Partala & Surakka, 2003) refers to pupil size as a passive information channel that provides insight into the affective state of the viewer but defies any voluntary control. However, since physiological arousal is influenced by various cognitive processes, we assume that pupil behavior can be brought under control by strategies of emotional regulation and cognitive processing. In the present paper we provide a methodological approach for examining the potentials and limits of active control of pupil dilation. Based on Ekman et al. (2008) we developed methods applying graphical feedback on systematic pupil diameter changes to utilize mechanisms of operant conditioning to gradually enable voluntary control over pupil size. Calculation models are introduced to carefully disentangle task relevant and irrelevant pupil dynamics. Based on mean values, single measuring and interpolation, we conceived computational rules to validate pupil data in real-time and determine criteria for artefact rejection. Extensive research based on the depicted methodology may shed further light on learning achievements related to emotional control and will reveal the potential of pupil-based input channels for the future development of affective Human-Computer Interfaces.}, pages ={343--346}, year ={2014}, month ={7}, keywords ={affective human-computer interface, pupil size, biofeedback, emotions, voluntary control}, doi ={10.4108/icst.pervasivehealth.2014.255312} } @article{HappelFrischknecht2014, author = {Happel, Max F K and Niekisch, Hartmut and Rivera, Laura L Castiblanco and Ohl, Frank W and Deliano, Matthias and Frischknecht, Renato}, title = {Enhanced cognitive flexibility in reversal learning induced by removal of the extracellular matrix in auditory cortex.}, journal = {Proc Natl Acad Sci U S A}, year = {2014}, volume = {111}, pages = {2800--2805}, number = {7}, month = {Feb}, abstract = {During brain maturation, the occurrence of the extracellular matrix (ECM) terminates juvenile plasticity by mediating structural stability. Interestingly, enzymatic removal of the ECM restores juvenile forms of plasticity, as for instance demonstrated by topographical reconnectivity in sensory pathways. However, to which degree the mature ECM is a compromise between stability and flexibility in the adult brain impacting synaptic plasticity as a fundamental basis for learning, lifelong memory formation, and higher cognitive functions is largely unknown. In this study, we removed the ECM in the auditory cortex of adult Mongolian gerbils during specific phases of cortex-dependent auditory relearning, which was induced by the contingency reversal of a frequency-modulated tone discrimination, a task requiring high behavioral flexibility. We found that ECM removal promoted a significant increase in relearning performance, without erasing already established-that is, learned-capacities when continuing discrimination training. The cognitive flexibility required for reversal learning of previously acquired behavioral habits, commonly understood to mainly rely on frontostriatal circuits, was enhanced by promoting synaptic plasticity via ECM removal within the sensory cortex. Our findings further suggest experimental modulation of the cortical ECM as a tool to open short-term windows of enhanced activity-dependent reorganization allowing for guided neuroplasticity.}, doi = {10.1073/pnas.1310272111}, institution = {Department of Systems Physiology of Learning and Department of Neurochemistry and Molecular Biology, Leibniz Institute for Neurobiology, D-39118 Magdeburg, Germany.}, keywords = {Acoustic Stimulation; Analysis of Variance; Animals; Auditory Cortex, physiology; Cognition, physiology; Discrimination Learning, physiology; Extracellular Matrix, metabolism; Fluorescence; Gerbillinae; Immunohistochemistry; Male; Memory, Long-Term, physiology; Neuronal Plasticity, physiology; Reversal Learning, physiology}, language = {eng}, medline-pst = {ppublish}, owner = {schulza}, pii = {1310272111}, pmid = {24550310}, timestamp = {2014.12.05} } @article{IlangoOhl2014, author = {Ilango, Anton and Shumake, Jason and Wetzel, Wolfram and Ohl, Frank W}, title = {Contribution of emotional and motivational neurocircuitry to cue-signaled active avoidance learning.}, journal = {Front Behav Neurosci}, year = {2014}, volume = {8}, pages = {372}, doi = {10.3389/fnbeh.2014.00372}, institution = {Leibniz Institute for Neurobiology , Magdeburg , Germany ; Institute of Biology, University of Magdeburg , Magdeburg , Germany ; Center for Behavioral Brain Sciences (CBBS) , Magdeburg , Germany.}, language = {eng}, medline-pst = {epublish}, owner = {schulza}, pmid = {25386127}, timestamp = {2014.12.05} } @article{MeyerAnemueller2014, author = {Meyer, Arne F and Diepenbrock, Jan-Philipp and Happel, Max F K and Ohl, Frank W and Anem{\"u}ller, J{\"o}rn}, title = {Discriminative learning of receptive fields from responses to non-Gaussian stimulus ensembles.}, journal = {PLoS One}, year = {2014}, volume = {9}, pages = {e93062}, number = {4}, __markedentry = {[schulza:]}, abstract = {Analysis of sensory neurons' processing characteristics requires simultaneous measurement of presented stimuli and concurrent spike responses. The functional transformation from high-dimensional stimulus space to the binary space of spike and non-spike responses is commonly described with linear-nonlinear models, whose linear filter component describes the neuron's receptive field. From a machine learning perspective, this corresponds to the binary classification problem of discriminating spike-eliciting from non-spike-eliciting stimulus examples. The classification-based receptive field (CbRF) estimation method proposed here adapts a linear large-margin classifier to optimally predict experimental stimulus-response data and subsequently interprets learned classifier weights as the neuron's receptive field filter. Computational learning theory provides a theoretical framework for learning from data and guarantees optimality in the sense that the risk of erroneously assigning a spike-eliciting stimulus example to the non-spike class (and vice versa) is minimized. Efficacy of the CbRF method is validated with simulations and for auditory spectro-temporal receptive field (STRF) estimation from experimental recordings in the auditory midbrain of Mongolian gerbils. Acoustic stimulation is performed with frequency-modulated tone complexes that mimic properties of natural stimuli, specifically non-Gaussia amplitude distribution and higher-order correlations. Results demonstrate that the proposed approach successfully identifies correct underlying STRFs, even in cases where second-order methods based on the spike-triggered average (STA) do not. Applied to small data samples, the method is shown to converge on smaller amounts of experimental recordings and with lower estimation variance than the generalized linear model and recent information theoretic methods. Thus, CbRF estimation may prove useful for investigation of neuronal processes in response to natural stimuli and in settings where rapid adaptation is induced by experimental design.}, doi = {10.1371/journal.pone.0093062}, institution = {Department of Medical Physics and Acoustics and Cluster of Excellence ''Hearing4all'', University of Oldenburg, Oldenburg, Germany.}, language = {eng}, medline-pst = {epublish}, owner = {schulza}, pii = {PONE-D-13-33547}, pmid = {24699631}, timestamp = {2014.12.05} } @article{MeyerAnemueller2014a, author = {Meyer, Arne F and Diepenbrock, Jan-Philipp and Ohl, Frank W and Anem{\"u}ller, J{\"o}rn}, title = {Temporal variability of spectro-temporal receptive fields in the anesthetized auditory cortex}, abstract = {Temporal variability of neuronal response characteristics during sensory stimulation is a ubiquitous phenomenon that may reflect processes such as stimulus-driven adaptation, top-down modulation or spontaneous fluctuations. It poses a challenge to functional characterization methods such as the receptive field, since these often assume stationarity. We propose a novel method for estimation of sensory neurons' receptive fields that extends the classic static linear receptive field model to the time-varying case. Here, the long-term estimate of the static receptive field serves as the mean of a probabilistic prior distribution from which the short-term temporally localized receptive field may deviate stochastically with time-varying standard deviation. The derived corresponding generalized linear model permits robust characterization of temporal variability in receptive field structure also for highly non-Gaussian stimulus ensembles. We computed and analyzed short-term auditory spectro-temporal receptive field (STRF) estimates with characteristic temporal resolution 5 s to 30 s based on model simulations and responses from in total 60 single-unit recordings in anesthetized Mongolian gerbil auditory midbrain and cortex. Stimulation was performed with short (100 ms) overlapping frequency-modulated tones. Results demonstrate identification of time-varying STRFs, with obtained predictive model likelihoods exceeding those from baseline static STRF estimation. Quantitative characterization of STRF variability reveals a higher degree thereof in auditory cortex compared to midbrain. Cluster analysis indicates that significant deviations from the long-term static STRF are brief, but reliably estimated. We hypothesize that the observed variability more likely reflects spontaneous or state-dependent internal fluctuations that interact with stimulus-induced processing, rather than experimental or stimulus design.}, journal = {Frontiers Computational Neuroscience}, year = {2014}, volume = {in press}, owner = {schulza}, timestamp = {2014.12.05} } @article{OhlOhl2014, author = {Ohl, Frank W}, title = {Role of cortical neurodynamics for understanding the neural basis of motivated behavior - lessons from auditory category learning.}, journal = {Curr Opin Neurobiol}, year = {2014}, volume = {31C}, pages = {88--94}, month = {Sep}, __markedentry = {[schulza:6]}, abstract = {Rhythmic activity appears in the auditory cortex in both microscopic and macroscopic observables and is modulated by both bottom-up and top-down processes. How this activity serves both types of processes is largely unknown. Here we review studies that have recently improved our understanding of potential functional roles of large-scale global dynamic activity patterns in auditory cortex. The experimental paradigm of auditory category learning allowed critical testing of the hypothesis that global auditory cortical activity states are associated with endogenous cognitive states mediating the meaning associated with an acoustic stimulus rather than with activity states that merely represent the stimulus for further processing.}, doi = {10.1016/j.conb.2014.08.014}, institution = {Leibniz Institute for Neurobiology, Department of Systems Physiology of Learning, Brenneckestr. 6, D-39118 Magdeburg, Germany. Electronic address: frank.ohl@lin-magdeburg.de.}, language = {eng}, medline-pst = {aheadofprint}, owner = {schulza}, pii = {S0959-4388(14)00176-7}, pmid = {25241212}, timestamp = {2014.12.05} } @inproceedings{Gugenheimer:2014:UIP:2669485.2669537, author = {Gugenheimer, Jan and Knierim, Pascal and Seifert, Julian and Rukzio, Enrico}, title = {UbiBeam: An Interactive Projector-Camera System for Domestic Deployment}, booktitle = {Proceedings of the Ninth ACM International Conference on Interactive Tabletops and Surfaces}, abstract = {Previous research on projector-camera systems has focused for a long time on interaction inside a lab environment. Currently they are no insight on how people would interact and use such a device in their everyday lives. We conducted an in-situ user study by visiting 22 households and exploring specific use cases and ideas of portable projector-camera systems in a domestic environment. Using a grounded theory approach, we identified several categories such as interaction techniques, presentation space, placement and use cases. Based on our observations, we designed and implement UbiBeam, a domestically deployable projector-camera system. The system comprises a projector, a depth camera and two servomotors to transform every ordinary surface into a touch-sensitive information display.}, series = {ITS '14}, year = {2014}, isbn = {978-1-4503-2587-5}, location = {Dresden, Germany}, pages = {305--310}, numpages = {6}, doi = {10.1145/2669485.2669537}, acmid = {2669537}, publisher = {ACM}, address = {New York, NY, USA}, keywords = {domestic deployment, projector-camera system, steerable projection, ubiquitous computing} } @inproceedings{Rogers:2014:PFP:2669485.2669514, author = {Rogers, Katja and R{\"o}hlig, Amrei and Weing, Matthias and Gugenheimer, Jan and K{\"o}nings, Bastian and Klepsch, Melina and Schaub, Florian and Rukzio, Enrico and Seufert, Tina and Weber, Michael}, title = {P.I.A.N.O.: Faster Piano Learning with Interactive Projection}, booktitle = {Proceedings of the Ninth ACM International Conference on Interactive Tabletops and Surfaces}, abstract = {Learning to play the piano is a prolonged challenge for novices. It requires them to learn sheet music notation and its mapping to respective piano keys, together with articulation details. Smooth playing further requires correct finger postures. The result is a slow learning progress, often causing frustration and strain. To overcome these issues, we propose P.I.A.N.O., a piano learning system with interactive projection that facilitates a fast learning process. Note information in form of an enhanced piano roll notation is directly projected onto the instrument and allows mapping of notes to piano keys without prior sight-reading skills. Three learning modes support the natural learning process with live feedback and performance evaluation. We report the results of two user studies, which show that P.I.A.N.O. supports faster learning, requires significantly less cognitive load, provides better user experience, and increases perceived musical quality compared to sheet music notation and non-projected piano roll notation.}, series = {ITS '14}, year = {2014}, isbn = {978-1-4503-2587-5}, location = {Dresden, Germany}, pages = {149--158}, numpages = {10}, doi = {10.1145/2669485.2669514}, acmid = {2669514}, publisher = {ACM}, address = {New York, NY, USA}, keywords = {camit, instrument learning, interactive projection, muscial expression, music, piano, piano roll notation} } @ARTICLE{Glodek2014b, author = {Glodek, Michael and Geier, Thomas and Biundo, Susanne and Palm, G{\"u}nther}, title = {A layered architecture for probabilistic complex pattern recognition to detect user preferences}, journal = {Journal of Biologically Inspired Cognitive Architectures}, abstract = {The recognition of complex patterns is nowadays one of the most challenging tasks in machine learning, and it promises to be of great benefit for many applications, e.g. by allowing advanced human computer interaction to access the user’s situative context. This work examines a layered architecture that operates on different temporal granularities to infer complex patterns of user preferences. Classical hidden Markov models (HMM), conditioned HMM (CHMM) and fuzzy CHMM (FCHMM) are compared to find the best configuration in the lower architecture layers. In the uppermost layer, a Markov logic network (MLN) is applied to infer the user preference in a probabilistic rule-based manner. For each layer a comprehensive evaluation is given. We provide empirical evidence showing that the layered architecture using FCHMM and MLN is well-suited to recognize patterns on different layers.}, year = {2014}, volume = {9}, pages = {46--56}, owner = {glodek}, timestamp = {2014.05.26} } @ARTICLE{Glodek2014, author = {Glodek, Michael and Schels, Martin and Schwenker, Friedhelm and Palm, G{\"u}nther}, title = {Combination of sequential class distributions from multiple channels using {M}arkov fusion networks}, journal = {Journal on Multimodal User Interfaces}, abstract = {The recognition of patterns in real-time scenarios has become an important trend in the field of multi-modal user interfaces in human computer interaction. Cognitive technical systems aim to improve the human computer interaction by means of recognizing the situative context, e.g. by activity recognition (Ahad et al. in IEEE, 1896–1901, 2008), or by estimating the affective state (Zeng et al., IEEE Trans Pattern Anal Mach Intell 31(1):39–58, 2009) of the human dialogue partner. Classifier systems developed for such applications must operate on multiple modalities and must integrate the available decisions over large time periods. We address this topic by introducing the Markov fusion network (MFN) which is a novel classifier combination approach, for the integration of multi-class and multi-modal decisions continuously over time. The MFN combines results while meeting real-time requirements, weighting decisions of the modalities dynamically, and dealing with sensor failures. The proposed MFN has been evaluated in two empirical studies: the recognition of objects involved in human activities, and the recognition of emotions where we successfully demonstrate its outstanding performance. Furthermore, we show how the MFN can be applied in a variety of different architectures and the several options to configure the model in order to meet the demands of a distinct problem.}, year = {2014}, volume = {8}, pages = {257--272}, number = {3}, doi = {10.1007/s12193-014-0149-0}, issn = {1783-7677}, keywords = {Markov fusion network; Multi-modal data; Temporal multi-class problems; Robust classifier fusion; Multiple classifier systems}, owner = {glodek}, publisher = {Springer Berlin Heidelberg}, timestamp = {2013.08.15} } @article{Saxen2014b, title = {Image-Based Methods for Interaction with Head-Worn Worker-Assistance Systems}, author = {Saxen, Frerk and Rashid, Omer and Al-Hamadi, Ayoub and Adler, Simon and Kernchen, Alexa and Mecke, R{\"u}diger}, abstract = {In this paper, a mobile assistance-system is described which supports users in performing manual working tasks in the context of assembling complex products. The assistance system contains a head-worn display for the visualization of information relevant for the workflow as well as a video camera to acquire the scene. This paper is focused on the interaction of the user with this system and describes work in progress and initial results from an industrial application scenario. We present image-based methods for robust recognition of static and dynamic hand gestures in realtime. These methods are used for an intuitive interaction with the assistance-system. The segmentation of the hand based on color information builds the basis of feature extraction for static and dynamic gestures. For the static gestures, the activation of particular sensitive regions in the camera image by the user’s hand is used for interaction. An HMM classifier is used to extract dynamic gestures depending on motion parameters determined based on the optical flow in the camera image.}, journal = {Journal of Intelligent Learning Systems and Applications}, month = {08}, year = {2014}, volume = {6}, pages = {141--152}, doi = {10.4236/jilsa.2014.63011} } @inproceedings{saxen2014superpixels, title = {Superpixels for skin segmentation}, author = {Saxen, Frerk and Al-Hamadi, Ayoub}, booktitle = {Workshop Farbbildverarbeitung}, abstract = {Using superpixels for color image segmentation is becoming increasingly popular, although it has not been utilized for skin segmentation so far. In this paper we investigate the applications of superpixels for skin segmentation using the SLIC (Simple Linear Iterative Clustering) algorithm. We propose a novel approach that uses superpixels with a straight forward feature derived from the well established Bayesian method. Our results are shown in comparison with state-of-the-art skin segmentation algorithms}, pages = {153--159}, month = {09}, year = {2014}, volume = {20}, doi = {10.13140/2.1.3293.3124}, web_url = {http://www-e.uni-magdeburg.de/saxen/pub/Saxen2014SuperpixelsForSkinSegmentation.pdf} } @article{Saeed2014frame, title = {Frame-Based Facial Expression Recognition Using Geometrical Features}, author = {Saeed, Anwar and Al-Hamadi, Ayoub and Niese, Robert and Elzobi, Moftah}, abstract = {To improve the human-computer interaction (HCI) to be as good as human-human interaction, building an efficient approach for human emotion recognition is required. These emotions could be fused from several modalities such as facial expression, hand gesture, acoustic data, and biophysiological data. In this paper, we address the frame-based perception of the universal human facial expressions (happiness, surprise, anger, disgust, fear, and sadness), with the help of several geometrical features. Unlike many other geometry-based approaches, the frame-based method does not rely on prior knowledge of a person-specific neutral expression; this knowledge is gained through human intervention and not available in real scenarios. Additionally, we provide a method to investigate the performance of the geometry-based approaches under various facial point localization errors. From an evaluation on two public benchmark datasets, we have found that using eight facial points, we can achieve the state-of-the-art recognition rate. However, this state-of-the-art geometry-based approach exploits features derived from 68 facial points and requires prior knowledge of the person-specific neutral expression. The expression recognition rate using geometrical features is adversely affected by the errors in the facial point localization, especially for the expressions with subtle facial deformations.}, journal = {Advances in Human-Computer Interaction}, volume = {2014}, number = {1}, pages = {1--13}, year = {2014} } @Inproceedings{Bercher14TDGHeuristics, title = {Hybrid Planning Heuristics Based on Task Decomposition Graphs}, year = {2014}, pages = {35--43}, abstract = {Hybrid Planning combines Hierarchical Task Network (HTN) planning with concepts known from Partial-Order Causal-Link (POCL) planning. We introduce novel heuristics for Hybrid Planning that estimate the number of necessary modifications to turn a partial plan into a solution. These estimates are based on the task decomposition graph that contains all decompositions of the abstract tasks in the planning domain. Our empirical evaluation shows that the proposed heuristics can significantly improve planning performance.}, url = {http://www.uni-ulm.de/fileadmin/website_uni_ulm/iui.inst.090/Publikationen/2014/Bercher14HybridHeuristics.pdf}, publisher = {AAAI Press}, booktitle = {Proceedings of the Seventh Annual Symposium on Combinatorial Search (SoCS 2014)}, author = {Bercher, Pascal and Keen, Shawn and Biundo, Susanne} } @Inproceedings{Pragst14CyberSecurity, title = {Introducing Hierarchy to Non-Hierarchical Planning Models – A Case Study for Behavioral Adversary Models}, year = {2014}, abstract = {Hierarchical planning approaches are often pursued when it comes to a real-world application scenario, because they allow for incorporating additional expert knowledge into the domain. That knowledge can be used both for improving plan explanations and for reducing the explored search space. In case a non-hierarchical planning model is already available, for instance because a bottom-up modeling approach was used, one has to concern oneself with the question of how to introduce a hierarchy. This paper discusses the points to consider when adding a hierarchy to a non-hierarchical planning model using the example of the BAMS Cyber Security domain.}, url = {http://www.uni-ulm.de/fileadmin/website_uni_ulm/iui.inst.090/Publikationen/2014/Pragst14CyberSecurity.pdf}, booktitle = {28th PuK Workshop ''Planen, Scheduling und Konfigurieren, Entwerfen'' (PuK 2014)}, author = {Pragst, Louisa and Richter, Felix and Bercher, Pascal and Schattenberg, Bernd and Biundo, Susanne} } @Inproceedings{Hoeller14PlanLinearization, title = {Finding User-friendly Linearizations of Partially Ordered Plans}, year = {2014}, abstract = {Planning models usually do not discriminate between different possible execution orders of the actions within a plan, as long as the sequence remains executable. As the formal planning problem is an abstraction of the real world, it can very well occur that one linearization is more favorable than the other for reasons not captured by the planning model --- in particular if actions are performed by a human. Post-hoc linearization of plans is thus a way to improve the quality of a plan enactment. The cost of this transformation decouples from the planning process, and it allows to incorporate knowledge that cannot be expressed within the limitations of a certain planning formalism. In this paper we discuss the idea of finding useful plan linearizations within the formalism of hybrid planning (although the basic ideas are applicable to a broader class of planning models). We propose three concrete models for plan linearization, discuss their ramifications using the application domain of automated user-assistance, and sketch out ways how to empirically validate the assumptions underlying these user-centric models.}, keywords = {Hybrid Planning, POCL Planning, HTN Planning, Plan Linearization, User-centered Planning, User Assistance, Plan Execution}, url = {http://www.uni-ulm.de/fileadmin/website_uni_ulm/iui.inst.090/Publikationen/2014/Hoeller14PlanLinearization.pdf}, booktitle = {28th PuK Workshop ''Planen, Scheduling und Konfigurieren, Entwerfen'' ({PuK} 2014)}, author = {H{\"o}ller, Daniel and Bercher, Pascal and Richter, Felix and Schiller, Marvin and Geier, Thomas and Biundo, Susanne} } @Inproceedings{Prylipko:INTERSPEECH, author = {Prylipko, Dmytro and Egorow, Olga and Siegert, Ingo and Wendemuth, Andreas}, title = {Application of Image Processing Methods to Filled Pauses Detection from Spontaneous Speech}, booktitle = {Proceedings of the INTERSPEECH 2014}, abstract = {To obtain a more human-like interaction with technical systems, those have to be adaptable to the users individual preferences, and current emotional state. In human-human interaction the behaviour of the speaker is characterised by semantic and prosodic cues, given (among other indicators) as short feedback signals. These so called filled pauses minimally convey certain dialogue functions such as attention, understanding, confirmation, or other attitudinal reactions. These signals play a valuable role in the progress and coordination of interaction. Hereby, the first step enabling an automatic system to react on these signals is the detection of them within the users utterances. This is a quite complex task, as the filled pauses are phonetically short, consisting mostly only of one vowel and one consonant. In this paper we present our methods to detect filled pauses in a naturalistic interaction utilising the LAST MINUTE corpus. We used an SVM classifier and improved the results further, by applying a Gaussian filter to infer temporal context information and performing a morphological opening to filter false alarms. We obtained recall of 70%, precision of 55%, and AUC of 0.94. Index Terms: Fillers, filled pause detection, non-verbal vocalisations, morphological filters, erosion, dilation}, address = {Singapore}, year = {2014}, pages = {1816--1820} } @Inproceedings{Vlasenko:ICME2014, author = {Vlasenko, Bogdan and Wendemuth, Andreas}, title = {Location of an emotionally neutral region in valence-arousal space. Two-class vs. three-class cross corpora emotion recognition evaluations}, booktitle = {Proceedings of the IEEE International Conference on Multimedia and Expo (ICME)}, address = {Chengdu, China}, abstract = {There are two main emotion annotation techniques: multidimensional and categories based. In order to conduct experiments on emotional data annotated with different techniques, two-classes emotion mapping strategies (e.g. high-vs. low-arousal) are commonly used. The ”affective computing” community could not specify a location of emotionally neutral area in multi-dimensional emotional space (e.g. valence-arousal-dominance (VAD)). Nonetheless, in the current research a neutral state is added to the standard two-classes emotion classification task. Within experiments a possible location of a neutral arousal region in valence-arousal space was determined. We employed general and phonetic pattern dependent emotion classification techniques for cross-corpora experiments. Emotional models were trained on the VAM dataset (multi-dimensional annotation) and evaluated them on the EMO-DB dataset (categories based annotation).}, month = {07}, date= {14-18}, year = {2014}, pages = {s.p.} } @article{Gluege:Neurocomp, title = {Learning long-term dependencies in segmented-memory recurrent neural networks with backpropagation of error}, journal = {Neurocomputing}, volume = {141}, number = {0}, pages = {54--64}, year = {2014}, issn = {0925-2312}, doi = {http://dx.doi.org/10.1016/j.neucom.2013.11.043}, author = {Gl{\"u}ge, Stefan and B{\"o}ck, Ronald and Palm, G{\"u}nther and Wendemuth, Andreas}, keywords = {Recurrent neural networks}, keywords = {Segmented-memory recurrent neural network}, keywords = {Vanishing Long-term dependencies}, keywords = {Unsupervised pre-training}, abstract = {In general, recurrent neural networks have difficulties in learning long-term dependencies. The segmented-memory recurrent neural network (SMRNN) architecture together with the extended real-time recurrent learning (eRTRL) algorithm was proposed to circumvent this problem. Due to its computational complexity eRTRL becomes impractical with increasing network size. Therefore, we introduce the less complex extended backpropagation through time (eBPTT) for \{SMRNN\} together with a layer-local unsupervised pre-training procedure. A comparison on the information latching problem showed that eRTRL is better able to handle the latching of information over longer periods of time, even though eBPTT guaranteed a better generalisation when training was successful. Further, pre-training significantly improved the ability to learn long-term dependencies with eBPTT. Therefore, the proposed eBPTT algorithm is suited for tasks that require big networks where eRTRL is impractical. The pre-training procedure itself is independent of the supervised learning algorithm and can improve learning in \{SMRNN\} in general.} } @Inproceedings{Wendemuth:Due2014, author = {Wendemuth, Andreas and B{\"o}ck, Ronald and Diedrich, Christian}, title = {Dialogs In Industrial Diagnosis}, booktitle = {Proceedings 3. Interdisziplin{\"a}rer Workshop Kognitive Systeme: Mensch, Teams, Systeme und Automaten}, address = {Magdeburg, Germany}, month = {03}, date= {25-27}, year = {2014}, pages = {s.p.}, editor= {Wendemuth, A. and Jipp, M. and Kluge, A. and S{\"o}ffker, D.} } @inproceedings{seifert2014hover, author= {Seifert, Julian and Boring,Sebastian and Winkler, Christian and Schaub, Florian and Schwab, Fabian and Herrdum, Steffen and Maier, Fabian and Mayer, Daniel and Rukzio, Enrico}, title= {Hover Pad: Interacting with autonomous and self-actuated displays in space}, booktitle= {ACM Symposium on User Interface Software and Technology, (UIST)}, year= {2014}, url= {http://www.uni-ulm.de/fileadmin/website_uni_ulm/iui.inst.100/institut/Papers/Prof_Rukzio/2014/Seifert-et-al.-HoverPad.pdf}, //url= {http://www.uni-ulm.de/en/in/mi/mi-forschung/mi-forschung-rukzio/projects/hoverpad.html}, note= {video: \url{https://www.youtube.com/watch?v=qAS6EC7cvU8}}, address= {New York, USA}, abstract= {Handheld displays enable flexible spatial exploration of information spaces – users can physically navigate through three-dimensional space to access information at specific locations. Having users constantly hold the display, however, has several limitations: (1)inaccuracies due to natural hand tremors; (2)fatigue over time;and(3)limited exploration within arm’s reach. We investigate autonomous,self-actuated displays that can freely move and hold their position and orientation in space without users having to hold them at all times. We illustrate various stages of such a display’s autonomy ranging from manual to fully autonomous, which – depending on the tasks – facilitate the interaction. Further, we discuss possible motion control mechanisms for these displays and present several interaction techniques enabled by such displays. Our Hover Pad toolkit enables exploring five degrees of freedom of self-actuated and autonomous displays and the developed control and interaction techniques. We illustrate the utility of our toolkit with five prototype applications, such as a volumetric medical data explorer.}, numpages= {9} } @inproceedings{Schussel:2014:MIH:2663204.2663255, author = {Sch{\"u}ssel, Felix and Honold, Frank and Schmidt, Miriam and Bubalo, Nikola and Huckauf, Anke and Weber, Michael}, title = {Multimodal Interaction History and Its Use in Error Detection and Recovery}, booktitle = {Proceedings of the 16th International Conference on Multimodal Interaction}, series = {ICMI '14}, year = {2014}, abstract = {Multimodal systems still tend to ignore the individual input behavior of users, and at the same time, suffer from erroneous sensor inputs. Although many researchers have described user behavior in specific settings and tasks, little to nothing is known about the applicability of such information, when it comes to increase the robustness of a system for multimodal inputs. We conducted a gamified experimental study to investigate individual user behavior and error types found in an actually running system. It is shown, that previous ways of describing input behavior by a simple classification scheme (like simultaneous and sequential) are not suited to build up an individual interaction history. Instead, we propose to use temporal distributions of different metrics derived from multimodal event timings. We identify the major errors that can occur in multimodal interactions and finally show how such an interaction history can practically be applied for error detection and recovery. Applying the proposed approach to the experimental data, the initial error rate is reduced from 4.9% to a minimum of 1.2%.}, isbn = {978-1-4503-2885-2}, location = {Istanbul, Turkey}, pages = {164--171}, numpages = {8}, doi = {10.1145/2663204.2663255}, acmid = {2663255}, publisher = {ACM}, address = {New York, NY, USA}, month = {November}, owner = {Felix}, timestamp = {2014.08.18}, keywords = {hci, input error detection and recovery, interaction history, multimodal interaction, user behavior} } @INPROCEEDINGS{schaub2014broken, author = {Schaub, Florian and Seifert, Julian and Honold, Frank and M{\"u}ller, Michael and Rukzio, Enrico and Weber, Michael}, title = {Broken Display= Broken Interface? The Impact of Display Damage on Smartphone Interaction}, booktitle = {Proceedings of the SIGCHI Conference on Human Factors in Computing Systems}, year = {2014}, series = {CHI '14}, pages = {2337--2346}, abstract= {This paper is the first to assess the impact of touchscreen damage on smartphone interaction. We gathered a dataset consisting of 95 closeup images of damaged smartphones and extensive information about a device’s usage history, damage severity, and impact on use. 88% of our participants continued to use their damaged smartphone for at least three months; 32% plan to use it for another year or more, mainly due to high repair and replacement costs. From the dataset, we identified three categories of damaged smartphone displays. Reading and text input were most affected. Further interviews (n=11) revealed that users adapt to damage with diverse coping strategies, closely tailored to specific interaction issues. In total, we identified 23 different strategies. Based on our results, we proposed guidelines for interaction design in order to provide a positive user experience when display damage occurs.}, address = {New York, NY, USA}, publisher = {ACM}, acmid = {2557067}, doi = {10.1145/2556288.2557067}, isbn = {978-1-4503-2473-1}, keywords = {broken display, display damage, mobile interaction, smartphone, user experience}, location = {Toronto, Ontario, Canada}, numpages = {10}, owner = {Frank}, timestamp = {2014.02.06}, web_url = {http://delivery.acm.org/10.1145/2560000/2557067/p2337-schaub.pdf?ip=134.60.12.218&id=2557067&acc=ACTIVE%20SERVICE&key=9B7742D6921DCF9C.4D4702B0C3E38B35.4D4702B0C3E38B35.4D4702B0C3E38B35&CFID=573922693&CFTOKEN=44610546&__acm__=1411568464_93c8ac39f7a0a1e12f5d95af2d86bbe9.pdf} } @INPROCEEDINGS{honold2014b, author = {Honold, Frank and Bercher, Pascal and Richter, Felix and Nothdurft, Florian and Geier, Thomas and Barth, Roland and H{\"o}rnle, Thilo and Sch{\"u}ssel, Felix and Reuter, Stephan and Rau, Matthias and Bertrand, Gregor and Seegebarth, Bastian and Kurzok, Peter and Schattenberg, Bernd and Minker, Wolfgang and Weber, Michael and Biundo, Susanne}, title = {Companion-Technology: Towards User- and Situation-Adaptive Functionality of Technical Systems}, booktitle = {Intelligent Environments (IE), 2014 10th International Conference on}, year = {2014}, pages = {378--381}, month = {July}, publisher = {IEEE}, note = {video: \url{http://companion.informatik.uni-ulm.de/ie2014/companion-system.mp4}}, abstract = {The properties of multimodality, individuality, adaptability, availability, cooperativeness and trustworthiness are at the focus of the investigation of Companion Systems. In this article, we describe the involved key components of such a system and the way they interact with each other. Along with the article comes a video, in which we demonstrate a fully functional prototypical implementation and explain the involved scientific contributions in a simplified manner. The realized technology considers the entire situation of the user and the environment in current and past states. The gained knowledge reflects the context of use and serves as basis for decision-making in the presented adaptive system.}, doi = {DOI 10.1109/IE.2014.60}, keywords = {Adaptive HCI; AI Planning; Dialog Management; Interaction Management; Companion Technology}, location = {Shanghai, China}, owner = {Frank}, url = {http://www.uni-ulm.de/fileadmin/website_uni_ulm/iui.inst.090/Publikationen/2014/Honold2014HomeTheaterVideo.pdf}, timestamp = {2014.04.28} } @INPROCEEDINGS{Honold2014a, author = {Honold, Frank and Sch{\"u}ssel, Felix and Weber, Michael}, title = {The Automated Interplay of Multimodal Fission and Fusion in Adaptive HCI}, booktitle = {Intelligent Environments (IE), 2014 10th International Conference on}, year = {2014}, abstract= {Present context-aware systems gather a lot of information to maximize their functionality but they predominantly use rather static ways to communicate. This paper motivates two components that serve as mediators between arbitrary components for multimodal fission and fusion, aiming to improve communication skills. Along with an exemplary selection scenario we describe the architecture for an automatic cooperation of fusion and fission in a model-driven realization. We describe how the approach supports user-initiative dialog requests as well as user-nominated UI configuration. Despite that, we show how multimodal input conflicts can be solved using a shortcut in the commonly used human-computer interaction loop (HCI loop).}, pages = {170--177}, address = {Shanghai, China}, month = {July}, publisher = {IEEE}, doi = {DOI 10.1109/IE.2014.32}, owner = {Frank}, timestamp = {2014.04.15} } @INPROCEEDINGS{Nothdurft2014a, author= {Nothdurft, Florian and Honold, Frank and Zablotskaya, Kseniya and Diab, Amr and Minker, Wolfgang}, title = {Application of Verbal Intelligence in Dialog Systems for Multimodal Interaction}, booktitle = {Intelligent Environments (IE), 2014 10th International Conference on}, year = {2014}, pages = {361--364}, abstract = {In this paper we present a prototypical dialog system adaptive to verbal user intelligence. Verbal intelligence (VI) is the ability to analyze information and to solve problems using language-based reasoning. VI can be analyzed by the number of reused words, lemmas, n-grams, cosine similarity and other features. Here we concentrate on the application of VI in a human-computer interaction (HCI) and how this value can be used by the dialog management to adapt the dialog flow, and complexity at run-time. In our work complexity as well as informative value of presented information can be reduced or increased when encountering human-computer interaction by individually adapting to a lower or higher verbally intelligent user. Especially in intelligent environments, where user’s may rely on speech as their primary interaction modality, the adaptation of system instructions to the the user’s VI could prove helpful. Index Terms—User-centered design, Human factors, User interfaces, Natural language.}, address = {Shanghai, China}, month = {July}, publisher = {IEEE}, doi = {DOI 10.1109/IE.2014.59}, file = {Nothdurft2014a.pdf:Nothdurft2014a.pdf:PDF}, owner = {Frank}, timestamp = {2014.04.15} } @article{Prylipko:2014, year= {2014}, issn= {1783-7677}, journal= {Journal on Multimodal User Interfaces}, volume= {8}, number= {1}, doi= {10.1007/s12193-013-0144-x}, title= {Analysis of significant dialog events in realistic human-computer interaction}, publisher= {Springer Berlin Heidelberg}, keywords= {Human-computer interaction; Multimodal analysis; Companion technology}, author= {Prylipko, Dmytro and R{\"o}sner, Dietmar and Siegert, Ingo and G{\"u}nther, Stephan and Friesen, Rafael and Haase, Matthias and Vlasenko, Bogdan and Wendemuth, Andreas}, pages= {75--86}, abstract= {This paper addresses issues of automatically detecting significant dialog events (SDEs) in naturalistic HCI, and of deducing trait-specific conclusions relevant for the design of spoken dialog systems. We perform our investigations on the multimodal LAST MINUTE corpus with records from naturalistic interactions. First, we used textual transcripts to analyse interaction styles and discourse structures. We found indications that younger subjects prefer a more technical style in communication with dialog systems. Next, we model the subject's internal success state with a hidden Markov model trained using the observed sequences of system feedback. This reveals that younger subjects interact significantly more successful with technical systems. Aiming on automatic detection of specific subjects' reactions, we then semi-automatically annotate SDEs-phrases indicating an irregular, i.e. not-task-oriented subject behavior. We use both acoustic and linguistic features to build several trait-specific classifiers for dialog phases, which showed pronouncedly different accuracies for diverse age and gender groups. The presented investigations coherently support age-dependence of both expressiveness and problem-solving ability. This in turn induces design rules for future automatic designated "companion" systems.}, language= {English} } @inproceedings{herrschel2014facial, author= {Herrschel, L. and Weng, J. and Traue, H.C. and Hoffmann, H.}, title= {Facial Emotion Recognition in Peripheral Vision}, booktitle={Proceedings of the Inaugural Conference of the Society for Affective Sciences (SAS)}, year= {2014} } @article{jongen2014an, author= {Jongen,S. and Axmacher, N. and Kremers, N.A.W. and Hoffmann, H. and Limbrecht-Ecklundt, K. and Traue, H.C. and Kessler, H.}, title= {An investigation of facial emotion recognition impairments in alexithymia and its neural correlates}, journal= {Behavioural Brain Research}, abstract={Alexithymia is a personality trait that involves difficulties identifying emotions and describing feelings. It is hypothesized that this includes facial emotion recognition but limited knowledge exists about possible neural correlates of this assumed deficit. We hence tested thirty-seven healthy subjects with either a relatively high or low degree of alexithymia (HDA versus LDA), who performed in a reliable and standardized test of facial emotion recognition (FEEL, Facially Expressed Emotion Labeling) in the functional MRI. LDA subjects had significantly better emotion recognition scores and showed relatively more activity in several brain areas associated with alexithymia and emotional awareness (anterior cingulate cortex), and the extended system of facial perception concerned with aspects of social communication and emotion (amygdala, insula, striatum). Additionally, LDA subjects had more activity in the visual area of social perception (posterior part of the superior temporal sulcus) and the inferior frontal cortex. HDA subjects, on the other hand, exhibited greater activity in the superior parietal lobule. With differences in behaviour and brain responses between two groups of otherwise healthy subjects, our results indirectly support recent conceptualizations and epidemiological data, that alexithymia is a dimensional personality trait apparent in clinically healthy subjects rather than a categorical diagnosis only applicable to clinical populations.}, volume= {271}, pages= {129--139}, year= {2014} } @inproceedings{tisler2014the, author= {Tisler, C. and Hoffmann, H. and Traue, H.C.}, title= {The Effect of Very Brief Display Durations on Facial Emotion Recognition}, booktitle={Proceedings of the Inaugural Conference of the Society for Affective Sciences (SAS)}, year= {2014} } @inproceedings{walter2014multimodal, author= {Walter, S. and Gruss, S. and Traue, H.C. and Hazer, D. and Ma, X.Y. and Werner, P. and Al-Hamadi, A. and Moreira da Silva, G. and Andrade, A.O.}, title= {Multimodal Automatic Pain Recognition via Videosignals and Biopotentials}, booktitle={Proceedings of the XXIV Brazilian Congress on Biomedical Engineering (CBEB)}, year= {2014} } @inproceedings{weng2014visual, author= {Weng, J. and Herrschel, L. and Limbrecht-Ecklundt, K. and Traue, H.C. and Hoffmann, H.}, title= {Visual Scanning of Facially Expressed Emotions under Varying Head Pose}, booktitle={Proceedings of the Inaugural Conference of the Society for Affective Sciences (SAS)}, year= {2014} } @article{SiegertCOGN:2014, title= {Investigation of Speaker Group-Dependent Modelling for Recognition of Affective States from Speech}, author= {Siegert, Ingo and Philippou-H{\"u}bner, David and Hartmann, Kim and B{\"o}ck, Ronald and Wendemuth, Andreas}, year= {2014}, number = {4}, issn= {1866-9956}, journal= {Cognitive Computation}, abstract= {For successful human–machine-interaction (HCI) the pure textual information and the individual skills, preferences, and affective states of the user must be known. Therefore, as a starting point, the user’s actual affective state has to be recognized. In this work we investigated how additional knowledge, for example age and gender of the user, can be used to improve recognition of affective state. Two methods from automatic speech recognition are used to incorporate age and gender differences in recognition of affective state: speaker group-dependent (SGD) modelling and vocal tract length normalisation (VTLN). The investigations were performed on four corpora with acted and natural affected speech. Different features and two methods of classification (Gaussian mixture models (GMMs) and multi-layer perceptrons (MLPs)) were used. In addition, the effects of channel compensation and contextual characteristics were analysed. The results are compared with our own baseline results and with results reported in the literature. Two hypotheses were tested. First, incorporation of age information further improves speaker group-dependent modelling. Second, acoustic normalization does not achieve the same improvement as achieved by speaker group-dependent modelling, because the age and gender of a speaker affects the way emotions are expressed}, doi= {10.1007/s12559-014-9296-6}, publisher={Springer US}, keywords= {Affect recognition; Companion systems; Vocal tract length normalization; Speaker group-dependent classifiers}, pages= {892--913}, volume = {6}, language= {English} } @incollection{SiegertHCII:2014, year= {2014}, isbn= {978-3-319-07229-6}, booktitle={Human-Computer Interaction. Advanced Interaction Modalities and Techniques}, volume= {8511}, series= {Lecture Notes in Computer Science}, editor= {Kurosu, Masaaki}, doi= {10.1007/978-3-319-07230-2_47}, title= {Discourse Particles and User Characteristics in Naturalistic Human-Computer Interaction}, abstract= {In human-human interaction (HHI) the behaviour of the speaker is amongst others characterised by semantic and prosodic cues. These short feedback signals minimally communicate certain dialogue functions such as attention, understanding or other attitudinal reactions. Human-computer interaction (HCI) systems have failed to note and respond to these details so far, resulting in users trying to cope with and adapt to the machines behaviour. In order to enhance HCI, an adaptation to the user’s behaviour, individual skills, and the integration of a general human behaviour understanding is indispensable. Another issue is the question if the usage of feedback signals is influenced by the user’s individuality. In this paper, we investigate the influence of specific feedback signals, known as discourse particles (DPs), with communication style and psychological characteristics within a naturalistic HCI. This investigation showed that there is a significant difference in the usage of DPs for users of certain user characteristics.}, publisher={Springer International Publishing}, keywords= {human-machine-interaction; discourse particles; personality; user characteristics}, author= {Siegert, Ingo and Haase, Matthias and Prylipko, Dmytro and Wendemuth, Andreas}, pages= {492--501}, language= {English} } @article{Siegert:JMUI, year= {2014}, issn= {1783-7677}, journal= {Journal on Multimodal User Interfaces}, volume= {8}, number= {1}, doi= {10.1007/s12193-013-0129-9}, title= {Inter-rater reliability for emotion annotation in human-computer interaction: comparison and methodological improvements}, publisher={Springer Berlin Heidelberg}, keywords= {Affective state; Annotation; Context influence; Inter-rater agreement; Labelling}, author= {Siegert, Ingo and B{\"o}ck, Ronald and Wendemuth, Andreas}, pages= {17--28}, abstract= {To enable a naturalistic human-computer interaction the recognition of emotions and intentions experiences increased attention and several modalities are comprised to cover all human communication abilities. For this reason, naturalistic material is recorded, where the subjects are guided through an interaction with crucial points, but with the freedom to react individually. This material captures realistic user reactions but lacks of clear labels. So, a good transcription and annotation of the given material is essential. For that, the assignment of human annotators has become widely accepted. A good measurement for the reliability of labelled material is the inter-rater agreement. In this paper we investigate the achieved inter-rater agreement utilizing Krippendorff's alpha for emotional annotated interaction corpora and present methods to improve the reliability, we show that the reliabilities obtained with different methods does not differ much, so a choice could rely on other aspects. Furthermore, a multimodal presentation of the items in their natural order increases the reliability.} } @incollection{WIRN2013, year= {2014}, isbn= {978-3-319-04128-5}, booktitle= {Recent Advances of Neural Network Models and Applications}, volume= {26}, doi= {10.1007/978-3-319-04129-2_39}, series= {Smart Innovation, Systems and Technologies}, editor= {Simone Bassis and Anna Esposito amd Francesco Carlo Morabito}, title= {Investigating the Form-Function-Relation of the Discourse Particle "hm"¯ in a Naturalistic Human-Computer Interaction}, publisher= {Springer}, keywords= {Prosodic Analysis; Companion Systems; Human-Computer Interaction; Discourse Particle; Pitch Contour Classification}, author= {Siegert, Ingo and Prylipko, Dmytro and Hartmann, Kim and B{\"o}ck, Ronald and Wendemuth, Andreas}, pages= {387--394}, abstract = {For a successful speech-controlled human-computer interaction (HCI) the pure textual information as well as individual skills, preferences, and affective states of the user have to be known. However, verbal human interaction consists of several information layers. Apart from pure textual information, further details regarding the speaker's feelings, believes, and social relations are transmitted. The additional information is encoded through acoustics. Especially, the intonation reveals details about the speakers communicative relation and their attitude towards the ongoing dialogue. Since the intonation is influenced by semantic and grammatical information, it is advisable to investigate the intonation of so-called discourse particles (DPs) as "hm" or "uhm". They cannot be inflected but can be emphasised. DPs have the same intonation curves (pitch-contours) as whole sentences and thus may indicate the same functional meanings. For German language J. E. Schmidt empirically discovered seven types of form-function-concurrences on the isolated DP "hm"¯. To determine the function within the dialogue of the DPs, methods are needed that preserve pitch-contours and are feasible to assign defined form-prototypes. Furthermore, it must be investigated which pitch-contours occur in naturalistic HCI and whether these contours are congruent with the findings by linguists. In this paper we present first results on the extraction and correlation of the DP "hm". We investigate the different form-function-relations in the naturalistic LAST MINUTE corpus and determine expectable form-function relations in naturalistic HCI in general.} } @article{Wendemuth:2014, year= {2014}, journal= {VDI SA - Mitteldeutsche Mitteilungen}, title= {Companion-Systeme: interaktive, kognitive Informationstechnik}, publisher={VDI}, author= {Wendemuth, Andreas}, pages= {12--13}, month= {03}, note= {Special Issue IKT: Innovationstreiber und Wachstumsmotor} } @article{Poppe:JMUI2014, year= {2014}, issn= {1783-7677}, journal= {Journal on Multimodal User Interfaces}, volume= {8}, number= {1}, doi= {10.1007/s12193-014-0152-5}, title= {From multimodal analysis to real-time interactions with virtual agents}, publisher= {Springer Berlin Heidelberg}, author= {Poppe, Ronald and B{\"o}ck, Ronald and Bonin, Francesca and Campbell, Nick and de Kok, Iwan and Traum, David}, pages= {1--3}, language= {English} } @inproceedings{Bergmann2014, title= {EmoGest: Investigating the Impact of Emotions on Spontaneous Co-speech Gestures}, editor= {Edlund, J. and Heylen, D. and Paggio, P.}, booktitle= {Proceedings of the Workshop on Multimodal corpora 2013: Multimodal Corpora: Combining applied and basic research targets}, year= {2014}, author= {Bergmann, K. and B{\"o}ck, R. and Jaecks, P.}, abstract= {Spontaneous co-speech gestures are an integral part of human communicative behavior. Little is known, however, about how they reflect a speaker’s emotional state. In this paper, we describe the setup of a novel body movement database. 32 participants were primed with emotions (happy, sad, neutral) by listening to selected music pieces and, subsequently, fulfilled a gesture-eliciting task. We present our methodology of evaluating the effects of emotion priming with standardized questionnaires, and via automatic emotion recognition of the speech signal. First results suggest that emotional priming was successful, thus, paving the way for further analyses comparing the gestural behavior across the three experimental conditions.}, pages= {13--16}, address= {Reykjavik, Island}, publisher= {Language Resources and Evaluation,(LREC)} } @article{TschechneNeumann2014a, journal = {Journal of Vision (VSS Abstract)}, author = {Tschechne, S. and Neumann, H.}, title = {Unified Representation of Motion and Motion Streak Patterns in a Model of Cortical Form-Motion Interaction}, abstract = {Problem. Direction selective neurons in visual cortex (V1) encode spatio-temporal movements of visual patterns. It has been suggested that motion directions are also spatially encoded in the form channel as oriented motion streaks (Burr, Curr. Biol., 2000), while only fast motions lead to motion streak patterns (Apthorp et al., Proc. Roy. Soc. London B, 2013). Geisler (Nature, 1999) proposed that motion streaks aid determining visual motion direction estimation while their awareness is suppressed in normal vision conditions (Wallis & Arnold, Curr. Biol., 2009). The underlying neural mechanisms of such form-motion interaction are, however, still unknown. Method. We propose a neural model that acquires data from an event-based vision sensor that responds to temporal changes in the input intensity. Model area V1 uses spatio-temporal filters to detect visual motion and forwards activations to be integrated in model area MT. Orientation-selective contrast cells in model areas V1 and V2 spatially integrate recent visual events and respond to oriented structures parallel to movement direction when sufficiently fast motion is presented. Form cells' responses temporally cease already for slow motions. Results and Conclusion. We probed the model with dark/light random dot patterns moving at different directions and speeds, replicating experimental settings. For higher speeds oriented contrast-sensitive cells are co-activated along an orientation parallel to the motion direction, viz., signaling motion streaks or speedlines. For slow motions no such responses occur. Adaptation effects confirm experimental findings from psychophysics. The model suggests that motion streaks occur in the form channel as a direct consequence of fast coherent motions along single directions without the need to assume separate motion channel representations. The model makes predictions concerning the strength of the streak patterns and sheds new light upon mechanisms of computing motion from form.}, pages = {18}, volume = {14}, number = {10}, year = {2014}, doi = {10.1167/14.10.18} } @inproceedings{SchrodtEtAl2014a, title = {Modeling perspective-taking by correlating visual and proprioceptive dynamics}, author = {Schrodt, Fabian and Layher, Georg and Neumann, Heiko and Butz, MV}, abstract = {How do we manage to step into another person's shoes and eventually derive the intention behind observed behavior? We propose a connectionist neural network (NN) model that learns self-supervised a prerequisite of this social capability: it adapts its internal perspective in accordance to observed biological motion. The model first learns predictive correlations between proprioceptive motion and a corresponding visual motion perspective. When a novel view of a biological motion is presented, the model is able to transform this view to the closest perspective that was seen during training. In effect, the model realizes a translation-, scale-, and rotation-invariant recognition of biological motion. The NN is an extended adaptive resonance model that incorporates self-supervised error backpropagation and parameter bootstrapping by neural noise. It segments and correlates relative, visual and proprioceptive velocity kinematics, gradually refining the emerging representations from scratch. As a result, it is able to adjust its internal perspective to novel views of trained biological motion patterns. Thus, we show that it is possible to take the perspective of another person by correlating proprioceptive motion with relative, visual motion, and then allowing the adjustment of the visual frame of reference to other views of similar motion patterns.}, booktitle = {Proc. 36th annual meeting of the Cognitive Science Society, 2013 (CogSci, 2014)}, pages = {1383--1388}, year = {2014}, editor = {Bello, B and Guarini, M and McShane, M and Scassellati, B}, url = {https://mindmodeling.org/cogsci2014/papers/243/}, web_url= {http://www.informatik.uni-ulm.de/ni/staff/HNeumann/publicationsYear/PDFs/CONFERENCES/CogSci14-SchrodtEtAl-modelingPerspectiveTakingVisualAndProprioceptiveDynamics.pdf} } @inproceedings{Granstroem2014, author = {Granstr{\"{o}}m, Karl and Reuter, Stephan and Meissner, Daniel and Scheel, Alexander}, title = {A multiple model {PHD} approach to tracking of cars under an assumed rectangular shape}, booktitle = {Proceedings of the 17th International Conference on Information Fusion}, year = {2014}, keywords = {driver assistance, tracking, SR, DM, AS}, abstract = {This paper presents an extended target tracking method for tracking cars in urban traffic using data from laser range sensors. Results are presented for three real world datasets that contain multiple cars, occlusions, and maneuver changes. The car's shape is approximated by a rectangle, and single track steering models are used for the target kinematics. A multiple model approach is taken for both the dynamics modeling and the measurement modeling. A comparison to ground truth shows that the estimation errors are generally very small: on average the absolute error is less than half a degree for the heading. Multiple cars are handled using a multiple model PHD filter, where a variable probability of detection is integrated to enable tracking of occluded cars.}, file = {:2014\\Fusion\\Granstroem2014.pdf:PDF}, owner = {reuter}, timestamp = {2014.08.27} } @article{Reuter2014, author = {Reuter, Stephan and Vo, Ba-Tuong and Vo, Ba-Ngu and Dietmayer, Klaus}, title = {The Labeled Multi-{B}ernoulli Filter}, journal = {IEEE Transactions on Signal Processing}, year = {2014}, volume = {62}, pages = {3246 -- 3260}, keywords = {driver assistance, SR1, SR, KD}, number = {12}, abstract = {This paper proposes a generalization of the multi-Bernoulli filter called the labeled multi-Bernoulli filter that outputs target tracks. Moreover, the labeled multi-Bernoulli filter does not exhibit a cardinality bias due to a more accurate update approximation compared to the multi-Bernoulli filter by exploiting the conjugate prior form for labeled Random Finite Sets. The proposed filter can be interpreted as an efficient approximation of the delta-Generalized Labeled Multi-Bernoulli filter. It inherits the advantages of the multi-Bernoulli filter in respect of particle implementation and state estimation. It also inherits advantages of the delta-Generalized Labeled Multi-Bernoulli filter in that it outputs (labeled) target tracks and achieves better performance.}, file = {:2014/TSP/Reuter2014.pdf:PDF}, owner = {reuter}, timestamp ={2014.03.17} } @Inproceedings{nothdurft-richter-minker:2014:W14-43, author = {Nothdurft, Florian and Richter, Felix and Minker, Wolfgang}, title = {Probabilistic Human-Computer Trust Handling}, abstract = {Human-computer trust has shown to be a critical factor in influencing the complexity and frequency of interaction in technical systems. Particularly incomprehensible situations in human-computer interaction may lead to a reduced users trust in the system and by that influence the style of interaction. Analogous to human-human interaction, explaining these situations can help to remedy negative effects. In this paper we present our approach of augmenting task-oriented dialogs with selected explanation dialogs to foster the human-computer trust relationship in those kinds of situations. We have conducted a web-based study testing the effects of different goals of explanations on the components of human-computer trust. Subsequently, we show how these results can be used in our probabilistic trust handling architecture to augment pre-defined task-oriented dialogs}, booktitle = {Proceedings of the 15th Annual Meeting of the Special Interest Group on Discourse and Dialogue (SIGDIAL)}, month = {June}, year = {2014}, address = {Philadelphia, PA, U.S.A.}, publisher = {Association for Computational Linguistics}, pages = {51--59}, url = {http://www.aclweb.org/anthology/W14-4307} } @Inproceedings{Roesner14Lrec, author = {R{\"o}sner, Dietmar and Friesen, Rafael and G{\"u}nther, Stephan and Andrich, Rico}, title = {Modeling and Evaluating Dialog Success in the {LAST} {MINUTE} Corpus}, abstract = {The LAST MINUTE corpus comprises records and transcripts of naturalistic problem solving dialogs between N = 130 subjects and a companion system simulated in a Wizard of Oz experiment. Our goal is to detect dialog situations where subjects might break up the dialog with the system which might happen when the subject is unsuccessful. We present a dialog act based representation of the dialog courses in the problem solving phase of the experiment and propose and evaluate measures for dialog success or failure derived from this representation. This dialog act representation refines our previous coarse measure as it enables the correct classification of many dialog sequences that were ambiguous before. The dialog act representation is useful for the identification of different subject groups and the exploration of interesting dialog courses in the corpus. We find young females to be most successful in the challenging last part of the problem solving phase and young subjects to have the initiative in the dialog more often than the elderly.}, booktitle = {Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC'14)}, year = {2014}, month = {may}, date = {26-31}, address = {Reykjavik, Iceland}, editor = {Nicoletta Calzolari (Conference Chair) and Khalid Choukri and Thierry Declerck and Hrafn Loftsson and Bente Maegaard and Joseph Mariani and Asuncion Moreno and Jan Odijk and Stelios Piperidis}, publisher = {European Language Resources Association (ELRA)}, isbn = {978-2-9517408-8-4}, language = {english} } @inproceedings{Roesner14KogSys, author = {R{\"o}sner, Dietmar and Andrich, Rico and Friesen, Rafael and {G\"u}nther, Stephan}, title = {In-depth Analyses of human companion interactions in the {LAST} {MINUTE} corpus -- a Computer Science perspective}, booktitle = {Proceedings 3. Interdisziplin{\"a}rer Workshop Kognitive Systeme: Mensch, Teams, Systeme und Automaten}, address = {Magdeburg, Germany}, date = {25-27}, month = {march}, year = {2014}, editor = {A. Wendemuth and M. Jipp and A. Kluge and D. S{\"o}ffker} } @INPROCEEDINGS{Scheel2014, author = {Scheel, Alexander and Granstr{\"o}m, Karl and Meissner, Daniel and Reuter, Stephan and Dietmayer, Klaus}, title = {Tracking and Data Segmentation Using a {GGIW} Filter with Mixture Clustering}, booktitle ={Proceedings of the 17th International Conference on Information Fusion}, year = {2014}, keywords = {driver assistance, tracking, laser, SR, DM, AS, KD, laser scanner, clustering}, month = {7}, abstract = {Common data preprocessing routines often introduce considerable flaws in laser-based tracking of extended objects. As an alternative, extended target tracking methods, such as the Gamma-Gaussian-Inverse Wishart (GGIW) probability hypothesis density (PHD) filter, work directly on raw data. In this paper, the GGIW-PHD filter is applied to real world traffic scenarios. To cope with the large amount of data, a mixture clustering approach which reduces the combinatorial complexity and computation time is proposed. The effective segmentation of raw measurements with respect to spatial distribution and motion is demonstrated and evaluated on two different applications: pedestrian tracking from a vehicle and intersection surveillance.}, file = {:2014/Fusion/Scheel2014.pdf:PDF}, owner = {scheel}, timestamp ={2014.06.04} } @INPROCEEDINGS{Vo2014, author = {Vo, Ba-Ngu and Vo, Ba-Tuong and Reuter, Stephan and Lam, Quang and Dietmayer, Klaus}, title = {Towards large scale multi-target tracking}, booktitle = {Proc. SPIE 9085, Sensors and Systems for Space Applications VII}, year = {2014}, keywords = {driver assistance, tracking, SR, KD}, abstract = {Multi-target tracking is intrinsically an NP-hard problem and the complexity of multi-target tracking solutions do not scale gracefully with problem size. Multi-target tracking for on-line applications involving a large number of targets is extremely challenging. This article demonstrates the capability of the random finite set approach to provide large scale multi-target tracking algorithms. In particular it is shown that an approximate filter known as the labeled multi-Bernoulli filter can simultaneously track one thousand five hundred targets in clutter on a standard laptop computer.}, doi = {10.1117/12.2055002}, file = {:2014/Vo2014.pdf:PDF}, owner = {reuter}, timestamp = {2014.06.05} } @INPROCEEDINGS{Reuter2014c, author = {Reuter, Stephan and Vo, Ba-Tuong and Vo, Ba-Ngu and Dietmayer, Klaus}, title = {Multi-Object Tracking Using Labeled Multi-{Bernoulli} Random Finite Sets}, booktitle = {Proceedings of the 17th International Conference on Information Fusion}, year = {2014}, keywords = {driver assistance, tracking, SR, SR1, KD}, abstract = {In this paper, we propose the labeled multi-Bernoulli filter which explicitly estimates target tracks and provides a more accurate approximation of the multi-object Bayes update than the multi-Bernoulli filter. In particular, the labeled multi-Bernoulli filter is not prone to the biased cardinality estimate of the multi-Bernoulli filter. The utilization of the class of labeled random finite sets naturally incorporates the estimation of a targets identity or label. Compared to the delta-generalized labeled multi-Bernoulli filter, the labeled multi-Bernoulli filter is an efficient approximation which obtains almost the same accuracy at significantly lower computational cost. The performance of the labeled multi-Bernoulli filter is compared to the multi-Bernoulli filter using simulated data. Further, the real-time capability of the filter is illustrated using real-world sensor data of our experimental vehicle}, file = {:2014\\Fusion\\Reuter2014c.pdf:PDF}, owner = {reuter}, timestamp = {2014.08.27} } @PHDTHESIS{Reuter2014b, author = {Stephan Reuter}, title = {Multi-Object Tracking Using Random Finite Sets}, school = {Ulm University}, year = {2014}, keywords = {driver assistance, tracking, random finite sets, SR, SR1}, abstract = {The aim of multi-object tracking is the estimation of the number of objects and their individual states using a sequence of measurements. While state of the art algorithms use object individual single-object trackers, the multi-object Bayes filter models the multi-object state as well as the measurement process using random finite sets which naturally represent the uncertainty in the number of objects as well as in the state of the objects. Hence, a realization of a random finite set valued random variable represents the complete environment and facilitates the incorporation of object interactions. In the update of the multi-object Bayes filter, the multi-object likelihood function averages over all possible track to measurement associations which avoids error-prone association decisions. During the last decade,several tractable approximations of the multi-object Bayes filter several tractable approximations of the multi-object Bayes filter have been proposed based on the statistical moments or parameterization. However, the approximations are prone to unstable cardinality estimates, the influence of missed detections on well-separated objects (''spooky effect''), or a biased cardinality estimate and do not allow for the incorporation of object interactions any more due to the required approximations. In this thesis, the first real-time capable sequential Monte Carlo implementation of the multi-object Bayes filter and its application to real-world sensor data are presented.The proposed implementation of the multi-object Bayes filter is based on an approximation of the multi-object likelihood function which significantly reduces the computational complexity. Further, several methods to incorporate object interactions in the prediction step of the multi-object Bayes filter are proposed. Additionally, anovel multi-object tracking algorithm, the labeled multi-Bernoulli filter, is proposed in this thesis. The approximation of the multi-object posterior density using labeled multi-Bernoulli random finite sets results in an accurate and real-time capable tracking algorithm. The labeled multi-Bernoulli filter facilitates an implementation using Gaussian mixtures and is capable to track a significantly larger number of objects than the sequential Monte Carlo implementation of the multi-object Bayes filter. The proposed tracking algorithms are evaluated using simulated data as well as real-world sensor data. The performance of the algorithms is compared to other approximations of the multi-object Bayes filter like the cardinalized probability hypothesis density filter and the cardinality balanced multi-target multi-Bernoulli filter.Additionally, the labeled multi-Bernoulli filter is compared to the joint integrated probabilistic data association filter in the context of vehicle environment perception.}, file = {:2014/dissertations/Reuter2014b.pdf:PDF}, owner = {reuter}, timestamp = {2014.08.07} } @article{Meissner2014, author = {Meissner, D. and Reuter, S. and Strigel, E. and Dietmayer, K.}, title = {Intersection-Based Road User Tracking Using a Classifying Multiple-Model {PHD} Filter}, journal = {IEEE Intelligent Transportation Systems Magazine}, year = {2014}, volume = {6}, pages = {21--33}, keywords = {driver assistance, tracking, intersection, DM, SR, ES, KD,}, number = {2}, abstract = {Abstract-The number of fatal accidents involving pedestrians and bikers at urban intersections is still increasing. Therefore, an intersection-based perception system provides a dynamic model of the intersection scene to the vehicles. Based on that, the intersection perception facilitates to discriminate occlusions which is expected to significantly reduce the number of accidents at intersections. Therefore this contribution presents a general purpose multi-sensor tracking algorithm, the classifying multiple-model probability hypothesis density (CMMPHD) filter, which facilitates the tracking and classification of relevant objects using a single filter. Due to the different motion characteristics, a multiple-model approach is required to obtain accurate state estimates and persistent tracks for all types of objects. Additionally, an extension of the PHD filter to handle contradictory measurements of different sensor types based on the Dempster-Shafer theory of evidence is proposed. The performance of tracking and classification is evaluated using real world sensor data of a public intersection.}, doi = {10.1109/MITS.2014.2304754}, file = {:2014/MITS/Meissner2014.pdf:PDF}, issn = {1939-1390}, owner = {reuter}, timestamp = {2014.05.21} } @article{BroschRoelfsemaNeumann2014, journal = {Journal of Vision (Abstract, VSS)}, author = {Brosch, T. and Roelfsema, P. and Neumann, H.}, title = {Learning of New Perceptual Groupings - A Biologically Plausible Recurrent Neural Network Model that Learns Contour Integration}, abstract ={Problem. Mechanisms of perceptual organization can be subdivided into base-grouping, operating in parallel over the visual field, and incremental grouping that operates sequentially and requires selective attention (Roelfsema, Ann. Rev. Neurosci, 2006). The underlying neural mechanisms recruit circuits and cortical subsystems that interact in feedforward and feedback streams (Poort et al., Neuron, 2012). Evidence suggests that the neural computational mechanisms are not inert but are influenced by perceptual learning (Li et al., Neuron, 2008). It is currently unknown what the underlying mechanisms are to implement such perceptual learning. Method. We propose the biologically inspired REinforcement LEarning Algorithm for Recurrent Neural Networks (RELEARNN). Our model consists of mutually connected model areas that include intra-areal and inter-areal excitatory, inhibitory and modulating connections that influence the mean firing rates of model neurons. Learning alters these connections and utilizes a biologically plausible Hebbian plasticity mechanism that is gated by two factors, a localized attentional feedback and a global reinforcement learning signal. The model is shown to provide a biologically plausible link to the Almeida-Pineda backpropagation scheme (Almeida, IEEE 1987; Pineda, Physical Review Letters, 1987). Results and Conclusion. We demonstrate how RELEARNN can account for the performance of the visual system in two different grouping tasks. The first is a curve-tracing task, and we demonstrate that a model trained in this task qualitatively reproduces the activity profile of neurons in the visual cortex of monkeys. Activation of neurons that are driven by feedforward signals are enhanced by sustained laterally propagated modulations that serve as grouping label. The second task demands the detection of a "snake" of collinearly aligned contour elements. Here, the model reproduces psychometric performance curves as well as neuronal activity in monkey area V1. The new findings suggest that multi-stage grouping operations in the brain may be learned by one common learning mechanism.}, pages = {941}, volume = {14}, number = {10}, year = {2014}, doi = {10.1167/14.10.941 } } @article{kohrs2014delayed, title= {Delayed system response times affect immediate physiology and the dynamics of subsequent button press behavior}, author= {Kohrs, Christin and Hrabal, David and Angenstein, Nicole and Brechmann, Andr{\'e}}, abstract= {System response time research is an important issue in human-computer interactions. Experience with technical devices and general rules of human-human interactions determine the user's expectation, and any delay in system response time may lead to immediate physiological, emotional, and behavioral consequences. We investigated such effects on a trial-by-trial basis during a human-computer interaction by measuring changes in skin conductance (SC), heart rate (HR), and the dynamics of button press responses. We found an increase in SC and a deceleration of HR for all three delayed system response times (0.5, 1, 2?s). Moreover, the data on button press dynamics was highly informative since subjects repeated a button press with more force in response to delayed system response times. Furthermore, the button press dynamics could distinguish between correct and incorrect decisions and may thus even be used to infer the uncertainty of a user's decision.}, journal= {Psychophysiology}, year= {2014}, publisher={Wiley Online Library}, url= {http://onlinelibrary.wiley.com/doi/10.1111/psyp.12253/abstract}, web_url= {http://www.uni-ulm.de/fileadmin/website_uni_ulm/iui.inst.090/Publikationen/2014/Kohrs_Psychophysiology_2014.pdf} } @ARTICLE{10.3389/fnhum.2014.00499, AUTHOR= {Bethmann, Anja and Brechmann, Andr{\'e}}, TITLE= {On the definition and interpretation of voice selective activation in the temporal cortex}, JOURNAL= {Frontiers in Human Neuroscience}, VOLUME= {8}, YEAR= {2014}, NUMBER= {499}, DOI= {10.3389/fnhum.2014.00499}, ISSN= {1662-5161} , ABSTRACT= {Regions along the superior temporal sulci and in the anterior temporal lobes have been found to be involved in voice processing. It has even been argued that parts of the temporal cortices serve as voice-selective areas. Yet, evidence for voice-selective activation in the strict sense is still missing. The current fMRI study aimed at assessing the degree of voice-specific processing in different parts of the superior and middle temporal cortices. To this end, voices of famous persons were contrasted with widely different categories, which were sounds of animals and musical instruments. The argumentation was that only brain regions with statistically proven absence of activation by the control stimuli may be considered as candidates for voice-selective areas. Neural activity was found to be stronger in response to human voices in all analyzed parts of the temporal lobes except for the middle and posterior STG. More importantly, the activation differences between voices and the other environmental sounds increased continuously from the mid-posterior STG to the anterior MTG. Here, only voices but not the control stimuli excited an increase of the BOLD response above a resting baseline level. The findings are discussed with reference to the function of the anterior temporal lobes in person recognition and the general question on how to define selectivity of brain regions for a specific class of stimuli or tasks. In addition, our results corroborate recent assumptions about the hierarchical organization of auditory processing building on a processing stream from the primary auditory cortices to anterior portions of the temporal lobes.} } @Inproceedings{Bercher14HybridPlanningApplication, title = {Plan, Repair, Execute, Explain - How Planning Helps to Assemble your Home Theater}, publisher = {AAAI Press}, pages = {386--394}, year = {2014}, abstract = {In various social, work-related, or educational contexts, an increasing demand for intelligent assistance systems can be observed. In this paper, we present a domain-independent approach that combines a number of planning and interaction components to realize advanced user assistance. Based on a hybrid planning formalism, the components provide facilities including the generation, execution, and repair as well as the presentation and explanation of plans. We demonstrate the feasibility of our approach by means of a system that aims to assist users in the assembly of their home theater. An empirical evaluation shows the benefit of such a supportive system, in particular for persons with a lack of domain expertise.}, url = {http://www.uni-ulm.de/fileadmin/website_uni_ulm/iui.inst.090/Publikationen/2014/Bercher14PlanRepairExecuteExplain.pdf}, booktitle = {Proceedings of the 24th International Conference on Automated Planning and Scheduling (ICAPS 2014)}, author = {Bercher, Pascal and Biundo, Susanne and Geier, Thomas and Hoernle, Thilo and Nothdurft, Florian and Richter, Felix and Schattenberg, Bernd} } @Inproceedings {Hoeller2014HTNLanguage, title = {Language Classification of Hierarchical Planning Problems}, year = {2014}, tags = {SFB-TRR-62,Planning}, volume = {263}, pages = {447--452}, abstract = {Theoretical results on HTN planning are mostly related to the plan existence problem. In this paper, we study the structure of the generated plans in terms of the language they produce. We show that such languages are always context-sensitive. Furthermore we identify certain subclasses of HTN planning problems which generate either regular or context-free languages. Most importantly we have discovered that HTN planning problems, where preconditions and effects are omitted, constitute a new class of languages that lies strictly between the context-free and context-sensitive languages.}, editor = {Schaub, Torsten and Friedrich, Gerhard and O'Sullivan, Barry}, publisher = {IOS Press}, address = {Amsterdam}, series = {Frontiers in Artificial Intelligence and Applications}, booktitle = {Proceedings of the 21st European Conference on Artificial Intelligence ({ECAI} 2014)}, event_place = {Prague, Czech Republic}, event_name = {21st European Conference on Artificial Intelligence}, event_date = {18-22 August 2014}, DOI = {10.3233/978-1-61499-419-0-447}, url = {http://www.uni-ulm.de/fileadmin/website_uni_ulm/iui.inst.090/Publikationen/2014/Hoeller2014HtnLanguages.pdf}, author = {H{\"o}ller, Daniel and Behnke, Gregor and Bercher, Pascal and Biundo, Susanne} } @INPROCEEDINGS{Kachele2014, author = {K{\"a}chele, Markus and Glodek, Michael and Zharkov, Dimitrij and Meudt, Sascha and Schwenker, Friedhelm}, title = {Fusion of audio-visual features using hierarchical classifier systems for the recognition of affective states and the state of depression}, booktitle = {Proceedings of the International Conference on Pattern Recognition Applications and Methods (ICPRAM)}, abstract = {Reliable prediction of affective states in real world scenarios is very challenging and a significant amount of ongoing research is targeted towards improvement of existing systems. Major problems include the unreliability of labels, variations of the same affective states amongst different persons and in different modalities as well as the presence of sensor noise in the signals. This work presents a framework for adaptive fusion of input modalities incorporating variable degrees of certainty on different levels. Using a strategy that starts with ensembles of weak learners, gradually, level by level, the discriminative power of the system is improved by adaptively weighting favorable decisions, while concurrently dismissing unfavorable ones. For the final decision fusion the proposed system leverages a trained Kalman filter. Besides its ability to deal with missing and uncertain values, in its nature, the Kalman filter is a time series predictor and thus a suitable choice to match input signals to a reference time series in the form of ground truth labels. In the case of affect recognition, the proposed system exhibits superior performance in comparison to competing systems on the analysed dataset.}, year = {2014}, editor = {De Marsico, Maria and Tabbone, Antoine and Ana Fred}, pages = {671--678}, publisher = {SciTePress} } @article{BroschNeumann2014, journal = {Neural Networks}, author = {Brosch, T. and Neumann, H.}, title = {Interaction of Feedforward and Feedback Streams in Visual Cortex in a Firing-Rate Model of Columnar Computations}, abstract = {Visual sensory input stimuli are rapidly processed along bottom-up feedforward cortical streams. Beyond such driving streams neurons in higher areas provide information that is re-entered into the representations and responses at the earlier stages of processing. The precise mechanisms and underlying functionality of such associative feedforward/feedback interactions are not resolved. This work develops a neuronal circuit at a level mimicking cortical columns with response properties linked to single cell recordings. The proposed model constitutes a coarse-grained model with gradual firing-rate responses which accounts for physiological in vitro recordings from mammalian cortical cells. It is shown that the proposed population-based circuit with gradual firing-rate dynamics generates responses like those of detailed biophysically realistic multi-compartment spiking models. The results motivate using a coarse-grained mechanism for large-scale neural network modeling and simulations of visual cortical mechanisms. They further provide insights about how local recurrent loops change the gain of modulating feedback signals.}, pages = {11--6}, volume = {54}, year = {2014} } @INPROCEEDINGS{Nothdurft14_1, author = {Nothdurft, Florian and Minker, Wolfgang}, month = {jan}, title = {Justification and Transparency Explanations in Dialogue Systems to Maintain Human-Computer Trust}, booktitle = {Proceedings of the 4th International Workshop On Spoken Dialogue Systems (IWSDS)}, abstract = {This paper describes a web-based study testing the effects of different explanations on the human-computer trust relationship. Human-computer trust has shown to be very important in keeping the user motivated and cooperative in a human-computer interaction. Especially unexpected or not understandable situations may decrease the trust and by that the way of interacting with a technical system.Analogoustohuman-humaninteractionprovidingexplanationsinthesesituations can help to remedy negative effects. However, selecting the appropriate explanation based on users’ human-computer trust is an unprecedented approach because existing studies concentrate on trust as a one-dimensional concept. In this study we try to ?nd a mapping between the bases of trust and the different goals of explanations. Our results show that transparency explanations seem to be the best way to in?uence the user’s perceived understandability and reliability.}, year = {2014}, publisher = {Springer}, location = {Napa, California, USA} } @article{LayherEtAl2014a, title= {Learning Representations of Animated Motion Sequences—A Neural Model}, author= {Layher, Georg and Giese, Martin A and Neumann, Heiko}, journal= {Topics in Cognitive Science}, abstract= {The detection and categorization of animate motions is a crucial task underlying social interaction and perceptual decision-making. Neural representations of perceived animate objects are built in the primate cortical region STS which is a region of convergent input from intermediate level form and motion representations. Populations of STS cells exist which are selectively responsive to specific animated motion sequences, such as walkers. It is still unclear how and to which extent form and motion information contribute to the generation of such representations and what kind of mechanisms are involved in the learning processes. The paper develops a cortical model architecture for the unsupervised learning of animated motion sequence representations. We demonstrate how the model automatically selects significant motion patterns as well as meaningful static form prototypes characterized by a high degree of articulation. Such key poses are selectively reinforced during learning through a cross-talk between the motion and form processing streams. Next, we show how sequence selective representations are learned in STS by fusing static form and motion input from the segregated bottom-up driving input streams. Cells in STS, in turn, feed their activities recurrently to their input sites along top-down signal pathways. We show how such learned feedback connections enable making predictions about future input as anticipation generated by sequence-selective STS cells. Network simulations demonstrate the computational capacity of the proposed model by reproducing several experimental findings from neurosciences and by accounting for recent behavioral data. Keywords: animated motion representation; implied motion; neural model; unsupervised learning; feedback.}, volume= {6}, number= {1}, pages= {170--182}, year= {2014}, doi= {10.1111/tops.12075}, publisher= {Wiley Online Library} }