@Proceedings{ICGI 20122012,
title = {Proceedings of the Eleventh International Conference on Grammatical Inference},
booktitle = {Proceedings of the Eleventh International Conference on Grammatical Inference},
editor = {Jeffrey Heinz and Colin Higuera and Tim Oates},
publisher = {PMLR},
series = {Proceedings of Machine Learning Research},
volume = 21
}
@InProceedings{pmlr-v21-heinz12a,
title = {Preface},
author = {Heinz, Jeffrey and de la Higuera, Colin and Oates, Tim},
booktitle = {Proceedings of the Eleventh International Conference on Grammatical Inference},
pages = {1--3},
year = {2012},
editor = {Heinz, Jeffrey and Higuera, Colin and Oates, Tim},
volume = {21},
series = {Proceedings of Machine Learning Research},
address = {University of Maryland, College Park, MD, USA},
month = {05--08 Sep},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v21/heinz12a/heinz12a.pdf},
url = {https://proceedings.mlr.press/v21/heinz12a.html},
abstract = {Preface to the Proceedings of the Eleventh International Conference on Grammatical Inference September 5-8, 2012, University of Maryland, College Park, United States.}
}
@InProceedings{pmlr-v21-aarts12a,
title = {Learning and Testing the Bounded Retransmission Protocol},
author = {Aarts, Fides and Kuppens, Harco and Tretmans, Jan and Vaandrager, Frits and Verwer, Sicco},
booktitle = {Proceedings of the Eleventh International Conference on Grammatical Inference},
pages = {4--18},
year = {2012},
editor = {Heinz, Jeffrey and Higuera, Colin and Oates, Tim},
volume = {21},
series = {Proceedings of Machine Learning Research},
address = {University of Maryland, College Park, MD, USA},
month = {05--08 Sep},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v21/aarts12a/aarts12a.pdf},
url = {https://proceedings.mlr.press/v21/aarts12a.html},
abstract = {Using a well-known industrial case study from the verification literature, the bounded retransmission protocol, we show how active learning can be used to establish the correctness of protocol implementation I relative to a given reference implementation R. Using active learning, we learn a model M_R of reference implementation R, which serves as input for a model based testing tool that checks conformance of implementation I to M_R. In addition, we also explore an alternative approach in which we learn a model M_I of implementation I, which is compared to model M_R using an equivalence checker. Our work uses a unique combination of software tools for model construction (Uppaal), active learning (LearnLib, Tomte), model-based testing (JTorX, TorXakis) and verification (CADP, MRMC). We show how these tools can be used for learning these models, analyzing the obtained results, and improving the learning performance.}
}
@InProceedings{pmlr-v21-akram12a,
title = {Actively Learning Probabilistic Subsequential Transducers},
author = {Akram, Hasan Ibne and La Higuera, Colin and Eckert, Claudia},
booktitle = {Proceedings of the Eleventh International Conference on Grammatical Inference},
pages = {19--33},
year = {2012},
editor = {Heinz, Jeffrey and Higuera, Colin and Oates, Tim},
volume = {21},
series = {Proceedings of Machine Learning Research},
address = {University of Maryland, College Park, MD, USA},
month = {05--08 Sep},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v21/akram12a/akram12a.pdf},
url = {https://proceedings.mlr.press/v21/akram12a.html},
abstract = {In this paper we investigate learning of probabilistic subsequential transducers in an active learning environment. In our learning algorithm the learner interacts with an oracle by asking probabilistic queries on the observed data. We prove our algorithm in an identification in the limit model. We also provide experimental evidence to show the correctness and to analyze the learnability of the proposed algorithm.}
}
@InProceedings{pmlr-v21-balle12a,
title = {Bootstrapping and Learning PDFA in Data Streams},
author = {Balle, Borja and Castro, Jorge and Gavaldà, Ricard},
booktitle = {Proceedings of the Eleventh International Conference on Grammatical Inference},
pages = {34--48},
year = {2012},
editor = {Heinz, Jeffrey and Higuera, Colin and Oates, Tim},
volume = {21},
series = {Proceedings of Machine Learning Research},
address = {University of Maryland, College Park, MD, USA},
month = {05--08 Sep},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v21/balle12a/balle12a.pdf},
url = {https://proceedings.mlr.press/v21/balle12a.html},
abstract = {Markovian models with hidden state are widely-used formalisms for modeling sequential phenomena. Learnability of these models has been well studied when the sample is given in batch mode, and algorithms with PAC-like learning guarantees exist for specific classes of models such as Probabilistic Deterministic Finite Automata (PDFA). Here we focus on PDFA and give an algorithm for infering models in this class under the stringent \emphdata stream scenario: unlike existing methods, our algorithm works incrementally and in one pass, uses memory sublinear in the stream length, and processes input items in amortized constant time. We provide rigorous PAC-like bounds for all of the above, as well as an evaluation on synthetic data showing that the algorithm performs well in practice. Our algorithm makes a key usage of several old and new sketching techniques. In particular, we develop a new sketch for implementing bootstrapping in a streaming setting which may be of independent interest. In experiments we have observed that this sketch yields important reductions in the examples required for performing some crucial statistical tests in our algorithm.}
}
@InProceedings{pmlr-v21-becerra12a,
title = {Speeding Up Syntactic Learning Using Contextual Information},
author = {Bonache, Leonor Becerra and Fromont, Elisa and Habrard, Amaury and Perrot, Michaël and Sebban, Marc},
booktitle = {Proceedings of the Eleventh International Conference on Grammatical Inference},
pages = {49--53},
year = {2012},
editor = {Heinz, Jeffrey and Higuera, Colin and Oates, Tim},
volume = {21},
series = {Proceedings of Machine Learning Research},
address = {University of Maryland, College Park, MD, USA},
month = {05--08 Sep},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v21/becerra12a/becerra12a.pdf},
url = {https://proceedings.mlr.press/v21/becerra12a.html},
abstract = {It has been shown in (Angluin and Becerra-Bonache, 2010, 2011) that interactions between a learner and a teacher can help language learning. In this paper, we make use of additional contextual information in a pairwise-based generative approach aiming at learning (situation,sentence)-pair-hidden markov models. We show that this allows a significant speed-up of the convergence of the syntactic learning. We apply our model on a toy natural language task in Spanish dealing with geometric objects.}
}
@InProceedings{pmlr-v21-cerno12a,
title = {Clearing Restarting Automata and Grammatical Inference},
author = {Černo, Peter},
booktitle = {Proceedings of the Eleventh International Conference on Grammatical Inference},
pages = {54--68},
year = {2012},
editor = {Heinz, Jeffrey and Higuera, Colin and Oates, Tim},
volume = {21},
series = {Proceedings of Machine Learning Research},
address = {University of Maryland, College Park, MD, USA},
month = {05--08 Sep},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v21/cerno12a/cerno12a.pdf},
url = {https://proceedings.mlr.press/v21/cerno12a.html},
abstract = {Clearing and subword-clearing restarting automata are linguistically motivated models of automata. We investigate the problem of grammatical inference for such automata based on the given set of positive and negative samples. We show that it is possible to identify these models in the limit. In this way we can learn a large class of languages. On the other hand, we prove that the task of finding a clearing restarting automaton consistent with a given set of positive and negative samples is NP-hard, provided that we impose an upper bound on the width of its instructions.}
}
@InProceedings{pmlr-v21-chandlee12a,
title = {Integrating Grammatical Inference into Robotic Planning},
author = {Chandlee, Jane and Fu, Jie and Karydis, Konstantinos and Koirala, Cesar and Heinz, Jeffrey and Tanner, Herbert},
booktitle = {Proceedings of the Eleventh International Conference on Grammatical Inference},
pages = {69--83},
year = {2012},
editor = {Heinz, Jeffrey and Higuera, Colin and Oates, Tim},
volume = {21},
series = {Proceedings of Machine Learning Research},
address = {University of Maryland, College Park, MD, USA},
month = {05--08 Sep},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v21/chandlee12a/chandlee12a.pdf},
url = {https://proceedings.mlr.press/v21/chandlee12a.html},
abstract = {This paper presents a method for the control synthesis of robotic systems in an unknown, dynamic, and adversarial environments. We (1) incorporate a grammatical inference module that identifies the governing dynamics of the adversarial environment and (2) utilize game theory to compute a motion plan for a system given a task specification. The framework is flexible and modular since different games can be formulated for different system objectives and different grammatical inference algorithms can be utilized depending on the abstract nature of the dynamic environment.}
}
@InProceedings{pmlr-v21-clark12a,
title = {Beyond Semilinearity: Distributional Learning of Parallel Multiple Context-free Grammars},
author = {Clark, Alexander and Yoshinaka, Ryo},
booktitle = {Proceedings of the Eleventh International Conference on Grammatical Inference},
pages = {84--96},
year = {2012},
editor = {Heinz, Jeffrey and Higuera, Colin and Oates, Tim},
volume = {21},
series = {Proceedings of Machine Learning Research},
address = {University of Maryland, College Park, MD, USA},
month = {05--08 Sep},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v21/clark12a/clark12a.pdf},
url = {https://proceedings.mlr.press/v21/clark12a.html},
abstract = {Semilinearity is widely held to be a linguistic invariant but, controversially, some linguistic phenomena in languages like Old Georgian and Yoruba seem to violate this constraint. In this paper we extend distributional learning to the class of parallel multiple context-free grammars, a class which as far as is known includes all attested natural languages, even taking an extreme view on these examples. These grammars may have a copying operation that can recursively copy constituents, allowing them to generate non-semilinear languages. We generalise the notion of a context to a class of functions that include copying operations. The congruential approach is ineffective at this level of the hierarchy; accordingly we extend this using dual approaches, defining nonterminals using sets of these generalised contexts. As a corollary we also extend the multiple context free grammars using the lattice based approaches.}
}
@InProceedings{pmlr-v21-coste12a,
title = {Locally Substitutable Languages for Enhanced Inductive Leaps},
author = {Coste, François and Garet, Gaëlle and Nicolas, Jacques},
booktitle = {Proceedings of the Eleventh International Conference on Grammatical Inference},
pages = {97--111},
year = {2012},
editor = {Heinz, Jeffrey and Higuera, Colin and Oates, Tim},
volume = {21},
series = {Proceedings of Machine Learning Research},
address = {University of Maryland, College Park, MD, USA},
month = {05--08 Sep},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v21/coste12a/coste12a.pdf},
url = {https://proceedings.mlr.press/v21/coste12a.html},
abstract = {Genomic banks are fed continuously by large sets of DNA or RNA sequences coming from high throughput machines. Protein annotation is a task of first importance with respect to these banks. It consists of retrieving the genes that code for proteins within the sequences and then predict the function of these new proteins in the cell by comparison with known families. Many methods have been designed to characterize protein families and discover new members, mainly based on subsets of regular expressions or simple Hidden Markov Models. We are interested in more expressive models that are able to capture the long-range characteristic interactions occurring in the spatial structure of the analyzed protein family. Starting from the work of Clark and Eyraud (2007) and Yoshinaka (2008) on inference of substitutable and \emphk,l-substitutable languages respectively, we introduce new classes of substitutable languages using local rather than global substitutability, a reasonable assumption with respect to protein structures to enhance inductive leaps performed by least generalized generalization approaches. The concepts are illustrated on a first experiment using a real proteic sequence set.}
}
@InProceedings{pmlr-v21-eisner12a,
title = {Grammar Induction: Beyond Local Search},
author = {Eisner, Jason},
booktitle = {Proceedings of the Eleventh International Conference on Grammatical Inference},
pages = {112--113},
year = {2012},
editor = {Heinz, Jeffrey and Higuera, Colin and Oates, Tim},
volume = {21},
series = {Proceedings of Machine Learning Research},
address = {University of Maryland, College Park, MD, USA},
month = {05--08 Sep},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v21/eisner12a/eisner12a.pdf},
url = {https://proceedings.mlr.press/v21/eisner12a.html},
abstract = {Many approaches to probabilistic grammar induction operate by iteratively improving a single grammar, beginning with an initial guess. These local search paradigms include (variational) EM, MCMC, and greedy model merging or splitting procedures. Unfortunately, local search methods tend to get caught in local optima, even with random restarts. Two approaches are outlined that try to avoid this problem. One uses branch-and-bound methods from mathematical programming to eliminate regions of parameter space that cannot contain the global optimum. The other is inspired by recent work on deep learning, and uses spectral methods to build up featural representations of all substrings, without premature commitment to which substrings are constituents.}
}
@InProceedings{pmlr-v21-eyraud12a,
title = {Learning Substitutable Binary Plane Graph Grammars},
author = {Eyraud, Rémi and Janodet, Jean-Christophe and Oates, Tim},
booktitle = {Proceedings of the Eleventh International Conference on Grammatical Inference},
pages = {114--128},
year = {2012},
editor = {Heinz, Jeffrey and Higuera, Colin and Oates, Tim},
volume = {21},
series = {Proceedings of Machine Learning Research},
address = {University of Maryland, College Park, MD, USA},
month = {05--08 Sep},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v21/eyraud12a/eyraud12a.pdf},
url = {https://proceedings.mlr.press/v21/eyraud12a.html},
abstract = {While some heuristics exist for the learning of graph grammars, few has been done on the theoretical side. Due to complexity issues, the class of graphs has to be restricted: this paper deals with the subclass of plane graphs, which correspond to drawings of planar graphs. This allows us to introduce a new kind of graph grammars, using a face-replacement mechanism. To learn them, we extend recent successful techniques developed for string grammars, and based on a property on target languages: the substitutability property. We show how this property can be extended to plane graph languages and finally state the first identification in the limit result for a class of graph grammars, as far as we know.}
}
@InProceedings{pmlr-v21-florencio12a,
title = {Learning Tree Adjoining Grammars from Structures and Strings},
author = {Costa Florêncio, Christophe},
booktitle = {Proceedings of the Eleventh International Conference on Grammatical Inference},
pages = {129--132},
year = {2012},
editor = {Heinz, Jeffrey and Higuera, Colin and Oates, Tim},
volume = {21},
series = {Proceedings of Machine Learning Research},
address = {University of Maryland, College Park, MD, USA},
month = {05--08 Sep},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v21/florencio12a/florencio12a.pdf},
url = {https://proceedings.mlr.press/v21/florencio12a.html},
abstract = {We investigate the learnability of certain subclasses of tree adjoining grammars (TAGs). TAGs are based on two tree-tree operations, and generate structures known as \emph{derived trees}. The corresponding strings form a \emph{mildly context-sensitive} language. We prove that even very constrained subclasses of TAGs are not learnable from structures (derived trees) or strings, demonstrating that this type of problem is far from trivial. We also demonstrate that a large (parameterized) family of classes of TAGs is learnable from strings.}
}
@InProceedings{pmlr-v21-irfan12a,
title = {Improving Model Inference of Black Box Components having Large Input Test Set},
author = {Irfan, Muhammad Naeem and Groz, Roland and Oriat, Catherine},
booktitle = {Proceedings of the Eleventh International Conference on Grammatical Inference},
pages = {133--138},
year = {2012},
editor = {Heinz, Jeffrey and Higuera, Colin and Oates, Tim},
volume = {21},
series = {Proceedings of Machine Learning Research},
address = {University of Maryland, College Park, MD, USA},
month = {05--08 Sep},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v21/irfan12a/irfan12a.pdf},
url = {https://proceedings.mlr.press/v21/irfan12a.html},
abstract = {The deterministic finite automata (DFA) learning algorithm $L*$ has been extended to learn Mealy machine models which are more succinct for \emph{input/output} (i/o) based systems. We propose an optimized learning algorithm $L_1$ to infer Mealy models of software black box components. The $L_1$ algorithm uses a modified observation table and avoids adding unnecessary elements to its columns and rows. The proposed improvements reduce the worst case time complexity. The $L_1$ algorithm is compared with the existing Mealy inference algorithms and the experiments conducted on a comprehensive set confirm the gain.}
}
@InProceedings{pmlr-v21-jayasri12a,
title = {Learning of Bi-$\omega$ Languages from Factors},
author = {Jayasrirani, M. and Begam, M. H. and Thomas, D. G. and Emerald, J. D.},
booktitle = {Proceedings of the Eleventh International Conference on Grammatical Inference},
pages = {139--144},
year = {2012},
editor = {Heinz, Jeffrey and Higuera, Colin and Oates, Tim},
volume = {21},
series = {Proceedings of Machine Learning Research},
address = {University of Maryland, College Park, MD, USA},
month = {05--08 Sep},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v21/jayasri12a/jayasri12a.pdf},
url = {https://proceedings.mlr.press/v21/jayasri12a.html},
abstract = {Higuera and Janodet (2001) gave a polynomial algorithm that identifies the class of safe $\omega$-languages which is a subclass of deterministic $\omega$-languages from positive and negative prefixes. As an extension of this work we study the learning of the family of bi-$\omega$ languages.}
}
@InProceedings{pmlr-v21-littman12a,
title = {Inducing Partially Observable Markov Decision Processes},
author = {Littman, Michael L.},
booktitle = {Proceedings of the Eleventh International Conference on Grammatical Inference},
pages = {145--148},
year = {2012},
editor = {Heinz, Jeffrey and Higuera, Colin and Oates, Tim},
volume = {21},
series = {Proceedings of Machine Learning Research},
address = {University of Maryland, College Park, MD, USA},
month = {05--08 Sep},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v21/littman12a/littman12a.pdf},
url = {https://proceedings.mlr.press/v21/littman12a.html},
abstract = {The partially observable Markov decision process (POMDP) model plays an important role in the field of reinforcement learning. It captures the problem of decision making when some important features of the environment are not visible to the decision maker. A number of approaches have been proposed for inducing POMDP models from data, a problem that has important parallels with grammar induction.}
}
@InProceedings{pmlr-v21-lopez12a,
title = {Inference of $k$-Testable Directed Acyclic Graph Languages},
author = {Lòpez, Damiàn and Calera-Rubio, Jorge and Gallego-Sànchez, Javier},
booktitle = {Proceedings of the Eleventh International Conference on Grammatical Inference},
pages = {149--163},
year = {2012},
editor = {Heinz, Jeffrey and Higuera, Colin and Oates, Tim},
volume = {21},
series = {Proceedings of Machine Learning Research},
address = {University of Maryland, College Park, MD, USA},
month = {05--08 Sep},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v21/lopez12a/lopez12a.pdf},
url = {https://proceedings.mlr.press/v21/lopez12a.html},
abstract = {In this paper, we tackle the task of graph language learning. We first extend the well-known classes of \emph{$k$-testability} and \emph{$k$-testability in the strict sense} languages to directed graph languages. Second, we propose a graph automata model for directed acyclic graph languages. This graph automata model is used to propose a grammatical inference algorithm to learn the class of directed acyclic $k$-testable in the strict sense graph languages. The algorithm runs in polynomial time and identifies this class of languages from positive data.}
}
@InProceedings{pmlr-v21-miclet12a,
title = {A Lattice of Sets of Alignments Built on the Common Subwords in a Finite Language},
author = {Miclet, Laurent and Barbot, Nelly and Jeudy, Baptiste},
booktitle = {Proceedings of the Eleventh International Conference on Grammatical Inference},
pages = {164--176},
year = {2012},
editor = {Heinz, Jeffrey and Higuera, Colin and Oates, Tim},
volume = {21},
series = {Proceedings of Machine Learning Research},
address = {University of Maryland, College Park, MD, USA},
month = {05--08 Sep},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v21/miclet12a/miclet12a.pdf},
url = {https://proceedings.mlr.press/v21/miclet12a.html},
abstract = {We define the locally maximal subwords and locally minimal superwords common to a finite set of words. We also define the corresponding sets of alignments. We give a partial order relation between such sets of alignments, as well as two operations between them. We show that the constructed family of sets of alignments has the lattice structure. We give hints to use this structure as a machine learning basis for inducing a generalization of the set of words.}
}
@InProceedings{pmlr-v21-otaki12a,
title = {Estimation of Generating Processes of Strings Represented with Patterns and Refinements},
author = {Otaki, Keisuke and Yamamoto, Akihiro},
booktitle = {Proceedings of the Eleventh International Conference on Grammatical Inference},
pages = {177--182},
year = {2012},
editor = {Heinz, Jeffrey and Higuera, Colin and Oates, Tim},
volume = {21},
series = {Proceedings of Machine Learning Research},
address = {University of Maryland, College Park, MD, USA},
month = {05--08 Sep},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v21/otaki12a/otaki12a.pdf},
url = {https://proceedings.mlr.press/v21/otaki12a.html},
abstract = {We formalize generating processes of strings based on patterns and substitutions, and give an algorithm to estimate a probability mass function on substitutions, which is an element of processes. Patterns are non-empty sequences of characters and variables. Variables indicate unknown substrings and are replaced with other patterns by substitutions. By introducing variables and substitutions, we can deal with the difficulty of preparing production rules in generative grammar and of representing context-sensitivity with them. Our key idea is to regard sequences of substitutions as generations of strings, and to give probabilities of substitutions like PCFG. In this study, after giving a problem to estimate a probability mass function from strings based on our formalization, we solve it by the Passing algorithm that runs in an iterative manner. Our experimental results with synthetic strings show that our method estimates probability mass functions with sufficient small errors.}
}
@InProceedings{pmlr-v21-schmid12a,
title = {Applying Grammar Inference To Identify Generalized Patterns of Facial Expressions of Pain},
author = {Schmid, Ute and Siebers, Michael and Seuß, Dominik and Kunz, Miriam and Lautenbacher, Stefan},
booktitle = {Proceedings of the Eleventh International Conference on Grammatical Inference},
pages = {183--188},
year = {2012},
editor = {Heinz, Jeffrey and Higuera, Colin and Oates, Tim},
volume = {21},
series = {Proceedings of Machine Learning Research},
address = {University of Maryland, College Park, MD, USA},
month = {05--08 Sep},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v21/schmid12a/schmid12a.pdf},
url = {https://proceedings.mlr.press/v21/schmid12a.html},
abstract = {We present an application of grammar inference in the domain of facial expression analysis. Typically, only sets of AUs which occur in a given time frame are used for expression analysis, the sequence in which these AUs occur is ignored. We wanted to explore whether the strucural patterns of AU appearances contain diagnostically relevant information. We applied alignment-based learning (ABL) to data of facial expressions of pain collected in a psychological study. To evaluate the quality of the induced grammars we applied cross-validation to estimate the generalization error. We can show that the learned grammars have reasonably high coverages for unseen pain sequences. However, the number of rules of the learned grammars cannot be reduced substantially without loss of generalization.}
}
@InProceedings{pmlr-v21-spitkovsky12a,
title = {Bootstrapping Dependency Grammar Inducers from Incomplete Sentence Fragments via Austere Models},
author = {Spitkovsky, Valentin I. and Alshawi, Hiyan and Jurafsky, Daniel},
booktitle = {Proceedings of the Eleventh International Conference on Grammatical Inference},
pages = {189--194},
year = {2012},
editor = {Heinz, Jeffrey and Higuera, Colin and Oates, Tim},
volume = {21},
series = {Proceedings of Machine Learning Research},
address = {University of Maryland, College Park, MD, USA},
month = {05--08 Sep},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v21/spitkovsky12a/spitkovsky12a.pdf},
url = {https://proceedings.mlr.press/v21/spitkovsky12a.html},
abstract = {Modern grammar induction systems often employ curriculum learning strategies that begin by training on a subset of all available input that is considered simpler than the full data. Traditionally, filtering has been at granularities of whole input units, e.g., discarding entire sentences with too many words or punctuation marks. We propose instead viewing inter-punctuation fragments as atoms, initially, thus making some simple phrases and clauses of complex sentences available to training sooner. Splitting input text at punctuation in this way improved our state-of-the-art grammar induction pipeline. We observe that resulting partial data, i.e., mostly incomplete sentence fragments, can be analyzed using reduced parsing models which, we show, can be easier to bootstrap than more nuanced grammars. Starting with a new, bare dependency-and-boundary model (DBM-0), our grammar inducer attained 61.2% directed dependency accuracy on Section 23 (all sentences) of the Wall Street Journal corpus: more than 2% higher than previous published results for this task.}
}
@InProceedings{pmlr-v21-steffen12a,
title = {Active Automata Learning: From DFAs to Interface Programs and Beyond},
author = {Steffen, Bernhard and Howar, Falk and Isberner, Malte},
booktitle = {Proceedings of the Eleventh International Conference on Grammatical Inference},
pages = {195--209},
year = {2012},
editor = {Heinz, Jeffrey and Higuera, Colin and Oates, Tim},
volume = {21},
series = {Proceedings of Machine Learning Research},
address = {University of Maryland, College Park, MD, USA},
month = {05--08 Sep},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v21/steffen12a/steffen12a.pdf},
url = {https://proceedings.mlr.press/v21/steffen12a.html},
abstract = {This paper reviews the development of active learning in the last decade under the perspective of treating of data, a major source of undecidability, and therefore a key problem to achieve practicality. Starting with the first case studies, in which data was completely disregarded, we revisit different steps towards dealing with data explicitly in active learning: We discuss Mealy Machines as a model for systems with (data) output, automated alphabet abstraction refinement as a two-dimensional extension of the partition-refinement based approach of active learning for inferring not only states but also optimal alphabet abstractions, and Register Mealy Machines, which can be regarded as programs restricted to data-independent data processing as it is typical for protocols or interface programs. We are convinced that this development has the potential to transform active automata learning into a technology of high practical importance.}
}
@InProceedings{pmlr-v21-unold12a,
title = {Fuzzy Grammar-based Prediction of Amyloidogenic Regions},
author = {Unold, Olgierd},
booktitle = {Proceedings of the Eleventh International Conference on Grammatical Inference},
pages = {210--219},
year = {2012},
editor = {Heinz, Jeffrey and Higuera, Colin and Oates, Tim},
volume = {21},
series = {Proceedings of Machine Learning Research},
address = {University of Maryland, College Park, MD, USA},
month = {05--08 Sep},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v21/unold12a/unold12a.pdf},
url = {https://proceedings.mlr.press/v21/unold12a.html},
abstract = {In this paper, we address the problem of predicting the location of amyloidogenic regions in proteins. The language of protein sequence can be described by using a formal system such as fuzzy context-free grammar, and the problem of amyloidogenic region recognition can be replaced by fuzzy grammar induction. The induced fuzzy grammar achieved 70.6% accuracy and 96.7% specificity on a recently published amyloidogenic dataset. Our results are comparable to other methods dedicated to recognize amyloid proteins.}
}
@InProceedings{pmlr-v21-vanzaanen12a,
title = {Learning Interpretations Using Sequence Classification},
author = {Zaanen, Menno and Loo, Janneke},
booktitle = {Proceedings of the Eleventh International Conference on Grammatical Inference},
pages = {220--223},
year = {2012},
editor = {Heinz, Jeffrey and Higuera, Colin and Oates, Tim},
volume = {21},
series = {Proceedings of Machine Learning Research},
address = {University of Maryland, College Park, MD, USA},
month = {05--08 Sep},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v21/vanzaanen12a/vanzaanen12a.pdf},
url = {https://proceedings.mlr.press/v21/vanzaanen12a.html},
abstract = {In this paper we present a system that assigns interpretations, in the form of shallow semantic frame descriptions, to natural language sentences. The system searches for relevant patterns, consisting of words from the sentences, to identify the correct semantic frame and associated slot values. For each of these choices, a separate classifier is trained. Each classifier learns the boundaries between different languages, which each correspond to a particular class. The different classifiers each have their own viewpoint on the data depending on which aspect needs to be identified.}
}
@InProceedings{pmlr-v21-vanzaanen12b,
title = {Model Merging Versus Model Splitting Context-Free Grammar Induction},
author = {Zaanen, Menno and Noord, Nanne},
booktitle = {Proceedings of the Eleventh International Conference on Grammatical Inference},
pages = {224--236},
year = {2012},
editor = {Heinz, Jeffrey and Higuera, Colin and Oates, Tim},
volume = {21},
series = {Proceedings of Machine Learning Research},
address = {University of Maryland, College Park, MD, USA},
month = {05--08 Sep},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v21/vanzaanen12b/vanzaanen12b.pdf},
url = {https://proceedings.mlr.press/v21/vanzaanen12b.html},
abstract = {When comparing different grammatical inference algorithms, it becomes evident that generic techniques have been used in different systems. Several finite-state learning algorithms use state-merging as their underlying technique and a collection of grammatical inference algorithms that aim to learn context-free grammars build on the concept of substitutability to identify potential grammar rules. When learning context-free grammars, there are essentially two approaches: model merging, which generalizes with more data, and model splitting, which specializes with more data. Both approaches can be combined sequentially in a generic framework. In this article, we investigate the impact of different approaches within the first phase of the framework on system performance.}
}
@InProceedings{pmlr-v21-wieczorek12a,
title = {Induction of Non-Deterministic Finite Automata on Supercomputers},
author = {Wieczorek, Wojciech},
booktitle = {Proceedings of the Eleventh International Conference on Grammatical Inference},
pages = {237--242},
year = {2012},
editor = {Heinz, Jeffrey and Higuera, Colin and Oates, Tim},
volume = {21},
series = {Proceedings of Machine Learning Research},
address = {University of Maryland, College Park, MD, USA},
month = {05--08 Sep},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v21/wieczorek12a/wieczorek12a.pdf},
url = {https://proceedings.mlr.press/v21/wieczorek12a.html},
abstract = {The problem of inducing automata of minimal size consistent with finite sets of examples and counter-examples is the combinatorial optimization task of grammatical inference and is known to be computationally hard. Both an exact and a heuristic method of finding a non-deterministic finite automaton (NFA) with a given number of states such that all examples are accepted and all counter-examples are rejected by the automaton will be evaluated. The methods are based on a translation of NFA identification into integer nonlinear programming.}
}
@InProceedings{pmlr-v21-verwer12a,
title = {Results of the PAutomaC Probabilistic Automaton Learning Competition},
author = {Verwer, Sicco and Eyraud, Rémi and Higuera, Colin},
booktitle = {Proceedings of the Eleventh International Conference on Grammatical Inference},
pages = {243--248},
year = {2012},
editor = {Heinz, Jeffrey and Higuera, Colin and Oates, Tim},
volume = {21},
series = {Proceedings of Machine Learning Research},
address = {University of Maryland, College Park, MD, USA},
month = {05--08 Sep},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v21/verwer12a/verwer12a.pdf},
url = {https://proceedings.mlr.press/v21/verwer12a.html},
abstract = {Approximating distributions over strings is a hard learning problem. Typical GI techniques involve using finite state machines as models and attempting to learn both the structure and the weights, simultaneously. The PAutomaC competition is the first challenge to allow comparison between methods and algorithms and builds a first state of the art for these techniques. Both artificial data and real data were proposed and contestants were to try to estimate the probabilities of test strings. The purpose of this paper is to provide an overview of the implementation details of PAutomaC and to report the final results of the competition.}
}
@InProceedings{pmlr-v21-hulden12a,
title = {Treba: Efficient Numerically Stable EM for PFA},
author = {Hulden, Mans},
booktitle = {Proceedings of the Eleventh International Conference on Grammatical Inference},
pages = {249--253},
year = {2012},
editor = {Heinz, Jeffrey and Higuera, Colin and Oates, Tim},
volume = {21},
series = {Proceedings of Machine Learning Research},
address = {University of Maryland, College Park, MD, USA},
month = {05--08 Sep},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v21/hulden12a/hulden12a.pdf},
url = {https://proceedings.mlr.press/v21/hulden12a.html},
abstract = {Training probabilistic finite automata with the EM/Baum-Welch algorithm is computationally very intensive, especially if random ergodic automata are used initially, and additional strategies such as deterministic annealing are used. In this paper we present some optimization and parallelization strategies to the Baum-Welch algorithm that often allow for training of much larger automata with a larger number of observations. The tool, \emphtreba, which implements the optimizations, is available open-source and its results were used to participate in the PAutomaC PFA/HMM competition.}
}
@InProceedings{pmlr-v21-kepler12a,
title = {Simple Variable Length N-grams for Probabilistic Automata Learning},
author = {Kepler, Fabio N. and Mergen, Sergio L. S. and Billa, Cleo Z.},
booktitle = {Proceedings of the Eleventh International Conference on Grammatical Inference},
pages = {254--258},
year = {2012},
editor = {Heinz, Jeffrey and Higuera, Colin and Oates, Tim},
volume = {21},
series = {Proceedings of Machine Learning Research},
address = {University of Maryland, College Park, MD, USA},
month = {05--08 Sep},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v21/kepler12a/kepler12a.pdf},
url = {https://proceedings.mlr.press/v21/kepler12a.html},
abstract = {This paper describes an approach used in the 2012 Probabilistic Automata Learning Competition. The main goal of the competition was to obtain insights about which techniques and approaches work best for sequence learning based on different kinds of automata generating machines. This paper proposes the usage of n-gram models with variable length. Experiments show that, using the test sets provided by the competition, the variable-length approach works better than fixed 3-grams.}
}
@InProceedings{pmlr-v21-shibata12a,
title = {Marginalizing Out Transition Probabilities for Several Subclasses of PFAs},
author = {Shibata, Chihiro and Yoshinaka, Ryo},
booktitle = {Proceedings of the Eleventh International Conference on Grammatical Inference},
pages = {259--263},
year = {2012},
editor = {Heinz, Jeffrey and Higuera, Colin and Oates, Tim},
volume = {21},
series = {Proceedings of Machine Learning Research},
address = {University of Maryland, College Park, MD, USA},
month = {05--08 Sep},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v21/shibata12a/shibata12a.pdf},
url = {https://proceedings.mlr.press/v21/shibata12a.html},
abstract = {A Bayesian manner which marginalizes transition probabilities can be generally applied to various kinds of probabilistic finite state machine models. Based on such a Bayesian manner, we implemented and compared three algorithms: variable-length gram, state merging method for PDFAs, and collapsed Gibbs sampling for PFAs. Among those, collapsed Gibbs sampling for PFAs performed the best on the data from the pre-competition stage of PAutomaC, although it consumes large computation resources.}
}