@Proceedings{ALT2018,
title = {Proceedings of Machine Learning Research},
booktitle = {Proceedings of Machine Learning Research},
editor = {Firdaus Janoos and Mehryar Mohri and Karthik Sridharan},
publisher = {PMLR},
series = {Proceedings of Machine Learning Research},
volume = 83
}
@InProceedings{mohri18a,
title = {{A}lgorithmic {L}earning {T}heory ({ALT}) 2017: {P}reface},
author = {Mehryar Mohri and Karthik Sridharan},
booktitle = {Proceedings of Algorithmic Learning Theory},
pages = {1--2},
year = {2018},
editor = {Firdaus Janoos and Mehryar Mohri and Karthik Sridharan},
volume = {83},
series = {Proceedings of Machine Learning Research},
address = {},
month = {07--09 Apr},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v83/mohri18a/mohri18a.pdf},
url = {http://proceedings.mlr.press/v83/mohri18a.html},
abstract = {}
}
@InProceedings{aziz18a,
title = {Pure Exploration in Infinitely-Armed Bandit Models with Fixed-Confidence},
author = {Maryam Aziz and Jesse Anderton and Emilie Kaufmann and Javed Aslam},
booktitle = {Proceedings of Algorithmic Learning Theory},
pages = {3--24},
year = {2018},
editor = {Firdaus Janoos and Mehryar Mohri and Karthik Sridharan},
volume = {83},
series = {Proceedings of Machine Learning Research},
address = {},
month = {07--09 Apr},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v83/aziz18a/aziz18a.pdf},
url = {http://proceedings.mlr.press/v83/aziz18a.html},
abstract = {We consider the problem of near-optimal arm identification in the fixed confidence setting of the infinitely armed bandit problem when nothing is known about the arm reservoir distribution. We (1) introduce a PAC-like framework within which to derive and cast results; (2) derive a sample complexity lower bound for near-optimal arm identification; (3) propose an algorithm that identifies a nearly-optimal arm with high probability and derive an upper bound on its sample complexity which is within a log factor of our lower bound; and (4) discuss whether our $\log^2 \frac{1}{δ}$ dependence is inescapable for “two-phase” (select arms first, identify the best later) algorithms in the infinite setting. This work permits the application of bandit models to a broader class of problems where fewer assumptions hold.}
}
@InProceedings{bassily18a,
title = {Learners that Use Little Information},
author = {Raef Bassily and Shay Moran and Ido Nachum and Jonathan Shafer and Amir Yehudayoff},
booktitle = {Proceedings of Algorithmic Learning Theory},
pages = {25--55},
year = {2018},
editor = {Firdaus Janoos and Mehryar Mohri and Karthik Sridharan},
volume = {83},
series = {Proceedings of Machine Learning Research},
address = {},
month = {07--09 Apr},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v83/bassily18a/bassily18a.pdf},
url = {http://proceedings.mlr.press/v83/bassily18a.html},
abstract = {
We study learning algorithms that are restricted to using a small amount of information from their input sample. We introduce a category of learning algorithms we term {\em $d$-bit information learners}, which are algorithms whose output conveys at most $d$ bits of information of their input. A central theme in this work is that such algorithms generalize.
We focus on the learning capacity of these algorithms, and prove sample complexity bounds with tight dependencies on the confidence and error parameters. We also observe connections with well studied notions such as sample compression schemes, Occam’s razor, PAC-Bayes and differential privacy.
We discuss an approach that allows us to prove upper bounds on the amount of information that algorithms reveal about their inputs, and also provide a lower bound by showing a simple concept class for which every (possibly randomized) empirical risk minimizer must reveal a lot of information. On the other hand, we show that in the distribution-dependent setting every VC class has empirical risk minimizers that do not reveal a lot of information.
}
}
@InProceedings{besson18a,
title = {{Multi-Player Bandits Revisited}},
author = {Lilian Besson and Emilie Kaufmann},
booktitle = {Proceedings of Algorithmic Learning Theory},
pages = {56--92},
year = {2018},
editor = {Firdaus Janoos and Mehryar Mohri and Karthik Sridharan},
volume = {83},
series = {Proceedings of Machine Learning Research},
address = {},
month = {07--09 Apr},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v83/besson18a/besson18a.pdf},
url = {http://proceedings.mlr.press/v83/besson18a.html},
abstract = {Multi-player Multi-Armed Bandits (MAB) have been extensively studied in the literature, motivated by applications to Cognitive Radio systems. Driven by such applications as well, we motivate the introduction of several levels of feedback for multi-player MAB algorithms. Most existing work assume that \emph{sensing information} is available to the algorithm. Under this assumption, we improve the state-of-the-art lower bound for the regret of any decentralized algorithms and introduce two algorithms, \emph{RandTopM} and \emph{MCTopM}, that are shown to empirically outperform existing algorithms. Moreover, we provide strong theoretical guarantees for these algorithms, including a notion of asymptotic optimality in terms of the number of selections of bad arms. We then introduce a promising heuristic, called \emph{Selfish}, that can operate without sensing information, which is crucial for emerging applications to Internet of Things networks. We investigate the empirical performance of this algorithm and provide some first theoretical elements for the understanding of its behavior.}
}
@InProceedings{bshouty18a,
title = {Adaptive Group Testing Algorithms to Estimate the Number of Defectives},
author = {Nader H. Bshouty and Vivian E. Bshouty-Hurani and George Haddad and Thomas Hashem and Fadi Khoury and Omar Sharafy},
booktitle = {Proceedings of Algorithmic Learning Theory},
pages = {93--110},
year = {2018},
editor = {Firdaus Janoos and Mehryar Mohri and Karthik Sridharan},
volume = {83},
series = {Proceedings of Machine Learning Research},
address = {},
month = {07--09 Apr},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v83/bshouty18a/bshouty18a.pdf},
url = {http://proceedings.mlr.press/v83/bshouty18a.html},
abstract = {We study the problem of estimating the number of defective
items in adaptive Group testing by using a minimum number of queries.
We improve the existing algorithm and prove a lower bound that shows that,
for constant estimation, the number of tests in our algorithm is optimal.}
}
@InProceedings{bubeck18a,
title = {Sparsity, variance and curvature in multi-armed bandits},
author = {Sébastien Bubeck and Michael Cohen and Yuanzhi Li},
booktitle = {Proceedings of Algorithmic Learning Theory},
pages = {111--127},
year = {2018},
editor = {Firdaus Janoos and Mehryar Mohri and Karthik Sridharan},
volume = {83},
series = {Proceedings of Machine Learning Research},
address = {},
month = {07--09 Apr},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v83/bubeck18a/bubeck18a.pdf},
url = {http://proceedings.mlr.press/v83/bubeck18a.html},
abstract = {In (online) learning theory the concepts of sparsity, variance and curvature are well-understood and are routinely used to obtain refined regret and generalization bounds. In this paper we further our understanding of these concepts in the more challenging limited feedback scenario. We consider the adversarial multi-armed bandit and linear bandit settings and solve several open problems pertaining to the existence of algorithms with favorable regret bounds under the following assumptions: (i) sparsity of the individual losses, (ii) small variation of the loss sequence, and (iii) curvature of the action set. Specifically we show that (i) for $s$-sparse losses one can obtain $\tilde{O}(\sqrt{s T})$-regret (solving an open problem by Kwon and Perchet), (ii) for loss sequences with variation bounded by $Q$ one can obtain $\tilde{O}(\sqrt{Q})$-regret (solving an open problem by Kale and Hazan), and (iii) for linear bandit on an $\ell_p^n$ ball one can obtain $\tilde{O}(\sqrt{n T})$-regret for $p ∈[1,2]$ and one has $\tilde{Ω}(n \sqrt{T})$-regret for $p>2$ (solving an open problem by Bubeck, Cesa-Bianchi and Kakade). A key new insight to obtain these results is to use regularizers satisfying more refined conditions than general self-concordance.}
}
@InProceedings{cesa-bianchi18a,
title = {Bandit Regret Scaling with the Effective Loss Range},
author = {Nicolò Cesa-Bianchi and Ohad Shamir},
booktitle = {Proceedings of Algorithmic Learning Theory},
pages = {128--151},
year = {2018},
editor = {Firdaus Janoos and Mehryar Mohri and Karthik Sridharan},
volume = {83},
series = {Proceedings of Machine Learning Research},
address = {},
month = {07--09 Apr},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v83/cesa-bianchi18a/cesa-bianchi18a.pdf},
url = {http://proceedings.mlr.press/v83/cesa-bianchi18a.html},
abstract = {We study how the regret guarantees of nonstochastic multi-armed
bandits can be improved, if the effective range of the losses in each round is
small (for example, the maximal difference between two losses or in a given
round). Despite a recent impossibility result, we show how this can be made
possible under certain mild additional assumptions, such as availability of
rough estimates of the losses, or knowledge of the loss of a single, possibly
unspecified arm, at the end of each round. Along the way, we develop a novel
technique which might be of independent interest, to convert any multi-armed
bandit algorithm with regret depending on the loss range, to an algorithm with
regret depending only on the effective range, while attaining better regret
bounds than existing approaches.}
}
@InProceedings{blanca18a,
title = {Structure Learning of ${H}$-colorings},
author = {Antonio Blanca and Zongchen Chen and Daniel Štefankovič and Eric Vigoda},
booktitle = {Proceedings of Algorithmic Learning Theory},
pages = {152--185},
year = {2018},
editor = {Firdaus Janoos and Mehryar Mohri and Karthik Sridharan},
volume = {83},
series = {Proceedings of Machine Learning Research},
address = {},
month = {07--09 Apr},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v83/blanca18a/blanca18a.pdf},
url = {http://proceedings.mlr.press/v83/blanca18a.html},
abstract = {We study the structure learning problem for $H$-colorings, an important class of Markov random fields that capture key combinatorial structures on graphs, including proper colorings and independent sets, as well as spin systems from statistical physics. The learning problem is as follows: for a fixed (and known) constraint graph $H$ with $q$ colors and an unknown graph $G=(V,E)$ with $n$ vertices, given uniformly random $H$-colorings of $G$, how many samples are required to learn the edges of the unknown graph $G$? We give a characterization of $H$ for which the problem is identifiable for every $G$, i.e., we can learn $G$ with an infinite number of samples. We also show that there are identifiable constraint graphs for which one cannot hope to learn every graph $G$ efficiently.
We focus particular attention on the case of proper vertex $q$-colorings of graphs of maximum degree $d$ where intriguing connections to statistical physics phase transitions appear. We prove that in the tree uniqueness region (i.e., when $q>d$) the problem is identifiable and we can learn $G$ in $\mathsf{poly}(d,q)\times O(n^2\log{n})$ time. In contrast for soft-constraint systems, such as the Ising model, the best possible running time is exponential in $d$. In the tree non-uniqueness region (i.e., when $q≤d$) we prove that the problem is not identifiable and thus $G$ cannot be learned. Moreover, when $q \frac{5}{2} n + \frac{4}{n - 2} + \frac{3}{2}$). The worst-case regret of the resulting distribution approaches the NML regret as the alphabet size grows.}
}
@InProceedings{jurgenson18a,
title = {Learning Decision Trees with Stochastic Linear Classifiers},
author = {Tom Jurgenson and Yishay Mansour},
booktitle = {Proceedings of Algorithmic Learning Theory},
pages = {489--528},
year = {2018},
editor = {Firdaus Janoos and Mehryar Mohri and Karthik Sridharan},
volume = {83},
series = {Proceedings of Machine Learning Research},
address = {},
month = {07--09 Apr},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v83/jurgenson18a/jurgenson18a.pdf},
url = {http://proceedings.mlr.press/v83/jurgenson18a.html},
abstract = {In this work we propose a top-down decision tree learning algorithm with a class of linear classifiers called stochastic linear classifiers as the internal nodes’ hypothesis class. To this end, we derive efficient algorithms for minimizing the Gini index for this class for each internal node, although the problem is non-convex. Moreover, the proposed algorithm has a theoretical guarantee under the weak stochastic hypothesis assumption.}
}
@InProceedings{kallus18a,
title = {Instrument-Armed Bandits},
author = {Nathan Kallus},
booktitle = {Proceedings of Algorithmic Learning Theory},
pages = {529--546},
year = {2018},
editor = {Firdaus Janoos and Mehryar Mohri and Karthik Sridharan},
volume = {83},
series = {Proceedings of Machine Learning Research},
address = {},
month = {07--09 Apr},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v83/kallus18a/kallus18a.pdf},
url = {http://proceedings.mlr.press/v83/kallus18a.html},
abstract = {We extend the classic multi-armed bandit (MAB) model to the setting of noncompliance, where the arm pull is a mere instrument and the treatment applied may differ from it, which gives rise to the instrument-armed bandit (IAB) problem. The IAB setting is relevant whenever the experimental units are human since free will, ethics, and the law may prohibit unrestricted or forced application of treatment. In particular, the setting is relevant in bandit models of dynamic clinical trials and other controlled trials on human interventions. Nonetheless, the setting has not been fully investigate in the bandit literature. We show that there are various and divergent notions of regret in this setting, all of which coincide only in the classic MAB setting. We characterize the behavior of these regrets and analyze standard MAB algorithms. We argue for a particular kind of regret that captures the causal effect of treatments but show that standard MAB algorithms cannot achieve sublinear control on this regret. Instead, we develop new algorithms for the IAB problem, prove new regret bounds for them, and compare them to standard MAB algorithms in numerical examples.}
}
@InProceedings{locatelli18a,
title = {An Adaptive Strategy for Active Learning with Smooth Decision Boundary},
author = {Andrea Locatelli and Alexandra Carpentier and Samory Kpotufe},
booktitle = {Proceedings of Algorithmic Learning Theory},
pages = {547--571},
year = {2018},
editor = {Firdaus Janoos and Mehryar Mohri and Karthik Sridharan},
volume = {83},
series = {Proceedings of Machine Learning Research},
address = {},
month = {07--09 Apr},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v83/locatelli18a/locatelli18a.pdf},
url = {http://proceedings.mlr.press/v83/locatelli18a.html},
abstract = {We present the first adaptive strategy for active learning in the setting of classification with smooth decision boundary. The problem of adaptivity (to unknown distributional parameters) has remained opened since the seminal work of Castro and Nowak (2007), which first established (active learning) rates for this setting. While some recent advances on this problem establish \emph{adaptive} rates in the case of univariate data, adaptivity in the more practical setting of multivariate data has so far remained elusive. Combining insights from various recent works, we show that, for the multivariate case, a careful reduction to univariate-adaptive strategies yield near-optimal rates without prior knowledge of distributional parameters.}
}
@InProceedings{mahloujifar18a,
title = {Learning under $p$-Tampering Attacks},
author = {Saeed Mahloujifar and Dimitrios I. Diochnos and Mohammad Mahmoody},
booktitle = {Proceedings of Algorithmic Learning Theory},
pages = {572--596},
year = {2018},
editor = {Firdaus Janoos and Mehryar Mohri and Karthik Sridharan},
volume = {83},
series = {Proceedings of Machine Learning Research},
address = {},
month = {07--09 Apr},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v83/mahloujifar18a/mahloujifar18a.pdf},
url = {http://proceedings.mlr.press/v83/mahloujifar18a.html},
abstract = {Recently, Mahloujifar and Mahmoody (TCC’17) studied attacks against learning algorithms using a special case of Valiant’s malicious noise, called $p$-tampering, in which the adversary gets to change any training example with independent probability $p$ but is limited to only choose ‘adversarial’ examples with correct labels. They obtained $p$-tampering attacks that increase the error probability in the so called ‘targeted’ poisoning model in which the adversary’s goal is to increase the loss of the trained hypothesis over a particular test example. At the heart of their attack was an efficient algorithm to bias the average output of any bounded real-valued function through $p$-tampering.
In this work, we present new biasing attacks for biasing the average output of bounded real-valued functions. Our new biasing attacks achieve in \emph{polynomial-time} the the best bias achieved by MM16 through an \emph{exponential} time $p$-tampering attack. Our improved biasing attacks, directly imply improved $p$-tampering attacks against learners in the targeted poisoning model. As a bonus, our attacks come with considerably simpler analysis compared to previous attacks.
We also study the possibility of PAC learning under $p$-tampering attacks in the \emph{non-targeted} (aka indiscriminate) setting where the adversary’s goal is to increase the risk of the generated hypothesis (for a random test example). We show that PAC learning is \emph{possible} under $p$-tampering poisoning attacks essentially whenever it is possible in the realizable setting without the attacks. We further show that PAC learning under ‘no-mistake’ adversarial noise is \emph{not} possible, if the adversary could choose the (still limited to only $p$ fraction of) tampered examples that she substitutes with adversarially chosen ones. Our formal model for such ‘bounded-budget’ tampering attackers is inspired by the notions of (strong) adaptive corruption in secure multi-party computation.}
}
@InProceedings{modi18a,
title = {Markov Decision Processes with Continuous Side Information},
author = {Aditya Modi and Nan Jiang and Satinder Singh and Ambuj Tewari},
booktitle = {Proceedings of Algorithmic Learning Theory},
pages = {597--618},
year = {2018},
editor = {Firdaus Janoos and Mehryar Mohri and Karthik Sridharan},
volume = {83},
series = {Proceedings of Machine Learning Research},
address = {},
month = {07--09 Apr},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v83/modi18a/modi18a.pdf},
url = {http://proceedings.mlr.press/v83/modi18a.html},
abstract = { We consider a reinforcement learning (RL) setting in which the agent interacts with a sequence of episodic MDPs. At the start of each episode the agent has access to some side-information or context that determines the dynamics of the MDP for that episode. Our setting is motivated by applications in healthcare where baseline measurements of a patient at the start of a treatment episode form the context that may provide information about how the patient might respond to treatment decisions.
We propose algorithms for learning in such Contextual Markov Decision Processes (CMDPs) under an assumption that the unobserved MDP parameters vary smoothly with the observed context. We give lower and upper PAC bounds under the smoothness assumption. Because our lower bound has an exponential dependence on the dimension, we also consider a tractable linear setting where the context creates linear combinations of a finite set of MDPs. For the linear setting, we give a PAC learning algorithm based on KWIK learning techniques.}
}
@InProceedings{nissim18a,
title = {Clustering Algorithms for the Centralized and Local Models},
author = {Kobbi Nissim and Uri Stemmer},
booktitle = {Proceedings of Algorithmic Learning Theory},
pages = {619--653},
year = {2018},
editor = {Firdaus Janoos and Mehryar Mohri and Karthik Sridharan},
volume = {83},
series = {Proceedings of Machine Learning Research},
address = {},
month = {07--09 Apr},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v83/nissim18a/nissim18a.pdf},
url = {http://proceedings.mlr.press/v83/nissim18a.html},
abstract = {We revisit the problem of finding a minimum enclosing ball with differential privacy: Given a set of $n$ points in the $d$-dimensional Euclidean space and an integer $t≤n$,
the goal is to find a ball of the smallest radius $r_{opt}$ enclosing at least $t$ input points. The problem is motivated by its various applications to differential privacy, including the sample and aggregate technique, private data exploration, and clustering.
Without privacy concerns, minimum enclosing ball has a polynomial time approximation scheme (PTAS), which computes a ball of radius almost $r_{opt}$ (the problem is NP-hard to solve exactly). In contrast, under differential privacy, until this work, only a $O(\sqrt{\log n})$-approximation algorithm was known.
We provide new constructions of differentially private algorithms for minimum enclosing ball achieving constant factor approximation to $r_{opt}$ both in the centralized model (where a trusted curator collects the sensitive information and analyzes it with differential privacy) and in the local model (where each respondent randomizes her answers to the data curator to protect her privacy).
We demonstrate how to use our algorithms as a building block for approximating $k$-means in both models.}
}
@InProceedings{pasteris18a,
title = {On Similarity Prediction and Pairwise Clustering},
author = {Stephen Pasteris and Fabio Vitale and Claudio Gentile and Mark Herbster},
booktitle = {Proceedings of Algorithmic Learning Theory},
pages = {654--681},
year = {2018},
editor = {Firdaus Janoos and Mehryar Mohri and Karthik Sridharan},
volume = {83},
series = {Proceedings of Machine Learning Research},
address = {},
month = {07--09 Apr},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v83/pasteris18a/pasteris18a.pdf},
url = {http://proceedings.mlr.press/v83/pasteris18a.html},
abstract = {We consider the problem of clustering a finite set of items from pairwise similarity information. Unlike what is done in the literature on this subject, we do so in a passive learning setting, and with no specific constraints on the cluster shapes other than their size. We investigate the problem in different settings: i. an online setting, where we provide a tight characterization of the prediction complexity in the mistake bound model, and ii. a standard stochastic batch setting, where we give tight upper and lower bounds on the achievable generalization error. Prediction performance is measured both in terms of the ability to recover the similarity function encoding the hidden clustering and in terms of how well we classify each item within the set. The proposed algorithms are time efficient.}
}
@InProceedings{pentina18a,
title = {Multi-task {K}ernel {L}earning Based on {P}robabilistic {L}ipschitzness},
author = {Anastasia Pentina and Shai Ben-David},
booktitle = {Proceedings of Algorithmic Learning Theory},
pages = {682--701},
year = {2018},
editor = {Firdaus Janoos and Mehryar Mohri and Karthik Sridharan},
volume = {83},
series = {Proceedings of Machine Learning Research},
address = {},
month = {07--09 Apr},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v83/pentina18a/pentina18a.pdf},
url = {http://proceedings.mlr.press/v83/pentina18a.html},
abstract = {In multi-task learning the learner is given data for a set of related learning tasks and aims to improve the overall learning performance by transferring information between them. A typical assumption exploited in this setting is that the tasks share a beneficial representation that can be learned form the joint training data of all tasks. This way, the training data of each task can be utilized to enhance the learning of other tasks in the set. Probabilistic Lipschitzness (PL) is a parameter that reflects one way in which some data representation can be beneficial for a classification learning task. In this work we propose to achieve multi-task learning by learning a kernel function relative to which each of the tasks in the set has a "high level" of probabilistic Lipschitzness. In order to be able to do that, we need to introduce a new variant of PL - one that allows reliable estimation of its value from finite size samples. We show that by having access to large amounts of training data in total (possibly the union of training sets for various tasks), the learner can identify a kernel function that would lead to fast learning rates per task when used for Nearest Neighbor classification or in a cluster-based active labeling procedure. }
}
@InProceedings{rahmanian18a,
title = {Online Learning of Combinatorial Objects via Extended Formulation},
author = {Holakou Rahmanian and David P. Helmbold and S. V. N. Vishwanathan},
booktitle = {Proceedings of Algorithmic Learning Theory},
pages = {702--724},
year = {2018},
editor = {Firdaus Janoos and Mehryar Mohri and Karthik Sridharan},
volume = {83},
series = {Proceedings of Machine Learning Research},
address = {},
month = {07--09 Apr},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v83/rahmanian18a/rahmanian18a.pdf},
url = {http://proceedings.mlr.press/v83/rahmanian18a.html},
abstract = {
The standard techniques for online learning of combinatorial objects
perform multiplicative updates followed by projections into the convex hull of all the objects.
However, this methodology can be expensive if the convex hull contains many facets.
For example, the convex hull of $n$-symbol Huffman trees is known to have exponentially many facets.
We get around this difficulty by exploiting extended formulations, which encode the polytope of combinatorial objects in a higher dimensional “extended” space with only polynomially many facets. We develop a general framework for converting extended formulations into efficient online algorithms with good relative loss bounds. We present applications of our framework to online learning of Huffman trees and permutations.
The regret bounds of the resulting algorithms are within a factor of $\Ocal(\sqrt{\log(n)})$
of the state-of-the-art specialized algorithms for permutations,
and depending on the loss regimes, improve on or match the state-of-the-art for Huffman trees.
Our method is general and can be applied to other combinatorial objects. }
}
@InProceedings{reeve18a,
title = {The K-Nearest Neighbour UCB Algorithm for Multi-Armed Bandits with Covariates},
author = {Henry Reeve and Joe Mellor and Gavin Brown},
booktitle = {Proceedings of Algorithmic Learning Theory},
pages = {725--752},
year = {2018},
editor = {Firdaus Janoos and Mehryar Mohri and Karthik Sridharan},
volume = {83},
series = {Proceedings of Machine Learning Research},
address = {},
month = {07--09 Apr},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v83/reeve18a/reeve18a.pdf},
url = {http://proceedings.mlr.press/v83/reeve18a.html},
abstract = {In this paper we propose and explore the $k$-Nearest Neighbour UCB algorithm for multi-armed bandits with covariates. We focus on a setting where covariates are supported on a metric space of low intrinsic dimension, such as a manifold embedded within a high dimensional ambient feature space. The algorithm is conceptually simple and straightforward to implement. Unlike previous methods such as the UCBogram and Adaptively Binned Successive Elimination, the $k$-Nearest Neighbour UCB algorithm does not require prior knowledge of the intrinsic dimension of the marginal distribution. It is also naturally anytime, without resorting to the doubling trick. We prove a regret bound for the $k$-Nearest Neighbour UCB algorithm which is minimax optimal up to logarithmic factors. In particular, the algorithm automatically takes advantage of both low intrinsic dimensionality of the marginal distribution over the covariates and low noise in the data, expressed as a margin condition. In addition, focusing on the case of bounded rewards, we give corresponding regret bounds for the $k$-Nearest Neighbour KL-UCB algorithm, which is an analogue of the KL-UCB algorithm adapted to the setting of multi-armed bandits with covariates. Finally, we present empirical results which demonstrate the ability of both the $k$-Nearest Neighbour UCB and $k$-Nearest Neighbour KL-UCB to take advantage of situations where the data is supported on an unknown sub-manifold of a high-dimensional feature space.}
}
@InProceedings{shkel18a,
title = {Sequential prediction with coded side information under logarithmic loss},
author = {Yanina Shkel and Maxim Raginsky and Sergio Verdú},
booktitle = {Proceedings of Algorithmic Learning Theory},
pages = {753--769},
year = {2018},
editor = {Firdaus Janoos and Mehryar Mohri and Karthik Sridharan},
volume = {83},
series = {Proceedings of Machine Learning Research},
address = {},
month = {07--09 Apr},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v83/shkel18a/shkel18a.pdf},
url = {http://proceedings.mlr.press/v83/shkel18a.html},
abstract = {We study the problem of sequential prediction with coded side information under logarithmic loss (log-loss). We show an operational equivalence between this setup and lossy compression with log-loss distortion. Using this insight, together with recent work on lossy compression with log-loss, we connect prediction strategies with distributions in a certain subset of the probability simplex. This allows us to derive a Shtarkov-like bound for regret and to evaluate the regret for several illustrative classes of experts. In the present work, we mainly focus on the “batch” side information setting with sequential prediction.}
}
@InProceedings{talebi18a,
title = {Variance-Aware Regret Bounds for Undiscounted Reinforcement Learning in MDPs},
author = {Mohammad Sadegh Talebi and Odalric-Ambrym Maillard},
booktitle = {Proceedings of Algorithmic Learning Theory},
pages = {770--805},
year = {2018},
editor = {Firdaus Janoos and Mehryar Mohri and Karthik Sridharan},
volume = {83},
series = {Proceedings of Machine Learning Research},
address = {},
month = {07--09 Apr},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v83/talebi18a/talebi18a.pdf},
url = {http://proceedings.mlr.press/v83/talebi18a.html},
abstract = {The problem of reinforcement learning in an unknown and discrete Markov Decision Process (MDP) under the average-reward criterion is considered, when
the learner interacts with the system in a single stream of observations, starting from an initial state without any reset.
We revisit the minimax lower bound for that problem by making appear the local variance of the bias function in place of the diameter of the MDP.
Furthermore, we provide a novel analysis of the \texttt{\textsc{KL-Ucrl}} algorithm establishing a high-probability regret bound scaling as
$\widetilde {\mathcal O}\Bigl({\textstyle \sqrt{S\sum_{s,a}{\bf V}^\star_{s,a}T}}\Big)$ for this algorithm for ergodic MDPs, where $S$ denotes the number of states and
where ${\bf V}^\star_{s,a}$ is the variance of the bias function with respect to the next-state distribution following action $a$ in state $s$.
The resulting bound improves upon the best previously known regret bound $\widetilde {\Ocal}(DS\sqrt{AT})$ for that algorithm, where $A$ and $D$ respectively denote
the maximum number of actions (per state) and the diameter of MDP. We finally compare the leading terms of the two bounds in some benchmark MDPs indicating
that the derived bound can provide an order of magnitude improvement in some cases. Our analysis leverages novel variations of the transportation lemma
combined with Kullback-Leibler concentration inequalities, that we believe to be of independent interest.
}
}
@InProceedings{wang18a,
title = {Efficient coordinate-wise leading eigenvector computation},
author = {Jialei Wang and Weiran Wang and Dan Garber and Nathan Srebro},
booktitle = {Proceedings of Algorithmic Learning Theory},
pages = {806--820},
year = {2018},
editor = {Firdaus Janoos and Mehryar Mohri and Karthik Sridharan},
volume = {83},
series = {Proceedings of Machine Learning Research},
address = {},
month = {07--09 Apr},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v83/wang18a/wang18a.pdf},
url = {http://proceedings.mlr.press/v83/wang18a.html},
abstract = {We develop and analyze efficient "coordinate-wise" methods for finding the leading eigenvector, where each step involves only a vector-vector product. We establish global convergence with overall runtime guarantees that are at least as good as Lanczos’s method and dominate it for slowly decaying spectrum. Our methods are based on combining a shift-and-invert approach with coordinate-wise algorithms for linear regression.}
}
@InProceedings{mao18a,
title = {Minimax Rates and Efficient Algorithms for Noisy Sorting},
author = {Cheng Mao and Jonathan Weed and Philippe Rigollet},
booktitle = {Proceedings of Algorithmic Learning Theory},
pages = {821--847},
year = {2018},
editor = {Firdaus Janoos and Mehryar Mohri and Karthik Sridharan},
volume = {83},
series = {Proceedings of Machine Learning Research},
address = {},
month = {07--09 Apr},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v83/mao18a/mao18a.pdf},
url = {http://proceedings.mlr.press/v83/mao18a.html},
abstract = {There has been a recent surge of interest in studying permutation-based models for ranking from pairwise comparison data. Despite being structurally richer and more robust than parametric ranking models, permutation-based models are less well understood statistically and generally lack efficient learning algorithms. In this work, we study a prototype of permutation-based ranking models, namely, the noisy sorting model. We establish the optimal rates of learning the model under two sampling procedures. Furthermore, we provide a fast algorithm to achieve near-optimal rates if the observations are sampled independently. Along the way, we discover properties of the symmetric group which are of theoretical interest.}
}