@Proceedings{ALT2018,
  title =     {Proceedings of Algorithmic Learning Theory},
  booktitle = {Proceedings of Algorithmic Learning Theory},
  editor =    {Firdaus Janoos and Mehryar Mohri and Karthik Sridharan},
  publisher = {PMLR},
  series =    {Proceedings of Machine Learning Research},
  volume =    83
}


@InProceedings{pmlr-v83-mohri18a,
  title = 	 {{A}lgorithmic {L}earning {T}heory ({ALT}) 2018: {P}reface},
  author = 	 {Mohri, Mehryar and Sridharan, Karthik},
  booktitle = 	 {Proceedings of Algorithmic Learning Theory},
  pages = 	 {1--2},
  year = 	 {2018},
  editor = 	 {Janoos, Firdaus and Mohri, Mehryar and Sridharan, Karthik},
  volume = 	 {83},
  series = 	 {Proceedings of Machine Learning Research},
  month = 	 {07--09 Apr},
  publisher =    {PMLR},
  pdf = 	 {http://proceedings.mlr.press/v83/mohri18a/mohri18a.pdf},
  url = 	 {https://proceedings.mlr.press/v83/mohri18a.html}
}


@InProceedings{pmlr-v83-aziz18a,
  title = 	 {Pure Exploration in Infinitely-Armed Bandit Models with Fixed-Confidence},
  author = 	 {Aziz, Maryam and Anderton, Jesse and Kaufmann, Emilie and Aslam, Javed},
  booktitle = 	 {Proceedings of Algorithmic Learning Theory},
  pages = 	 {3--24},
  year = 	 {2018},
  editor = 	 {Janoos, Firdaus and Mohri, Mehryar and Sridharan, Karthik},
  volume = 	 {83},
  series = 	 {Proceedings of Machine Learning Research},
  month = 	 {07--09 Apr},
  publisher =    {PMLR},
  pdf = 	 {http://proceedings.mlr.press/v83/aziz18a/aziz18a.pdf},
  url = 	 {https://proceedings.mlr.press/v83/aziz18a.html},
  abstract = 	 {We consider the problem of near-optimal arm identification in the fixed confidence setting of the infinitely armed bandit problem when nothing is known about the arm reservoir distribution. We (1) introduce a PAC-like framework within which to derive and cast results; (2) derive a sample complexity lower bound for near-optimal arm identification; (3) propose an algorithm that identifies a nearly-optimal arm with high probability and derive an upper bound on its sample complexity which is within a log factor of our lower bound; and (4) discuss whether our $\log^2 \frac{1}{δ}$ dependence is inescapable for “two-phase” (select arms first, identify the best later) algorithms in the infinite setting. This work permits the application of bandit models to a broader class of problems where fewer assumptions hold.}
}


@InProceedings{pmlr-v83-bassily18a,
  title = 	 {Learners that Use Little Information},
  author = 	 {Bassily, Raef and Moran, Shay and Nachum, Ido and Shafer, Jonathan and Yehudayoff, Amir},
  booktitle = 	 {Proceedings of Algorithmic Learning Theory},
  pages = 	 {25--55},
  year = 	 {2018},
  editor = 	 {Janoos, Firdaus and Mohri, Mehryar and Sridharan, Karthik},
  volume = 	 {83},
  series = 	 {Proceedings of Machine Learning Research},
  month = 	 {07--09 Apr},
  publisher =    {PMLR},
  pdf = 	 {http://proceedings.mlr.press/v83/bassily18a/bassily18a.pdf},
  url = 	 {https://proceedings.mlr.press/v83/bassily18a.html},
  abstract = 	 { We study learning algorithms that are restricted to using a small amount of information from their input sample. We introduce a category of learning algorithms we term {\em $d$-bit information learners}, which are algorithms whose output conveys at most $d$ bits of information of their input. A central theme in this work is that such algorithms generalize.  We focus on the  learning capacity of these algorithms, and prove sample complexity bounds with tight dependencies on the confidence and error parameters. We also observe connections with well studied notions such as sample compression schemes, Occam’s razor, PAC-Bayes and differential privacy. We discuss an approach that allows us to prove upper bounds on the amount of information that algorithms reveal about their inputs, and also provide a lower bound by showing a simple concept class for which every (possibly randomized) empirical risk minimizer must reveal a lot of information. On the other hand, we show that in the distribution-dependent setting every VC class has empirical risk minimizers that do not reveal a lot of information. }
}


@InProceedings{pmlr-v83-besson18a,
  title = 	 {{Multi-Player Bandits Revisited}},
  author = 	 {Besson, Lilian and Kaufmann, Emilie},
  booktitle = 	 {Proceedings of Algorithmic Learning Theory},
  pages = 	 {56--92},
  year = 	 {2018},
  editor = 	 {Janoos, Firdaus and Mohri, Mehryar and Sridharan, Karthik},
  volume = 	 {83},
  series = 	 {Proceedings of Machine Learning Research},
  month = 	 {07--09 Apr},
  publisher =    {PMLR},
  pdf = 	 {http://proceedings.mlr.press/v83/besson18a/besson18a.pdf},
  url = 	 {https://proceedings.mlr.press/v83/besson18a.html},
  abstract = 	 {Multi-player Multi-Armed Bandits (MAB) have been extensively studied in the literature, motivated by applications to Cognitive Radio systems. Driven by such applications as well, we motivate the introduction of several levels of feedback for multi-player MAB algorithms. Most existing work assume that \emph{sensing information} is available to the algorithm. Under this assumption, we improve the state-of-the-art lower bound for the regret of any decentralized algorithms and introduce two algorithms, \emph{RandTopM} and \emph{MCTopM}, that are shown to empirically outperform existing algorithms. Moreover, we provide strong theoretical guarantees for these algorithms, including a notion of asymptotic optimality in terms of the number of selections of bad arms. We then introduce a promising heuristic, called \emph{Selfish}, that can operate without sensing information, which is crucial for emerging applications to Internet of Things networks. We investigate the empirical performance of this algorithm and provide some first theoretical elements for the understanding of its behavior.}
}


@InProceedings{pmlr-v83-bshouty18a,
  title = 	 {Adaptive Group Testing Algorithms to Estimate the Number of Defectives},
  author = 	 {Bshouty, Nader H. and E. Bshouty-Hurani, Vivian and Haddad, George and Hashem, Thomas and Khoury, Fadi and Sharafy, Omar},
  booktitle = 	 {Proceedings of Algorithmic Learning Theory},
  pages = 	 {93--110},
  year = 	 {2018},
  editor = 	 {Janoos, Firdaus and Mohri, Mehryar and Sridharan, Karthik},
  volume = 	 {83},
  series = 	 {Proceedings of Machine Learning Research},
  month = 	 {07--09 Apr},
  publisher =    {PMLR},
  pdf = 	 {http://proceedings.mlr.press/v83/bshouty18a/bshouty18a.pdf},
  url = 	 {https://proceedings.mlr.press/v83/bshouty18a.html},
  abstract = 	 {We study the problem of estimating the number of defective items in adaptive Group testing by using a minimum number of queries. We improve the existing algorithm and prove a lower bound that shows that, for constant estimation, the number of tests in our algorithm is optimal.}
}


@InProceedings{pmlr-v83-bubeck18a,
  title = 	 {Sparsity, variance and curvature in multi-armed bandits},
  author = 	 {Bubeck, Sébastien and Cohen, Michael and Li, Yuanzhi},
  booktitle = 	 {Proceedings of Algorithmic Learning Theory},
  pages = 	 {111--127},
  year = 	 {2018},
  editor = 	 {Janoos, Firdaus and Mohri, Mehryar and Sridharan, Karthik},
  volume = 	 {83},
  series = 	 {Proceedings of Machine Learning Research},
  month = 	 {07--09 Apr},
  publisher =    {PMLR},
  pdf = 	 {http://proceedings.mlr.press/v83/bubeck18a/bubeck18a.pdf},
  url = 	 {https://proceedings.mlr.press/v83/bubeck18a.html},
  abstract = 	 {In (online) learning theory the concepts of sparsity, variance and curvature are well-understood and are routinely used to obtain refined regret and generalization bounds. In this paper we further our understanding of these concepts in the more challenging limited feedback scenario. We consider the adversarial multi-armed bandit and linear bandit settings and solve several open problems pertaining to the existence of algorithms with favorable regret bounds under the following assumptions: (i) sparsity of the individual losses, (ii) small variation of the loss sequence, and (iii) curvature of the action set. Specifically we show that (i) for $s$-sparse losses one can obtain $\tilde{O}(\sqrt{s T})$-regret (solving an open problem by Kwon and Perchet), (ii) for loss sequences with variation bounded by $Q$ one can obtain $\tilde{O}(\sqrt{Q})$-regret (solving an open problem by Kale and Hazan), and (iii) for linear bandit on an $\ell_p^n$ ball one can obtain $\tilde{O}(\sqrt{n T})$-regret for $p ∈[1,2]$ and one has $\tilde{Ω}(n \sqrt{T})$-regret for $p>2$ (solving an open problem by Bubeck, Cesa-Bianchi and Kakade). A key new insight to obtain these results is to use regularizers satisfying more refined conditions than general self-concordance.}
}


@InProceedings{pmlr-v83-cesa-bianchi18a,
  title = 	 {Bandit Regret Scaling with the Effective Loss Range},
  author = 	 {Cesa-Bianchi, Nicolò and Shamir, Ohad},
  booktitle = 	 {Proceedings of Algorithmic Learning Theory},
  pages = 	 {128--151},
  year = 	 {2018},
  editor = 	 {Janoos, Firdaus and Mohri, Mehryar and Sridharan, Karthik},
  volume = 	 {83},
  series = 	 {Proceedings of Machine Learning Research},
  month = 	 {07--09 Apr},
  publisher =    {PMLR},
  pdf = 	 {http://proceedings.mlr.press/v83/cesa-bianchi18a/cesa-bianchi18a.pdf},
  url = 	 {https://proceedings.mlr.press/v83/cesa-bianchi18a.html},
  abstract = 	 {We study how the regret guarantees of nonstochastic multi-armed  bandits can be improved, if the effective range of the losses in each round is  small (for example, the maximal difference between two losses or in a given  round). Despite a recent impossibility result, we show how this can be made  possible under certain mild additional assumptions, such as availability of  rough estimates of the losses, or knowledge of the loss of a single, possibly  unspecified arm, at the end of each round. Along the way, we develop a novel  technique which might be of independent interest, to convert any multi-armed  bandit algorithm with regret depending on the loss range, to an algorithm with  regret depending only on the effective range, while attaining better regret  bounds than existing approaches.}
}


@InProceedings{pmlr-v83-blanca18a,
  title = 	 {Structure Learning of ${H}$-colorings},
  author = 	 {Blanca, Antonio and Chen, Zongchen and Štefankovič, Daniel and Vigoda, Eric},
  booktitle = 	 {Proceedings of Algorithmic Learning Theory},
  pages = 	 {152--185},
  year = 	 {2018},
  editor = 	 {Janoos, Firdaus and Mohri, Mehryar and Sridharan, Karthik},
  volume = 	 {83},
  series = 	 {Proceedings of Machine Learning Research},
  month = 	 {07--09 Apr},
  publisher =    {PMLR},
  pdf = 	 {http://proceedings.mlr.press/v83/blanca18a/blanca18a.pdf},
  url = 	 {https://proceedings.mlr.press/v83/blanca18a.html},
  abstract = 	 {We study the structure learning problem for $H$-colorings, an important class of Markov random fields that capture key combinatorial structures on graphs, including proper colorings and independent sets, as well as spin systems from statistical physics. The learning problem is as follows: for a fixed (and known) constraint graph $H$ with $q$ colors and an unknown graph $G=(V,E)$ with $n$ vertices, given uniformly random $H$-colorings of $G$, how many samples are required to learn the edges of the unknown graph $G$? We give a characterization of $H$ for which the problem is identifiable for every $G$, i.e., we can learn $G$ with an infinite number of samples. We also show that there are identifiable constraint graphs for which one cannot hope to learn every graph $G$ efficiently. We focus particular attention on the case of proper vertex $q$-colorings of graphs of maximum degree $d$ where intriguing connections to statistical physics phase transitions appear. We prove that in the tree uniqueness region (i.e., when $q>d$) the problem is identifiable and we can learn $G$ in $\mathsf{poly}(d,q)\times O(n^2\log{n})$ time. In contrast for soft-constraint systems, such as the Ising model, the best possible running time is exponential in $d$. In the tree non-uniqueness region (i.e., when $q≤d$) we prove that the problem is not identifiable and thus $G$ cannot be learned. Moreover, when $q<d-\sqrt{d} + Θ(1)$ we prove that even learning an equivalent graph (any graph with the same set of $H$-colorings) is computationally hard—sample complexity is exponential in $n$ in the worst case. We further explore the connection between the efficiency/hardness of the structure learning problem and the uniqueness/non-uniqueness phase transition for general $H$-colorings and prove that under a well-known condition in statistical physics, known as the Dobrushin uniqueness condition, we can learn $G$ in $\mathsf{poly}(d,q)\times O(n^2\log{n})$ time.}
}


@InProceedings{pmlr-v83-cheng18a,
  title = 	 {Convergence of Langevin MCMC in KL-divergence},
  author = 	 {Cheng, Xiang and Bartlett, Peter},
  booktitle = 	 {Proceedings of Algorithmic Learning Theory},
  pages = 	 {186--211},
  year = 	 {2018},
  editor = 	 {Janoos, Firdaus and Mohri, Mehryar and Sridharan, Karthik},
  volume = 	 {83},
  series = 	 {Proceedings of Machine Learning Research},
  month = 	 {07--09 Apr},
  publisher =    {PMLR},
  pdf = 	 {http://proceedings.mlr.press/v83/cheng18a/cheng18a.pdf},
  url = 	 {https://proceedings.mlr.press/v83/cheng18a.html},
  abstract = 	 {Langevin diffusion is a commonly used tool for sampling from a given distribution. In this work, we establish that when the target density $\p^*$ is such that $\log \p^*$ is $L$ smooth and $m$ strongly convex, discrete Langevin diffusion produces a distribution $\p$ with $\KL{\p}{\p^*}≤ε$ in $\tilde{O}(\frac{d}{ε})$ steps, where $d$ is the dimension of the sample space.  We also study the convergence rate when the strong-convexity assumption is absent. By considering the Langevin diffusion as a gradient flow in the space of probability distributions, we obtain an elegant analysis that applies to the stronger property of convergence in KL-divergence and gives a conceptually simpler proof of the best-known convergence results in weaker metrics.}
}


@InProceedings{pmlr-v83-clemencon18a,
  title = 	 {Ranking Median Regression: Learning to Order through Local Consensus},
  author = 	 {Clémençon, Stephan and Korba, Anna and Sibony, Eric},
  booktitle = 	 {Proceedings of Algorithmic Learning Theory},
  pages = 	 {212--245},
  year = 	 {2018},
  editor = 	 {Janoos, Firdaus and Mohri, Mehryar and Sridharan, Karthik},
  volume = 	 {83},
  series = 	 {Proceedings of Machine Learning Research},
  month = 	 {07--09 Apr},
  publisher =    {PMLR},
  pdf = 	 {http://proceedings.mlr.press/v83/clemencon18a/clemencon18a.pdf},
  url = 	 {https://proceedings.mlr.press/v83/clemencon18a.html},
  abstract = 	 {This article is devoted to the problem of predicting the value taken by a random permutation $Σ$, describing the preferences of an individual over a set of numbered items $\{1,; \ldots,;{n}\}$ say, based on the observation of an input/explanatory r.v. $X$ (\textit{e.g.} characteristics of the individual), when error is measured by the Kendall’s $τ$ distance. In the probabilistic formulation of the ’Learning to Order’ problem we propose, which extends the framework for statistical Kemeny ranking aggregation developped in \citet{CKS17}, this boils down to recovering conditional Kemeny medians of $Σ$ given $X$ from i.i.d. training examples $(X_1, \Sigma_1),; \ldots,; (X_N, \Sigma_N)$. For this reason, this statistical learning problem is referred to as \textit{ranking median regression} here. Our contribution is twofold. We first propose a probabilistic theory of ranking median regression: the set of optimal elements is characterized, the performance of empirical risk minimizers is investigated in this context and situations where fast learning rates can be achieved are also exhibited. Next we introduce the concept of local consensus/median, in order to derive efficient methods for ranking median regression. The major advantage of this local learning approach lies in its close connection with the widely studied Kemeny aggregation problem. From an algorithmic perspective, this permits to build predictive rules for ranking median regression by implementing efficient techniques for (approximate) Kemeny median computations at a local level in a tractable manner. In particular, versions of $k$-nearest neighbor and tree-based methods, tailored to ranking median regression, are investigated. Accuracy of piecewise constant ranking median regression rules is studied under a specific smoothness assumption for $Σ$’s conditional distribution given $X$. The results of various numerical experiments are also displayed for illustration purpose.}
}


@InProceedings{pmlr-v83-csiba18a,
  title = 	 {Coordinate Descent Faceoff: Primal or Dual?},
  author = 	 {Csiba, Dominik and Richtárik, Peter},
  booktitle = 	 {Proceedings of Algorithmic Learning Theory},
  pages = 	 {246--267},
  year = 	 {2018},
  editor = 	 {Janoos, Firdaus and Mohri, Mehryar and Sridharan, Karthik},
  volume = 	 {83},
  series = 	 {Proceedings of Machine Learning Research},
  month = 	 {07--09 Apr},
  publisher =    {PMLR},
  pdf = 	 {http://proceedings.mlr.press/v83/csiba18a/csiba18a.pdf},
  url = 	 {https://proceedings.mlr.press/v83/csiba18a.html},
  abstract = 	 {Randomized coordinate descent (RCD) methods are state-of-the-art algorithms for training linear predictors via minimizing regularized empirical risk. When the number of examples (n) is much larger than the number of features (d), a common strategy is to apply RCD to the dual problem. On the other hand, when the number of features is much larger than the number of examples, it makes sense to apply RCD directly to the primal problem. In this paper we provide the first joint study of these two approaches when applied to L2-regularized linear ERM. First, we show through a rigorous analysis that for dense data, the above intuition is precisely correct. However, we find that for sparse and structured data,  primal RCD can significantly outperform dual RCD even if $d << n$, and vice versa, dual RCD can be much faster than primal RCD even if $n << d$. Moreover, we show that, surprisingly, a single sampling strategy minimizes both the (bound on the) number of iterations and the overall expected complexity of RCD. Note that the latter complexity measure also takes into account  the average cost of the iterations, which depends on the structure and sparsity of the data, and on the sampling strategy employed.  We confirm our theoretical predictions using extensive experiments with both synthetic and real data sets.  }
}


@InProceedings{pmlr-v83-dagan18a,
  title = 	 {A Better Resource Allocation Algorithm with Semi-Bandit Feedback},
  author = 	 {Dagan, Yuval and Koby, Crammer},
  booktitle = 	 {Proceedings of Algorithmic Learning Theory},
  pages = 	 {268--320},
  year = 	 {2018},
  editor = 	 {Janoos, Firdaus and Mohri, Mehryar and Sridharan, Karthik},
  volume = 	 {83},
  series = 	 {Proceedings of Machine Learning Research},
  month = 	 {07--09 Apr},
  publisher =    {PMLR},
  pdf = 	 {http://proceedings.mlr.press/v83/dagan18a/dagan18a.pdf},
  url = 	 {https://proceedings.mlr.press/v83/dagan18a.html},
  abstract = 	 {We study a sequential resource allocation problem between a fixed number of arms. On each iteration the algorithm distributes a resource among the arms in order to maximize the expected success rate. Allocating more of the resource to a given arm increases the probability that it succeeds, yet with a cut-off. We follow Lattimore et al (2014) and assume that the probability increases linearly until it equals one, after which allocating more of the resource is wasteful. These cut-off values are fixed and unknown to the learner. We present an algorithm for this problem and prove a regret upper bound of $O(\log n)$ improving over the best known bound of $O(\log^2 n)$. Lower bounds we prove show that our upper bound is tight. Simulations demonstrate the superiority of our algorithm.}
}


@InProceedings{pmlr-v83-eldridge18a,
  title = 	 {Unperturbed: spectral analysis beyond Davis-Kahan},
  author = 	 {Eldridge, Justin and Belkin, Mikhail and Wang, Yusu},
  booktitle = 	 {Proceedings of Algorithmic Learning Theory},
  pages = 	 {321--358},
  year = 	 {2018},
  editor = 	 {Janoos, Firdaus and Mohri, Mehryar and Sridharan, Karthik},
  volume = 	 {83},
  series = 	 {Proceedings of Machine Learning Research},
  month = 	 {07--09 Apr},
  publisher =    {PMLR},
  pdf = 	 {http://proceedings.mlr.press/v83/eldridge18a/eldridge18a.pdf},
  url = 	 {https://proceedings.mlr.press/v83/eldridge18a.html},
  abstract = 	 {Classical matrix perturbation results, such as Weyl’s theorem for eigenvalues and the Davis-Kahan theorem for eigenvectors, are general purpose. These classical bounds are tight in the worst case, but in many settings sub-optimal in the typical case.  In this paper, we present perturbation bounds which consider the nature of the perturbation and its interaction with the unperturbed structure in order to obtain significant improvements over the classical theory in many scenarios, such as when the perturbation is random. We demonstrate the utility of these new results by analyzing perturbations in the stochastic blockmodel where we derive much tighter bounds than provided by the classical theory.}
}


@InProceedings{pmlr-v83-ensign18a,
  title = 	 {Decision making with limited feedback},
  author = 	 {Ensign, Danielle and Sorelle, Frielder and Scott, Neville and Carlos, Scheidegger and Suresh, Venkatasubramanian},
  booktitle = 	 {Proceedings of Algorithmic Learning Theory},
  pages = 	 {359--367},
  year = 	 {2018},
  editor = 	 {Janoos, Firdaus and Mohri, Mehryar and Sridharan, Karthik},
  volume = 	 {83},
  series = 	 {Proceedings of Machine Learning Research},
  month = 	 {07--09 Apr},
  publisher =    {PMLR},
  pdf = 	 {http://proceedings.mlr.press/v83/ensign18a/ensign18a.pdf},
  url = 	 {https://proceedings.mlr.press/v83/ensign18a.html},
  abstract = 	 {When models are trained for deployment in decision-making in various real-world settings, they are typically trained in batch mode. Historical data is used to train and validate the models prior to deployment. However, in many settings, \emph{feedback} changes the nature of the training process. Either the learner does not get full feedback on its actions, or the decisions made by the trained model influence what future training data it will see. In this paper, we focus on the problems of recidivism prediction and predictive policing. We present the first algorithms with provable regret for these problems, by  showing that both problems (and others like these) can be abstracted into a general reinforcement learning framework called partial monitoring. We also  discuss the policy implications of these solutions. }
}


@InProceedings{pmlr-v83-feige18a,
  title = 	 {Robust Inference for Multiclass Classification},
  author = 	 {Feige, Uriel and Mansour, Yishay and Schapire, Robert E.},
  booktitle = 	 {Proceedings of Algorithmic Learning Theory},
  pages = 	 {368--386},
  year = 	 {2018},
  editor = 	 {Janoos, Firdaus and Mohri, Mehryar and Sridharan, Karthik},
  volume = 	 {83},
  series = 	 {Proceedings of Machine Learning Research},
  month = 	 {07--09 Apr},
  publisher =    {PMLR},
  pdf = 	 {http://proceedings.mlr.press/v83/feige18a/feige18a.pdf},
  url = 	 {https://proceedings.mlr.press/v83/feige18a.html},
  abstract = 	 {We consider the problem of robust inference in which inputs may be maliciously corrupted by a powerful adversary, and the learner’s goal is to accurately predict the original, uncorrupted input’s true label given only the adversarially corrupted version of the input. We specifically focus on the multiclass version of this problem in which more than two labels are possible. We substantially extend and generalize previous work which had only considered the binary case, thus uncovering stark differences between the two cases. We show how robust inference can be modeled as a zero-sum game between a learner who maximizes the expected accuracy, and an adversary. The value of this game is the best-attainable accuracy rate of any algorithm. We then show how the optimal policy for both the learner and adversary can be exactly characterized in terms of a particular hypergraph, specifically, as the hypergraph’s maximum fractional independent set and minimum fractional set cover, respectively. This characterization yields efficient algorithms in the size of the domain (number of possible inputs). For the typical setting that the domain is huge, we also design efficient local computation algorithms for approximating maximum fractional independent set in hypergraphs. This leads to a near optimal algorithm for the learner whose complexity is independent of the domain size, instead depending only on the rank and maximum degree of the underlying hypergraph, and on the desired approximation ratio.}
}


@InProceedings{pmlr-v83-gajane18a,
  title = 	 {Corrupt Bandits for Preserving Local Privacy},
  author = 	 {Gajane, Pratik and Urvoy, Tanguy and Kaufmann, Emilie},
  booktitle = 	 {Proceedings of Algorithmic Learning Theory},
  pages = 	 {387--412},
  year = 	 {2018},
  editor = 	 {Janoos, Firdaus and Mohri, Mehryar and Sridharan, Karthik},
  volume = 	 {83},
  series = 	 {Proceedings of Machine Learning Research},
  month = 	 {07--09 Apr},
  publisher =    {PMLR},
  pdf = 	 {http://proceedings.mlr.press/v83/gajane18a/gajane18a.pdf},
  url = 	 {https://proceedings.mlr.press/v83/gajane18a.html},
  abstract = 	 {We study a variant of the stochastic multi-armed bandit (MAB) problem in which the rewards are corrupted. In this framework, motivated by privacy preservation in online recommender systems, the goal is to maximize the sum of the (unobserved) rewards, based on the observation of transformation of these rewards through a stochastic corruption process with known parameters. We provide a lower bound on the expected regret of any bandit algorithm in this corrupted setting. We devise a frequentist algorithm, KLUCB-CF, and a Bayesian algorithm, TS-CF and give upper bounds on their regret. We also provide the appropriate corruption parameters to guarantee a desired level of local privacy and analyze how this impacts the regret. Finally, we present some experimental results that confirm our analysis.}
}


@InProceedings{pmlr-v83-gao18a,
  title = 	 {On the Help of Bounded Shot Verifiers, Comparators and Standardisers for Learnability in Inductive Inference},
  author = 	 {Gao, Ziyuan and Jain, Sanjay and Stephan, Frank and Zeugmann, Thomas},
  booktitle = 	 {Proceedings of Algorithmic Learning Theory},
  pages = 	 {413--437},
  year = 	 {2018},
  editor = 	 {Janoos, Firdaus and Mohri, Mehryar and Sridharan, Karthik},
  volume = 	 {83},
  series = 	 {Proceedings of Machine Learning Research},
  month = 	 {07--09 Apr},
  publisher =    {PMLR},
  pdf = 	 {http://proceedings.mlr.press/v83/gao18a/gao18a.pdf},
  url = 	 {https://proceedings.mlr.press/v83/gao18a.html},
  abstract = 	 {The present paper deals with the inductive inference of recursively enumerable languages from positive data (also called text).  It introduces the learning models of \emph{verifiability} and  \emph{comparability}. The input to a verifier is an index $e$ and a text of the target language $L$, and the learner has to \emph{verify} whether or not the index $e$ input is correct for the target language $L$. A comparator receives two indices of languages from the target class $\cL$ as input and has to decide in the limit whether or not these indices generate the same language. Furthermore, \emph{standardisability} is studied, where a \emph{standardiser} receives an index $j$ of some target language $L$ from the class $\cL$, and for every $L∈\cL$ there must be an index $e$ such that $e$ generates $L$ and the standardiser has to map every index $j$  for $L$ to $e$. Additionally, the common learning models of \emph{explanatory learning}, \emph{conservative explanatory learning}, and \emph{behaviourally correct learning} are considered. For almost all learning models mentioned above it is also appropriate to consider the number of times a learner changes its mind. In particular, if no mind change occurs then we obtain the \emph{finite} variant of the models considered. Occasionally, also learning with the help of an oracle is taken into consideration. The main goal of this paper is to figure out to what extent  verifiability, comparability, and standardisability are helpful for the inductive inference of classes of recursively enumerable languages.  Here we also distinguish between \emph{indexed families}, \emph{one-one enumerable classes}, and \emph{recursively enumerable classes}.  Our results are manyfold, and an almost complete picture is obtained. In particular, for indexed families and recursively enumerable classes finite comparability, finite standardisability, and finite verifiability always imply finite learnability. If at least one mind change is allowed, then there are  differences, i.e., for indexed families, comparability or verifiability imply conservative explanatory learning, but standardisability does not; still explanatory learning can be achieved.}
}


@InProceedings{pmlr-v83-gonem18a,
  title = 	 {Smooth Sensitivity Based Approach for Differentially Private PCA},
  author = 	 {Gonem, Alon and Gilad-Bachrach, Ram},
  booktitle = 	 {Proceedings of Algorithmic Learning Theory},
  pages = 	 {438--450},
  year = 	 {2018},
  editor = 	 {Janoos, Firdaus and Mohri, Mehryar and Sridharan, Karthik},
  volume = 	 {83},
  series = 	 {Proceedings of Machine Learning Research},
  month = 	 {07--09 Apr},
  publisher =    {PMLR},
  pdf = 	 {http://proceedings.mlr.press/v83/gonem18a/gonem18a.pdf},
  url = 	 {https://proceedings.mlr.press/v83/gonem18a.html},
  abstract = 	 {We consider the challenge of differentially private PCA.  Currently known methods for this task either employ the computationally intensive exponential mechanism or require an access to the covariance matrix, and therefore fail to utilize potential sparsity of the data. The problem of designing simpler and more efficient methods for this task has been raised as an open problem in Kapralov et al.  In this paper we address this problem by employing the output perturbation mechanism. Despite being arguably the simplest and most straightforward technique, it has been overlooked due to the large global sensitivity associated with publishing the leading eigenvector. We tackle this issue by adopting a smooth sensitivity based  approach, which allows us to establish differential privacy (in a worst-case manner) and near-optimal sample complexity results under eigengap assumption. We consider both the pure and the approximate  notions of differential privacy, and demonstrate a tradeoff between privacy level and sample complexity. We conclude by suggesting how our results can be extended to related problems.}
}


@InProceedings{pmlr-v83-hsieh18a,
  title = 	 {Dimension-free Information Concentration via Exp-Concavity},
  author = 	 {Hsieh, Ya-ping and Cevher, Volkan},
  booktitle = 	 {Proceedings of Algorithmic Learning Theory},
  pages = 	 {451--469},
  year = 	 {2018},
  editor = 	 {Janoos, Firdaus and Mohri, Mehryar and Sridharan, Karthik},
  volume = 	 {83},
  series = 	 {Proceedings of Machine Learning Research},
  month = 	 {07--09 Apr},
  publisher =    {PMLR},
  pdf = 	 {http://proceedings.mlr.press/v83/hsieh18a/hsieh18a.pdf},
  url = 	 {https://proceedings.mlr.press/v83/hsieh18a.html},
  abstract = 	 {Information concentration of probability measures have important implications in learning theory. Recently, it is discovered that the information content of a log-concave distribution concentrates around their differential entropy, albeit with an unpleasant dependence on the ambient dimension. In this work, we prove that if the potentials of the log-concave distribution are \emph{exp-concave}, which is a central notion for fast rates in online and statistical learning, then the concentration of information can be further improved to depend only on the exp-concavity parameter, and hence can be dimension independent.  Central to our proof is a novel yet simple application of the variance Brascamp-Lieb inequality. In the context of learning theory, concentration of information immediately implies high-probability results to many of the previous bounds that only hold in expectation.}
}


@InProceedings{pmlr-v83-jaasaari18a,
  title = 	 {Minimax Optimal Bayes Mixtures for Memoryless Sources over Large Alphabets},
  author = 	 {Jääsaari, Elias and Leppä-aho, Janne and Silander, Tomi and Roos, Teemu},
  booktitle = 	 {Proceedings of Algorithmic Learning Theory},
  pages = 	 {470--488},
  year = 	 {2018},
  editor = 	 {Janoos, Firdaus and Mohri, Mehryar and Sridharan, Karthik},
  volume = 	 {83},
  series = 	 {Proceedings of Machine Learning Research},
  month = 	 {07--09 Apr},
  publisher =    {PMLR},
  pdf = 	 {http://proceedings.mlr.press/v83/jaasaari18a/jaasaari18a.pdf},
  url = 	 {https://proceedings.mlr.press/v83/jaasaari18a.html},
  abstract = 	 {The normalized maximum likelihood (NML) distribution achieves minimax log loss and coding regret for the multinomial model. In practice other nearly minimax distributions are used instead as calculating the sequential probabilities needed for coding and prediction takes exponential time with NML. The Bayes mixture obtained with the Dirichlet prior $\operatorname{Dir}(1/2, …, 1/2)$ and asymptotically minimax modifications of it have been widely studied in the context of large sample sizes. Recently there has also been interest in minimax optimal coding distributions for large alphabets.  We investigate Dirichlet priors that achieve minimax coding regret when the alphabet size $m$ is finite but large in comparison to the sample size $n$. We prove that a Bayes mixture with the Dirichlet prior $\operatorname{Dir}(1/3, …, 1/3)$ is optimal in this regime (in particular, when $m > \frac{5}{2} n + \frac{4}{n - 2} + \frac{3}{2}$).  The worst-case regret of the resulting distribution approaches the NML regret as the alphabet size grows.}
}


@InProceedings{pmlr-v83-jurgenson18a,
  title = 	 {Learning Decision Trees with Stochastic Linear Classifiers},
  author = 	 {Jurgenson, Tom and Mansour, Yishay},
  booktitle = 	 {Proceedings of Algorithmic Learning Theory},
  pages = 	 {489--528},
  year = 	 {2018},
  editor = 	 {Janoos, Firdaus and Mohri, Mehryar and Sridharan, Karthik},
  volume = 	 {83},
  series = 	 {Proceedings of Machine Learning Research},
  month = 	 {07--09 Apr},
  publisher =    {PMLR},
  pdf = 	 {http://proceedings.mlr.press/v83/jurgenson18a/jurgenson18a.pdf},
  url = 	 {https://proceedings.mlr.press/v83/jurgenson18a.html},
  abstract = 	 {In this work we propose a top-down decision tree learning algorithm with a class of linear classifiers called stochastic linear classifiers as the internal nodes’ hypothesis class. To this end, we derive efficient algorithms for minimizing the Gini index for this class for each internal node, although the problem is non-convex. Moreover, the proposed algorithm has a theoretical guarantee under the weak stochastic hypothesis assumption.}
}


@InProceedings{pmlr-v83-kallus18a,
  title = 	 {Instrument-Armed Bandits},
  author = 	 {Kallus, Nathan},
  booktitle = 	 {Proceedings of Algorithmic Learning Theory},
  pages = 	 {529--546},
  year = 	 {2018},
  editor = 	 {Janoos, Firdaus and Mohri, Mehryar and Sridharan, Karthik},
  volume = 	 {83},
  series = 	 {Proceedings of Machine Learning Research},
  month = 	 {07--09 Apr},
  publisher =    {PMLR},
  pdf = 	 {http://proceedings.mlr.press/v83/kallus18a/kallus18a.pdf},
  url = 	 {https://proceedings.mlr.press/v83/kallus18a.html},
  abstract = 	 {We extend the classic multi-armed bandit (MAB) model to the setting of noncompliance, where the arm pull is a mere instrument and the treatment applied may differ from it, which gives rise to the instrument-armed bandit (IAB) problem. The IAB setting is relevant whenever the experimental units are human since free will, ethics, and the law may prohibit unrestricted or forced application of treatment. In particular, the setting is relevant in bandit models of dynamic clinical trials and other controlled trials on human interventions. Nonetheless, the setting has not been fully investigate in the bandit literature. We show that there are various and divergent notions of regret in this setting, all of which coincide only in the classic MAB setting. We characterize the behavior of these regrets and analyze standard MAB algorithms. We argue for a particular kind of regret that captures the causal effect of treatments but show that standard MAB algorithms cannot achieve sublinear control on this regret. Instead, we develop new algorithms for the IAB problem, prove new regret bounds for them, and compare them to standard MAB algorithms in numerical examples.}
}


@InProceedings{pmlr-v83-locatelli18a,
  title = 	 {An Adaptive Strategy for Active Learning with Smooth Decision Boundary},
  author = 	 {Locatelli, Andrea and Carpentier, Alexandra and Kpotufe, Samory},
  booktitle = 	 {Proceedings of Algorithmic Learning Theory},
  pages = 	 {547--571},
  year = 	 {2018},
  editor = 	 {Janoos, Firdaus and Mohri, Mehryar and Sridharan, Karthik},
  volume = 	 {83},
  series = 	 {Proceedings of Machine Learning Research},
  month = 	 {07--09 Apr},
  publisher =    {PMLR},
  pdf = 	 {http://proceedings.mlr.press/v83/locatelli18a/locatelli18a.pdf},
  url = 	 {https://proceedings.mlr.press/v83/locatelli18a.html},
  abstract = 	 {We present the first adaptive strategy for active learning in the setting of classification with smooth decision boundary. The problem of adaptivity (to unknown distributional parameters) has remained opened since the seminal work of Castro and Nowak (2007), which first established (active learning) rates for this setting. While some recent advances on this problem establish \emph{adaptive} rates in the case of univariate data, adaptivity in the more practical setting of multivariate data has so far remained elusive. Combining insights from various recent works, we show that, for the multivariate case, a careful reduction to univariate-adaptive strategies yield near-optimal rates without prior knowledge of distributional parameters.}
}


@InProceedings{pmlr-v83-mahloujifar18a,
  title = 	 {Learning under $p$-Tampering Attacks},
  author = 	 {Mahloujifar, Saeed and Diochnos, Dimitrios I. and Mahmoody, Mohammad},
  booktitle = 	 {Proceedings of Algorithmic Learning Theory},
  pages = 	 {572--596},
  year = 	 {2018},
  editor = 	 {Janoos, Firdaus and Mohri, Mehryar and Sridharan, Karthik},
  volume = 	 {83},
  series = 	 {Proceedings of Machine Learning Research},
  month = 	 {07--09 Apr},
  publisher =    {PMLR},
  pdf = 	 {http://proceedings.mlr.press/v83/mahloujifar18a/mahloujifar18a.pdf},
  url = 	 {https://proceedings.mlr.press/v83/mahloujifar18a.html},
  abstract = 	 {Recently, Mahloujifar and Mahmoody (TCC’17) studied attacks against learning algorithms  using a special case of Valiant’s malicious noise, called $p$-tampering,  in which the adversary gets to change any training example with independent probability $p$ but  is limited to only choose ‘adversarial’ examples with correct labels. They obtained $p$-tampering attacks that increase the error probability in the so called ‘targeted’ poisoning model in which the adversary’s goal is to increase the loss of the trained hypothesis over a particular test example. At the heart of their attack was an efficient algorithm to bias the average output of any bounded real-valued function through $p$-tampering.  In this work, we present new biasing attacks for biasing the average output of bounded real-valued functions. Our new biasing attacks achieve in \emph{polynomial-time} the the best bias  achieved by MM16 through an \emph{exponential} time $p$-tampering attack.  Our improved biasing attacks, directly imply improved $p$-tampering attacks against learners in the targeted poisoning model. As a bonus, our attacks come with considerably simpler analysis compared to previous attacks.   We also  study the possibility of PAC learning under $p$-tampering attacks in the \emph{non-targeted} (aka indiscriminate) setting where the adversary’s goal is  to increase the risk of the generated hypothesis (for a random test example). We show that PAC learning is  \emph{possible} under $p$-tampering poisoning attacks essentially whenever it is possible in the realizable setting without the attacks. We further show that PAC learning  under ‘no-mistake’ adversarial noise is \emph{not} possible, if the adversary could choose the (still limited to only $p$ fraction of) tampered examples that  she substitutes with adversarially chosen ones. Our formal model for such ‘bounded-budget’ tampering attackers is  inspired by the notions of (strong) adaptive corruption in secure multi-party computation.}
}


@InProceedings{pmlr-v83-modi18a,
  title = 	 {Markov Decision Processes with Continuous Side Information},
  author = 	 {Modi, Aditya and Jiang, Nan and Singh, Satinder and Tewari, Ambuj},
  booktitle = 	 {Proceedings of Algorithmic Learning Theory},
  pages = 	 {597--618},
  year = 	 {2018},
  editor = 	 {Janoos, Firdaus and Mohri, Mehryar and Sridharan, Karthik},
  volume = 	 {83},
  series = 	 {Proceedings of Machine Learning Research},
  month = 	 {07--09 Apr},
  publisher =    {PMLR},
  pdf = 	 {http://proceedings.mlr.press/v83/modi18a/modi18a.pdf},
  url = 	 {https://proceedings.mlr.press/v83/modi18a.html},
  abstract = 	 { We consider a reinforcement learning (RL) setting in which the agent interacts with a sequence of episodic MDPs. At the start of each episode the agent has access to some side-information or context that determines the dynamics of the MDP for that episode. Our setting is motivated by applications in healthcare where baseline measurements of a patient at the start of a treatment episode form the context that may provide information about how the patient might respond to treatment decisions.  We propose algorithms for learning in such Contextual Markov Decision Processes (CMDPs) under an assumption that the unobserved MDP parameters vary smoothly with the observed context. We give lower and upper PAC bounds under the smoothness assumption. Because our lower bound has an exponential dependence on the dimension, we also consider a tractable linear setting where the context creates linear combinations of a finite set of MDPs. For the linear setting, we give a PAC learning algorithm based on KWIK learning techniques.}
}


@InProceedings{pmlr-v83-nissim18a,
  title = 	 {Clustering Algorithms for the Centralized and Local Models},
  author = 	 {Nissim, Kobbi and Stemmer, Uri},
  booktitle = 	 {Proceedings of Algorithmic Learning Theory},
  pages = 	 {619--653},
  year = 	 {2018},
  editor = 	 {Janoos, Firdaus and Mohri, Mehryar and Sridharan, Karthik},
  volume = 	 {83},
  series = 	 {Proceedings of Machine Learning Research},
  month = 	 {07--09 Apr},
  publisher =    {PMLR},
  pdf = 	 {http://proceedings.mlr.press/v83/nissim18a/nissim18a.pdf},
  url = 	 {https://proceedings.mlr.press/v83/nissim18a.html},
  abstract = 	 {We revisit the problem of finding a minimum enclosing ball with differential privacy: Given a set of $n$ points in the $d$-dimensional Euclidean space and an integer $t≤n$, the goal is to find a ball of the smallest radius $r_{opt}$ enclosing at least $t$ input points. The problem is motivated by its various applications to differential privacy, including the sample and aggregate technique, private data exploration, and clustering.  Without privacy concerns, minimum enclosing ball has a polynomial time approximation scheme (PTAS), which computes a ball of radius almost $r_{opt}$ (the problem is NP-hard to solve exactly). In contrast, under differential privacy, until this work, only a $O(\sqrt{\log n})$-approximation algorithm was known.  We provide new constructions of differentially private algorithms for minimum enclosing ball achieving constant factor approximation to $r_{opt}$ both in the centralized model (where a trusted curator collects the sensitive information and analyzes it with differential privacy) and in the local model (where each respondent randomizes her answers to the data curator to protect her privacy).  We demonstrate how to use our algorithms as a building block for approximating $k$-means in both models.}
}


@InProceedings{pmlr-v83-pasteris18a,
  title = 	 {On Similarity Prediction and Pairwise Clustering},
  author = 	 {Pasteris, Stephen and Vitale, Fabio and Gentile, Claudio and Herbster, Mark},
  booktitle = 	 {Proceedings of Algorithmic Learning Theory},
  pages = 	 {654--681},
  year = 	 {2018},
  editor = 	 {Janoos, Firdaus and Mohri, Mehryar and Sridharan, Karthik},
  volume = 	 {83},
  series = 	 {Proceedings of Machine Learning Research},
  month = 	 {07--09 Apr},
  publisher =    {PMLR},
  pdf = 	 {http://proceedings.mlr.press/v83/pasteris18a/pasteris18a.pdf},
  url = 	 {https://proceedings.mlr.press/v83/pasteris18a.html},
  abstract = 	 {We consider the problem of clustering a finite set of items from pairwise similarity information. Unlike what is done in the literature on this subject,  we do so in a passive learning setting, and with no specific constraints on the cluster shapes other than their size. We investigate the problem in different settings: i. an online setting, where we provide a tight characterization of the prediction complexity in the mistake bound model, and ii. a standard stochastic batch setting, where we give tight upper and lower bounds on the achievable generalization error. Prediction performance is measured both in terms of the ability to recover the similarity function encoding the hidden clustering and in terms of how well we classify each item within the set. The proposed algorithms are time efficient.}
}


@InProceedings{pmlr-v83-pentina18a,
  title = 	 {Multi-task {K}ernel {L}earning Based on {P}robabilistic {L}ipschitzness},
  author = 	 {Pentina, Anastasia and Ben-David, Shai},
  booktitle = 	 {Proceedings of Algorithmic Learning Theory},
  pages = 	 {682--701},
  year = 	 {2018},
  editor = 	 {Janoos, Firdaus and Mohri, Mehryar and Sridharan, Karthik},
  volume = 	 {83},
  series = 	 {Proceedings of Machine Learning Research},
  month = 	 {07--09 Apr},
  publisher =    {PMLR},
  pdf = 	 {http://proceedings.mlr.press/v83/pentina18a/pentina18a.pdf},
  url = 	 {https://proceedings.mlr.press/v83/pentina18a.html},
  abstract = 	 {In multi-task learning the learner is given data for a set of related learning tasks and aims to improve the overall learning performance by transferring information between them. A typical assumption exploited in this setting is that the tasks share a beneficial representation that can be learned form the joint training data of all tasks. This way, the training data of each task can be utilized to enhance the learning of other tasks in the set. Probabilistic Lipschitzness (PL) is a parameter that reflects one way in which some data representation can be beneficial for a classification learning task. In this work we propose to achieve multi-task learning by learning a kernel function relative to which each of the tasks in the set has a "high level" of probabilistic Lipschitzness. In order to be able to do that, we need to introduce a new variant of PL - one that allows reliable estimation of its value from finite size samples. We show that by having access to large amounts of training data in total (possibly the union of training sets for various tasks), the learner can identify a kernel function that would lead to fast learning rates per task when used for Nearest Neighbor classification or in a cluster-based active labeling procedure. }
}


@InProceedings{pmlr-v83-rahmanian18a,
  title = 	 {Online Learning of Combinatorial Objects via Extended Formulation},
  author = 	 {Rahmanian, Holakou and Helmbold, David P. and Vishwanathan, S. V. N.},
  booktitle = 	 {Proceedings of Algorithmic Learning Theory},
  pages = 	 {702--724},
  year = 	 {2018},
  editor = 	 {Janoos, Firdaus and Mohri, Mehryar and Sridharan, Karthik},
  volume = 	 {83},
  series = 	 {Proceedings of Machine Learning Research},
  month = 	 {07--09 Apr},
  publisher =    {PMLR},
  pdf = 	 {http://proceedings.mlr.press/v83/rahmanian18a/rahmanian18a.pdf},
  url = 	 {https://proceedings.mlr.press/v83/rahmanian18a.html},
  abstract = 	 { The standard techniques for online learning of combinatorial objects perform multiplicative updates followed by projections into the convex hull of all the objects. However, this methodology can be expensive if the convex hull contains many facets. For example, the convex hull of $n$-symbol Huffman trees is known to have exponentially many facets. We get around this difficulty by exploiting extended formulations, which encode the polytope of combinatorial objects in a higher dimensional “extended” space with only polynomially many facets. We develop a general framework for converting extended formulations into efficient online algorithms with good relative loss bounds. We present applications of our framework to online learning of Huffman trees and permutations. The regret bounds of the resulting algorithms are within a factor of $\Ocal(\sqrt{\log(n)})$  of the state-of-the-art specialized algorithms for permutations,  and depending on the loss regimes, improve on or match the state-of-the-art for Huffman trees.  Our method is general and can be applied to other combinatorial objects. }
}


@InProceedings{pmlr-v83-reeve18a,
  title = 	 {The K-Nearest Neighbour UCB Algorithm for Multi-Armed Bandits with Covariates},
  author = 	 {Reeve, Henry and Mellor, Joe and Brown, Gavin},
  booktitle = 	 {Proceedings of Algorithmic Learning Theory},
  pages = 	 {725--752},
  year = 	 {2018},
  editor = 	 {Janoos, Firdaus and Mohri, Mehryar and Sridharan, Karthik},
  volume = 	 {83},
  series = 	 {Proceedings of Machine Learning Research},
  month = 	 {07--09 Apr},
  publisher =    {PMLR},
  pdf = 	 {http://proceedings.mlr.press/v83/reeve18a/reeve18a.pdf},
  url = 	 {https://proceedings.mlr.press/v83/reeve18a.html},
  abstract = 	 {In this paper we propose and explore the $k$-Nearest Neighbour UCB algorithm for multi-armed bandits with covariates. We focus on a setting where covariates are supported on a metric space of low intrinsic dimension, such as a manifold embedded within a high dimensional ambient feature space. The algorithm is conceptually simple and straightforward to implement. Unlike previous methods such as the UCBogram and Adaptively Binned Successive Elimination, the $k$-Nearest Neighbour UCB algorithm does not require prior knowledge of the intrinsic dimension of the marginal distribution. It is also naturally anytime, without resorting to the doubling trick. We prove a regret bound for the $k$-Nearest Neighbour UCB algorithm which is minimax optimal up to logarithmic factors. In particular, the algorithm automatically takes advantage of both low intrinsic dimensionality of the marginal distribution over the covariates and low noise in the data, expressed as a margin condition. In addition, focusing on the case of bounded rewards, we give corresponding regret bounds for the $k$-Nearest Neighbour KL-UCB algorithm, which is an analogue of the KL-UCB algorithm adapted to the setting of multi-armed bandits with covariates. Finally, we present empirical results which demonstrate the ability of both the $k$-Nearest Neighbour UCB and $k$-Nearest Neighbour KL-UCB to take advantage of situations where the data is supported on an unknown sub-manifold of a high-dimensional feature space.}
}


@InProceedings{pmlr-v83-shkel18a,
  title = 	 {Sequential prediction with coded side information under logarithmic loss},
  author = 	 {Shkel, Yanina and Raginsky, Maxim and Verdú, Sergio},
  booktitle = 	 {Proceedings of Algorithmic Learning Theory},
  pages = 	 {753--769},
  year = 	 {2018},
  editor = 	 {Janoos, Firdaus and Mohri, Mehryar and Sridharan, Karthik},
  volume = 	 {83},
  series = 	 {Proceedings of Machine Learning Research},
  month = 	 {07--09 Apr},
  publisher =    {PMLR},
  pdf = 	 {http://proceedings.mlr.press/v83/shkel18a/shkel18a.pdf},
  url = 	 {https://proceedings.mlr.press/v83/shkel18a.html},
  abstract = 	 {We study the problem of sequential prediction with coded side information under logarithmic loss (log-loss). We show an operational equivalence between this setup and lossy compression with log-loss distortion. Using this insight, together with recent work on lossy compression with log-loss, we connect prediction strategies with distributions in a certain subset of the probability simplex. This allows us to derive a Shtarkov-like bound for regret and to evaluate the regret for several illustrative classes of experts. In the present work, we mainly focus on the “batch” side information  setting with sequential prediction.}
}


@InProceedings{pmlr-v83-talebi18a,
  title = 	 {Variance-Aware Regret Bounds for Undiscounted Reinforcement Learning in MDPs},
  author = 	 {Talebi, Mohammad Sadegh and Maillard, Odalric-Ambrym},
  booktitle = 	 {Proceedings of Algorithmic Learning Theory},
  pages = 	 {770--805},
  year = 	 {2018},
  editor = 	 {Janoos, Firdaus and Mohri, Mehryar and Sridharan, Karthik},
  volume = 	 {83},
  series = 	 {Proceedings of Machine Learning Research},
  month = 	 {07--09 Apr},
  publisher =    {PMLR},
  pdf = 	 {http://proceedings.mlr.press/v83/talebi18a/talebi18a.pdf},
  url = 	 {https://proceedings.mlr.press/v83/talebi18a.html},
  abstract = 	 {The problem of reinforcement learning in an unknown and discrete Markov Decision Process (MDP) under the average-reward criterion is considered, when the learner interacts with the system in a single stream of observations, starting from an initial state without any reset. We revisit the minimax lower bound for that problem by making appear the local variance of the bias function  in place of the diameter of the MDP. Furthermore, we provide a novel analysis of the \texttt{\textsc{KL-Ucrl}} algorithm establishing a high-probability regret bound scaling as $\widetilde {\mathcal O}\Bigl({\textstyle \sqrt{S\sum_{s,a}{\bf V}^\star_{s,a}T}}\Big)$ for this algorithm for ergodic MDPs, where $S$ denotes the number of states and where ${\bf V}^\star_{s,a}$ is the variance of the bias function with respect to the next-state distribution following action $a$ in state $s$. The resulting bound improves upon the best previously known regret bound $\widetilde {\Ocal}(DS\sqrt{AT})$ for that algorithm, where $A$ and $D$ respectively denote the maximum number of actions (per state) and the diameter of MDP. We finally compare the leading terms of the two bounds in some benchmark MDPs indicating that the derived bound can provide an order of magnitude improvement in some cases. Our analysis leverages novel variations of the transportation lemma combined with Kullback-Leibler concentration inequalities, that we believe to be of independent interest. }
}


@InProceedings{pmlr-v83-wang18a,
  title = 	 {Efficient coordinate-wise leading eigenvector computation},
  author = 	 {Wang, Jialei and Wang, Weiran and Garber, Dan and Srebro, Nathan},
  booktitle = 	 {Proceedings of Algorithmic Learning Theory},
  pages = 	 {806--820},
  year = 	 {2018},
  editor = 	 {Janoos, Firdaus and Mohri, Mehryar and Sridharan, Karthik},
  volume = 	 {83},
  series = 	 {Proceedings of Machine Learning Research},
  month = 	 {07--09 Apr},
  publisher =    {PMLR},
  pdf = 	 {http://proceedings.mlr.press/v83/wang18a/wang18a.pdf},
  url = 	 {https://proceedings.mlr.press/v83/wang18a.html},
  abstract = 	 {We develop and analyze efficient "coordinate-wise" methods for finding the leading eigenvector, where each step involves only a vector-vector product.  We establish global convergence with overall runtime guarantees that are at least as good as Lanczos’s method and dominate it for slowly decaying spectrum. Our methods are based on combining a shift-and-invert approach with coordinate-wise algorithms for linear regression.}
}


@InProceedings{pmlr-v83-mao18a,
  title = 	 {Minimax Rates and Efficient Algorithms for Noisy Sorting},
  author = 	 {Mao, Cheng and Weed, Jonathan and Rigollet, Philippe},
  booktitle = 	 {Proceedings of Algorithmic Learning Theory},
  pages = 	 {821--847},
  year = 	 {2018},
  editor = 	 {Janoos, Firdaus and Mohri, Mehryar and Sridharan, Karthik},
  volume = 	 {83},
  series = 	 {Proceedings of Machine Learning Research},
  month = 	 {07--09 Apr},
  publisher =    {PMLR},
  pdf = 	 {http://proceedings.mlr.press/v83/mao18a/mao18a.pdf},
  url = 	 {https://proceedings.mlr.press/v83/mao18a.html},
  abstract = 	 {There has been a recent surge of interest in studying permutation-based models for ranking from pairwise comparison data. Despite being structurally richer and more robust than parametric ranking models, permutation-based models are less well understood statistically and generally lack efficient learning algorithms. In this work, we study a prototype of permutation-based ranking models, namely, the noisy sorting model. We establish the optimal rates of learning the model under two sampling procedures. Furthermore, we provide a fast algorithm to achieve near-optimal rates if the observations are sampled independently. Along the way, we discover properties of the symmetric group which are of theoretical interest.}
}