@Proceedings{COST2018,
title = {Proceedings of the International Workshop on Cost-Sensitive Learning},
booktitle = {Proceedings of the International Workshop on Cost-Sensitive Learning},
editor = {Luís Torgo and Stan Matwin and Gary Weiss and Nuno Moniz and Paula Branco},
publisher = {PMLR},
series = {Proceedings of Machine Learning Research},
volume = 88
}
@InProceedings{pmlr-v88-torgo18a,
title = {Cost-Sensitive Learning: Preface},
author = {Torgo, Lu\'{i}s and Matwin, Stan and Weiss, Gary and Moniz, Nuno and Branco, Paula},
booktitle = {Proceedings of The International Workshop on Cost-Sensitive Learning},
pages = {1--3},
year = {2018},
editor = {Torgo, Luís and Matwin, Stan and Weiss, Gary and Moniz, Nuno and Branco, Paula},
volume = {88},
series = {Proceedings of Machine Learning Research},
month = {05 May},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v88/torgo18a/torgo18a.pdf},
url = {https://proceedings.mlr.press/v88/torgo18a.html}
}
@InProceedings{pmlr-v88-miller18a,
title = {Classifier Performance Estimation with Unbalanced, Partially Labeled Data},
author = {Miller, Benjamin A. and Vila, Jeremy and Kirn, Malina and Zipkin, Joseph R.},
booktitle = {Proceedings of The International Workshop on Cost-Sensitive Learning},
pages = {4--16},
year = {2018},
editor = {Torgo, Luís and Matwin, Stan and Weiss, Gary and Moniz, Nuno and Branco, Paula},
volume = {88},
series = {Proceedings of Machine Learning Research},
month = {05 May},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v88/miller18a/miller18a.pdf},
url = {https://proceedings.mlr.press/v88/miller18a.html},
abstract = {Class imbalance and lack of ground truth are two significant problems in modern machine learning research. These problems are especially pressing in operational contexts where the total number of data points is extremely large and the cost of obtaining labels is very high. In the face of these issues, accurate estimation of the performance of a detection or classification system is crucial to inform decisions based on the observations. This paper presents a framework for estimating performance of a binary classifier in such a context. We focus on the scenario where each set of measurements has been reduced to a score, and the operator only investigates data when the score exceeds a threshold. The operator is blind to the number of missed detections, so performance estimation targets two quantities: recall and the derivative of precision with respect to recall. Measuring with respect to error in these two metrics, simulations in this context demonstrate that labeling outliers not only outperforms random labeling, but often matches performance of an adaptive method that attempts to choose the optimal data for labeling. Application to real anomaly detection data confirms the utility of the approach, and suggests direction for future work.}
}
@InProceedings{pmlr-v88-meekins18a,
title = {Cost-sensitive Classifier Selection when there is Additional Cost Information},
author = {Meekins, Ryan and Adams, Stephen and Beling, Peter A. and Farinholt, Kevin and Hipwell, Nathan and Chaudhry, Ali and Polter, Sherwood and Dong, Qing},
booktitle = {Proceedings of The International Workshop on Cost-Sensitive Learning},
pages = {17--30},
year = {2018},
editor = {Torgo, Luís and Matwin, Stan and Weiss, Gary and Moniz, Nuno and Branco, Paula},
volume = {88},
series = {Proceedings of Machine Learning Research},
month = {05 May},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v88/meekins18a/meekins18a.pdf},
url = {https://proceedings.mlr.press/v88/meekins18a.html},
abstract = {Machine learning models are increasing in popularity in many domains as they are shown to be able to solve difficult problems. However, selecting a model to implement when there are various alternatives is a difficult problem. Receiver operating characteristic (ROC) curves are useful for selecting binary classification models for real world problems. However, ROC curves only consider the misclassification cost of the classifier. The total cost of a classification system includes various other types of cost including implementation, computation, and feature costs. To extend the ROC analysis to include this additional cost information, the ROC Convex Hull with Cost (ROCCHC) method is introduced. This method extends the ROC Convex Hull (ROCCH) method, which is used to select potentially optimal classifiers in the ROC space using misclassification cost, by selecting potentially optimal classifiers using this additional cost information. The ROCCHC method is tested using three binary classification data sets, each of which include real feature costs as the additional cost information. Competing classifiers are created with the CART algorithm by using each combination of features or sensors for each data set. The ROCCHC method reduces the classifier decision space to 4%, 9%, and 0.02%. These results are compared to the current ROCCH method, which misses 91%, 58%, and 6% of potentially optimal classifiers because the method does not include the additional cost information.}
}
@InProceedings{pmlr-v88-kriege18a,
title = {Recognizing Cuneiform Signs Using Graph Based Methods},
author = {Kriege, Nils M. and Fey, Matthias and Fisseler, Denis and Mutzel, Petra and Weichert, Frank},
booktitle = {Proceedings of The International Workshop on Cost-Sensitive Learning},
pages = {31--44},
year = {2018},
editor = {Torgo, Luís and Matwin, Stan and Weiss, Gary and Moniz, Nuno and Branco, Paula},
volume = {88},
series = {Proceedings of Machine Learning Research},
month = {05 May},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v88/kriege18a/kriege18a.pdf},
url = {https://proceedings.mlr.press/v88/kriege18a.html},
abstract = {The cuneiform script constitutes one of the earliest systems of
writing and is realized by wedge-shaped marks on clay tablets. A
tremendous number of cuneiform tablets have already been discovered
and are incrementally digitalized and made available to automated
processing. As reading cuneiform script is still a manual task, we
address the real-world application of recognizing cuneiform signs by
two graph based methods with complementary runtime
characteristics. We present a graph model for cuneiform signs
together with a tailored distance measure based on the concept of
the graph edit distance. We propose efficient heuristics for its
computation and demonstrate its effectiveness in classification
tasks experimentally. To this end, the distance measure is used to
implement a nearest neighbor classifier leading to a high
computational cost for the prediction phase with increasing training
set size. In order to overcome this issue, we propose to use CNNs
adapted to graphs as an alternative approach shifting the
computational cost to the training phase. We demonstrate the
practicability of both approaches in an experimental comparison
regarding runtime and prediction accuracy. Although currently
available annotated real-world data is still limited, we obtain a
high accuracy using CNNs, in particular, when the training set is
enriched by augmented examples.
}
}