@inbook{cb59af173a944445aeee26b46538584d,
title = "An Analysis of Case-Base Editing in a Spam Filtering System",
abstract = "Because of the volume of spam email and its evolving nature, any deployed Machine Learning-based spam filtering system will need to have procedures for case-base maintenance. Key to this will be procedures to edit the case-base to remove noise and eliminate redundancy. In this paper we present a two stage process to do this. We present a new noise reduction algorithm called Blame-Based Noise Reduction that removes cases that are observed to cause misclassification. We also present an algorithm called Conservative Redundancy Reduction that is much less aggressive than the state-of-the-art alternatives and has significantly better generalisation performance in this domain. These new techniques are evaluated against the alternatives in the literature on four datasets of 1000 emails each (50% spam and 50% non spam).",
author = "Delany, {Sarah Jane} and P{\'a}draig Cunningham",
year = "2004",
doi = "10.1007/978-3-540-28631-8_11",
language = "English",
isbn = "3540228829",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Verlag",
pages = "128--141",
editor = "Peter Funk and Gonzalez-Calero, {Pedro A.}",
booktitle = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
address = "Germany",
}