@inproceedings{3c7167f6521f4e6fa498de5b22b38940,
title = "A comparison of classical versus deep learning techniques for abusive content detection on social media sites",
abstract = "The automated detection of abusive content on social media websites faces a variety of challenges including imbalanced training sets, the identification of an appropriate feature representation and the selection of optimal classifiers. Classifiers such as support vector machines (SVM), combined with bag of words or ngram feature representation, have traditionally dominated in text classification for decades. With the recent emergence of deep learning and word embeddings, an increasing number of researchers have started to focus on deep neural networks. In this paper, our aim is to explore cutting-edge techniques in automated abusive content detection. We use two deep learning approaches: Convolutional neural networks (CNNs) and recurrent neural networks (RNNs). We apply these to 9 public datasets derived from various social media websites. Firstly, we show that word embeddings pre-trained on the same data source as the subsequent classification task improves the prediction accuracy of deep learning models. Secondly, we investigate the impact of different levels of training set imbalances on classifier types. In comparison to the traditional SVM classifier, we identify that although deep learning models can outperform the classification results of the traditional SVM classifier when the associated training dataset is seriously imbalanced, the performance of the SVM classifier can be dramatically improved through the use of oversampling, surpassing the deep learning models. Our work can inform researchers in selecting appropriate text classification strategies in the detection of abusive content, including scenarios where the training datasets suffer from class imbalance.",
keywords = "Abuse detection, Deep learning, Text classification",
author = "Hao Chen and Susan McKeever and Delany, \{Sarah Jane\}",
note = "Publisher Copyright: {\textcopyright} Springer Nature Switzerland AG 2018.; 10th Conference on Social Informatics, SocInfo 2018 ; Conference date: 25-09-2018 Through 28-09-2018",
year = "2018",
doi = "10.1007/978-3-030-01129-1\_8",
language = "English",
isbn = "9783030011284",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Verlag",
pages = "117--133",
editor = "Olessia Koltsova and Ignatov, \{Dmitry I.\} and Steffen Staab",
booktitle = "Social Informatics - 10th International Conference, SocInfo 2018, Proceedings",
address = "Germany",
}