@inproceedings{ce4e433689e143be9087b2afc430d512,
title = "Inclusive Counterfactual Generation: Leveraging LLMs in Identifying Online Hate",
abstract = "Counterfactually augmented data has recently been proposed as a successful solution for socially situated NLP tasks such as hate speech detection. The chief component within the existing counterfactual data augmentation pipeline, however, involves manually flipping labels and making minimal content edits to training data. In a hate speech context, these forms of editing have been shown to still retain offensive hate speech content. Inspired by the recent success of large language models (LLMs), especially the development of ChatGPT, which have demonstrated improved language comprehension abilities, we propose an inclusivity-oriented approach to automatically generate counterfactually augmented data using LLMs. We show that hate speech detection models trained with LLM-produced counterfactually augmented data can outperform both state-of-the-art and human-based methods.",
keywords = "ChatGPT, counterfactuals, inclusivity, model robustness, out-of-domain testing",
author = "Qureshi, {M. Atif} and Arjumand Younus and Simon Caton",
note = "Publisher Copyright: {\textcopyright} The Author(s), under exclusive license to Springer Nature Switzerland AG 2024.; 24th International Conference on Web Engineering, ICWE 2024 ; Conference date: 17-06-2024 Through 20-06-2024",
year = "2024",
doi = "10.1007/978-3-031-62362-2_3",
language = "English",
isbn = "9783031623615",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "34--48",
editor = "Kostas Stefanidis and Kari Syst{\"a} and Maristella Matera and Sebastian Heil and Haridimos Kondylakis and Elisa Quintarelli",
booktitle = "Web Engineering - 24th International Conference, ICWE 2024, Proceedings",
address = "Germany",
}