@inproceedings{b30a1bfff2dc4e66a464acd46a33b16d,
title = "BERT-based Classifiers for Fake News Detection on Short and Long Texts with Noisy Data: A Comparative Analysis",
abstract = "Free uncontrolled access to the Internet is the main reason for fake news propagation on the Internet both in social media and in regular Internet publications. In this paper we study the potential of several BERT-based models to detect fake news related to politics. Our contribution to the area consists of testing BERT, RoBERTa and MNLI RoBERTa models with (a) short and long texts; (b) ensembling with the best models; (c) noisy texts. To improve ensembling, we introduce an additional class {\textquoteleft}Doubtful news{\textquoteright}. To create noisy data we use cross-translation. For the experiments we consider the well-known FRN (Fake vs. Real News, long texts) and LIAR (short texts) datasets. The results we obtained on the long texts dataset are higher than the results we obtained on the short texts dataset. The proposed approach to ensembling provided significant improvement of the results. The experiments with noisy data demonstrated high noise immunity of the BERT model with long news and the RoBERTa model with short news.",
keywords = "BERT, Ensembling, Fake News, MNLI RoBERTa, Noise Immunity, RoBERTa",
author = "Elena Shushkevich and Mikhail Alexandrov and John Cardiff",
note = "Publisher Copyright: {\textcopyright} 2022, Springer Nature Switzerland AG.; 25th International Conference on Text, Speech, and Dialogue, TSD 2022 ; Conference date: 06-09-2022 Through 09-09-2022",
year = "2022",
doi = "10.1007/978-3-031-16270-1\_22",
language = "English",
isbn = "9783031162695",
series = "Lecture Notes in Computer Science",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "263--274",
editor = "Petr Sojka and Ale{\v s} Hor{\'a}k and Ivan Kope{\v c}ek and Karel Pala",
booktitle = "Text, Speech, and Dialogue - 25th International Conference, TSD 2022, Proceedings",
address = "Germany",
}