@inproceedings{e4260520a2ab4700835aefd1e48c8ccd,
title = "Update Frequency and Background Corpus Selection in Dynamic TF-IDF Models for First Story Detection",
abstract = "First Story Detection (FSD) requires a system to detect the very first story that mentions an event from a stream of stories. Nearest neighbour-based models, using the traditional term vector document representations like TF-IDF, currently achieve the state of the art in FSD. Because of its online nature, a dynamic term vector model that is incrementally updated during the detection process is usually adopted for FSD instead of a static model. However, very little research has investigated the selection of hyper-parameters and the background corpora for a dynamic model. In this paper, we analyse how a dynamic term vector model works for FSD, and investigate the impact of different update frequencies and background corpora on FSD performance. Our results show that dynamic models with high update frequencies outperform static model and dynamic models with low update frequencies; and that the FSD performance of dynamic models does not always increase with higher update frequencies, but instead reaches steady state after some update frequency threshold is reached. In addition, we demonstrate that different background corpora have very limited influence on the dynamic models with high update frequencies in terms of FSD performance.",
keywords = "Background corpus, First Story Detection, Nearest neighbour, Novelty detection, TF-IDF, Update frequency",
author = "Fei Wang and Ross, {Robert J.} and Kelleher, {John D.}",
note = "Publisher Copyright: {\textcopyright} 2020, Springer Nature Singapore Pte Ltd.; 16th International Conference of the Pacific Association for Computational Linguistics, PACLING 2019 ; Conference date: 11-10-2019 Through 13-10-2019",
year = "2020",
doi = "10.1007/978-981-15-6168-9_18",
language = "English",
isbn = "9789811561672",
series = "Communications in Computer and Information Science",
publisher = "Springer",
pages = "206--217",
editor = "Le-Minh Nguyen and Satoshi Tojo and Xuan-Hieu Phan and K{\^o}iti Hasida",
booktitle = "Computational Linguistics - 16th International Conference of the Pacific Association for Computational Linguistics, PACLING 2019, Revised Selected Papers",
}