@inproceedings{a635f81176d24ac5874d4c64b94636b6,
title = "Data Quality Assessment of Comma Separated Values Using Linked Data Approach",
abstract = "With an increasing amount of structured data on the web, the need to understand and convert it into linked data is growing. One of the most frequent data formats is Comma Separated Value (CSV). However, it is not easy to describe metadata such as the datatype, data quality and data provenance along with it. Therefore, to publish CSV on the web, it is required to convert CSV into linked data format. Many approaches exist to facilitate the conversion process from structured data to linked data. However, all methods require additional domain knowledge for the conversion process. The goal of this research is to assist publishers in converting CSV files into linked data without human intervention whilst understanding its quality and root causes of data quality violations. The proposed framework consists of two modules. The first module converts the given CSV file into a knowledge graph based on a proposed ontology which is appended with data quality information. In the second module, triples that have violated the data quality constraints are identified. The results show that it is possible to convert a CSV to a knowledge graph by adding its quality information without the help of external mappings.",
keywords = "CSV, Data quality, Knowledge graphs, Linked data, Quality assessment, Root cause analysis",
author = "Aparna Nayak and Bojan Bo{\v z}i{\'c} and Luca Longo",
note = "Publisher Copyright: {\textcopyright} 2022, Springer Nature Switzerland AG.; 24th International Conference on Business Information Systems, BIS 2021 ; Conference date: 14-06-2021 Through 17-06-2021",
year = "2022",
doi = "10.1007/978-3-031-04216-4_22",
language = "English",
isbn = "9783031042157",
series = "Lecture Notes in Business Information Processing",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "240--250",
editor = "Witold Abramowicz and S{\"o}ren Auer and Milena Str{\'o}{\.z}yna",
booktitle = "Business Information Systems Workshops - BIS 2021 International Workshops, Revised Selected Papers",
address = "Germany",
}