@inproceedings{18ea02a2190b4dc08d74dfe918ea2ff4,
title = "Can crowdsourcing create the missing crash data?",
abstract = "UPDATED - -June 1, 2020. Road traffic crashes (RTCs) are the primary cause of death among children and young adults. Yet data on RTCs is incomplete, hindering effective road safety policymaking in many developing countries where mortality is purportedly highest. We web-scrape 850,000 tweets to create crash data and develop a machine learning algorithm to geolocate RTCs. Our algorithm is nearly twice as precise as a standard geoparsing algorithm in identifying the set of locations that include the crash location. Above and beyond, it identifies the unique location of a crash from the set of possible locations in a majority of cases. We dispatch a set of motorcycle drivers to the site of the presumed crash in real time to verify the validity of the crowdsourced data and document the performance of the algorithm. The study can be used as a proof of concept for countries interested to improve RTC data at low cost through a machine learning approach and substantially increase the data available to analyze RTCs and prioritize road safety policies.",
keywords = "geoparse, natural language processing, road safety, twitter",
author = "Sveta Milusheva and Robert Marty and Guadalupe Bedoya and Elizabeth Resor and Sarah Williams and Arianna Legovini",
note = "Publisher Copyright: {\textcopyright} 2020 Owner/Author.; 3rd ACM SIGCAS Conference on Computing and Sustainable Societies, COMPASS 2020 ; Conference date: 15-06-2020 Through 17-06-2020",
year = "2020",
month = jun,
day = "15",
doi = "10.1145/3378393.3402264",
language = "English",
series = "COMPASS 2020 - Proceedings of the 2020 3rd ACM SIGCAS Conference on Computing and Sustainable Societies",
publisher = "Association for Computing Machinery (ACM)",
pages = "305--306",
booktitle = "COMPASS 2020 - Proceedings of the 2020 3rd ACM SIGCAS Conference on Computing and Sustainable Societies",
address = "United States",
}