@inproceedings{34806093aeca46db8133f49202c5d6a9,
title = "End-to-end Speech Intelligibility Prediction Using Time-Domain Fully Convolutional Neural Networks",
abstract = "Data-driven speech intelligibility prediction has been slow totake off. Datasets of measured speech intelligibility are scarce,and so current models are relatively small and rely on hand-picked features. Classical predictors based on psychoacousticmodels and heuristics are still the state-of-the-art. This workproposes a U-Net inspired fully convolutional neural networkarchitecture, NSIP, trained and tested on ten datasets to pre-dict intelligibility of time-domain speech. The architecture iscompared to a frequency domain data-driven predictor and tothe classical state-of-the-art predictors STOI, ESTOI, HASPIand SIIB. The performance of NSIP is found to be superior fordatasets seen in the training phase. On unseen datasets NSIPreaches performance comparable to classical predictors.",
keywords = "Taleforst{\aa}elighed, foldningsnetv{\ae}rk, Deep Learning, speech intelligibility prediction, fully convolutional neural networks, deep Learning",
author = "Mathias Pedersen and Morten Kolb{\ae}k and Andersen, {Asger Heidemann} and Jensen, {S{\o}ren Holdt} and Jesper Jensen",
year = "2020",
doi = "10.21437/Interspeech.2020-1740",
language = "English",
series = "Proceedings of the International Conference on Spoken Language Processing",
publisher = "International Speech Communication Association",
pages = "1151--1155",
booktitle = "INTERSPEECH 2020",
note = "Interspeech 2020 ; Conference date: 25-10-2020 Through 29-10-2020",
}