MSS BIBLIOGRAPHY

Author	Title	Year	Journal/Proceedings	Reftype	DOI/URL
Ward, D., Wierstorf, H., Mason, R., Grais, E.M. and Plumbley, M.	BSS Eval or PEASS? Predicting the perception of singing-voice separation [BibTeX]	2018	To appear in Proc. ICASSP 2018, Calgary, Canada	misc
BibTeX: @misc{Ward2018, author = {D. Ward and H. Wierstorf and R.D. Mason and E. M. Grais and M.D. Plumbley}, title = {BSS Eval or PEASS? Predicting the perception of singing-voice separation}, year = {2018} }
Virtanen, T. and Klapuri, A.	Separation of harmonic sound sources using sinusoidal modeling [BibTeX]	2000	Vol. 22000 IEEE International Conference on Acoustics, Speech, and Signal Processing. Proceedings (ICASSP), pp. II765-II768 vol.2	inproceedings	DOI
BibTeX: @inproceedings{Virtanen2000, author = {T. Virtanen and A. Klapuri}, title = {Separation of harmonic sound sources using sinusoidal modeling}, booktitle = {2000 IEEE International Conference on Acoustics, Speech, and Signal Processing. Proceedings (ICASSP)}, year = {2000}, volume = {2}, pages = {II765-II768 vol.2}, doi = {http://doi.org/10.1109/ICASSP.2000.859072} }
Virtanen, T.	Monaural Sound Source Separation by Nonnegative Matrix Factorization With Temporal Continuity and Sparseness Criteria [BibTeX]	2007	IEEE Transactions on Audio, Speech, and Language Processing Vol. 15(3), pp. 1066-1074	article	DOI
BibTeX: @article{Virtanen2007, author = {T. Virtanen}, title = {Monaural Sound Source Separation by Nonnegative Matrix Factorization With Temporal Continuity and Sparseness Criteria}, journal = {IEEE Transactions on Audio, Speech, and Language Processing}, year = {2007}, volume = {15}, number = {3}, pages = {1066-1074}, doi = {http://doi.org/10.1109/TASL.2006.885253} }
E. Vincent, R. Gribonval and C. Fevotte	Performance measurement in blind audio source separation [BibTeX]	2006	IEEE Transactions on Audio Speech Language Process. Vol. 14(4), pp. 1462-1469	article	DOI
BibTeX: @article{Vincent2006, author = {E. Vincent and R. Gribonval and C. Fevotte}, title = {Performance measurement in blind audio source separation}, journal = {IEEE Transactions on Audio Speech Language Process.}, year = {2006}, volume = {14}, number = {4}, pages = {1462--1469}, doi = {http://doi.org/10.1109/TSA.2005.858005} }
Uhlich, S., Giron, F. and Mitsufuji, Y.	Deep neural network based instrument extraction from music [BibTeX]	2015	2015 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 2135-2139	inproceedings	DOI
BibTeX: @inproceedings{Uhlich2015, author = {S. Uhlich and F. Giron and Y. Mitsufuji}, title = {Deep neural network based instrument extraction from music}, booktitle = {2015 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, year = {2015}, pages = {2135-2139}, doi = {http://doi.org/10.1109/ICASSP.2015.7178348} }
Smaragdis, P., Fevotte, C., Mysore, G.J., Mohammadiha, N. and Hoffman, M.	Static and Dynamic Source Separation Using Nonnegative Factorizations: A unified view [BibTeX]	2014	IEEE Signal Processing Magazine Vol. 31(3), pp. 66-75	article	DOI
BibTeX: @article{Smaragdis2014, author = {P. Smaragdis and C. Fevotte and G. J. Mysore and N. Mohammadiha and M. Hoffman}, title = {Static and Dynamic Source Separation Using Nonnegative Factorizations: A unified view}, journal = {IEEE Signal Processing Magazine}, year = {2014}, volume = {31}, number = {3}, pages = {66-75}, doi = {http://doi.org/10.1109/MSP.2013.2297715} }
Serra, X.	Musical Sound Modeling with Sinusoids plus Noise [Abstract] [BibTeX]	1997	Musical Signal Processing, pp. 91-122	inbook
Abstract: When generating musical sound on a digital computer, it is important to have a good model whose parameters provide a rich source of meaningful sound transformations. Three basic model types are in prevalent use today for musical sound generation instrument models, spectrum models, and abstract models. Instrument models attempt to parametrize a sound at its source, such as a violin, clarinet, or vocal tract. Spectrum models attempt to parametrize a sound at the basilar membrane of the ear, discarding whatever information the ear seems to discard in the spectrum. Abstract models, such as FM, attempt to provide musically useful parameters in an abstract formula. This article addresses the second category of synthesis techniques spectrum modeling. The main advantage of this group of techniques is the existence of analysis procedures that extract the synthesis parameters out of real sounds, thus being able to reproduce and modify actual sounds. Our particular approach is based on modeling sounds as stable sinusoids (partials) plus noise (residual component), therefore analyzing sounds with this model and generating new sounds from the analyzed data. The analysis procedure detects partials by studying the time-varying spectral characteristics of a sound and represents them with time-varying sinusoids. These partials are then subtracted from the original sound and the remaining "residual" is represented as a time-varying filtered white noise component. The synthesis procedure is a combination of additive synthesis for the sinusoidal part, and subtractive synthesis for the noise part.
BibTeX: @inbook{Serra1997, author = {X. Serra}, title = {Musical Sound Modeling with Sinusoids plus Noise}, booktitle = {Musical Signal Processing}, publisher = {Swets & Zeitlinger}, year = {1997}, pages = {91-122} }
Rickard, S.	The DUET Blind Source Separation Algorithm [Abstract] [BibTeX]	2007	Blind Speech Separation, pp. 217-241	inbook	DOI URL
Abstract: This chapter presents a tutorial on the DUET Blind Source Separation method which can separate any number of sources using only two mixtures. The method is valid when sources are W-disjoint orthogonal, that is, when the supports of the windowed Fourier transform of the signals in the mixture are disjoint. For anechoic mixtures of attenuated and delayed sources, the method allows one to estimate the mixing parameters by clustering relative attenuation-delay pairs extracted from the ratios of the time--frequency representations of the mixtures. The estimates of the mixing parameters are then used to partition the time--frequency representation of one mixture to recover the original sources. The technique is valid even in the case when the number of sources is larger than the number of mixtures. The method is particularly well suited to speech mixtures because the time--frequency representation of speech is sparse and this leads to W-disjoint orthogonality. The algorithm is easily coded and a simple MatlabÂ® implementation is presented1. Additionally in this chapter, two strategies which allow DUET to be applied to situations where the microphones are far apart are presented; this removes a major limitation of the original method.
BibTeX: @inbook{Rickard2007, author = {Rickard, Scott}, title = {The DUET Blind Source Separation Algorithm}, booktitle = {Blind Speech Separation}, publisher = {Springer Netherlands}, year = {2007}, pages = {217--241}, url = {https://doi.org/10.1007/978-1-4020-6479-1_8}, doi = {http://doi.org/10.1007/978-1-4020-6479-1_8} }
BibTeX: @article{Rafii2013, author = {Z. Rafii and B. Pardo}, title = {REpeating Pattern Extraction Technique (REPET): A Simple Method for Music/Voice Separation}, journal = {IEEE Transactions on Audio, Speech & Language Processing}, year = {2013}, volume = {21}, number = {1}, pages = {71-82} }
Rafii, Z. and Pardo, B.	REpeating Pattern Extraction Technique (REPET): A Simple Method for Music / Voice Separation [BibTeX]	2013	IEEE Transactions on Audio, Speech and Language Processing Vol. 21(1), pp. 73-84	article
BibTeX: @article{Rafii2013a, author = {Rafii, Z. and Pardo, B.}, title = {REpeating Pattern Extraction Technique (REPET): A Simple Method for Music / Voice Separation}, journal = {IEEE Transactions on Audio, Speech and Language Processing}, year = {2013}, volume = {21}, number = {1}, pages = {73--84} }
Ozerov, A. and Févotte, C.	Multichannel nonnegative matrix factorization in convolutive mixtures for audio source separation [BibTeX]	2010	IEEE Transactions on Audio, Speech and Language Processing Vol. 18(3), pp. 550-563	article
BibTeX: @article{Ozerov2010, author = {A. Ozerov and C. Févotte}, title = {Multichannel nonnegative matrix factorization in convolutive mixtures for audio source separation}, journal = {IEEE Transactions on Audio, Speech and Language Processing}, year = {2010}, volume = {18}, number = {3}, pages = {550--563} }
Nugraha, A.A., Liutkus, A. and Vincent, E.	Multichannel Audio Source Separation With Deep Neural Networks [BibTeX]	2016	IEEE/ACM Transactions on Audio, Speech, and Language Processing Vol. 24(9), pp. 1652-1664	article	DOI
BibTeX: @article{Nugraha2016, author = {A. A. Nugraha and A. Liutkus and E. Vincent}, title = {Multichannel Audio Source Separation With Deep Neural Networks}, journal = {IEEE/ACM Transactions on Audio, Speech, and Language Processing}, year = {2016}, volume = {24}, number = {9}, pages = {1652-1664}, doi = {http://doi.org/10.1109/TASLP.2016.2580946} }
Luo, Y., Chen, Z., Hershey, J.R., Roux, J.L. and Mesgarani, N.	Deep Clustering and Conventional Networks for Music Separation: Stronger Together [BibTeX]	2017	2017 IEEE International Conference on Acoustics, Speech, and Signal Processing. Proceedings (ICASSP)	inproceedings
BibTeX: @inproceedings{Luo16, author = {Y. Luo and Z. Chen and J. R. Hershey and J. Le Roux and N. Mesgarani}, title = {Deep Clustering and Conventional Networks for Music Separation: Stronger Together}, booktitle = {2017 IEEE International Conference on Acoustics, Speech, and Signal Processing. Proceedings (ICASSP)}, year = {2017} }
Liutkus, A., Stöter, F.-R., Rafii, Z., Kitamura, D., Rivet, B., Ito, N., Ono, N. and Fontecave, J.	The 2016 signal separation evaluation campaign [BibTeX]	2017	International Conference on Latent Variable Analysis and Signal Separation, pp. 323-332	inproceedings
BibTeX: @inproceedings{sisec2016, author = {A. Liutkus and F.-R. Stöter and Z. Rafii and D. Kitamura and B. Rivet and N. Ito and N. Ono and J. Fontecave}, title = {The 2016 signal separation evaluation campaign}, booktitle = {International Conference on Latent Variable Analysis and Signal Separation}, year = {2017}, pages = {323--332} }
Liutkus, A., Fitzgerald, D., Rafii, Z., Pardo, B. and Daudet, L.	Kernel Additive Models for Source Separation [BibTeX]	2014	IEEE Transactions on Signal Processing Vol. 62(16), pp. 4298-4310	article	DOI
BibTeX: @article{Liutkus2014, author = {A. Liutkus and D. Fitzgerald and Z. Rafii and B. Pardo and L. Daudet}, title = {Kernel Additive Models for Source Separation}, journal = {IEEE Transactions on Signal Processing}, year = {2014}, volume = {62}, number = {16}, pages = {4298-4310}, doi = {http://doi.org/10.1109/TSP.2014.2332434} }
Lee, D.D., Laboratories, B., Hill, M. and Seung, H.S.	Algorithms for Non-negative Matrix Factorization [BibTeX]	2001	(1)Advances in Neural Information Processing Systems 13, pp. 556-562	inproceedings
BibTeX: @inproceedings{Lee2001, author = {Lee, D. D and Laboratories, Bell and Hill, Murray and H S. Seung}, title = {Algorithms for Non-negative Matrix Factorization}, booktitle = {Advances in Neural Information Processing Systems 13}, publisher = {MIT Press}, year = {2001}, number = {1}, pages = {556--562} }
ITU-R	Recommendation BS.1534-3: Method for the subjective assessment of indermediate quality levels of coding systems [BibTeX]	10/2015	(BS.1534)	misc
BibTeX: @misc{ITUR.062014, author = {ITU-R}, title = {Recommendation BS.1534-3: Method for the subjective assessment of indermediate quality levels of coding systems}, year = {10/2015}, number = {BS.1534}, edition = {3} }
Huang, P.-S., Chen, S.D., Smaragdis, P. and Hasegawa-Johnson, M.	Singing-Voice Separation From Monaural Recordings Using Robust Principal Component Analysis [BibTeX]	2012	IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 57-60	inproceedings
BibTeX: @inproceedings{Huang2012, author = {P.-S. Huang and S. D. Chen and P. Smaragdis and M. Hasegawa-Johnson}, title = {Singing-Voice Separation From Monaural Recordings Using Robust Principal Component Analysis}, booktitle = {IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, year = {2012}, pages = {57--60} }
Huang, P.S., Kim, M., Hasegawa-Johnson, M. and Smaragdis, P.	Joint Optimization of Masks and Deep Recurrent Neural Networks for Monaural Source Separation [BibTeX]	2015	IEEE/ACM Transactions on Audio, Speech, and Language Processing Vol. 23(12), pp. 2136-2147	article	DOI
BibTeX: @article{Huang2015, author = {P. S. Huang and M. Kim and M. Hasegawa-Johnson and P. Smaragdis}, title = {Joint Optimization of Masks and Deep Recurrent Neural Networks for Monaural Source Separation}, journal = {IEEE/ACM Transactions on Audio, Speech, and Language Processing}, year = {2015}, volume = {23}, number = {12}, pages = {2136-2147}, doi = {http://doi.org/10.1109/TASLP.2015.2468583} }
FitzGerald, D., Liutkus, A. and Badeau, R.	Projection-Based Demixing of Spatial Audio [BibTeX]	2016	IEEE/ACM Transactions on Audio, Speech, and Language Processing Vol. 24(9), pp. 1560-1572	article	DOI
BibTeX: @article{Fitzgerald2016, author = {D. FitzGerald and A. Liutkus and R. Badeau}, title = {Projection-Based Demixing of Spatial Audio}, journal = {IEEE/ACM Transactions on Audio, Speech, and Language Processing}, year = {2016}, volume = {24}, number = {9}, pages = {1560-1572}, doi = {http://doi.org/10.1109/TASLP.2016.2570945} }
FitzGerald, D.	Harmonic/Percusssive Separation Using Median Filtering [BibTeX]	2010	Proceedings of the 13th International Conference on Digital Audio Effects (DAFx)	inproceedings	URL
BibTeX: @inproceedings{FitzGerald2010, author = {FitzGerald, D.}, title = {Harmonic/Percusssive Separation Using Median Filtering}, booktitle = {Proceedings of the 13th International Conference on Digital Audio Effects (DAFx)}, year = {2010}, url = {http://dafx10.iem.at/proceedings/papers/DerryFitzGerald_DAFx10_P15.pdf} }
Fitzgerald, D.	Harmonic/percussive separation using median filtering [BibTeX]	2010	13th Int. Conference on Digital Audio Effects (DAFx-10)	inproceedings
BibTeX: @inproceedings{Fitzgerlad2010, author = {D. Fitzgerald}, title = {Harmonic/percussive separation using median filtering}, booktitle = {13th Int. Conference on Digital Audio Effects (DAFx-10)}, year = {2010} }
Ewert, S., Pardo, B., Mueller, M. and Plumbley, M.D.	Score-Informed Source Separation for Musical Audio Recordings: An overview [BibTeX]	2014	IEEE Signal Processing Magazine Vol. 31(3), pp. 116-124	article	DOI
BibTeX: @article{Ewert2014, author = {S. Ewert and B. Pardo and M. Mueller and M. D. Plumbley}, title = {Score-Informed Source Separation for Musical Audio Recordings: An overview}, journal = {IEEE Signal Processing Magazine}, year = {2014}, volume = {31}, number = {3}, pages = {116-124}, doi = {http://doi.org/10.1109/MSP.2013.2296076} }
Emiya, V., Vincent, E., Harlander, N. and Hohmann, V.	Subjective and Objective Quality Assessment of Audio Source Separation [BibTeX]	2011	IEEE Transactions on Audio, Speech, and Language Processing Vol. 19(7), pp. 2046-2057	article	DOI
BibTeX: @article{Emiya2011, author = {Emiya, V. and Vincent, E. and Harlander, N. and Hohmann, V.}, title = {Subjective and Objective Quality Assessment of Audio Source Separation}, journal = {IEEE Transactions on Audio, Speech, and Language Processing}, year = {2011}, volume = {19}, number = {7}, pages = {2046--2057}, doi = {http://doi.org/10.1109/TASL.2011.2109381} }
Durrieu, J.L., Richard, G., David, B. and Fevotte, C.	Source/Filter Model for Unsupervised Main Melody Extraction From Polyphonic Audio Signals [BibTeX]	2010	IEEE Transactions on Audio, Speech, and Language Processing Vol. 18(3), pp. 564-575	article	DOI
BibTeX: @article{Durrieu2010, author = {J. L. Durrieu and G. Richard and B. David and C. Fevotte}, title = {Source/Filter Model for Unsupervised Main Melody Extraction From Polyphonic Audio Signals}, journal = {IEEE Transactions on Audio, Speech, and Language Processing}, year = {2010}, volume = {18}, number = {3}, pages = {564-575}, doi = {http://doi.org/10.1109/TASL.2010.2041114} }
Duong, N., Vincent, E. and Gribonval, R.	Under-Determined Reverberant Audio Source Separation Using a Full-Rank Spatial Covariance Model [BibTeX]	2010	IEEE Transactions on Audio, Speech, and Language Processing Vol. 18(7), pp. 1830 -1840	article	DOI
BibTeX: @article{Duong2010, author = {Duong, N.Q.K. and Vincent, E. and Gribonval, R.}, title = {Under-Determined Reverberant Audio Source Separation Using a Full-Rank Spatial Covariance Model}, journal = {IEEE Transactions on Audio, Speech, and Language Processing}, year = {2010}, volume = {18}, number = {7}, pages = {1830 -1840}, doi = {http://doi.org/10.1109/TASL.2010.2050716} }
Chan, T.-S., Yeh, T.-C., Fan, Z.-C., Chen, H.-W., Su, L., Yang, Y.-H. and Jang, R.	Vocal activity informed singing voice separation with the iKala dataset [BibTeX]	2015	2015 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 718-722	inproceedings
BibTeX: @inproceedings{Chan2015, author = {T.-S. Chan and T.-C. Yeh and Z.-C. Fan and H.-W. Chen and L. Su and Y.-H. Yang and R. Jang}, title = {Vocal activity informed singing voice separation with the iKala dataset}, booktitle = {2015 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, year = {2015}, pages = {718--722} }
Cano, E., Plumbley, M. and Dittmar, C.	Phase-based harmonic/percussive separation [BibTeX]	2014	15th Annual Conference of the International Speech Communication Association (2014). Interspeech	inproceedings
BibTeX: @inproceedings{Cano2014, author = {Cano, E. and Plumbley, M. and Dittmar, C.}, title = {Phase-based harmonic/percussive separation}, booktitle = {15th Annual Conference of the International Speech Communication Association (2014). Interspeech}, year = {2014} }
Cano, E., FitzGerald, D. and Brandenburg, K.	Evaluation of quality of sound source separation algorithms: Human perception vs quantitative metrics [BibTeX]	2016	2016 24th European Signal Processing Conference (EUSIPCO), pp. 1758-1762	inproceedings
BibTeX: @inproceedings{Cano2016, author = {Cano, E. and FitzGerald, D. and Brandenburg, K.}, title = {Evaluation of quality of sound source separation algorithms: Human perception vs quantitative metrics}, booktitle = {2016 24th European Signal Processing Conference (EUSIPCO)}, year = {2016}, pages = {1758--1762} }
Bregman, A.S.	Auditory Scene Analysis: The Perceptual Organization of Sound [BibTeX]	1990		book
BibTeX: @book{Bregman:1990:BOOK, author = {Bregman, Albert S.}, title = {Auditory Scene Analysis: The Perceptual Organization of Sound}, publisher = {MIT Press/Bradford Books}, year = {1990} }
Barry, D., Lawlor, B. and Coyle, E.	Real-time Sound Source Separation using Azimuth Discrimination and Resynthesis [BibTeX]	2004	117th Audio Engineering Society (AES) Convention	inproceedings
BibTeX: @inproceedings{Barry2004, author = {D. Barry and B. Lawlor and E. Coyle}, title = {Real-time Sound Source Separation using Azimuth Discrimination and Resynthesis}, booktitle = {117th Audio Engineering Society (AES) Convention}, year = {2004} }