Skip to content

Latest commit

 

History

History
1341 lines (1188 loc) · 48.9 KB

bib.md

File metadata and controls

1341 lines (1188 loc) · 48.9 KB
  • Bibliography

@article{kumar2017weight, title={On weight initialization in deep neural networks}, author={Kumar, Siddharth Krishna}, journal={arXiv preprint arXiv:1704.08863}, year={2017} }

@article{hori2017advances, title={Advances in joint CTC-attention based end-to-end speech recognition with a deep CNN encoder and RNN-LM}, author={Hori, Takaaki and Watanabe, Shinji and Zhang, Yu and Chan, William}, journal={arXiv preprint arXiv:1706.02737}, year={2017} }

@article{landahl1943statistical, title={A statistical consequence of the logical calculus of nervous nets}, author={Landahl, HD and McCulloch, Warren S and Pitts, Walter}, journal={The bulletin of mathematical biophysics}, volume={5}, number={4}, pages={135--137}, year={1943}, publisher={Springer} }

@inproceedings{he2015delving, title={Delving deep into rectifiers: Surpassing human-level performance on imagenet classification}, author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian}, booktitle={Proceedings of the IEEE international conference on computer vision}, pages={1026--1034}, year={2015} }

@inproceedings{glorot2010understanding, title={Understanding the difficulty of training deep feedforward neural networks}, author={Glorot, Xavier and Bengio, Yoshua}, booktitle={Proceedings of the thirteenth international conference on artificial intelligence and statistics}, pages={249--256}, year={2010} }

@inproceedings{kim2017joint, title={Joint CTC-attention based end-to-end speech recognition using multi-task learning}, author={Kim, Suyoun and Hori, Takaaki and Watanabe, Shinji}, booktitle={2017 IEEE international conference on acoustics, speech and signal processing (ICASSP)}, pages={4835--4839}, year={2017}, organization={IEEE} }

@article{ardila2019common, title={Common voice: A massively-multilingual speech corpus}, author={Ardila, Rosana and Branson, Megan and Davis, Kelly and Henretty, Michael and Kohler, Michael and Meyer, Josh and Morais, Reuben and Saunders, Lindsay and Tyers, Francis M and Weber, Gregor}, journal={arXiv preprint arXiv:1912.06670}, year={2019} }

@article{bahdanau2014neural, title={Neural machine translation by jointly learning to align and translate}, author={Bahdanau, Dzmitry and Cho, Kyunghyun and Bengio, Yoshua}, journal={arXiv preprint arXiv:1409.0473}, year={2014} }

@article{goodfellow2013multi, title={Multi-digit number recognition from street view imagery using deep convolutional neural networks}, author={Goodfellow, Ian J and Bulatov, Yaroslav and Ibarz, Julian and Arnoud, Sacha and Shet, Vinay}, journal={arXiv preprint arXiv:1312.6082}, year={2013} }

@article{luong2015effective, title={Effective approaches to attention-based neural machine translation}, author={Luong, Minh-Thang and Pham, Hieu and Manning, Christopher D}, journal={arXiv preprint arXiv:1508.04025}, year={2015} }

@misc{foxvorge2019, title={Voxforge}, url={http://www.voxforge.org}, year={2019} },

@misc{karpathy2015unreasonable, title={The unreasonable effectiveness of recurrent neural network}, author={Andrej Karpathy}, booktitle={Andrej Karpathy Blog}, url={http://karpathy.github.io/2015/05/21/rnn-effectiveness/}, year={2015} },

@inproceedings{salazar2019self, title={Self-attention networks for connectionist temporal classification in speech recognition}, author={Salazar, Julian and Kirchhoff, Katrin and Huang, Zhiheng}, booktitle={ICASSP 2019-2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, pages={7115--7119}, year={2019}, organization={IEEE} }

@inproceedings{hwang2017character, title={Character-level language modeling with hierarchical recurrent neural networks}, author={Hwang, Kyuyeon and Sung, Wonyong}, booktitle={2017 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, pages={5720--5724}, year={2017}, organization={IEEE} }

@book{geron2019hands, title={Hands-on machine learning with Scikit-Learn, Keras, and TensorFlow: Concepts, tools, and techniques to build intelligent systems}, author={G{'e}ron, Aur{'e}lien}, year={2019}, publisher={O'Reilly Media} }

@inproceedings{papineni2002bleu, title={BLEU: a method for automatic evaluation of machine translation}, author={Papineni, Kishore and Roukos, Salim and Ward, Todd and Zhu, Wei-Jing}, booktitle={Proceedings of the 40th annual meeting on association for computational linguistics}, pages={311--318}, year={2002}, organization={Association for Computational Linguistics} }

@inproceedings{toshniwal2018multilingual, title={Multilingual speech recognition with a single end-to-end model}, author={Toshniwal, Shubham and Sainath, Tara N and Weiss, Ron J and Li, Bo and Moreno, Pedro and Weinstein, Eugene and Rao, Kanishka}, booktitle={2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, pages={4904--4908}, year={2018}, organization={IEEE} }

@article{renals2017multilingual, title={Multilingual Speech Recognition}, author={Renals, Steve and Lecture, Automatic Speech Recognition--ASR}, journal={Automatic Speech Recognition Lecture16}, year={2017} }

@article{koushik2016understanding, title={Understanding convolutional neural networks}, author={Koushik, Jayanth}, journal={arXiv preprint arXiv:1605.09081}, year={2016} }

@article{zeghidour2018end, title={End-to-end speech recognition from the raw waveform}, author={Zeghidour, Neil and Usunier, Nicolas and Synnaeve, Gabriel and Collobert, Ronan and Dupoux, Emmanuel}, journal={arXiv preprint arXiv:1806.07098}, year={2018} }

@book{ogunfunmi2015speech, author={Ogunfunmi,Tokunbo and Togneri,Roberto and Narasimha,Madihally}, year={2015}, title={Speech and audio processing for coding, enhancement and recognition}, publisher={Springer}, address={New York}, keywords={Mechanical speech recognizer; Speech processing systems; Speech recognition, Automatic; Automatic speech recognition}, isbn={1493914553;9781493914555;}, language={English}, }

@inproceedings{szegedy2015going, title={Going deeper with convolutions}, author={Szegedy, Christian and Liu, Wei and Jia, Yangqing and Sermanet, Pierre and Reed, Scott and Anguelov, Dragomir and Erhan, Dumitru and Vanhoucke, Vincent and Rabinovich, Andrew}, booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, pages={1--9}, year={2015} }

@inproceedings{lasserre2006principled, title={Principled hybrids of generative and discriminative models}, author={Lasserre, Julia A and Bishop, Christopher M and Minka, Thomas P}, booktitle={2006 IEEE Computer Society Conference on Computer Vision and Pattern Recognition (CVPR'06)}, volume={1}, pages={87--94}, year={2006}, organization={IEEE} }

@article{hendrycks2019using, title={Using pre-training can improve model robustness and uncertainty}, author={Hendrycks, Dan and Lee, Kimin and Mazeika, Mantas}, journal={arXiv preprint arXiv:1901.09960}, year={2019} }

@article{ramachandran2016unsupervised, title={Unsupervised pretraining for sequence to sequence learning}, author={Ramachandran, Prajit and Liu, Peter J and Le, Quoc V}, journal={arXiv preprint arXiv:1611.02683}, year={2016} }

@book{Goodfellow-et-al-2016, title={Deep Learning}, author={Ian Goodfellow and Yoshua Bengio and Aaron Courville}, publisher={MIT Press}, note={\url{http://www.deeplearningbook.org}}, year={2016} }

@inproceedings{ochiai2017multichannel, title={Multichannel end-to-end speech recognition}, author={Ochiai, Tsubasa and Watanabe, Shinji and Hori, Takaaki and Hershey, John R}, booktitle={Proceedings of the 34th International Conference on Machine Learning-Volume 70}, pages={2632--2641}, year={2017}, organization={JMLR.org} }

@inproceedings{chorowski2015attention, title={Attention-based models for speech recognition}, author={Chorowski, Jan K and Bahdanau, Dzmitry and Serdyuk, Dmitriy and Cho, Kyunghyun and Bengio, Yoshua}, booktitle={Advances in neural information processing systems}, pages={577--585}, year={2015} }

@inproceedings{acero1990acoustical, title={Acoustical and environmental robustness in automatic speech recognition}, author={Acero, Alejandro}, booktitle={Proc. of ICASSP}, year={1990} }

@inproceedings{paul1992design, title={The design for the Wall Street Journal-based CSR corpus}, author={Paul, Douglas B and Baker, Janet M}, booktitle={Proceedings of the workshop on Speech and Natural Language}, pages={357--362}, year={1992}, organization={Association for Computational Linguistics} }

@article{wang2019bridging, title={Bridging the gap between pre-training and fine-tuning for end-to-end speech translation}, author={Wang, Chengyi and Wu, Yu and Liu, Shujie and Yang, Zhenglu and Zhou, Ming}, journal={arXiv preprint arXiv:1909.07575}, year={2019} }

@article{sifre2014rigid, title={Rigid-motion scattering for image classification}, author={Sifre, Laurent and Mallat, St{'e}phane}, journal={Ph. D. dissertation}, year={2014}, publisher={Citeseer} }

@inproceedings{sifre2013rotation, title={Rotation, scaling and deformation invariant scattering for texture discrimination}, author={Sifre, Laurent and Mallat, St{'e}phane}, booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, pages={1233--1240}, year={2013} }

@inproceedings{sturm2012analysis, title={An analysis of the GTZAN music genre dataset}, author={Sturm, Bob L}, booktitle={Proceedings of the second international ACM workshop on Music information retrieval with user-centered and multimodal strategies}, pages={7--12}, year={2012}, organization={ACM} }

@inproceedings{povey2011kaldi, title={The Kaldi speech recognition toolkit}, author={Povey, Daniel and Ghoshal, Arnab and Boulianne, Gilles and Burget, Lukas and Glembek, Ondrej and Goel, Nagendra and Hannemann, Mirko and Motlicek, Petr and Qian, Yanmin and Schwarz, Petr and others}, booktitle={IEEE 2011 workshop on automatic speech recognition and understanding}, number={CONF}, year={2011}, organization={IEEE Signal Processing Society} }

@article{walker2004sphinx, title={Sphinx-4: A flexible open source framework for speech recognition}, author={Walker, Willie and Lamere, Paul and Kwok, Philip and Raj, Bhiksha and Singh, Rita and Gouvea, Evandro and Wolf, Peter and Woelfel, Joe}, year={2004}, publisher={Sun Microsystems, Inc.} }

@MISC{Lamere03thecmu, author = {Paul Lamere and Philip Kwok and Evandro Gouvêa and Bhiksha Raj and Rita Singh and William Walker and Manfred Warmuth and Peter Wolf}, title = {The CMU SPHINX-4 Speech Recognition System }, year = {2003} url = "https://github.com/cmusphinx/sphinx4/blob/master/sphinx4-samples/src/main/java/edu/cmu/sphinx/demo/transcriber/TranscriberDemo.java", note = {[Online; accessed ]} }

@article{al2016theano, title={Theano: A Python framework for fast computation of mathematical expressions}, author={Al-Rfou, Rami and Alain, Guillaume and Almahairi, Amjad and Angermueller, Christof and Bahdanau, Dzmitry and Ballas, Nicolas and Bastien, Fr{'e}d{'e}ric and Bayer, Justin and Belikov, Anatoly and Belopolsky, Alexander and others}, journal={arXiv preprint arXiv:1605.02688}, year={2016} }

@article{goldsborough2016tour, title={A tour of tensorflow}, author={Goldsborough, Peter}, journal={arXiv preprint arXiv:1610.01178}, year={2016} }

@article{abadi2016tensorflow, title={Tensorflow: Large-scale machine learning on heterogeneous distributed systems}, author={Abadi, Mart{'\i}n and Agarwal, Ashish and Barham, Paul and Brevdo, Eugene and Chen, Zhifeng and Citro, Craig and Corrado, Greg S and Davis, Andy and Dean, Jeffrey and Devin, Matthieu and others}, journal={arXiv preprint arXiv:1603.04467}, year={2016} }

@inproceedings{abadi2017computational, title={A computational model for TensorFlow: an introduction}, author={Abadi, Mart{'\i}n and Isard, Michael and Murray, Derek G}, booktitle={Proceedings of the 1st ACM SIGPLAN International Workshop on Machine Learning and Programming Languages}, pages={1--7}, year={2017}, organization={ACM} }

@article{goldman2011easyalign, title={EasyAlign: an automatic phonetic alignment tool under Praat}, author={Goldman, Jean-Philippe}, year={2011} }

@inproceedings{krizhevsky2012imagenet, title={Imagenet classification with deep convolutional neural networks}, author={Krizhevsky, Alex and Sutskever, Ilya and Hinton, Geoffrey E}, booktitle={Advances in neural information processing systems}, pages={1097--1105}, year={2012} }

@Misc{numpy, author = {Travis Oliphant}, title = {{NumPy}: A guide to {NumPy}}, year = {2006--}, howpublished = {USA: Trelgol Publishing}, url = "http://www.numpy.org/", note = {[Online; accessed ]} }

@inproceedings{novotney2009analysis, title={Analysis of low-resource acoustic model self-training}, author={Novotney, Scott and Schwartz, Richard}, booktitle={Tenth Annual Conference of the International Speech Communication Association}, year={2009} }

@incollection{ketkar2017introduction, title={Introduction to pytorch}, author={Ketkar, Nikhil}, booktitle={Deep learning with python}, pages={195--208}, year={2017}, publisher={Springer} }

@article{schluter2001model, title={Model-based MCE bound to the true Bayes' error}, author={Schluter, Ralf and Ney, Hermann}, journal={IEEE Signal Processing Letters}, volume={8}, number={5}, pages={131--133}, year={2001}, publisher={IEEE} }

@article{kamper2016unsupervised, title={Unsupervised word segmentation and lexicon discovery using acoustic word embeddings}, author={Kamper, Herman and Jansen, Aren and Goldwater, Sharon}, journal={IEEE/ACM Transactions on Audio, Speech and Language Processing (TASLP)}, volume={24}, number={4}, pages={669--679}, year={2016}, publisher={IEEE Press} }

@inproceedings{ragni2018automatic, title={Automatic Speech Recognition System Development in the" Wild".}, author={Ragni, Anton and Gales, Mark JF}, booktitle={Interspeech}, pages={2217--2221}, year={2018} }

@inproceedings{watanabe2018espnet, author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson {Enrique Yalta Soplin} and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai}, title={ESPnet: End-to-End Speech Processing Toolkit}, year=2018, booktitle={Interspeech}, pages={2207--2211}, doi={10.21437/Interspeech.2018-1456}, url={http://dx.doi.org/10.21437/Interspeech.2018-1456} }

@inproceedings{pot2009choregraphe, title={Choregraphe: a graphical tool for humanoid robot programming}, author={Pot, Emmanuel and Monceaux, J{'e}r{^o}me and Gelin, Rodolphe and Maisonnier, Bruno}, booktitle={RO-MAN 2009-The 18th IEEE International Symposium on Robot and Human Interactive Communication}, pages={46--51}, year={2009}, organization={IEEE} }

@lecture{cwt_lecture, title={Continuous Wavelet Transform}, publisher={University of Huddersfield. Unpublished}, year={2015}, organization={University of Huddersfield. Unpublished} }

@inproceedings{pennington-etal-2014-glove, title = "{G}love: Global Vectors for Word Representation", author = "Pennington, Jeffrey and Socher, Richard and Manning, Christopher", booktitle = "Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing ({EMNLP})", month = oct, year = "2014", address = "Doha, Qatar", publisher = "Association for Computational Linguistics", doi = "10.3115/v1/D14-1162", pages = "1532--1543", }

@book{becchetti1998, author={Becchetti,Claudio and Ricotti,Lucio P.}, year={1998}, title={Speech recognition: theory and C++ implementation}, publisher={Wiley}, address={New York}, keywords={C (Computer program language); Automatic speech recognition}, isbn={0471977306;9780471977308;}, language={English}, }

@article{stan2016alisa, title={ALISA: An automatic lightly supervised speech segmentation and alignment tool}, author={Stan, Adriana and Mamiya, Yoshitaka and Yamagishi, Junichi and Bell, Peter and Watts, Oliver and Clark, Robert AJ and King, Simon}, journal={Computer Speech & Language}, volume={35}, pages={116--133}, year={2016}, publisher={Elsevier} }

@article{besacier2014introduction, title={Introduction to the special issue on processing under-resourced languages}, author={Besacier, Laurent and Barnard, Etienne and Karpov, Alexey and Schultz, Tanja}, year={2014}, publisher={Speech Communications} }

@inproceedings{panayotov2015librispeech, title={Librispeech: an ASR corpus based on public domain audio books}, author={Panayotov, Vassil and Chen, Guoguo and Povey, Daniel and Khudanpur, Sanjeev}, booktitle={2015 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, pages={5206--5210}, year={2015}, organization={IEEE} }

@misc{mozilla_2019, title={Mozilla Deepspeech}, url={https://voice.mozilla.org/en}, journal={Voice.mozilla.org}, year={2019} },

@misc{lyons_2012, title={Mel Frequency Cepstral Coefficient (MFCC) tutorial}, url={http://practicalcryptography.com/miscellaneous/machine-learning/guide-mel-frequency-cepstral-coefficients-mfccs/}, journal={Practicalcryptography.com}, author={Lyons, J}, year={2012} }

@article{gales2008application, title={The application of hidden Markov models in speech recognition}, author={Gales, Mark and Young, Steve and others}, journal={Foundations and Trends{\textregistered} in Signal Processing}, volume={1}, number={3}, pages={195--304}, year={2008}, publisher={Now Publishers, Inc.} }

@article{becchetti1999behaviour, title={The behaviour of financial time series: stylised features, theoretical interpretations and proposals for Hidden Markov Model applications}, author={Becchetti, L}, journal={Speech recognition. Theory and C++ implementation}, year={1999} }

@article{stevens1937scale, title={A scale for the measurement of the psychological magnitude pitch}, author={Stevens, Stanley Smith and Volkmann, John and Newman, Edwin B}, journal={The Journal of the Acoustical Society of America}, volume={8}, number={3}, pages={185--190}, year={1937}, publisher={ASA} }

@inproceedings{Heafield-estimate, author = {Kenneth Heafield and Ivan Pouzyrevsky and Jonathan H. Clark and Philipp Koehn}, title = {Scalable Modified {Kneser-Ney} Language Model Estimation}, year = {2013}, month = {August}, booktitle = {Proceedings of the 51st Annual Meeting of the Association for Computational Linguistics}, address = {Sofia, Bulgaria}, pages = {690--696}, url = {https://kheafield.com/papers/edinburgh/estimate\_paper.pdf}, } @article{srivastava2014dropout, title={Dropout: a simple way to prevent neural networks from overfitting}, author={Srivastava, Nitish and Hinton, Geoffrey and Krizhevsky, Alex and Sutskever, Ilya and Salakhutdinov, Ruslan}, journal={The journal of machine learning research}, volume={15}, number={1}, pages={1929--1958}, year={2014}, publisher={JMLR. org} }

@article{saito2017statistical, title={Statistical parametric speech synthesis incorporating generative adversarial networks}, author={Saito, Yuki and Takamichi, Shinnosuke and Saruwatari, Hiroshi}, journal={IEEE/ACM Transactions on Audio, Speech, and Language Processing}, volume={26}, number={1}, pages={84--96}, year={2017}, publisher={IEEE} }

@article{picone1996fundamentals, title={Fundamentals of speech recognition: A short course}, author={Picone, Joseph}, journal={Institute for Signal and Information Processing, Mississippi State University}, year={1996} }

@article{ragni2014data, title={Data augmentation for low resource languages}, author={Ragni, Anton and Knill, Katherine Mary and Rath, Shakti P and Gales, Mark John}, year={2014} }

@inproceedings{gales2014speech, title={Speech recognition and keyword spotting for low-resource languages: BABEL project research at CUED}, author={Gales, Mark JF and Knill, Kate M and Ragni, Anton and Rath, Shakti P}, booktitle={Spoken Language Technologies for Under-Resourced Languages}, year={2014} }

@article{kingma2014adam, title={Adam: A method for stochastic optimization}, author={Kingma, Diederik P and Ba, Jimmy}, journal={arXiv preprint arXiv:1412.6980}, year={2014} }

@book{graves_2014, title={Supervised sequence labelling with recurrent neural networks}, publisher={Springer}, author={Graves, Alex}, year={2014} }

@inproceedings{graves2013speech, title={Speech recognition with deep recurrent neural networks}, author={Graves, Alex and Mohamed, Abdel-rahman and Hinton, Geoffrey}, booktitle={2013 IEEE international conference on acoustics, speech and signal processing}, pages={6645--6649}, year={2013}, organization={IEEE} }

@misc{mozilla/deepspeech_2019, title={Common voice}, url={https://github.com/mozilla/DeepSpeech#common-voice-training-data}, journal={GitHub}, year={2019} }

@article{sak2014long, title={Long short-term memory based recurrent neural network architectures for large vocabulary speech recognition}, author={Sak, Ha{\c{s}}im and Senior, Andrew and Beaufays, Fran{\c{c}}oise}, journal={arXiv preprint arXiv:1402.1128}, year={2014} }

@article{anden2014scatnet, title={Scatnet (v0. 2)}, author={And{'e}n, J and Sifre, L and Mallat, S and Kapoko, M and Lostanlen, V and Oyallon, E}, journal={Computer Software. Available: http://www.di.ens.fr/data/software/scatnet/. [Accessed: December 10, 2013]}, volume={0.2}, year={2014} }

@inproceedings{gehring2017convolutional, title={Convolutional sequence to sequence learning}, author={Gehring, Jonas and Auli, Michael and Grangier, David and Yarats, Denis and Dauphin, Yann N}, booktitle={Proceedings of the 34th International Conference on Machine Learning-Volume 70}, pages={1243--1252}, year={2017}, organization={JMLR.org} }

@article{kalchbrenner2016neural, title={Neural machine translation in linear time}, author={Kalchbrenner, Nal and Espeholt, Lasse and Simonyan, Karen and Oord, Aaron van den and Graves, Alex and Kavukcuoglu, Koray}, journal={arXiv preprint arXiv:1610.10099}, year={2016} }

@inproceedings{kaiser2016can, title={Can active memory replace attention?}, author={Kaiser, {\L}ukasz and Bengio, Samy}, booktitle={Advances in Neural Information Processing Systems}, pages={3781--3789}, year={2016} }

@inproceedings{vaswani2017attention, title={Attention is all you need}, author={Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N and Kaiser, {\L}ukasz and Polosukhin, Illia}, booktitle={Advances in Neural Information Processing Systems}, pages={5998--6008}, year={2017} }

@book{mcloughlin2009applied, title={Applied speech and audio processing: with Matlab examples}, author={McLoughlin, Ian}, year={2009}, publisher={Cambridge University Press} }

@article{shen2016combination, title={Combination of multiple acoustic models with unsupervised adaptation for lecture speech transcription}, author={Shen, Peng and Lu, Xugang and Hu, Xinhui and Kanda, Naoyuki and Saiko, Masahiro and Hori, Chiori and Kawai, Hisashi}, journal={Speech Communication}, volume={82}, pages={1--13}, year={2016}, publisher={Elsevier} }

@article{dines2010measuring, title={Measuring the gap between HMM-based ASR and TTS}, author={Dines, John and Yamagishi, Junichi and King, Simon}, journal={IEEE Journal of Selected Topics in Signal Processing}, volume={4}, number={6}, pages={1046--1058}, year={2010}, publisher={IEEE} }

@article{mallat1989theory, title={A theory for multiresolution signal decomposition: the wavelet representation}, author={Mallat, Stephane G}, journal={IEEE transactions on pattern analysis and machine intelligence}, volume={11}, number={7}, pages={674--693}, year={1989}, publisher={Ieee} }

@article{cowan1990discussion, title={Discussion: McCulloch-Pitts and related neural nets from 1943 to 1989}, author={Cowan, Jack D}, journal={Bulletin of mathematical biology}, volume={52}, number={1-2}, pages={73--97}, year={1990}, publisher={Springer} }

@article{boden2002guide, title={A guide to recurrent neural networks and backpropagation}, author={Boden, Mikael}, journal={the Dallas project}, year={2002} }

@book{jaeger2002tutorial, title={Tutorial on training recurrent neural networks, covering BPPT, RTRL, EKF and the" echo state network" approach}, author={Jaeger, Herbert}, volume={5}, year={2002}, publisher={GMD-Forschungszentrum Informationstechnik Bonn} }

@inproceedings{mohamed2009deep, title={Deep belief networks for phone recognition}, author={Mohamed, Abdel-rahman and Dahl, George and Hinton, Geoffrey}, booktitle={Nips workshop on deep learning for speech recognition and related applications}, volume={1}, number={9}, pages={39}, year={2009}, organization={Vancouver, Canada} }

@inproceedings{yu2010roles, title={Roles of pre-training and fine-tuning in context-dependent DBN-HMMs for real-world speech recognition}, author={Yu, Dong and Deng, Li and Dahl, George}, booktitle={Proc. NIPS Workshop on Deep Learning and Unsupervised Feature Learning}, year={2010} }

@article{dahl2012context, title={Context-dependent pre-trained deep neural networks for large-vocabulary speech recognition}, author={Dahl, George E and Yu, Dong and Deng, Li and Acero, Alex}, journal={IEEE Transactions on audio, speech, and language processing}, volume={20}, number={1}, pages={30--42}, year={2012}, publisher={IEEE} }

@inproceedings{yu2012conversational, title={Conversational Speech Transcription Using Context-Dependent Deep Neural Networks.}, author={Yu, Dong and Seide, Frank and Li, Gang}, booktitle={ICML}, year={2012} }

@book{yu2016automatic, title={AUTOMATIC SPEECH RECOGNITION.}, author={Yu, Dong and Deng, Li}, year={2016}, publisher={Springer} }

@article{sutton2012introduction, title={An introduction to conditional random fields}, author={Sutton, Charles and McCallum, Andrew and others}, journal={Foundations and Trends{\textregistered} in Machine Learning}, volume={4}, number={4}, pages={267--373}, year={2012}, publisher={Now Publishers, Inc.} }

@article{maas2017building, title={Building DNN acoustic models for large vocabulary speech recognition}, author={Maas, Andrew L and Qi, Peng and Xie, Ziang and Hannun, Awni Y and Lengerich, Christopher T and Jurafsky, Daniel and Ng, Andrew Y}, journal={Computer Speech & Language}, volume={41}, pages={195--213}, year={2017}, publisher={Elsevier} }

@inproceedings{sainath2014deep, title={Deep scattering spectra with deep neural networks for LVCSR tasks}, author={Sainath, Tara N and Peddinti, Vijayaditya and Kingsbury, Brian and Fousek, Petr and Ramabhadran, Bhuvana and Nahamoo, David}, booktitle={Fifteenth Annual Conference of the International Speech Communication Association}, year={2014} }

@inproceedings{zeghidour2016deep, title={A deep scattering spectrum—deep siamese network pipeline for unsupervised acoustic modeling}, author={Zeghidour, Neil and Synnaeve, Gabriel and Versteegh, Maarten and Dupoux, Emmanuel}, booktitle={Acoustics, Speech and Signal Processing (ICASSP), 2016 IEEE International Conference on}, pages={4965--4969}, year={2016}, organization={IEEE} }

@article{mallat2016understanding, title={Understanding deep convolutional networks}, author={Mallat, St{'e}phane}, journal={Phil. Trans. R. Soc. A}, volume={374}, number={2065}, pages={20150203}, year={2016}, publisher={The Royal Society} }

@article{lecun1998gradient, title={Gradient-based learning applied to document recognition}, author={LeCun, Yann and Bottou, L{'e}on and Bengio, Yoshua and Haffner, Patrick}, journal={Proceedings of the IEEE}, volume={86}, number={11}, pages={2278--2324}, year={1998}, publisher={IEEE} }

@techreport{smolensky1986information, title={Information processing in dynamical systems: Foundations of harmony theory}, author={Smolensky, Paul}, year={1986}, institution={COLORADO UNIV AT BOULDER DEPT OF COMPUTER SCIENCE} }

@inproceedings{grezl2008optimizing, title={Optimizing bottle-neck features for lvcsr.}, author={Grezl, Frantisek and Fousek, Petr}, booktitle={ICASSP}, volume={8}, pages={4729--4732}, year={2008} }

@inproceedings{bengio2007greedy, title={Greedy layer-wise training of deep networks}, author={Bengio, Yoshua and Lamblin, Pascal and Popovici, Dan and Larochelle, Hugo}, booktitle={Advances in neural information processing systems}, pages={153--160}, year={2007} }

@article{kuhn1990cache, author={R. Kuhn and R. De Mori}, year={1990}, title={A cache-based natural language model for speech recognition}, journal={IEEE Transactions on Pattern Analysis and Machine Intelligence}, volume={12}, number={6}, pages={570-583}, abstract={Speech-recognition systems must often decide between competing ways of breaking up the acoustic input into strings of words. Since the possible strings may be acoustically similar, a language model is required; given a word string, the model returns its linguistic probability. Several Markov language models are discussed. A novel kind of language model which reflects short-term patterns of word use by means of a cache component (analogous to cache memory in hardware terminology) is presented. The model also contains a 3g-gram component of the traditional type. The combined model and a pure 3g-gram model were tested on samples drawn from the Lancaster-Oslo/Bergen (LOB) corpus of English text. The relative performance of the two models is examined, and suggestions for the future improvements are made.}, isbn={0162-8828}, language={English}, doi={10.1109/34.56193} }

@article{Brown1992class, author={Peter F. Brown and Peter V. Desouza and Robert L. Mercer and Vincent J. Della Pietra and Jenifer C. Lai}, year={1992}, title={Class-based n-gram models of natural language}, journal={Computational linguistics}, volume={18}, number={4}, pages={467-479}, url={http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.13.9919&rep=rep1&type=pdf} }

@article{juang2000automatic, author={Bing-Hwang Juang and S. Furui}, year={2000}, title={Automatic recognition and understanding of spoken language - a first step toward natural human-machine communication}, journal={Proceedings of the IEEE}, volume={88}, number={8}, pages={1142-1165}, abstract={The promise of a powerful computing device to help people in productivity as well as in recreation can only be realized with proper human-machine communication. Automatic recognition and understanding of spoken language is the first step toward natural human-machine interaction. Research in this field has produced remarkable results, leading to many exciting expectations and new challenges. We summarize the development of the spoken language technology from both a vertical (chronology) and a horizontal (spectrum of technical approaches) perspective. We highlight the introduction of statistical methods in dealing with language-related problems, as this represents a paradigm shift in the research field of spoken language processing. Statistical methods are designed to allow the machine to learn structural regularities in the speech signal, directly from data, for the purpose of automatic speech recognition and understanding. Research results in spoken language processing have led to a number of successful applications, ranging from dictation software for personal computers and telephone-call processing systems for automatic call routing, to automatic sub-captioning for television broadcasts. We analyze the technical successes that support these applications. Along with an assessment of the state of the art in this broad technical field, we also discuss the limitations of the current technology, and point out the challenges that are ahead. This paper presents an accurate overview of spoken language technology as a basis to inspire future advances.}, isbn={0018-9219}, language={English}, url={http://ieeexplore.ieee.org/document/880077}, doi={10.1109/5.880077} }

@article{1996YoungA, author={Steve Young}, year={1996}, title={A review of large-vocabulary continuous-speech}, journal={IEEE Signal Processing Magazine}, volume={13}, number={5}, pages={45}, abstract={Considerable progress has been made in speech-recognition technology over the last few years and nowhere has this progress been more evident than in the area of large-vocabulary recognition (LVR). Current laboratory systems are capable of transcribing continuous speech from any speaker with average word-error rates between 5% and 10%. If speaker adaptation is allowed, then after 2 or 3 minutes of speech, the error rate will drop well below 5% for most speakers. LVR systems had been limited to dictation applications since the systems were speaker dependent and required words to be spoken with a short pause between them. However, the capability to recognize natural continuous-speech input from any speaker opens up many more applications. As a result, LVR technology appears to be on the brink of widespread deployment across a range of information technology (IT) systems. This article discusses the principles and architecture of current LVR systems and identifies the key issues affecting their future deployment. To illustrate the various points raised, the Cambridge University HTK system is described. This system is a modem design that gives state-of-the-art performance, and it is typical of the current generation of recognition systems.}, isbn={1053-5888}, language={English}, doi={10.1109/79.536824} }

@article{1976jelinekcontinuous, author={F. Jelinek}, year={1976}, title={Continuous speech recognition by statistical methods}, journal={Proceedings of the IEEE}, volume={64}, number={4}, pages={532-556}, abstract={Statistical methods useful in automatic recognition of continuous speech are described. They concern modeling of a speaker and of an acoustic processor, extraction of the models' statistical parameters and hypothesis search procedures and likelihood computations of linguistic decoding. Experimental results are presented that indicate the power of the methods.}, isbn={0018-9219}, language={English}, doi={10.1109/PROC.1976.10159} }

@article{nunamaker1990systems, title={Systems development in information systems research}, author={Nunamaker Jr, Jay F and Chen, Minder and Purdin, Titus DM}, journal={Journal of management information systems}, volume={7}, number={3}, pages={89--106}, year={1990}, publisher={Taylor & Francis} }

@article{mallat2016understanding, title={Understanding deep convolutional networks}, author={Mallat, St{'e}phane}, journal={Phil. Trans. R. Soc. A}, volume={374}, number={2065}, pages={20150203}, year={2016}, publisher={The Royal Society} }

@inproceedings{anden2011multiscale, title={Multiscale Scattering for Audio Classification.}, author={And{'e}n, Joakim and Mallat, St{'e}phane}, booktitle={ISMIR}, pages={657--662}, year={2011}, organization={Miami, FL} }

@article{furui1986speaker, title={Speaker-independent isolated word recognition using dynamic features of speech spectrum}, author={Furui, Sadaoki}, journal={IEEE Transactions on Acoustics, Speech, and Signal Processing}, volume={34}, number={1}, pages={52--59}, year={1986}, publisher={IEEE} }

@article{hermansky1994rasta, title={RASTA processing of speech}, author={Hermansky, Hynek and Morgan, Nelson}, journal={IEEE transactions on speech and audio processing}, volume={2}, number={4}, pages={578--589}, year={1994}, publisher={IEEE} }

@article{hermansky1990perceptual, author={Hynek Hermansky}, year={1990}, title={Perceptual linear predictive (PLP) analysis of speech}, journal={The Journal of the Acoustical Society of America}, volume={87}, number={4}, pages={1738-1752} }

@article{davis1980comparison, title={Comparison of parametric representations for monosyllabic word recognition in continuously spoken sentences}, author={Davis, Steven and Mermelstein, Paul}, journal={IEEE transactions on acoustics, speech, and signal processing}, volume={28}, number={4}, pages={357--366}, year={1980}, publisher={IEEE} }

@article{anden2014deep, title={Deep scattering spectrum}, author={And{'e}n, Joakim and Mallat, St{'e}phane}, journal={IEEE Transactions on Signal Processing}, volume={62}, number={16}, pages={4114--4128}, year={2014}, publisher={IEEE} }

@INPROCEEDINGS{Rosenberg2017end, author={A. Rosenberg and K. Audhkhasi and A. Sethy and B. Ramabhadran and M. Picheny}, booktitle={2017 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, title={End-to-end speech recognition and keyword search on low-resource languages}, year={2017}, volume={}, number={}, pages={5280-5284}, keywords={natural language processing;speech recognition;end-to-end speech recognition systems;keyword search;low-resource languages;ASR frameworks;orthographic query;speech corpus;automatic speech recognition;Connectionist Temporal Classification;CTC networks;recurrent encoder-decoders;ASR systems;IARPA BABEL OP3 languages;evaluation framework;Speech recognition;Hidden Markov models;Acoustics;Training;Decoding;Keyword search;Indexes;keyword search;end-to-end speech recognition;CTC;attention networks}, doi={10.1109/ICASSP.2017.7953164}, ISSN={2379-190X}, month={March},}

@inproceedings{amodei2016deep, title={Deep speech 2: End-to-end speech recognition in english and mandarin}, author={Amodei, Dario and Ananthanarayanan, Sundaram and Anubhai, Rishita and Bai, Jingliang and Battenberg, Eric and Case, Carl and Casper, Jared and Catanzaro, Bryan and Cheng, Qiang and Chen, Guoliang and others}, booktitle={International Conference on Machine Learning}, pages={173--182}, year={2016} }

@inproceedings{peddinti2014deep, title={Deep scattering spectrum with deep neural networks}, author={Peddinti, Vijayaditya and Sainath, TaraN and Maymon, Shay and Ramabhadran, Bhuvana and Nahamoo, David and Goel, Vaibhava}, booktitle={Acoustics, Speech and Signal Processing (ICASSP), 2014 IEEE International Conference on}, pages={210--214}, year={2014}, organization={IEEE} }

@article{kunze2017transfer, title={Transfer learning for speech recognition on a budget}, author={Kunze, Julius and Kirsch, Louis and Kurenkov, Ilia and Krug, Andreas and Johannsmeier, Jens and Stober, Sebastian}, journal={arXiv preprint arXiv:1706.00290}, year={2017} }

@article{collobert2016wav2letter, title={Wav2letter: an end-to-end convnet-based speech recognition system}, author={Collobert, Ronan and Puhrsch, Christian and Synnaeve, Gabriel}, journal={arXiv preprint arXiv:1609.03193}, year={2016} }

@inproceedings{graves2006connectionist, title={Connectionist temporal classification: labelling unsegmented sequence data with recurrent neural networks}, author={Graves, Alex and Fern{'a}ndez, Santiago and Gomez, Faustino and Schmidhuber, J{"u}rgen}, booktitle={Proceedings of the 23rd international conference on Machine learning}, pages={369--376}, year={2006}, organization={ACM} }

@inproceedings{graves2014towards, title={Towards end-to-end speech recognition with recurrent neural networks}, author={Graves, Alex and Jaitly, Navdeep}, booktitle={International Conference on Machine Learning}, pages={1764--1772}, year={2014} }

@article{mohamed2012acoustic, title={Acoustic modeling using deep belief networks}, author={Mohamed, Abdel-rahman and Dahl, George E and Hinton, Geoffrey and others}, journal={IEEE Trans. Audio, Speech & Language Processing}, volume={20}, number={1}, pages={14--22}, year={2012} }

@inproceedings{woodland2000large, title={Large scale discriminative training for speech recognition}, author={Woodland, PC and Povey, Daniel}, booktitle={ASR2000-Automatic Speech Recognition: Challenges for the new Millenium ISCA Tutorial and Research Workshop (ITRW)}, year={2000} }

@article{povey2011subspace, title={The subspace Gaussian mixture model—A structured model for speech recognition}, author={Povey, Daniel and Burget, Luk{'a}{\v{s}} and Agarwal, Mohit and Akyazi, Pinar and Kai, Feng and Ghoshal, Arnab and Glembek, Ond{\v{r}}ej and Goel, Nagendra and Karafi{'a}t, Martin and Rastrow, Ariya and others}, journal={Computer Speech & Language}, volume={25}, number={2}, pages={404--439}, year={2011}, publisher={Elsevier} }

@inproceedings{ghoshal2013multilingual, title={Multilingual training of deep neural networks}, author={Ghoshal, Arnab and Swietojanski, Pawel and Renals, Steve}, booktitle={Acoustics, Speech and Signal Processing (ICASSP), 2013 IEEE International Conference on}, pages={7319--7323}, year={2013}, organization={IEEE} }

@inproceedings{vu2013multilingual, title={Multilingual multilayer perceptron for rapid language adaptation between and across language families.}, author={Vu, Ngoc Thang and Schultz, Tanja}, booktitle={Interspeech}, pages={515--519}, year={2013} }

@article{young2002htk, title={The HTK book}, author={Young, Steve and Evermann, Gunnar and Gales, Mark and Hain, Thomas and Kershaw, Dan and Liu, Xunying and Moore, Gareth and Odell, Julian and Ollason, Dave and Povey, Dan and others}, journal={Cambridge university engineering department}, volume={3}, pages={175}, year={2002} }

@misc{ethnologue, author={Gary F. Simons and Charles D. Fennig}, year={2018}, title={ Ethnologue: Languages of the World, Twenty-first edition.}, volume={2018}, number={11/11/}, url={http://www.ethnologue.com.} }

@book{wakirike, author={Charles Ogan D. S.}, year={2008}, title={Okrika: A kingdom of the Niger Delta}, publisher={Onyoma Research Publications}, address={Port Harcourt, Rivers State, Nigeria}, edition={1}, pages={27} }

@phdthesis{berment2004methodes, title={M{'e}thodes pour informatiser les langues et les groupes de langues {\guillemotleft}peu dot{'e}es{\guillemotright}}, author={Berment, Vincent}, year={2004}, school={Universit{'e} Joseph-Fourier-Grenoble I} }

@article{hannun2014deep, title={Deep speech: Scaling up end-to-end speech recognition}, author={Hannun, Awni and Case, Carl and Casper, Jared and Catanzaro, Bryan and Diamos, Greg and Elsen, Erich and Prenger, Ryan and Satheesh, Sanjeev and Sengupta, Shubho and Coates, Adam and others}, journal={arXiv preprint arXiv:1412.5567}, year={2014} }

@article{hannun2014first, title={First-pass large vocabulary continuous speech recognition using bi-directional recurrent DNNs}, author={Hannun, Awni Y and Maas, Andrew L and Jurafsky, Daniel and Ng, Andrew Y}, journal={arXiv preprint arXiv:1408.2873}, year={2014} }

@article{saon2015ibm, title={The IBM 2015 English conversational telephone speech recognition system}, author={Saon, George and Kuo, Hong-Kwang J and Rennie, Steven and Picheny, Michael}, journal={arXiv preprint arXiv:1505.05899}, year={2015} } @article{deng2014deep, title={Deep learning: methods and applications}, author={Deng, Li and Yu, Dong and others}, journal={Foundations and Trends{\textregistered} in Signal Processing}, volume={7}, number={3--4}, pages={197--387}, year={2014}, publisher={Now Publishers, Inc.} }

@book{2015watanabe, author={Watanabe,Shinji (. e. and Chien,Jen-Tzung}, year={2015}, title={Bayesian speech and language processing}, publisher={Cambridge University Press}, address={Cambridge}, keywords={Mechanical speech recognizer; Speech processing systems; Mathematical models; Speech recognition, Automatic; Automatic speech recognition}, isbn={1107055571;9781107055575;}, language={English}, }

@article{deng2013machine, title={Machine learning paradigms for speech recognition: An overview}, author={Deng, Li and Li, Xiao}, journal={IEEE Transactions on Audio, Speech, and Language Processing}, volume={21}, number={5}, pages={1060--1089}, year={2013}, publisher={IEEE} }

@article{gales2012structured, title={Structured discriminative models for speech recognition: An overview}, author={Gales, Mark John Francis and Watanabe, Shinji and Fosler-Lussier, Eric}, journal={IEEE Signal Processing Magazine}, volume={29}, number={6}, pages={70--81}, year={2012}, publisher={IEEE} }

@inproceedings{goodfellow2014generative, title={Generative adversarial nets}, author={Goodfellow, Ian and Pouget-Abadie, Jean and Mirza, Mehdi and Xu, Bing and Warde-Farley, David and Ozair, Sherjil and Courville, Aaron and Bengio, Yoshua}, booktitle={Advances in neural information processing systems}, pages={2672--2680}, year={2014} }

@article{cho2014learning, title={Learning phrase representations using RNN encoder-decoder for statistical machine translation}, author={Cho, Kyunghyun and Van Merri{"e}nboer, Bart and Gulcehre, Caglar and Bahdanau, Dzmitry and Bougares, Fethi and Schwenk, Holger and Bengio, Yoshua}, journal={arXiv preprint arXiv:1406.1078}, year={2014} }

@book{geron2017, author={Géron,Aurélien}, year={2017}, title={Hands-on machine learning with Scikit-Learn and TensorFlow: concepts, tools, and techniques to build intelligent systems}, publisher={O'Reilly}, address={Beijing}, edition={First}, keywords={Computers and IT; Machine learning}, isbn={9781491962299;1491962291;}, language={English}, }

@book{marsland2009, author={Marsland,Stephen}, year={2009}, title={Machine learning: an algorithmic perspective}, publisher={Chapman & Hall/CRC}, address={Boca Raton;London;}, keywords={Algorithms; Machine learning}, isbn={1420067184;9781420067187;}, language={English}, url={http://hud.summon.serialssolutions.com/2.0.0/link/0/eLvHCXMwbV25DsIwDLU4FiSkcopT6g8UaNK0ZEYgFjYkxspJU2Bhgv8nblNAwJgMTiI5tny8ZwDOFqvgyyYQz3csNK41ZpqFaP1IFiVa2fhC5OuCdvuDmOlFu10l3i6PzEGrNN6psrlUV7WMmRRS1KGeJKTb4Ym_syuRkDySJZKLLLI9qCJ4cmvyR1buhzfZdaBBCIMu1MytB141V8F336wP3qHocDS-G-lwHsB4tz1u9oGVlLqES1reiw2hjdSlfrsXaLZsBP5KIU94bkMqKYh7CzlHI-IoVBLRMDWG7q-gyb_NKbTKugYlA2bQzK2ymnnxoidGUGLj}, } }

@inproceedings{snoek2012practical, title={Practical bayesian optimization of machine learning algorithms}, author={Snoek, Jasper and Larochelle, Hugo and Adams, Ryan P}, booktitle={Advances in neural information processing systems}, pages={2951--2959}, year={2012} }

@article{xu2013cross, title={Cross-lingual language modeling for low-resource speech recognition}, author={Xu, Ping and Fung, Pascale}, journal={IEEE Transactions on Audio, Speech, and Language Processing}, volume={21}, number={6}, pages={1134--1144}, year={2013}, publisher={IEEE} }

@inproceedings{kim2016character, title={Character-Aware Neural Language Models.}, author={Kim, Yoon and Jernite, Yacine and Sontag, David and Rush, Alexander M}, booktitle={AAAI}, pages={2741--2749}, year={2016} }

@inproceedings{chen1996empirical, title={An empirical study of smoothing techniques for language modeling}, author={Chen, Stanley F and Goodman, Joshua}, booktitle={Proceedings of the 34th annual meeting on Association for Computational Linguistics}, pages={310--318}, year={1996}, organization={Association for Computational Linguistics} }

@article{bengio2003neural, title={A neural probabilistic language model}, author={Bengio, Yoshua and Ducharme, R{'e}jean and Vincent, Pascal and Jauvin, Christian}, journal={Journal of machine learning research}, volume={3}, number={Feb}, pages={1137--1155}, year={2003} }

@inproceedings{mikolov2011empirical, title={Empirical evaluation and combination of advanced language modeling techniques}, author={Mikolov, Tom{'a}{\v{s}} and Deoras, Anoop and Kombrink, Stefan and Burget, Luk{'a}{\v{s}} and {\v{C}}ernock{`y}, Jan}, booktitle={Twelfth Annual Conference of the International Speech Communication Association}, year={2011} }

@inproceedings{sutskever2014sequence, title={Sequence to sequence learning with neural networks}, author={Sutskever, Ilya and Vinyals, Oriol and Le, Quoc V}, booktitle={Advances in neural information processing systems}, pages={3104--3112}, year={2014} }

@inproceedings{luong2013better, title={Better word representations with recursive neural networks for morphology.}, author={Luong, Thang and Socher, Richard and Manning, Christopher D}, booktitle={CoNLL}, pages={104--113}, year={2013} }

@inproceedings{versteegh2015zero, title={The zero resource speech challenge 2015}, author={Versteegh, Maarten and Thiolliere, Roland and Schatz, Thomas and Cao, Xuan Nga and Anguera, Xavier and Jansen, Aren and Dupoux, Emmanuel}, booktitle={Sixteenth Annual Conference of the International Speech Communication Association}, year={2015} }

@article{besacier2014automatic, title={Automatic speech recognition for under-resourced languages: A survey}, author={Besacier, Laurent and Barnard, Etienne and Karpov, Alexey and Schultz, Tanja}, journal={Speech Communication}, volume={56}, pages={85--100}, year={2014}, publisher={Elsevier} }

@book{allen1995natural, title={Natural language understanding}, author={Allen, James}, year={1995}, publisher={Pearson} }

@inproceedings{graves2013hybrid, title={Hybrid speech recognition with deep bidirectional LSTM}, author={Graves, Alex and Jaitly, Navdeep and Mohamed, Abdel-rahman}, booktitle={Automatic Speech Recognition and Understanding (ASRU), 2013 IEEE Workshop on}, pages={273--278}, year={2013}, organization={IEEE} }