OpenNMT
diff --git a/‎.gitignore
Lines changed: 3 additions & 11 deletions b/‎.gitignore
Lines changed: 3 additions & 11 deletions
diff --git a/‎.travis.yml
Lines changed: 5 additions & 5 deletions b/‎.travis.yml
Lines changed: 5 additions & 5 deletions
diff --git a/‎CHANGELOG.md
Lines changed: 38 additions & 1 deletion b/‎CHANGELOG.md
Lines changed: 38 additions & 1 deletion
diff --git a/‎CONTRIBUTING.md
Lines changed: 82 additions & 5 deletions b/‎CONTRIBUTING.md
Lines changed: 82 additions & 5 deletions
diff --git a/‎README.md
Lines changed: 4 additions & 5 deletions b/‎README.md
Lines changed: 4 additions & 5 deletions
diff --git a/‎data/README.md
Lines changed: 0 additions & 7 deletions b/‎data/README.md
Lines changed: 0 additions & 7 deletions
@@ -34,17 +34,6 @@ wheels/
 .installed.cfg
 *.egg
 
-# Logs
-logs/
-
-# Testes
-test/
-train/
-eduamf/
-data/
-models/
-embeddings/
-
 # PyInstaller
 #  Usually these files are written by a python script from a template
 #  before PyInstaller builds the exe, so as to inject date/other infos into it.
@@ -106,6 +95,9 @@ env/
 venv/
 ENV/
 
+# My changes
+eduamf/
+
 # Spyder project settings
 .spyderproject
 .spyproject
 
@@ -17,7 +17,7 @@ before_install:
   # Useful for debugging any issues with conda
   - conda info -a
   # freeze the supported pytorch version for consistency
-  - conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION pytorch=0.4.0 -c soumith
+  - conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION pytorch=0.4.1 cuda92 -c pytorch
   - source activate test-environment
   # use requirements.txt for dependencies
   - pip install -r requirements.txt
@@ -32,15 +32,15 @@ install:
 script:
   - wget -O /tmp/im2text.tgz http://lstm.seas.harvard.edu/latex/im2text_small.tgz; tar zxf /tmp/im2text.tgz -C /tmp/; head /tmp/im2text/src-train.txt > /tmp/im2text/src-train-head.txt; head /tmp/im2text/tgt-train.txt > /tmp/im2text/tgt-train-head.txt; head /tmp/im2text/src-val.txt > /tmp/im2text/src-val-head.txt; head /tmp/im2text/tgt-val.txt > /tmp/im2text/tgt-val-head.txt
   - wget -O /tmp/speech.tgz http://lstm.seas.harvard.edu/latex/speech.tgz; tar zxf /tmp/speech.tgz -C /tmp/; head /tmp/speech/src-train.txt > /tmp/speech/src-train-head.txt; head /tmp/speech/tgt-train.txt > /tmp/speech/tgt-train-head.txt; head /tmp/speech/src-val.txt > /tmp/speech/src-val-head.txt; head /tmp/speech/tgt-val.txt > /tmp/speech/tgt-val-head.txt
-  - wget -O /tmp/test_model_speech.pt http://lstm.seas.harvard.edu/latex/test_model_speech.pt
+  - wget -O /tmp/test_model_speech.pt http://lstm.seas.harvard.edu/latex/model_step_2760.pt
   - wget -O /tmp/test_model_im2text.pt http://lstm.seas.harvard.edu/latex/test_model_im2text.pt
   - python -m unittest discover
   # test nmt preprocessing
   - python preprocess.py -train_src data/src-train.txt -train_tgt data/tgt-train.txt -valid_src data/src-val.txt -valid_tgt data/tgt-val.txt -save_data /tmp/data -src_vocab_size 1000 -tgt_vocab_size 1000 && rm -rf /tmp/data*.pt
   # test im2text preprocessing
-  - python preprocess.py -data_type img -src_dir /tmp/im2text/images -train_src /tmp/im2text/src-train.txt -train_tgt /tmp/im2text/tgt-train.txt -valid_src /tmp/im2text/src-val.txt -valid_tgt /tmp/im2text/tgt-val.txt -save_data /tmp/im2text/data && rm -rf /tmp/im2text/data*.pt
+  - python preprocess.py -data_type img -shard_size 3 -src_dir /tmp/im2text/images -train_src /tmp/im2text/src-train.txt -train_tgt /tmp/im2text/tgt-train.txt -valid_src /tmp/im2text/src-val.txt -valid_tgt /tmp/im2text/tgt-val.txt -save_data /tmp/im2text/data && rm -rf /tmp/im2text/data*.pt
   # test speech2text preprocessing
-  - python preprocess.py -data_type audio -src_dir /tmp/speech/an4_dataset -train_src /tmp/speech/src-train.txt -train_tgt /tmp/speech/tgt-train.txt -valid_src /tmp/speech/src-val.txt -valid_tgt /tmp/speech/tgt-val.txt -save_data /tmp/speech/data && rm -rf /tmp/speech/data*.pt
+  - python preprocess.py -data_type audio -shard_size 300 -src_dir /tmp/speech/an4_dataset -train_src /tmp/speech/src-train.txt -train_tgt /tmp/speech/tgt-train.txt -valid_src /tmp/speech/src-val.txt -valid_tgt /tmp/speech/tgt-val.txt -save_data /tmp/speech/data && rm -rf /tmp/speech/data*.pt
   # test nmt translation
   - head data/src-test.txt > /tmp/src-test.txt; python translate.py -model onmt/tests/test_model.pt -src /tmp/src-test.txt -verbose
   # test im2text translation
@@ -50,7 +50,7 @@ script:
   # test nmt preprocessing and training
   - head data/src-val.txt > /tmp/src-val.txt; head data/tgt-val.txt > /tmp/tgt-val.txt; python preprocess.py -train_src /tmp/src-val.txt -train_tgt /tmp/tgt-val.txt -valid_src /tmp/src-val.txt -valid_tgt /tmp/tgt-val.txt -save_data /tmp/q -src_vocab_size 1000 -tgt_vocab_size 1000; python train.py -data /tmp/q -rnn_size 2 -batch_size 10 -word_vec_size 5 -report_every 5 -rnn_size 10 -train_steps 10 && rm -rf /tmp/q*.pt
   # test nmt preprocessing w/ sharding and training w/copy
-  - head data/src-val.txt > /tmp/src-val.txt; head data/tgt-val.txt > /tmp/tgt-val.txt; python preprocess.py -train_src /tmp/src-val.txt -train_tgt /tmp/tgt-val.txt -valid_src /tmp/src-val.txt -valid_tgt /tmp/tgt-val.txt -max_shard_size 1 -dynamic_dict -save_data /tmp/q -src_vocab_size 1000 -tgt_vocab_size 1000; python train.py -data /tmp/q -rnn_size 2 -batch_size 10 -word_vec_size 5 -report_every 5 -rnn_size 10 -copy_attn -train_steps 10 && rm -rf /tmp/q*.pt
+  - head data/src-val.txt > /tmp/src-val.txt; head data/tgt-val.txt > /tmp/tgt-val.txt; python preprocess.py -train_src /tmp/src-val.txt -train_tgt /tmp/tgt-val.txt -valid_src /tmp/src-val.txt -valid_tgt /tmp/tgt-val.txt -shard_size 1 -dynamic_dict -save_data /tmp/q -src_vocab_size 1000 -tgt_vocab_size 1000; python train.py -data /tmp/q -rnn_size 2 -batch_size 10 -word_vec_size 5 -report_every 5 -rnn_size 10 -copy_attn -train_steps 10 && rm -rf /tmp/q*.pt
 
   # test im2text preprocessing and training
   - head /tmp/im2text/src-val.txt > /tmp/im2text/src-val-head.txt; head /tmp/im2text/tgt-val.txt > /tmp/im2text/tgt-val-head.txt; python preprocess.py -data_type img -src_dir /tmp/im2text/images -train_src /tmp/im2text/src-val-head.txt -train_tgt /tmp/im2text/tgt-val-head.txt -valid_src /tmp/im2text/src-val-head.txt -valid_tgt /tmp/im2text/tgt-val-head.txt -save_data /tmp/im2text/q; python train.py -model_type img -data /tmp/im2text/q -rnn_size 2 -batch_size 10 -word_vec_size 5 -report_every 5 -rnn_size 10 -train_steps 10 && rm -rf /tmp/im2text/q*.pt
 
@@ -3,10 +3,47 @@
 
 
 ## [Unreleased]
+### Fixes and improvements
+
+## [0.8.2](https://github.com/OpenNMT/OpenNMT-py/tree/0.8.2) (2019-02-16)
+* Update documentation and Library example
+* Revamp args
+* Bug fixes, save moving average in FP32
+* Allow FP32 inference for FP16 models
+
+## [0.8.1](https://github.com/OpenNMT/OpenNMT-py/tree/0.8.1) (2019-02-12)
+* Update documentation
+* Random sampling scores fixes
+* Bug fixes
+
+## [0.8.0](https://github.com/OpenNMT/OpenNMT-py/tree/0.8.0) (2019-02-09)
+* Many fixes and code cleaning thanks @flauted, @guillaumekln
+* Datasets code refactor (thanks @flauted) you need to r-preeprocess datasets
 
 ### New features
+* FP16 Support: Experimental, using Apex, Checkpoints may break in future version.
+* Continuous exponential moving average (thanks @francoishernandez, and Marian)
+* Relative positions encoding (thanks @francoishernanndez, and Google T2T)
+* Deprecate the old beam search, fast batched beam search supports all options
 
-### Fixes and improvements
+
+## [0.7.2](https://github.com/OpenNMT/OpenNMT-py/tree/0.7.2) (2019-01-31)
+* Many fixes and code cleaning thanks @bpopeters, @flauted, @guillaumekln
+
+### New features
+* Multilevel fields for better handling of text featuer embeddinggs. 
+
+
+## [0.7.1](https://github.com/OpenNMT/OpenNMT-py/tree/0.7.1) (2019-01-24)
+* Many fixes and code refactoring thanks @bpopeters, @flauted, @guillaumekln
+
+### New features
+* Random sampling thanks @daphnei
+* Enable sharding for huge files at translation
+
+## [0.7.0](https://github.com/OpenNMT/OpenNMT-py/tree/0.7.0) (2019-01-02)
+* Many fixes and code refactoring thanks @benopeters
+* Migrated to Pytorch 1.0
 
 ## [0.6.0](https://github.com/OpenNMT/OpenNMT-py/tree/0.6.0) (2018-11-28)
 * Many fixes and code improvements
 
@@ -1,11 +1,88 @@
+# Contributors
+
 OpenNMT-py is a community developed project and we love developer contributions.
 
+## Guidelines
 Before sending a PR, please do this checklist first:
 
-- Please run `onmt/tests/pull_request_chk.sh` and fix any errors. When adding new functionality, also add tests to this script. Included checks:
-    1. flake8 and pep8-naming check for coding style;
+- Please run `tools/pull_request_chk.sh` and fix any errors. When adding new functionality, also add tests to this script. Included checks:
+    1. flake8 check for coding style;
     2. unittest;
     3. continuous integration tests listed in `.travis.yml`.
-- When adding/modifying class constructor, please make the arguments as same naming style as its superclass in pytorch.
-- If your change is based on a paper, please include a clear comment and reference in the code.
-- If your function takes/returns tensor arguments, please include assertions to document the sizes. See `GlobalAttention.py` for examples.
+- When adding/modifying class constructor, please make the arguments as same naming style as its superclass in PyTorch.
+- If your change is based on a paper, please include a clear comment and reference in the code (more on that below). 
+
+### Docstrings
+Above all, try to follow the Google docstring format
+([Napoleon example](https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html),
+[Google styleguide](http://google.github.io/styleguide/pyguide.html)).
+This makes it easy to include your contributions in the Sphinx documentation. And, do feel free
+to autodoc your contributions in the API ``.rst`` files in the `docs/source` folder! If you do, check that
+your additions look right.
+
+```bash
+cd docs
+# install some dependencies if necessary:
+# recommonmark, sphinx_rtd_theme, sphinxcontrib-bibtex
+make html
+firefox build/html/main.html  # or your browser of choice
+```
+
+Some particular advice:
+- Try to follow Python 3 [``typing`` module](https://docs.python.org/3/library/typing.html) conventions when documenting types.
+    - Exception: use "or" instead of unions for more readability
+    - For external types, use the full "import name". Common abbreviations (e.g. ``np``) are acceptable.
+      For ``torch.Tensor`` types, the ``torch.`` is optional.
+    - Please don't use tics like `` (`str`) `` or rst directives like `` (:obj:`str`) ``. Napoleon handles types
+      very well without additional help, so avoid the clutter.
+- [Google docstrings don't support multiple returns](https://stackoverflow.com/questions/29221551/can-sphinx-napoleon-document-function-returning-multiple-arguments).
+For multiple returns, the following works well with Sphinx and is still very readable.
+  ```python
+  def foo(a, b):
+      """This is my docstring.
+      
+      Args:
+          a (object): Something.
+          b (class): Another thing.
+    
+      Returns:
+          (object, class):
+        
+          * a: Something or rather with a long
+            description that spills over.
+          * b: And another thing.
+      """
+    
+      return a, b
+  ```
+- When citing a paper, avoid directly linking in the docstring! Add a Bibtex entry to `docs/source/refs.bib`.
+E.g., to cite "Attention Is All You Need", visit [arXiv](https://arxiv.org/abs/1706.03762), choose the
+[bibtext](https://dblp.uni-trier.de/rec/bibtex/journals/corr/VaswaniSPUJGKP17) link, search `docs/source/refs.bib`
+using `CTRL-F` for `DBLP:journals/corr/VaswaniSPUJGKP17`, and if you do not find it then copy-paste the
+citation into `refs.bib`. Then, in your docstring, use ``:cite:`DBLP:journals/corr/VaswaniSPUJGKP17` ``.
+    - However, a link is better than nothing.
+- Please document tensor shapes. Prefer the format
+  ``` ``(a, b, c)`` ```. This style is easy to read, allows using ``x`` for multplication, and is common
+  (PyTorch uses a few variations on the parentheses format, AllenNLP uses exactly this format, Fairseq uses
+  the parentheses format with single ticks).
+    - Again, a different style is better than no shape documentation.
+- Please avoid unnecessary space characters, try to capitalize, and try to punctuate.
+    
+  For multi-line docstrings, add a blank line after the closing ``"""``.
+  Don't use a blank line before the closing quotes.
+  
+  ``""" not this """`` ``"""This."""``
+  
+  ```python
+  """
+      Not this.
+  """
+  ```
+  ```python
+  """This."""
+  ```
+
+  This note is the least important. Focus on content first, but remember that consistent docs look good.
+- Be sensible about the first line. Generally, one stand-alone summary line (per the Google guidelines) is good.
+  Sometimes, it's better to cut directly to the args or an extended description. It's always acceptable to have a
+  "trailing" citation.
@@ -34,7 +34,7 @@ All dependencies can be installed via:
 pip install -r requirements.txt
 ```
 
-Note that we currently only support PyTorch 0.4.1
+Note that we currently only support PyTorch 1.0.0
 
 ## Features
 
@@ -50,11 +50,9 @@ Note that we currently only support PyTorch 0.4.1
 - ["Attention is all you need"](http://opennmt.net/OpenNMT-py/FAQ.html#how-do-i-use-the-transformer-model)
 - [Multi-GPU](http://opennmt.net/OpenNMT-py/FAQ.html##do-you-support-multi-gpu)
 - Inference time loss functions.
-
-Beta Features (committed):
-- Structured attention
 - [Conv2Conv convolution model]
 - SRU "RNNs faster than CNN" paper
+- FP16 training (mixed-precision with Apex)
 
 ## Quickstart
 
@@ -122,7 +120,7 @@ Click this button to open a Workspace on [FloydHub](https://www.floydhub.com/?ut
 
 ## Pretrained embeddings (e.g. GloVe)
 
-Go to tutorial: [How to use GloVe pre-trained embeddings in OpenNMT-py](http://forum.opennmt.net/t/how-to-use-glove-pre-trained-embeddings-in-opennmt-py/1011)
+Please see the FAQ: [How to use GloVe pre-trained embeddings in OpenNMT-py](http://opennmt.net/OpenNMT-py/FAQ.html#how-do-i-use-pretrained-embeddings-e-g-glove)
 
 ## Pretrained Models
 
@@ -145,6 +143,7 @@ Major contributors are:
 [Paul Tardy](https://github.com/pltrdy) (Ubiqus / Lium)
 [François Hernandez](https://github.com/francoishernandez) (Ubiqus)
 [Jianyu Zhan](http://github.com/jianyuzhan) (Shanghai)
+[Dylan Flaute](http://github.com/flauted (University of Dayton)
 and more !
 
 OpentNMT-py belongs to the OpenNMT project along with OpenNMT-Lua and OpenNMT-tf.