diff --git a/fairseq/options.py b/fairseq/options.py index 920591635a..8c60c6210e 100644 --- a/fairseq/options.py +++ b/fairseq/options.py @@ -305,6 +305,9 @@ def add_preprocess_args(parser): help="number of parallel workers") group.add_argument("--dict-only", action='store_true', help="if true, only builds a dictionary and then exits") + group.add_argument("--no-eos-append", action="store_false", + dest="append_eos", default=True, + help="Do not append EOS to the end of each document") # fmt: on return parser diff --git a/fairseq_cli/preprocess.py b/fairseq_cli/preprocess.py index 2ba9e09338..deb510177f 100644 --- a/fairseq_cli/preprocess.py +++ b/fairseq_cli/preprocess.py @@ -110,7 +110,7 @@ def _make_binary_dataset( binarizer = VocabularyDatasetBinarizer( vocab, - append_eos=True, + append_eos=args.append_eos, ) input_file = "{}{}".format(input_prefix, ("." + lang) if lang is not None else "")