diff --git a/.github/stale.yml b/.github/stale.yml new file mode 100644 index 0000000..2c71efd --- /dev/null +++ b/.github/stale.yml @@ -0,0 +1,17 @@ +# Number of days of inactivity before an issue becomes stale +daysUntilStale: 365 +# Number of days of inactivity before a stale issue is closed +daysUntilClose: 30 +# Issues with these labels will never be considered stale +exemptLabels: + - pinned + - security +# Label to use when marking an issue as stale +staleLabel: wontfix +# Comment to post when marking an issue as stale. Set to `false` to disable +markComment: > + This issue has been automatically marked as stale because it has not had + recent activity. It will be closed if no further activity occurs. Thank you + for your contributions. +# Comment to post when closing a stale issue. Set to `false` to disable +closeComment: true \ No newline at end of file diff --git a/dataset/prepare_data.py b/dataset/prepare_data.py index 1f095ab..46da38a 100644 --- a/dataset/prepare_data.py +++ b/dataset/prepare_data.py @@ -185,7 +185,7 @@ def buffered_and_sliding_window_article_iterator(tokenizer, final_desired_size=1 train_file = args.base_fn + 'train_wiki19_{:04d}.tfrecord'.format(args.fold) with S3TFRecordWriter(train_file) as train_writer: for article in buffered_and_sliding_window_article_iterator(tokenizer, - final_desired_size=max(args.max_seq_length + 1, 1025)): + final_desired_size=args.max_seq_length + 1): writer2use = train_writer assert len(article['input_ids']) == (args.max_seq_length + 1)