Skip to content

Commit

Permalink
dump: Add chunksize
Browse files Browse the repository at this point in the history
  • Loading branch information
revsic committed Jul 17, 2022
1 parent 465fb53 commit 43053b7
Showing 1 changed file with 7 additions and 2 deletions.
9 changes: 7 additions & 2 deletions utils/dump.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,12 +87,16 @@ def dumper(speechset: SpeechSet, outdir: str, i: int) -> int:
return i


def mp_dump(speechset: SpeechSet, outdir: str, num_proc: int) -> int:
def mp_dump(speechset: SpeechSet,
outdir: str,
num_proc: int,
chunksize: int = 1) -> int:
"""Dump dataset.
Args:
speechset: target dataset.
outdir: path to the output directory.
num_proc: the number of the process for multiprocessing.
chunksize: size of the imap_ordered chunk.
Returns:
the number of the written data.
"""
Expand All @@ -101,7 +105,8 @@ def mp_dump(speechset: SpeechSet, outdir: str, num_proc: int) -> int:

length = len(speechset)
with mp.Pool(num_proc) as pool:
for _ in tqdm(pool.imap_unordered(partial, range(length)), total=length):
worker = pool.imap_unordered(partial, range(length), chunksize=chunksize)
for _ in tqdm(worker, total=length):
pass

return length

0 comments on commit 43053b7

Please sign in to comment.