PyTorchKorea · hrxorxm · Jul 2, 2022 · Jul 5, 2022 · Taeyoung96 · Jul 4, 2022
diff --git a/beginner_source/audio_io_tutorial.py b/beginner_source/audio_io_tutorial.py
@@ -1,13 +1,13 @@
 # -*- coding: utf-8 -*-
 """
-Audio I/O
+오디오 I/O
 =========
 
-``torchaudio`` integrates ``libsox`` and provides a rich set of audio I/O.
+``torchaudio`` 는 ``libsox`` 를 통합하고 풍부한 오디오 I/O 를 제공합니다.
 """
 
-# When running this tutorial in Google Colab, install the required packages
-# with the following.
+# 이번 튜토리얼을 구글 코랩(Google Colab)에서 실행할 때,
+# 필요한 패키지들을 아래와 같이 설치해주세요.
 # !pip install torchaudio boto3
 
 import torch
@@ -17,16 +17,16 @@
 print(torchaudio.__version__)
 
 ######################################################################
-# Preparing data and utility functions (skip this section)
+# 데이터 및 유용한 함수 준비하기 (이번 섹션 생략)
 # --------------------------------------------------------
 #
 
-#@title Prepare data and utility functions. {display-mode: "form"}
+#@title 데이터 및 유용한 함수 준비하기 {display-mode: "form"}
 #@markdown
-#@markdown You do not need to look into this cell.
-#@markdown Just execute once and you are good to go.
+#@markdown 이 부분을 자세히 보실 필요는 없습니다.
+#@markdown 한번만 실행하면 충분합니다.
 #@markdown
-#@markdown In this tutorial, we will use a speech data from [VOiCES dataset](https://iqtlabs.github.io/voices/), which is licensed under Creative Commos BY 4.0.
+#@markdown 이번 튜토리얼에서, [VOiCES dataset](https://iqtlabs.github.io/voices/) 의 음성 데이터를 사용할 것이고, 이 데이터는 Creative Commos BY 4.0 에 의해 라이센스가 부여됩니다.
 
 
 import io
@@ -166,54 +166,53 @@ def inspect_file(path):
   print(f" - {torchaudio.info(path)}")
 
 ######################################################################
-# Quering audio metadata
-# ----------------------
+# 오디오 메타데이터 쿼리하기
+# --------------------------
 #
-# Function ``torchaudio.info`` fetches audio metadata. You can provide
-# a path-like object or file-like object.
+# 함수 ``torchaudio.info`` 는 오디오 메타데이터를 가져옵니다. 
+# 경로 혹은 파일 형식의 객체를 파라미터로 넣을 수 있습니다.
 #
 
 metadata = torchaudio.info(SAMPLE_WAV_PATH)
 print(metadata)
 
 ######################################################################
-# Where
-#
-# -  ``sample_rate`` is the sampling rate of the audio
-# -  ``num_channels`` is the number of channels
-# -  ``num_frames`` is the number of frames per channel
-# -  ``bits_per_sample`` is bit depth
-# -  ``encoding`` is the sample coding format
-#
-# ``encoding`` can take on one of the following values:
-#
-# -  ``"PCM_S"``: Signed integer linear PCM
-# -  ``"PCM_U"``: Unsigned integer linear PCM
-# -  ``"PCM_F"``: Floating point linear PCM
-# -  ``"FLAC"``: Flac, `Free Lossless Audio
-#    Codec <https://xiph.org/flac/>`__
-# -  ``"ULAW"``: Mu-law,
+# 위 결과에서
+#
+# -  ``sample_rate`` 는 오디오의 샘플링 비율입니다.
+# -  ``num_channels`` 는 채널의 개수입니다.
+# -  ``num_frames`` 는 채널별 프레임의 개수입니다.
+# -  ``bits_per_sample`` 은 샘플당 비트 수(bit depth)입니다.
+# -  ``encoding`` 는 샘플 코딩 형식입니다.
+#
+# ``encoding`` 은 다음 값들 중 하나가 될 수 있습니다:
+#
+# -  ``"PCM_S"``: 부호가 있는 정수 선형 PCM
+# -  ``"PCM_U"``: 부호가 없는 정수 선형 PCM
+# -  ``"PCM_F"``: 부동소수점 선형 PCM
+# -  ``"FLAC"``: Flac, `무료 무손실 오디오 코덱(Free Lossless Audio
+#    Codec) <https://xiph.org/flac/>`__
+# -  ``"ULAW"``: 뮤 법칙(Mu-law),
 #    [`wikipedia <https://en.wikipedia.org/wiki/%CE%9C-law_algorithm>`__]
-# -  ``"ALAW"``: A-law
+# -  ``"ALAW"``: A 법칙(A-law)
 #    [`wikipedia <https://en.wikipedia.org/wiki/A-law_algorithm>`__]
-# -  ``"MP3"`` : MP3, MPEG-1 Audio Layer III
+# -  ``"MP3"`` : MP3, MPEG-1 오디오 레이어 III
 # -  ``"VORBIS"``: OGG Vorbis [`xiph.org <https://xiph.org/vorbis/>`__]
-# -  ``"AMR_NB"``: Adaptive Multi-Rate
+# -  ``"AMR_NB"``: 적응형 다중 속도(Adaptive Multi-Rate)
 #    [`wikipedia <https://en.wikipedia.org/wiki/Adaptive_Multi-Rate_audio_codec>`__]
-# -  ``"AMR_WB"``: Adaptive Multi-Rate Wideband
+# -  ``"AMR_WB"``: 적응형 다중 속도 광대역(Adaptive Multi-Rate Wideband)
 #    [`wikipedia <https://en.wikipedia.org/wiki/Adaptive_Multi-Rate_Wideband>`__]
 # -  ``"OPUS"``: Opus [`opus-codec.org <https://opus-codec.org/>`__]
 # -  ``"GSM"``: GSM-FR
 #    [`wikipedia <https://en.wikipedia.org/wiki/Full_Rate>`__]
-# -  ``"UNKNOWN"`` None of above
+# -  ``"UNKNOWN"``: 위에 없음
 #
 
 ######################################################################
-# **Note**
+# **참고**
 #
-# -  ``bits_per_sample`` can be ``0`` for formats with compression and/or
-#    variable bit rate (such as MP3).
-# -  ``num_frames`` can be ``0`` for GSM-FR format.
+# -  압축 및/또는 가변 비트 전송률 형식(예: MP3)의 경우 ``bits_per_sample`` 은 ``0`` 일 수 있습니다.
+# -  GSM-FR 형식의 경우 ``num_frames`` 는 ``0`` 일 수 있습니다.
 #
 
 metadata = torchaudio.info(SAMPLE_MP3_PATH)
@@ -224,10 +223,10 @@ def inspect_file(path):
 
 
 ######################################################################
-# Querying file-like object
+# 파일 형식의 객체 쿼리하기
 # ~~~~~~~~~~~~~~~~~~~~~~~~~
 #
-# ``info`` works on file-like objects.
+# ``info`` 는 파일 형식의 객체에서 동작합니다.
 #
 
 print("Source:", SAMPLE_WAV_URL)
@@ -236,15 +235,14 @@ def inspect_file(path):
 print(metadata)
 
 ######################################################################
-# **Note** When passing a file-like object, ``info`` does not read
-# all of the underlying data; rather, it reads only a portion
-# of the data from the beginning.
-# Therefore, for a given audio format, it may not be able to retrieve the
-# correct metadata, including the format itself.
-# The following example illustrates this.
+# **참고** 파일 형식의 객체를 넘길 때, ``info`` 는 모든 기본 데이터를 읽는 
+# 것이 아니라 처음부터 데이터의 일부만 읽습니다.
+# 따라서, 주어진 오디오 형식의 경우, 형식 자체를 포함하여, 
+# 올바른 메타데이터를 검색하지 못할 수 있습니다.
+# 다음 예시에서 이를 보여줍니다.
 #
-# -  Use argument ``format`` to specify the audio format of the input.
-# -  The returned metadata has ``num_frames = 0``
+# -  ``format`` 인자를 사용하여 입력의 오디오 형식을 지정합니다.
+# -  반환된 메타데이터에 ``num_frames = 0`` 가 있습니다.
 #
 
 print("Source:", SAMPLE_MP3_URL)
@@ -255,21 +253,20 @@ def inspect_file(path):
 print(metadata)
 
 ######################################################################
-# Loading audio data into Tensor
+# 텐서에 오디오 데이터 로드하기
 # ------------------------------
 #
-# To load audio data, you can use ``torchaudio.load``.
+# 오디오 데이터를 로드하기 위해, ``torchaudio.load`` 를 사용할 수 있습니다.
 #
-# This function accepts a path-like object or file-like object as input.
+# 이 함수는 경로 혹은 파일 형식의 객체를 입력으로 받아들입니다.
 #
-# The returned value is a tuple of waveform (``Tensor``) and sample rate
-# (``int``).
+# 반환되는 값은 파형 (``Tensor``) 과 샘플링 비율 (``int``) 의 튜플입니다.
 #
-# By default, the resulting tensor object has ``dtype=torch.float32`` and
-# its value range is normalized within ``[-1.0, 1.0]``.
+# 기본적으로, 결과 텐서 객체는 ``dtype=torch.float32`` 이고,
+# 값의 범위는 ``[-1.0, 1.0]`` 내에서 정규화됩니다.
 #
-# For the list of supported format, please refer to `the torchaudio
-# documentation <https://pytorch.org/audio>`__.
+# 지원되는 형식의 목록은 `torchaudio 문서 <https://pytorch.org/audio>`__
+# 를 참고하세요.
 #
 
 waveform, sample_rate = torchaudio.load(SAMPLE_WAV_SPEECH_PATH)
@@ -281,59 +278,57 @@ def inspect_file(path):
 
 
 ######################################################################
-# Loading from file-like object
+# 파일 형식의 객체 로드하기
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 #
-# ``torchaudio``\ ’s I/O functions now support file-like objects. This
-# allows for fetching and decoding audio data from locations
-# within and beyond the local file system.
-# The following examples illustrate this.
+# ``torchaudio`` 의 I/O 함수는 이제 파일 형식의 객체를 지원합니다.
+# 이를 통해 로컬 파일 시스템 내부 및 외부 위치에서 오디오 데이터를
+# 가져오고 디코딩할 수 있습니다.
+# 다음 예제는 이를 보여줍니다.
 #
 
-# Load audio data as HTTP request
+# HTTP 요청으로 오디오 데이터 로드
 with requests.get(SAMPLE_WAV_SPEECH_URL, stream=True) as response:
   waveform, sample_rate = torchaudio.load(response.raw)
 plot_specgram(waveform, sample_rate, title="HTTP datasource")
 
-# Load audio from tar file
+# 파일에서 오디오 로드
 with tarfile.open(SAMPLE_TAR_PATH, mode='r') as tarfile_:
   fileobj = tarfile_.extractfile(SAMPLE_TAR_ITEM)
   waveform, sample_rate = torchaudio.load(fileobj)
 plot_specgram(waveform, sample_rate, title="TAR file")
 
-# Load audio from S3
+# S3에서 오디오 로드
 client = boto3.client('s3', config=Config(signature_version=UNSIGNED))
 response = client.get_object(Bucket=S3_BUCKET, Key=S3_KEY)
 waveform, sample_rate = torchaudio.load(response['Body'])
 plot_specgram(waveform, sample_rate, title="From S3")
 
 
 ######################################################################
-# Tips on slicing
-# ~~~~~~~~~~~~~~~
+# 슬라이싱(slicing)을 위한 팁
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~
 #
-# Providing ``num_frames`` and ``frame_offset`` arguments restricts
-# decoding to the corresponding segment of the input.
+# ``num_frames`` 와 ``frame_offset`` 인자를 지정하면
+# 디코딩이 입력의 해당 세그먼트로 제한됩니다.
 #
-# The same result can be achieved using vanilla Tensor slicing,
-# (i.e. ``waveform[:, frame_offset:frame_offset+num_frames]``). However,
-# providing ``num_frames`` and ``frame_offset`` arguments is more
-# efficient.
+# 평범한 텐서 슬라이싱(즉, ``waveform[:, frame_offset:frame_offset+num_frames]``)
+# 을 사용하여 동일한 결과를 얻을 수 있습니다.
+# 하지만 ``num_frames`` 와 ``frame_offset`` 인자를 지정하는 것이 더 효율적입니다.
 #
-# This is because the function will end data acquisition and decoding
-# once it finishes decoding the requested frames. This is advantageous
-# when the audio data are transferred via network as the data transfer will
-# stop as soon as the necessary amount of data is fetched.
+# 이는 요청된 프레임의 디코딩이 완료되면 함수가 데이터 수집 및 디코딩을 
+# 종료하기 때문입니다. 필요한 양의 데이터를 가져오는 즉시 데이터 전송이 중지되기 때문에,
+# 오디오 데이터가 네트워크를 통해 전송될 때 유리합니다.
 #
-# The following example illustrates this.
+# 다음 예제에서 이를 보여줍니다.
 #
 
-# Illustration of two different decoding methods.
-# The first one will fetch all the data and decode them, while
-# the second one will stop fetching data once it completes decoding.
-# The resulting waveforms are identical.
+# 두 가지 다른 디코딩 방법입니다.
+# 첫번째 방법은 모든 데이터를 가져온 후 디코딩합니다.
+# 두번째 방법은 디코딩이 완료되면 데이터를 가져오는 것을 중지합니다.
+# 결과로 나오는 파형은 동일합니다.
 
-frame_offset, num_frames = 16000, 16000  # Fetch and decode the 1 - 2 seconds
+frame_offset, num_frames = 16000, 16000  # 1-2 초 가져오기 및 디코딩
 
 print("Fetching all the data...")
 with requests.get(SAMPLE_WAV_SPEECH_URL, stream=True) as response:
@@ -353,42 +348,40 @@ def inspect_file(path):
 
 
 ######################################################################
-# Saving audio to file
-# --------------------
+# 오디오를 파일에 저장하기
+# ------------------------
 #
-# To save audio data in formats interpretable by common applications,
-# you can use ``torchaudio.save``.
+# 일반적인 응용 프로그램에서 해석 가능한 형식으로 오디오 데이터를 저장하려면
+# ``torchaudio.save`` 를 사용할 수 있습니다.
 #
-# This function accepts a path-like object or file-like object.
+# 이 함수는 경로 혹은 파일 형식의 객체를 입력으로 받아들입니다.
 #
-# When passing a file-like object, you also need to provide argument ``format``
-# so that the function knows which format it should use. In the
-# case of a path-like object, the function will infer the format from
-# the extension. If you are saving to a file without an extension, you need
-# to provide argument ``format``.
+# 파일 형식의 객체를 전달할 때, 함수에서 사용할 형식을 알 수 있도록 
+# ``format`` 인자를 넣어줘야 합니다. 경로 형식의 객체의 경우, 
+# 함수에서 그 경로의 확장자로부터 형식을 추론하게 됩니다. 
+# 확장자가 없는 파일에 저장하는 경우에는 ``format`` 인자를 넣어줘야 합니다.
 #
-# When saving WAV-formatted data, the default encoding for ``float32`` Tensor
-# is 32-bit floating-point PCM. You can provide arguments ``encoding`` and
-# ``bits_per_sample`` to change this behavior. For example, to save data
-# in 16-bit signed integer PCM, you can do the following.
+# WAV 형식의 데이터를 저장할 때, ``float32`` 텐서의 기본 인코딩은 32비트 
+# 부동소수점 PCM 입니다. ``encoding`` 과 ``bits_per_sample`` 인자를 넣어서
+# 이 동작을 변경할 수 있습니다. 예를 들어, 데이터를 16비트 부호 있는 정수 PCM으로
+# 저장하려면, 다음과 같이 작업합니다.
 #
-# **Note** Saving data in encodings with lower bit depth reduces the
-# resulting file size but also precision.
+# **참고** 비트 깊이가 낮은 인코딩으로 데이터를 저장하면 결과 파일 크기가
+# 줄어들 뿐만 아니라 정확도도 떨어집니다.
 #
 
 
 waveform, sample_rate = get_sample()
 print_stats(waveform, sample_rate=sample_rate)
 
-# Save without any encoding option.
-# The function will pick up the encoding which
-# the provided data fit
+# 인코딩 옵션 없이 저장하기
+# 이 함수는 제공된 데이터에 적합한 인코딩을 선택합니다.
 path = "save_example_default.wav"
 torchaudio.save(path, waveform, sample_rate)
 inspect_file(path)
 
-# Save as 16-bit signed integer Linear PCM
-# The resulting file occupies half the storage but loses precision
+# 16비트 부호 있는 정수 선형 PCM으로 저장
+# 결과 파일이 스토리지의 절반을 차지하지만 정확도가 떨어집니다.
 path = "save_example_PCM_S16.wav"
 torchaudio.save(
     path, waveform, sample_rate,
@@ -397,7 +390,7 @@ def inspect_file(path):
 
 
 ######################################################################
-# ``torchaudio.save`` can also handle other formats. To name a few:
+# ``torchaudio.save`` 는 다른 형식도 처리할 수 있습니다. 몇 가지 예를 들면:
 #
 
 waveform, sample_rate = get_sample(resample=8000)
@@ -419,18 +412,17 @@ def inspect_file(path):
 
 
 ######################################################################
-# Saving to file-like object
+# 파일 형식의 객체에 저장하기
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~
 #
-# Similar to the other I/O functions, you can save audio to file-like
-# objects. When saving to a file-like object, argument ``format`` is
-# required.
+# 다른 I/O 기능과 마찬가지로, 오디오를 파일 형식의 객체로 저장할 수 있습니다.
+# 파일 형식의 객체로 저장할 때는 ``format`` 인자가 필요합니다.
 #
 
 
 waveform, sample_rate = get_sample()
 
-# Saving to bytes buffer
+# 바이트 버퍼에 저장하기
 buffer_ = io.BytesIO()
 torchaudio.save(buffer_, waveform, sample_rate, format="wav")