From e2fbba9cea9e060da4844558cefa960764b28a13 Mon Sep 17 00:00:00 2001
From: AK391 <ahsengradio@gmail.com>
Date: Mon, 14 Jun 2021 17:48:46 +0000
Subject: [PATCH 01/18] gradio

---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)
diff --git a/requirements.txt b/requirements.txt
index ecacbad3..7dd1b352 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,3 +8,4 @@ tensorboard==2.3.0
 torch==1.6.0
 torchvision==0.7.0
 Unidecode==1.1.1
+gradio

From 91c1dbe1475d68a7d486986ee055f93a6122961e Mon Sep 17 00:00:00 2001
From: AK391 <ahsengradio@gmail.com>
Date: Mon, 14 Jun 2021 17:50:46 +0000
Subject: [PATCH 02/18] gradio

---
 gradiodemo.py | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)
 create mode 100644 gradiodemo.py

diff --git a/gradiodemo.py b/gradiodemo.py
new file mode 100644
index 00000000..6f440740
--- /dev/null
+++ b/gradiodemo.py
@@ -0,0 +1,60 @@
+%matplotlib inline
+import matplotlib.pyplot as plt
+import IPython.display as ipd
+
+import os
+import json
+import math
+import torch
+from torch import nn
+from torch.nn import functional as F
+from torch.utils.data import DataLoader
+
+import commons
+import utils
+from data_utils import TextAudioLoader, TextAudioCollate, TextAudioSpeakerLoader, TextAudioSpeakerCollate
+from models import SynthesizerTrn
+from text.symbols import symbols
+from text import text_to_sequence
+
+from scipy.io.wavfile import write
+import gradio as gr
+import scipy.io.wavfile
+import numpy as np
+
+
+def get_text(text, hps):
+    text_norm = text_to_sequence(text, hps.data.text_cleaners)
+    if hps.data.add_blank:
+        text_norm = commons.intersperse(text_norm, 0)
+    text_norm = torch.LongTensor(text_norm)
+    return text_norm
+
+hps = utils.get_hparams_from_file("./configs/ljs_base.json")
+net_g = SynthesizerTrn(
+    len(symbols),
+    hps.data.filter_length // 2 + 1,
+    hps.train.segment_size // hps.data.hop_length,
+    **hps.model)
+_ = net_g.eval()
+
+_ = utils.load_checkpoint("pretrained_ljs.pth", net_g, None)
+def inference(text):
+    stn_tst = get_text(text, hps)
+    with torch.no_grad():
+        x_tst = stn_tst.unsqueeze(0)
+        x_tst_lengths = torch.LongTensor([stn_tst.size(0)])
+        audio = net_g.infer(x_tst, x_tst_lengths, noise_scale=.667, noise_scale_w=0.8, length_scale=1)[0][0,0].data.float().numpy()
+        scipy.io.wavfile.write("out.wav", hps.data.sampling_rate, audio)
+        return "./out.wav"
+
+
+inputs = gr.inputs.Textbox(label="Input Text")
+outputs =  gr.outputs.File(label="Output Audio")
+
+
+title = "VITS"
+description = "demo for VITS: Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech. To use it, simply upload your text, or click one of the examples to load them. Read more at the links below."
+article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2106.06103'>Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech</a> | <a href='https://github.com/jaywalnut310/vits'>Github Repo</a></p>"
+
+gr.Interface(inference, inputs, outputs, title=title, description=description, article=article).launch(debug=True)
\ No newline at end of file

From 9c36366a7131d2c18f05b6b1f206e54e92434dfa Mon Sep 17 00:00:00 2001
From: AK391 <ahsengradio@gmail.com>
Date: Mon, 14 Jun 2021 18:13:01 +0000
Subject: [PATCH 03/18] syntax fix

---
 gradiodemo.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/gradiodemo.py b/gradiodemo.py
index 6f440740..5b5ee3b5 100644
--- a/gradiodemo.py
+++ b/gradiodemo.py
@@ -1,4 +1,3 @@
-%matplotlib inline
 import matplotlib.pyplot as plt
 import IPython.display as ipd
 

From 3c858286c48bd8cb417e3879ed708a44ba0051fb Mon Sep 17 00:00:00 2001
From: AK391 <81195143+AK391@users.noreply.github.com>
Date: Mon, 14 Jun 2021 14:25:20 -0400
Subject: [PATCH 04/18] remove

---
 gradiodemo.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/gradiodemo.py b/gradiodemo.py
index 5b5ee3b5..90467ef9 100644
--- a/gradiodemo.py
+++ b/gradiodemo.py
@@ -1,5 +1,4 @@
 import matplotlib.pyplot as plt
-import IPython.display as ipd
 
 import os
 import json
@@ -56,4 +55,4 @@ def inference(text):
 description = "demo for VITS: Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech. To use it, simply upload your text, or click one of the examples to load them. Read more at the links below."
 article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2106.06103'>Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech</a> | <a href='https://github.com/jaywalnut310/vits'>Github Repo</a></p>"
 
-gr.Interface(inference, inputs, outputs, title=title, description=description, article=article).launch(debug=True)
\ No newline at end of file
+gr.Interface(inference, inputs, outputs, title=title, description=description, article=article).launch(debug=True)

From 1b8777f9449fe27f32ff62ec3aa69e926855ab13 Mon Sep 17 00:00:00 2001
From: AK391 <81195143+AK391@users.noreply.github.com>
Date: Mon, 14 Jun 2021 14:39:15 -0400
Subject: [PATCH 05/18] add model

---
 gradiodemo.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/gradiodemo.py b/gradiodemo.py
index 90467ef9..34f31d7a 100644
--- a/gradiodemo.py
+++ b/gradiodemo.py
@@ -19,6 +19,9 @@
 import gradio as gr
 import scipy.io.wavfile
 import numpy as np
+import torchtext
+
+torchtext.utils.download_from_url("https://drive.google.com/uc?id=1RILKwUdjjBBngB17JHwhZNBEaW4Mr-Ml", root=".")
 
 
 def get_text(text, hps):

From 3b8ea149a9042ddda4bc09ca4f4194f7e669975a Mon Sep 17 00:00:00 2001
From: AK391 <81195143+AK391@users.noreply.github.com>
Date: Mon, 14 Jun 2021 14:39:30 -0400
Subject: [PATCH 06/18] torchtext

---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index 7dd1b352..45241668 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -9,3 +9,4 @@ torch==1.6.0
 torchvision==0.7.0
 Unidecode==1.1.1
 gradio
+torchtext

From 2d91da9425f13cc368d54922d406176b752edf12 Mon Sep 17 00:00:00 2001
From: AK391 <81195143+AK391@users.noreply.github.com>
Date: Mon, 14 Jun 2021 14:40:14 -0400
Subject: [PATCH 07/18] replace link

---
 gradiodemo.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gradiodemo.py b/gradiodemo.py
index 34f31d7a..eed2579b 100644
--- a/gradiodemo.py
+++ b/gradiodemo.py
@@ -21,7 +21,7 @@
 import numpy as np
 import torchtext
 
-torchtext.utils.download_from_url("https://drive.google.com/uc?id=1RILKwUdjjBBngB17JHwhZNBEaW4Mr-Ml", root=".")
+torchtext.utils.download_from_url("https://drive.google.com/uc?id=1q86w74Ygw2hNzYP9cWkeClGT5X25PvBT", root=".")
 
 
 def get_text(text, hps):

From 36e73b56cea2cf25f449f9b638004a98dfe071c1 Mon Sep 17 00:00:00 2001
From: AK391 <81195143+AK391@users.noreply.github.com>
Date: Mon, 14 Jun 2021 14:48:01 -0400
Subject: [PATCH 08/18] subproceess

---
 gradiodemo.py | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/gradiodemo.py b/gradiodemo.py
index eed2579b..61a8c18e 100644
--- a/gradiodemo.py
+++ b/gradiodemo.py
@@ -21,6 +21,25 @@
 import numpy as np
 import torchtext
 
+import sys
+from subprocess import call
+
+def run_cmd(command):
+    try:
+        print(command)
+        call(command, shell=True)
+    except KeyboardInterrupt:
+        print("Process interrupted")
+        sys.exit(1)
+        
+
+
+run_cmd("cd monotonic_align")
+run_cmd("sudo python setup.py build_ext --inplace")
+run_cmd("sudo apt-get install espeak -y")
+run_cmd("cd ..")
+
+
 torchtext.utils.download_from_url("https://drive.google.com/uc?id=1q86w74Ygw2hNzYP9cWkeClGT5X25PvBT", root=".")
 
 

From c2ab5d82fedbd1244855657f86fef0f6e1d6c201 Mon Sep 17 00:00:00 2001
From: AK391 <81195143+AK391@users.noreply.github.com>
Date: Mon, 14 Jun 2021 15:08:07 -0400
Subject: [PATCH 09/18] dir change

---
 gradiodemo.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/gradiodemo.py b/gradiodemo.py
index 61a8c18e..e820192b 100644
--- a/gradiodemo.py
+++ b/gradiodemo.py
@@ -33,11 +33,12 @@ def run_cmd(command):
         sys.exit(1)
         
 
-
+os.chdir("./monotonic_align")
 run_cmd("cd monotonic_align")
 run_cmd("sudo python setup.py build_ext --inplace")
 run_cmd("sudo apt-get install espeak -y")
-run_cmd("cd ..")
+os.chdir("../vits")
+
 
 
 torchtext.utils.download_from_url("https://drive.google.com/uc?id=1q86w74Ygw2hNzYP9cWkeClGT5X25PvBT", root=".")

From 206fe321e863b9d6335496438e93257ed5dedec8 Mon Sep 17 00:00:00 2001
From: AK391 <81195143+AK391@users.noreply.github.com>
Date: Mon, 14 Jun 2021 15:08:44 -0400
Subject: [PATCH 10/18] remove

---
 gradiodemo.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/gradiodemo.py b/gradiodemo.py
index e820192b..6d145e30 100644
--- a/gradiodemo.py
+++ b/gradiodemo.py
@@ -34,7 +34,6 @@ def run_cmd(command):
         
 
 os.chdir("./monotonic_align")
-run_cmd("cd monotonic_align")
 run_cmd("sudo python setup.py build_ext --inplace")
 run_cmd("sudo apt-get install espeak -y")
 os.chdir("../vits")

From a46fa2dc7ab13b2600c1c4298d54efd71b77044b Mon Sep 17 00:00:00 2001
From: AK391 <81195143+AK391@users.noreply.github.com>
Date: Mon, 14 Jun 2021 15:38:34 -0400
Subject: [PATCH 11/18] change

---
 gradiodemo.py | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/gradiodemo.py b/gradiodemo.py
index 6d145e30..1ddb5a15 100644
--- a/gradiodemo.py
+++ b/gradiodemo.py
@@ -11,15 +11,6 @@
 import commons
 import utils
 from data_utils import TextAudioLoader, TextAudioCollate, TextAudioSpeakerLoader, TextAudioSpeakerCollate
-from models import SynthesizerTrn
-from text.symbols import symbols
-from text import text_to_sequence
-
-from scipy.io.wavfile import write
-import gradio as gr
-import scipy.io.wavfile
-import numpy as np
-import torchtext
 
 import sys
 from subprocess import call
@@ -36,7 +27,19 @@ def run_cmd(command):
 os.chdir("./monotonic_align")
 run_cmd("sudo python setup.py build_ext --inplace")
 run_cmd("sudo apt-get install espeak -y")
-os.chdir("../vits")
+os.chdir("..")
+
+from models import SynthesizerTrn
+from text.symbols import symbols
+from text import text_to_sequence
+
+from scipy.io.wavfile import write
+import gradio as gr
+import scipy.io.wavfile
+import numpy as np
+import torchtext
+
+
 
 
 

From 63314da1ab8cf2494c52b02f14c8b9e9ef5babbe Mon Sep 17 00:00:00 2001
From: AK391 <81195143+AK391@users.noreply.github.com>
Date: Mon, 14 Jun 2021 16:39:24 -0400
Subject: [PATCH 12/18] dir chnage

---
 gradiodemo.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/gradiodemo.py b/gradiodemo.py
index 1ddb5a15..898ed777 100644
--- a/gradiodemo.py
+++ b/gradiodemo.py
@@ -22,9 +22,12 @@ def run_cmd(command):
     except KeyboardInterrupt:
         print("Process interrupted")
         sys.exit(1)
-        
-
-os.chdir("./monotonic_align")
+  
+current = os.getcwd()
+print(current)
+full = current + "/monotonic_align"
+print(full)
+os.chdir(full)
 run_cmd("sudo python setup.py build_ext --inplace")
 run_cmd("sudo apt-get install espeak -y")
 os.chdir("..")

From db48911c5a13eee4f1018bb1de780e3e1ff3558e Mon Sep 17 00:00:00 2001
From: AK391 <81195143+AK391@users.noreply.github.com>
Date: Mon, 14 Jun 2021 17:59:16 -0400
Subject: [PATCH 13/18] print

---
 gradiodemo.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/gradiodemo.py b/gradiodemo.py
index 898ed777..b2ba690c 100644
--- a/gradiodemo.py
+++ b/gradiodemo.py
@@ -28,9 +28,11 @@ def run_cmd(command):
 full = current + "/monotonic_align"
 print(full)
 os.chdir(full)
+print(os.getcwd())
 run_cmd("sudo python setup.py build_ext --inplace")
 run_cmd("sudo apt-get install espeak -y")
 os.chdir("..")
+print(os.getcwd())
 
 from models import SynthesizerTrn
 from text.symbols import symbols

From 2020ee3a6d0b34291ded148248cf372760cb60b3 Mon Sep 17 00:00:00 2001
From: AK391 <81195143+AK391@users.noreply.github.com>
Date: Mon, 14 Jun 2021 18:14:38 -0400
Subject: [PATCH 14/18] remove sudo

---
 gradiodemo.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gradiodemo.py b/gradiodemo.py
index b2ba690c..2aa6fee3 100644
--- a/gradiodemo.py
+++ b/gradiodemo.py
@@ -29,8 +29,8 @@ def run_cmd(command):
 print(full)
 os.chdir(full)
 print(os.getcwd())
-run_cmd("sudo python setup.py build_ext --inplace")
-run_cmd("sudo apt-get install espeak -y")
+run_cmd("python3 setup.py build_ext --inplace")
+run_cmd("apt-get install espeak -y")
 os.chdir("..")
 print(os.getcwd())
 

From 5c88a50de3de0a3c3eb5a1b78d11c56060f89924 Mon Sep 17 00:00:00 2001
From: AK391 <81195143+AK391@users.noreply.github.com>
Date: Mon, 14 Jun 2021 18:54:00 -0400
Subject: [PATCH 15/18] upgrade

---
 requirements.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 45241668..9c1a41b3 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,8 +5,8 @@ numpy==1.18.5
 phonemizer==2.2.1
 scipy==1.5.2
 tensorboard==2.3.0
-torch==1.6.0
-torchvision==0.7.0
+torch
+torchvision
 Unidecode==1.1.1
 gradio
 torchtext

From b28873598889e9db3369168cf246fda7214a1527 Mon Sep 17 00:00:00 2001
From: AK391 <81195143+AK391@users.noreply.github.com>
Date: Mon, 14 Jun 2021 19:08:14 -0400
Subject: [PATCH 16/18] add examples

---
 gradiodemo.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/gradiodemo.py b/gradiodemo.py
index 2aa6fee3..495b2b5c 100644
--- a/gradiodemo.py
+++ b/gradiodemo.py
@@ -82,7 +82,12 @@ def inference(text):
 
 
 title = "VITS"
-description = "demo for VITS: Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech. To use it, simply upload your text, or click one of the examples to load them. Read more at the links below."
+description = "demo for VITS: Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech. To use it, simply add your text, or click one of the examples to load them. Read more at the links below."
 article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2106.06103'>Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech</a> | <a href='https://github.com/jaywalnut310/vits'>Github Repo</a></p>"
 
-gr.Interface(inference, inputs, outputs, title=title, description=description, article=article).launch(debug=True)
+examples = [
+ ["We propose VITS, Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech."],
+ ["Our method adopts variational inference augmented with normalizing flows and an adversarial training process, which improves the expressive power of generative modeling."]   
+]
+
+gr.Interface(inference, inputs, outputs, title=title, description=description, article=article, examples=examples).launch()

From 9f47bc9f4700d46cffb1604e9497cf82b8b3082f Mon Sep 17 00:00:00 2001
From: AK391 <81195143+AK391@users.noreply.github.com>
Date: Mon, 14 Jun 2021 19:22:57 -0400
Subject: [PATCH 17/18] lines 5

---
 gradiodemo.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gradiodemo.py b/gradiodemo.py
index 495b2b5c..681f3065 100644
--- a/gradiodemo.py
+++ b/gradiodemo.py
@@ -77,7 +77,7 @@ def inference(text):
         return "./out.wav"
 
 
-inputs = gr.inputs.Textbox(label="Input Text")
+inputs = gr.inputs.Textbox(lines=5, label="Input Text")
 outputs =  gr.outputs.File(label="Output Audio")
 
 

From 1b6db7437cd5224baae9776139f890ca315fad8a Mon Sep 17 00:00:00 2001
From: AK391 <81195143+AK391@users.noreply.github.com>
Date: Mon, 14 Jun 2021 19:23:56 -0400
Subject: [PATCH 18/18] link

---
 README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/README.md b/README.md
index f7883f8c..b1c7890c 100644
--- a/README.md
+++ b/README.md
@@ -10,6 +10,8 @@ Visit our [demo](https://jaywalnut310.github.io/vits-demo/index.html) for audio
 
 We also provide the [pretrained models](https://drive.google.com/drive/folders/1ksarh-cJf3F5eKJjLVWY0X1j1qsQqiS2?usp=sharing).
 
+** Update note: [Gradio Web Demo](https://gradio.app/hub/AK391/vits)
+
 ** Update note: Thanks to [Rishikesh (ऋषिकेश)](https://github.com/jaywalnut310/vits/issues/1), our interactive TTS demo is now available on [Colab Notebook](https://colab.research.google.com/drive/1CO61pZizDj7en71NQG_aqqKdGaA_SaBf?usp=sharing).
 
 <table style="width:100%">