diff --git a/README.md b/README.md
index f7883f8c..b1c7890c 100644
--- a/README.md
+++ b/README.md
@@ -10,6 +10,8 @@ Visit our [demo](https://jaywalnut310.github.io/vits-demo/index.html) for audio
We also provide the [pretrained models](https://drive.google.com/drive/folders/1ksarh-cJf3F5eKJjLVWY0X1j1qsQqiS2?usp=sharing).
+** Update note: [Gradio Web Demo](https://gradio.app/hub/AK391/vits)
+
** Update note: Thanks to [Rishikesh (ऋषिकेश)](https://github.com/jaywalnut310/vits/issues/1), our interactive TTS demo is now available on [Colab Notebook](https://colab.research.google.com/drive/1CO61pZizDj7en71NQG_aqqKdGaA_SaBf?usp=sharing).
diff --git a/gradiodemo.py b/gradiodemo.py
new file mode 100644
index 00000000..681f3065
--- /dev/null
+++ b/gradiodemo.py
@@ -0,0 +1,93 @@
+import matplotlib.pyplot as plt
+
+import os
+import json
+import math
+import torch
+from torch import nn
+from torch.nn import functional as F
+from torch.utils.data import DataLoader
+
+import commons
+import utils
+from data_utils import TextAudioLoader, TextAudioCollate, TextAudioSpeakerLoader, TextAudioSpeakerCollate
+
+import sys
+from subprocess import call
+
+def run_cmd(command):
+ try:
+ print(command)
+ call(command, shell=True)
+ except KeyboardInterrupt:
+ print("Process interrupted")
+ sys.exit(1)
+
+current = os.getcwd()
+print(current)
+full = current + "/monotonic_align"
+print(full)
+os.chdir(full)
+print(os.getcwd())
+run_cmd("python3 setup.py build_ext --inplace")
+run_cmd("apt-get install espeak -y")
+os.chdir("..")
+print(os.getcwd())
+
+from models import SynthesizerTrn
+from text.symbols import symbols
+from text import text_to_sequence
+
+from scipy.io.wavfile import write
+import gradio as gr
+import scipy.io.wavfile
+import numpy as np
+import torchtext
+
+
+
+
+
+torchtext.utils.download_from_url("https://drive.google.com/uc?id=1q86w74Ygw2hNzYP9cWkeClGT5X25PvBT", root=".")
+
+
+def get_text(text, hps):
+ text_norm = text_to_sequence(text, hps.data.text_cleaners)
+ if hps.data.add_blank:
+ text_norm = commons.intersperse(text_norm, 0)
+ text_norm = torch.LongTensor(text_norm)
+ return text_norm
+
+hps = utils.get_hparams_from_file("./configs/ljs_base.json")
+net_g = SynthesizerTrn(
+ len(symbols),
+ hps.data.filter_length // 2 + 1,
+ hps.train.segment_size // hps.data.hop_length,
+ **hps.model)
+_ = net_g.eval()
+
+_ = utils.load_checkpoint("pretrained_ljs.pth", net_g, None)
+def inference(text):
+ stn_tst = get_text(text, hps)
+ with torch.no_grad():
+ x_tst = stn_tst.unsqueeze(0)
+ x_tst_lengths = torch.LongTensor([stn_tst.size(0)])
+ audio = net_g.infer(x_tst, x_tst_lengths, noise_scale=.667, noise_scale_w=0.8, length_scale=1)[0][0,0].data.float().numpy()
+ scipy.io.wavfile.write("out.wav", hps.data.sampling_rate, audio)
+ return "./out.wav"
+
+
+inputs = gr.inputs.Textbox(lines=5, label="Input Text")
+outputs = gr.outputs.File(label="Output Audio")
+
+
+title = "VITS"
+description = "demo for VITS: Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech. To use it, simply add your text, or click one of the examples to load them. Read more at the links below."
+article = "Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech | Github Repo
"
+
+examples = [
+ ["We propose VITS, Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech."],
+ ["Our method adopts variational inference augmented with normalizing flows and an adversarial training process, which improves the expressive power of generative modeling."]
+]
+
+gr.Interface(inference, inputs, outputs, title=title, description=description, article=article, examples=examples).launch()
diff --git a/requirements.txt b/requirements.txt
index ecacbad3..9c1a41b3 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,6 +5,8 @@ numpy==1.18.5
phonemizer==2.2.1
scipy==1.5.2
tensorboard==2.3.0
-torch==1.6.0
-torchvision==0.7.0
+torch
+torchvision
Unidecode==1.1.1
+gradio
+torchtext