SeamlessOnDevice

Running

Tonic commited on Nov 20, 2023

Commit

3eefcff

•

1 Parent(s): e4eae4e

Create app.py

Files changed (1) hide show

app.py ADDED Viewed

+import gradio as gr
+import torchaudio
+import torch
+def speech_to_text(audio_file):
+    audio_input, _ = torchaudio.load(audio_file.name)
+    s2t_model = torch.jit.load("unity_on_device_s2t.ptl")
+    with torch.no_grad():
+        text = s2t_model(audio_input, tgt_lang=TGT_LANG)
+    return text
+def speech_to_speech_translation(audio_file):
+    audio_input, _ = torchaudio.load(audio_file.name)
+    s2st_model = torch.jit.load("unity_on_device.ptl")
+    with torch.no_grad():
+        text, units, waveform = s2st_model(audio_input, tgt_lang=TGT_LANG)
+    output_file = "/tmp/result.wav"
+    torchaudio.save(output_file, waveform.unsqueeze(0), sample_rate=16000)
+    return text, output_file
+# Gradio interfaces
+iface_s2t = gr.Interface(
+    fn=speech_to_text,
+    inputs=gr.inputs.Audio(type="file", label="Upload Audio for Speech to Text"),
+    outputs="text",
+    title="Speech to Text"
+)
+iface_s2st = gr.Interface(
+    fn=speech_to_speech_translation,
+    inputs=gr.inputs.Audio(type="file", label="Upload Audio for Speech to Speech Translation"),
+    outputs=["text", "audio"],
+    title="Speech to Speech Translation"
+)
+# Combine into a tabbed interface
+iface = gr.TabbedInterface([iface_s2t, iface_s2st], ["Speech to Text", "Speech to Speech Translation"])
+iface.launch()