import streamlit as st import os import os os.system('pip install transformers') import os os.system('pip install torch') import os os.system('pip install tensorflow') import os os.system('pip install soundfile') import soundfile as sf from transformers import VitsModel, AutoTokenizer import numpy as np import io import torch print(torch.__version__) # Load model and tokenizer model = VitsModel.from_pretrained("facebook/mms-tts-eng") tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-eng") def generate_speech(text): inputs = tokenizer(text, return_tensors="pt") with torch.no_grad(): output = model(**inputs).waveform # Corrected typo: waveform (not waveformform) # Convert the waveform tensor to a NumPy array waveform = output.squeeze().cpu().numpy() # Convert the waveform to bytes audio_bytes_io = io.BytesIO() sf.write(audio_bytes_io, waveform, samplerate=22050, format='WAV') audio_bytes_io.seek(0) return audio_bytes_io st.title("Text-to-Speech Converter") st.write("Developed by Hiba Bayz") st.write("Enter text below and click 'Generate Speech' to convert it to audio.") # Text input text_input = st.text_area("Text to convert:", "Some example text in the English language") if st.button("Generate Speech"): if text_input: st.write("Generating speech...") audio_bytes_io = generate_speech(text_input) # Display audio in Streamlit st.audio(audio_bytes_io, format="audio/wav") else: st.write("Please enter some text.")