import librosa import numpy as np def preprocess_audio(file_path): y, sr = librosa.load(file_path, sr=16000) mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr) return mel_spectrogram from transformers import Wav2Vec2ForCTC, Wav2Vec2Tokenizer, MarianMTModel, MarianTokenizer # Load pre-trained models speech_to_text_model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-xlsr-53") speech_to_text_tokenizer = Wav2Vec2Tokenizer.from_pretrained("facebook/wav2vec2-large-xlsr-53") translation_model = MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-en-hi") translation_tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-hi") def translate_audio(file_path): # Preprocess the audio mel_spectrogram = preprocess_audio(file_path) # Convert to text audio_input = speech_to_text_tokenizer(file_path, return_tensors="pt").input_values logits = speech_to_text_model(audio_input).logits predicted_ids = logits.argmax(dim=-1) transcription = speech_to_text_tokenizer.batch_decode(predicted_ids)[0] # Translate text translation_input = translation_tokenizer(transcription, return_tensors="pt") translated_output = translation_model.generate(**translation_input) translation = translation_tokenizer.batch_decode(translated_output, skip_special_tokens=True)[0] return translation import datetime def should_translate(): now = datetime.datetime.now() return now.hour >= 18 def handle_translation(file_path): if should_translate(): return translate_audio(file_path) else: return "Translation is only available after 6 PM IST."