deepugaur commited on
Commit
aa35b70
·
verified ·
1 Parent(s): 3b8f19e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -0
app.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import librosa
2
+ import numpy as np
3
+
4
+ def extract_features(audio_path):
5
+ y, sr = librosa.load(audio_path, sr=16000)
6
+ mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
7
+ return np.mean(mfccs.T, axis=0)
8
+
9
+ # Example usage
10
+ features = extract_features("path/to/audio/file.wav")
11
+
12
+ from transformers import Wav2Vec2ForCTC, Wav2Vec2Tokenizer, MarianMTModel, MarianTokenizer
13
+
14
+ # Load pre-trained models
15
+ speech_recognition_model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h")
16
+ speech_recognition_tokenizer = Wav2Vec2Tokenizer.from_pretrained("facebook/wav2vec2-large-960h")
17
+ translation_model = MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-en-hi")
18
+ translation_tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-hi")
19
+
20
+ from transformers import pipeline
21
+
22
+ # Example inference pipeline
23
+ def translate_audio(audio_path):
24
+ # Speech Recognition
25
+ speech_input = speech_recognition_tokenizer(extract_features(audio_path), return_tensors="pt").input_values
26
+ logits = speech_recognition_model(speech_input).logits
27
+ transcription = speech_recognition_tokenizer.batch_decode(torch.argmax(logits, dim=-1))[0]
28
+
29
+ # Translation
30
+ translated = translation_model.generate(**translation_tokenizer.prepare_seq2seq_batch(transcription, return_tensors="pt"))
31
+ translation = translation_tokenizer.batch_decode(translated, skip_special_tokens=True)[0]
32
+
33
+ return translation
34
+
35
+ # Save the models and tokenizer
36
+ speech_recognition_model.save_pretrained("path/to/save/wav2vec2")
37
+ speech_recognition_tokenizer.save_pretrained("path/to/save/wav2vec2")
38
+ translation_model.save_pretrained("path/to/save/opus-mt-en-hi")
39
+ translation_tokenizer.save_pretrained("path/to/save/opus-mt-en-hi")
40
+
41
+ # Upload to Hugging Face
42
+ !huggingface-cli login
43
+ !transformers-cli upload path/to/save/wav2vec2
44
+ !transformers-cli upload path/to/save/opus-mt-en-hi
45
+
46
+ from datetime import datetime
47
+ import pytz
48
+
49
+ def is_after_6_pm_ist():
50
+ ist = pytz.timezone('Asia/Kolkata')
51
+ current_time = datetime.now(ist)
52
+ return current_time.hour >= 18
53
+
54
+ if is_after_6_pm_ist():
55
+ translation = translate_audio("path/to/audio/file.wav")
56
+ print(translation)
57
+ else:
58
+ print("The translation service is available after 6 PM IST.")