deepugaur commited on
Commit
e26c7a0
·
verified ·
1 Parent(s): 8bc1ae5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -52
app.py CHANGED
@@ -1,59 +1,51 @@
1
- from flask import Flask, request, jsonify
2
- from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor, MarianMTModel, MarianTokenizer
3
- import torch
4
- import pytz
5
- from datetime import datetime
6
- from pydub import AudioSegment
7
- import io
8
-
9
- app = Flask(__name__)
10
-
11
- # Load pre-trained models and tokenizers
12
- asr_model_name = "facebook/wav2vec2-large-960h"
13
- translation_model_name = "Helsinki-NLP/opus-mt-en-hi"
14
-
15
- asr_processor = Wav2Vec2Processor.from_pretrained(asr_model_name)
16
- asr_model = Wav2Vec2ForCTC.from_pretrained(asr_model_name)
17
- translator = MarianMTModel.from_pretrained(translation_model_name)
18
- tokenizer = MarianTokenizer.from_pretrained(translation_model_name)
19
-
20
- # Function to convert audio file to text
21
- def audio_to_text(audio_file):
22
- audio_input = AudioSegment.from_file(audio_file)
23
- audio_array = np.array(audio_input.get_array_of_samples())
24
- inputs = asr_processor(audio_array, sampling_rate=16000, return_tensors="pt", padding=True)
25
- with torch.no_grad():
26
- logits = asr_model(inputs.input_values).logits
27
- predicted_ids = torch.argmax(logits, dim=-1)
28
- transcription = asr_processor.batch_decode(predicted_ids)[0]
29
- return transcription
30
-
31
- # Function to translate text from English to Hindi
32
- def translate_text(text):
33
  inputs = tokenizer(text, return_tensors="pt", padding=True)
34
- translated = translator.generate(**inputs)
35
- translated_text = tokenizer.batch_decode(translated, skip_special_tokens=True)[0]
36
  return translated_text
37
 
38
- # Function to check if the current time is after 6 PM IST
39
- def is_after_6pm_ist():
40
- ist = pytz.timezone('Asia/Kolkata')
41
- current_time = datetime.now(ist)
42
- return current_time.hour >= 18
43
-
44
- @app.route('/translate', methods=['POST'])
45
- def translate_audio():
46
- if not is_after_6pm_ist():
47
- return jsonify({'error': 'Service available only after 6 PM IST'}), 403
48
 
49
- if 'audio' not in request.files:
50
- return jsonify({'error': 'No audio file provided'}), 400
 
 
51
 
52
- audio_file = request.files['audio']
53
- text = audio_to_text(audio_file)
54
- translated_text = translate_text(text)
55
- return jsonify({'translation': translated_text})
56
 
57
- if __name__ == '__main__':
58
- app.run(host='0.0.0.0', port=5000)
 
 
59
 
 
1
+ import librosa
2
+ import numpy as np
3
+ import tensorflow as tf
4
+
5
+ def load_audio(file_path):
6
+ audio, sr = librosa.load(file_path, sr=16000)
7
+ return audio, sr
8
+
9
+ def extract_features(audio, sr):
10
+ mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13)
11
+ return mfccs
12
+
13
+ # Load pre-trained model (example: DeepSpeech)
14
+ model = tf.keras.models.load_model('deepspeech_model.h5')
15
+
16
+ def speech_to_text(audio):
17
+ features = extract_features(audio, 16000)
18
+ features = np.expand_dims(features, axis=0) # Model expects batch dimension
19
+ text = model.predict(features)
20
+ return text
21
+
22
+ from transformers import MarianMTModel, MarianTokenizer
23
+
24
+ # Load pre-trained translation model
25
+ model_name = 'Helsinki-NLP/opus-mt-en-hi'
26
+ tokenizer = MarianTokenizer.from_pretrained(model_name)
27
+ translation_model = MarianMTModel.from_pretrained(model_name)
28
+
29
+ def translate_text(text, tokenizer, model):
 
 
 
30
  inputs = tokenizer(text, return_tensors="pt", padding=True)
31
+ translated = model.generate(**inputs)
32
+ translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
33
  return translated_text
34
 
35
+ import datetime
 
 
 
 
 
 
 
 
 
36
 
37
+ def process_audio_for_translation(audio_file_path):
38
+ current_time = datetime.datetime.now().time()
39
+ if current_time < datetime.time(18, 0):
40
+ return "Translation service is available only after 6 PM IST"
41
 
42
+ audio, sr = load_audio(audio_file_path)
43
+ english_text = speech_to_text(audio)
44
+ hindi_text = translate_text(english_text, tokenizer, translation_model)
45
+ return hindi_text
46
 
47
+ # Example usage
48
+ audio_file_path = 'path_to_audio_file.wav'
49
+ translated_text = process_audio_for_translation(audio_file_path)
50
+ print(translated_text)
51