File size: 6,534 Bytes
b817ab5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
import requests
import soundfile as sf
import time
import streamlit as st

def speech_translation(audio_file_path, language):
    if audio_file_path is None:
        return "No audio input provided!"

    # Convert audio to .wav format if not already
    if not audio_file_path.endswith(".wav"):
        wav_data, samplerate = sf.read(audio_file_path)
        sf.write("temp_audio.wav", wav_data, samplerate)
        audio_file_path = "temp_audio.wav"
    else:
        audio_file_path = audio_file_path

    # ASR processing
    files = {
        'file': open(audio_file_path, "rb"),
        'language': (None, language), 
        'vtt': (None, 'true'),
    }
    response = requests.post('https://asr.iitm.ac.in/ssl_asr/decode', files=files)

    print(response.json())
    try:
        asr_output = response.json()['transcript']
    except:
        return "Error in ASR processing"

    asr_output = asr_output.replace("।", "")
    asr_output = asr_output.replace(".", "")

    time.sleep(1)

    lang = ""
    if language == "telugu":
        lang = "te"
    elif language == "hindi":
        lang = "hi"
    elif language == "marathi":
        lang = "mr"
    elif language == "bengali":
        lang = "bn"

    payload = {
        "pipelineTasks": [
            {
                "taskType": "translation",
                "config": {
                    "language": {
                        "sourceLanguage": lang,
                        "targetLanguage": "en",
                    },
                },
            }
        ],
        "pipelineRequestConfig": {
            "pipelineId": "64392f96daac500b55c543cd"
        }
    }
    headers = {
        "Content-Type": "application/json",
        "userID": "2aeef589f4584eb08aa0b9c49761aeb8",
        "ulcaApiKey": "02ed10445a-66b0-4061-9030-9b0b8b37a4f1"
    }

    response = requests.post('https://meity-auth.ulcacontrib.org/ulca/apis/v0/model/getModelsPipeline', json=payload, headers=headers)

    if response.status_code == 200:
        response_data = response.json()
        print(response_data)

        compute_payload = {
            "pipelineTasks": [
                {
                    "taskType": "translation",
                    "config": {
                        "language": {
                            "sourceLanguage": lang,
                            "targetLanguage": "en",
                        },
                    },
                }
            ],
            "inputData": {"input": [{"source": asr_output}]},
        }

        callback_url = response_data["pipelineInferenceAPIEndPoint"]["callbackUrl"]

        headers2 = {
            "Content-Type": "application/json",
            response_data["pipelineInferenceAPIEndPoint"]["inferenceApiKey"]["name"]:
                response_data["pipelineInferenceAPIEndPoint"]["inferenceApiKey"]["value"]
        }

        compute_response = requests.post(callback_url, json=compute_payload, headers=headers2)

        if compute_response.status_code == 200:
            compute_response_data = compute_response.json()
            print(compute_response_data)
            translated_content = compute_response_data["pipelineResponse"][0]["output"][0]["target"]
            return translated_content
        else:
            return f"Error in translation: status code {compute_response.status_code}"
    else:
        return f"Error in fetching model pipeline: status code {response.status_code}"

    return "Translation failed"

# Streamlit UI
st.title("Speech Translation")
st.write("Record your speech and get the English translation.")

# Audio Recorder HTML
st.markdown("""
    <h3>Record Audio</h3>
    <button id="startButton">Start Recording</button>
    <button id="stopButton" disabled>Stop Recording</button>
    <audio id="recordedAudio" controls></audio>
    <script>
        let chunks = [];
        let recorder;
        let audioURL;
        let recordedAudio = document.getElementById('recordedAudio');
        
        document.getElementById('startButton').onclick = function() {
            navigator.mediaDevices.getUserMedia({ audio: true })
                .then(stream => {
                    recorder = new MediaRecorder(stream);
                    recorder.ondataavailable = e => chunks.push(e.data);
                    recorder.onstop = e => {
                        let blob = new Blob(chunks, { type: 'audio/wav' });
                        chunks = [];
                        audioURL = URL.createObjectURL(blob);
                        recordedAudio.src = audioURL;

                        // Send the recorded audio blob to Streamlit
                        var reader = new FileReader();
                        reader.readAsDataURL(blob); 
                        reader.onloadend = function() {
                            var base64data = reader.result;                
                            fetch('/upload-audio', {
                                method: 'POST',
                                body: JSON.stringify({ audio: base64data }),
                                headers: { 'Content-Type': 'application/json' }
                            }).then(response => response.json())
                              .then(data => console.log(data));
                        }
                    };
                    recorder.start();
                    document.getElementById('startButton').disabled = true;
                    document.getElementById('stopButton').disabled = false;
                });
        };
        
        document.getElementById('stopButton').onclick = function() {
            recorder.stop();
            document.getElementById('startButton').disabled = false;
            document.getElementById('stopButton').disabled = true;
        };
    </script>
""", unsafe_allow_html=True)

uploaded_file = st.file_uploader("Upload an audio file", type=["wav", "mp3"])
language = st.selectbox("Select Language", ["telugu", "hindi", "marathi", "bengali"])

if st.button("Translate"):
    if uploaded_file is not None:
        with open("uploaded_audio.wav", "wb") as f:
            f.write(uploaded_file.getbuffer())
        result = speech_translation("uploaded_audio.wav", language)
        st.text_area("Translation", result)
    elif st.session_state.get('recorded_audio'):
        result = speech_translation(st.session_state['recorded_audio'], language)
        st.text_area("Translation", result)
    else:
        st.write("Please upload an audio file or record your speech and select a language.")