File size: 4,131 Bytes
07829aa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import streamlit as st
import speech_recognition as sr
from deep_translator import GoogleTranslator
from pydub import AudioSegment
from io import BytesIO
import tempfile

# Title of the app
st.title("Speech-to-Text with Translation to English")

# Initialize recognizer
recognizer = sr.Recognizer()

# Choice for input method
input_method = st.radio("Select Input Method", ("Record from Microphone", "Upload Audio File"))

# Choice for input language
language_options = {"English": "en", "Hindi": "hi"}
input_language = st.selectbox("Select Input Language", options=language_options.keys())
selected_lang_code = language_options[input_language]

# Function to convert audio chunk to text
def speech_to_text(audio_data, lang="en"):  # Default language to English
    try:
        # Recognize speech
        st.info("Converting speech to text...")
        detected_text = recognizer.recognize_google(audio_data, language=lang)
        return detected_text
    except Exception as e:
        st.error(f"Error in speech recognition: {e}")
        return None

# Handle recording from microphone
if input_method == "Record from Microphone":
    if st.button("Start Recording"):
        with st.spinner("Recording... Please speak into the microphone."):
            try:
                # Capture audio input from the microphone
                with sr.Microphone() as source:
                    st.info("Listening... Please speak now.")
                    recognizer.adjust_for_ambient_noise(source)  # Adjust for background noise
                    audio_data = recognizer.listen(source)
                    st.success("Recording complete!")
                
                # Process and convert speech to text
                detected_text = speech_to_text(audio_data, lang=selected_lang_code)
                if detected_text:
                    st.write("Detected Speech Text:", detected_text)
                    
                    # Translate to English
                    translator = GoogleTranslator(source='auto', target='en')
                    translated_text = translator.translate(detected_text)
                    st.write("Translated Text (English):", translated_text)
                    
            except sr.UnknownValueError:
                st.error("Could not understand the audio. Please try again.")
            except sr.RequestError as e:
                st.error(f"Could not request results from the service; {e}")

# Process uploaded audio file
if input_method == "Upload Audio File":
    uploaded_file = st.file_uploader("Upload an audio file", type=["wav", "mp3", "ogg"])
    if uploaded_file:
        with st.spinner("Processing uploaded audio..."):
            try:
                # Convert uploaded file to WAV format using pydub
                audio = AudioSegment.from_file(BytesIO(uploaded_file.read()))
                # Split audio into 30-second chunks
                chunk_duration_ms = 30000
                chunks = [audio[i:i+chunk_duration_ms] for i in range(0, len(audio), chunk_duration_ms)]
                text_output = ""

                for i, chunk in enumerate(chunks):
                    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_wav_file:
                        chunk.export(tmp_wav_file.name, format="wav")
                        with sr.AudioFile(tmp_wav_file.name) as source:
                            audio_data = recognizer.record(source)
                            detected_text = speech_to_text(audio_data, lang=selected_lang_code)
                            if detected_text:
                                text_output += detected_text + " "

                # Display detected text and translate
                if text_output:
                    st.write("Detected Speech Text:", text_output)
                    translator = GoogleTranslator(source='auto', target='en')
                    translated_text = translator.translate(text_output)
                    st.write("Translated Text (English):", translated_text)

            except Exception as e:
                st.error(f"Error processing the audio file: {e}")