Spaces:

Kishorekumar7
/

Voice_to_Text_and_Image

Sleeping

Kishorekumar7 commited on Apr 2

Commit

4dffac9

verified ·

1 Parent(s): 5ce3342

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,9 +1,10 @@
 import streamlit as st
-import torch
 import os
 import soundfile as sf
 from groq import Groq
 from diffusers import AutoPipelineForText2Image
 # Load API keys
 GROQ_API_KEY = os.getenv("GROQ_API_KEY")
@@ -22,7 +23,7 @@ def transcribe(audio_path):
         transcription = client.audio.transcriptions.create(
             file=(audio_path, file.read()),
             model="whisper-large-v3",
-            language="ta",  # Tamil
             response_format="verbose_json"
         )
     return transcription["text"]
@@ -54,21 +55,25 @@ st.title("Tamil Speech to Image & Story Generator")
 # Choose input method
 input_method = st.radio("Choose Input Method:", ("Record Audio", "Upload Audio"))
 if input_method == "Record Audio":
-    recorded_audio = st.audio(st.file_uploader("Record your Tamil speech", type=["wav", "mp3"]))
 elif input_method == "Upload Audio":
     uploaded_file = st.file_uploader("Upload an audio file", type=["wav", "mp3"])
-if st.button("Generate"):
-    if input_method == "Record Audio" and recorded_audio:
-        audio_data, samplerate = sf.read(recorded_audio)
-        audio_path = "recorded_audio.wav"
-        sf.write(audio_path, audio_data, samplerate)
-    elif input_method == "Upload Audio" and uploaded_file:
         audio_path = "uploaded_audio.wav"
         with open(audio_path, "wb") as f:
             f.write(uploaded_file.getbuffer())
-    else:
         st.error("Please provide an audio file.")
         st.stop()

 import streamlit as st
 import os
+import torch
 import soundfile as sf
 from groq import Groq
 from diffusers import AutoPipelineForText2Image
+from streamlit_webrtc import webrtc_streamer, AudioRecorder
 # Load API keys
 GROQ_API_KEY = os.getenv("GROQ_API_KEY")
         transcription = client.audio.transcriptions.create(
             file=(audio_path, file.read()),
             model="whisper-large-v3",
+            language="ta",
             response_format="verbose_json"
         )
     return transcription["text"]
 # Choose input method
 input_method = st.radio("Choose Input Method:", ("Record Audio", "Upload Audio"))
+audio_path = None
 if input_method == "Record Audio":
+    st.subheader("Record your Tamil speech")
+    recorder = webrtc_streamer(key="record_audio", audio=True)
+    if recorder.audio_receiver:
+        audio_data = recorder.audio_receiver.get_frames()  # Get recorded audio
+        audio_path = "recorded_audio.wav"
+        sf.write(audio_path, audio_data, 16000)  # Save recorded audio
 elif input_method == "Upload Audio":
     uploaded_file = st.file_uploader("Upload an audio file", type=["wav", "mp3"])
+    if uploaded_file:
         audio_path = "uploaded_audio.wav"
         with open(audio_path, "wb") as f:
             f.write(uploaded_file.getbuffer())
+if st.button("Generate"):
+    if not audio_path:
         st.error("Please provide an audio file.")
         st.stop()