Spaces:

navidved
/

gooya-asr

Running

App Files Files Community

navidved commited on May 12

Commit

5421e82

verified ·

1 Parent(s): cd130f1

Update app.py

Browse files

Files changed (1) hide show

app.py +116 -86

app.py CHANGED Viewed

@@ -1,100 +1,130 @@
-import os
-import time
-import requests
-import streamlit as st
-# ---------- Config ----------
-ASR_API_URL = os.getenv("ASR_API_URL", "")   # define these env vars in your space settings!
-AUTH_TOKEN  = os.getenv("AUTH_TOKEN", "")
-VIOLET_MAIN = "#7F3FBF"
-VIOLET_LIGHT = "#C3A6FF"
-st.set_page_config(page_title="Gooya ASR v1.4", page_icon="🎤")
-custom_css = f"""
-    <style>
-    .gooya-title {{
-        color:#fff;
-        background:linear-gradient(90deg,{VIOLET_MAIN} 0%,{VIOLET_LIGHT} 100%);
-        border-radius:12px;padding:20px 10px;margin-bottom:12px;text-align:center;
-        font-size: 1.6em; font-weight: bold;
-    }}
-    .gooya-badge {{
-        display:inline-block;background:{VIOLET_MAIN};color:#fff;
-        border-radius:16px;padding:6px 16px;font-size:.97rem;margin-top:4px;
-    }}
-    </style>
-"""
-st.markdown(custom_css, unsafe_allow_html=True)
-st.markdown('<div class="gooya-title">Gooya ASR v1.4</div>', unsafe_allow_html=True)
-# ---------- Upload Audio ----------
-col_input, col_output = st.columns([1, 1])
-with col_input:
-    audio_file = st.file_uploader(
-        "Audio Input (upload, mp3/wav, up to 30s)",
-        type=["mp3", "wav", "m4a", "ogg"]
-    )
-    # Microphone input (optional): Streamlit 1.26+ has st.audio_recorder
-    # audio_file = st.audio_recorder("Record audio (up to 30s)") # EXPERIMENTAL
-with col_output:
-    transcription = st.text_area("📝 Transcription", "", height=120, key="trans_tb")
-    processing_time = st.text_input("⏱️ Processing Time", "", key="ptime_tb")
-btn_col1, btn_col2 = st.columns([1,1])
-with btn_col1:
-    transcribe_btn = st.button("Transcribe", use_container_width=True, type="primary")
-with btn_col2:
-    clear_btn = st.button("Clear", use_container_width=True, type="secondary")
-st.markdown("""
-**Guidelines**
-- Maximum audio length: **30 seconds**
-- Audio content should be in Persian.
-- Both transcription and processing time are displayed immediately.
-See the [Persian ASR Leaderboard](https://huggingface.co/spaces/navidved/open_persian_asr_leaderboard) for benchmarks.
-""")
-# ---------- Transcribe Function ----------
-def transcribe_audio_streamlit(file_obj):
     if not ASR_API_URL or not AUTH_TOKEN:
-        return "❌ Error: ASR_API_URL or AUTH_TOKEN is not set.", ""
     headers = {
         "accept": "application/json",
         "Authorization": f"Bearer {AUTH_TOKEN}",
     }
     start = time.time()
     try:
-        files = {"file": (file_obj.name, file_obj, "audio/mpeg")}
-        resp = requests.post(ASR_API_URL, headers=headers, files=files, timeout=120)
     except Exception as e:
-        return f"❌ Error while calling ASR API: {e}", ""
     elapsed = time.time() - start
     if resp.status_code == 200:
-        data = resp.json()
-        text = data.get("transcription", "No transcription returned.")
-        return text, f"{data.get('time', elapsed):.2f} s"
-    return f"❌ Error: {resp.status_code}, {resp.text}", ""
-# ---------- Logic ----------
-if transcribe_btn and audio_file:
-    with st.spinner("Transcribing ..."):
-        text, ptime = transcribe_audio_streamlit(audio_file)
-        st.session_state["trans_tb"] = text
-        st.session_state["ptime_tb"] = ptime
-elif transcribe_btn and not audio_file:
-    st.warning("لطفاً فایل صوتی را انتخاب کنید.")
-if clear_btn:
-    st.session_state["trans_tb"] = ""
-    st.session_state["ptime_tb"] = ""
-# ---------- Audio Playback ----------
-if audio_file:
-    st.audio(audio_file, format="audio/mp3")

+import os, time, requests, gradio as gr
+print("Gradio version:", gr.__version__)
+# ---------- Environment Variables ----------
+ASR_API_URL = os.getenv("ASR_API_URL")
+AUTH_TOKEN  = os.getenv("AUTH_TOKEN")
+if not ASR_API_URL or not AUTH_TOKEN:
+    print("⚠️  ASR_API_URL or AUTH_TOKEN is not set; API calls will fail.")
+# ---------- Core Transcription Function ----------
+def transcribe_audio(file_path: str | None): # Added None type hint for clarity on clearing
+    # Handle case where audio is cleared (input might be None)
+    if file_path is None:
+        return "Audio cleared.", "", None # Return default/empty values
     if not ASR_API_URL or not AUTH_TOKEN:
+        return "❌ Error: ASR_API_URL or AUTH_TOKEN is not set.", "", file_path # Keep file path on error
     headers = {
         "accept": "application/json",
         "Authorization": f"Bearer {AUTH_TOKEN}",
     }
     start = time.time()
     try:
+        with open(file_path, "rb") as f:
+            # Ensure the filename is correctly extracted, especially for temp files
+            file_name = os.path.basename(file_path)
+            files = {"file": (file_name, f, "audio/mpeg")} # Use common mpeg, adjust if specific format needed
+            resp = requests.post(ASR_API_URL, headers=headers, files=files, timeout=120) # Increased timeout just in case
+    except requests.exceptions.Timeout:
+         return f"❌ Error: Request timed out after 120 seconds.", "", file_path
     except Exception as e:
+        # Provide more specific error context if possible
+        return f"❌ Error during API call or file handling: {e}", "", file_path
     elapsed = time.time() - start
     if resp.status_code == 200:
+        try:
+            data = resp.json()
+            text = data.get("transcription", "No transcription returned.")
+            # Use the 'time' field from response if available, otherwise use measured elapsed time
+            processing_time = data.get('time', elapsed)
+            return text, f"{processing_time:.2f} s", file_path # Return filepath to keep it in the input widget if needed
+        except requests.exceptions.JSONDecodeError:
+            return f"❌ Error: Could not decode JSON response. Status: {resp.status_code}, Response: {resp.text}", "", file_path
+    else:
+        # Return error details from the response
+        return f"❌ Error: API returned status {resp.status_code}. Response: {resp.text}", "", file_path
+# ---------- Styling ----------
+VIOLET_MAIN = "#7F3FBF"
+VIOLET_LIGHT = "#C3A6FF"
+custom_css = f"""
+#gooya-title {{
+  color:#fff;
+  background:linear-gradient(90deg,{VIOLET_MAIN} 0%,{VIOLET_LIGHT} 100%);
+  border-radius:12px;padding:20px 10px;margin-bottom:12px;
+}}
+.gooya-badge {{
+  display:inline-block;background:{VIOLET_MAIN};color:#fff;
+  border-radius:16px;padding:6px 16px;font-size:.97rem;margin-top:4px;
+}}
+"""
+# ---------- UI ----------
+with gr.Blocks(css=custom_css, title="Gooya ASR v1.4") as demo:
+    # Optional: Add a title using Markdown or HTML
+    gr.Markdown("# Gooya ASR v1.4 Transcription", elem_id="gooya-title")
+    with gr.Row():
+        with gr.Column():
+            audio_input = gr.Audio(
+                label="Audio Input (upload or record, up to 30 s)",
+                type="filepath",
+                sources=["upload", "microphone"],
+            )
+        with gr.Column():
+            processing_time_tb = gr.Textbox(
+                label="⏱️ Processing Time",
+                interactive=False,
+                elem_classes="gooya-badge", # Use elem_classes for multiple classes
+            )
+            transcription_tb = gr.Textbox(
+                label="📝 Transcription",
+                lines=5,
+                show_copy_button=True,
+                placeholder="The transcription will appear here...",
+                elem_id="gooya-textbox", # elem_id should be unique if used
+            )
+    with gr.Row():
+        btn_transcribe = gr.Button("Transcribe", variant="primary")
+        btn_clear      = gr.Button("Clear",      variant="secondary")
+    gr.Markdown(
+        """
+**Guidelines**
+- Maximum audio length: **30 seconds**
+- Audio content should be in Persian.
+- Both transcription and processing time are displayed upon completion.
+- See the [Persian ASR Leaderboard](https://huggingface.co/spaces/navidved/open_persian_asr_leaderboard) for benchmarks.
+"""
+    )
+    # ---------- Callbacks ----------
+    # Update outputs to potentially include audio_input if you want to keep it on error
+    btn_transcribe.click(
+        fn=transcribe_audio,
+        inputs=[audio_input],
+        outputs=[transcription_tb, processing_time_tb, audio_input], # Keep audio input displayed
+    )
+    # Clear function
+    def clear_all():
+        return "", "", None # Clears transcription, time, and audio input
+    btn_clear.click(
+        fn=clear_all, # Use a named function for clarity
+        inputs=None,
+        outputs=[transcription_tb, processing_time_tb, audio_input],
+    )
+# ---------- Launch ----------
+if __name__ == "__main__":
+    # Set share=True to generate a public link when localhost is not accessible
+    # This is necessary in environments like Docker containers or cloud platforms (e.g., HF Spaces)
+    demo.queue().launch(debug=True, share=True) # <-- Changed share=False to share=True