navidved commited on
Commit
5421e82
·
verified ·
1 Parent(s): cd130f1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +116 -86
app.py CHANGED
@@ -1,100 +1,130 @@
1
- import os
2
- import time
3
- import requests
4
- import streamlit as st
5
 
6
- # ---------- Config ----------
7
 
8
- ASR_API_URL = os.getenv("ASR_API_URL", "") # define these env vars in your space settings!
9
- AUTH_TOKEN = os.getenv("AUTH_TOKEN", "")
 
 
 
10
 
11
- VIOLET_MAIN = "#7F3FBF"
12
- VIOLET_LIGHT = "#C3A6FF"
13
-
14
- st.set_page_config(page_title="Gooya ASR v1.4", page_icon="🎤")
15
-
16
- custom_css = f"""
17
- <style>
18
- .gooya-title {{
19
- color:#fff;
20
- background:linear-gradient(90deg,{VIOLET_MAIN} 0%,{VIOLET_LIGHT} 100%);
21
- border-radius:12px;padding:20px 10px;margin-bottom:12px;text-align:center;
22
- font-size: 1.6em; font-weight: bold;
23
- }}
24
- .gooya-badge {{
25
- display:inline-block;background:{VIOLET_MAIN};color:#fff;
26
- border-radius:16px;padding:6px 16px;font-size:.97rem;margin-top:4px;
27
- }}
28
- </style>
29
- """
30
- st.markdown(custom_css, unsafe_allow_html=True)
31
- st.markdown('<div class="gooya-title">Gooya ASR v1.4</div>', unsafe_allow_html=True)
32
-
33
- # ---------- Upload Audio ----------
34
- col_input, col_output = st.columns([1, 1])
35
-
36
- with col_input:
37
- audio_file = st.file_uploader(
38
- "Audio Input (upload, mp3/wav, up to 30s)",
39
- type=["mp3", "wav", "m4a", "ogg"]
40
- )
41
- # Microphone input (optional): Streamlit 1.26+ has st.audio_recorder
42
- # audio_file = st.audio_recorder("Record audio (up to 30s)") # EXPERIMENTAL
43
-
44
- with col_output:
45
- transcription = st.text_area("📝 Transcription", "", height=120, key="trans_tb")
46
- processing_time = st.text_input("⏱️ Processing Time", "", key="ptime_tb")
47
-
48
- btn_col1, btn_col2 = st.columns([1,1])
49
-
50
- with btn_col1:
51
- transcribe_btn = st.button("Transcribe", use_container_width=True, type="primary")
52
- with btn_col2:
53
- clear_btn = st.button("Clear", use_container_width=True, type="secondary")
54
 
55
- st.markdown("""
56
- **Guidelines**
57
- - Maximum audio length: **30 seconds**
58
- - Audio content should be in Persian.
59
- - Both transcription and processing time are displayed immediately.
60
-
61
- See the [Persian ASR Leaderboard](https://huggingface.co/spaces/navidved/open_persian_asr_leaderboard) for benchmarks.
62
- """)
63
-
64
- # ---------- Transcribe Function ----------
65
- def transcribe_audio_streamlit(file_obj):
66
  if not ASR_API_URL or not AUTH_TOKEN:
67
- return "❌ Error: ASR_API_URL or AUTH_TOKEN is not set.", ""
 
68
  headers = {
69
  "accept": "application/json",
70
  "Authorization": f"Bearer {AUTH_TOKEN}",
71
  }
 
72
  start = time.time()
73
  try:
74
- files = {"file": (file_obj.name, file_obj, "audio/mpeg")}
75
- resp = requests.post(ASR_API_URL, headers=headers, files=files, timeout=120)
 
 
 
 
 
76
  except Exception as e:
77
- return f"❌ Error while calling ASR API: {e}", ""
 
 
78
  elapsed = time.time() - start
79
  if resp.status_code == 200:
80
- data = resp.json()
81
- text = data.get("transcription", "No transcription returned.")
82
- return text, f"{data.get('time', elapsed):.2f} s"
83
- return f"❌ Error: {resp.status_code}, {resp.text}", ""
84
-
85
- # ---------- Logic ----------
86
- if transcribe_btn and audio_file:
87
- with st.spinner("Transcribing ..."):
88
- text, ptime = transcribe_audio_streamlit(audio_file)
89
- st.session_state["trans_tb"] = text
90
- st.session_state["ptime_tb"] = ptime
91
- elif transcribe_btn and not audio_file:
92
- st.warning("لطفاً فایل صوتی را انتخاب کنید.")
93
-
94
- if clear_btn:
95
- st.session_state["trans_tb"] = ""
96
- st.session_state["ptime_tb"] = ""
97
-
98
- # ---------- Audio Playback ----------
99
- if audio_file:
100
- st.audio(audio_file, format="audio/mp3")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, time, requests, gradio as gr
 
 
 
2
 
3
+ print("Gradio version:", gr.__version__)
4
 
5
+ # ---------- Environment Variables ----------
6
+ ASR_API_URL = os.getenv("ASR_API_URL")
7
+ AUTH_TOKEN = os.getenv("AUTH_TOKEN")
8
+ if not ASR_API_URL or not AUTH_TOKEN:
9
+ print("⚠️ ASR_API_URL or AUTH_TOKEN is not set; API calls will fail.")
10
 
11
+ # ---------- Core Transcription Function ----------
12
+ def transcribe_audio(file_path: str | None): # Added None type hint for clarity on clearing
13
+ # Handle case where audio is cleared (input might be None)
14
+ if file_path is None:
15
+ return "Audio cleared.", "", None # Return default/empty values
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
 
 
 
 
 
 
 
 
 
 
 
17
  if not ASR_API_URL or not AUTH_TOKEN:
18
+ return "❌ Error: ASR_API_URL or AUTH_TOKEN is not set.", "", file_path # Keep file path on error
19
+
20
  headers = {
21
  "accept": "application/json",
22
  "Authorization": f"Bearer {AUTH_TOKEN}",
23
  }
24
+
25
  start = time.time()
26
  try:
27
+ with open(file_path, "rb") as f:
28
+ # Ensure the filename is correctly extracted, especially for temp files
29
+ file_name = os.path.basename(file_path)
30
+ files = {"file": (file_name, f, "audio/mpeg")} # Use common mpeg, adjust if specific format needed
31
+ resp = requests.post(ASR_API_URL, headers=headers, files=files, timeout=120) # Increased timeout just in case
32
+ except requests.exceptions.Timeout:
33
+ return f"❌ Error: Request timed out after 120 seconds.", "", file_path
34
  except Exception as e:
35
+ # Provide more specific error context if possible
36
+ return f"❌ Error during API call or file handling: {e}", "", file_path
37
+
38
  elapsed = time.time() - start
39
  if resp.status_code == 200:
40
+ try:
41
+ data = resp.json()
42
+ text = data.get("transcription", "No transcription returned.")
43
+ # Use the 'time' field from response if available, otherwise use measured elapsed time
44
+ processing_time = data.get('time', elapsed)
45
+ return text, f"{processing_time:.2f} s", file_path # Return filepath to keep it in the input widget if needed
46
+ except requests.exceptions.JSONDecodeError:
47
+ return f"❌ Error: Could not decode JSON response. Status: {resp.status_code}, Response: {resp.text}", "", file_path
48
+ else:
49
+ # Return error details from the response
50
+ return f"❌ Error: API returned status {resp.status_code}. Response: {resp.text}", "", file_path
51
+
52
+
53
+ # ---------- Styling ----------
54
+ VIOLET_MAIN = "#7F3FBF"
55
+ VIOLET_LIGHT = "#C3A6FF"
56
+
57
+ custom_css = f"""
58
+ #gooya-title {{
59
+ color:#fff;
60
+ background:linear-gradient(90deg,{VIOLET_MAIN} 0%,{VIOLET_LIGHT} 100%);
61
+ border-radius:12px;padding:20px 10px;margin-bottom:12px;
62
+ }}
63
+ .gooya-badge {{
64
+ display:inline-block;background:{VIOLET_MAIN};color:#fff;
65
+ border-radius:16px;padding:6px 16px;font-size:.97rem;margin-top:4px;
66
+ }}
67
+ """
68
+
69
+ # ---------- UI ----------
70
+ with gr.Blocks(css=custom_css, title="Gooya ASR v1.4") as demo:
71
+ # Optional: Add a title using Markdown or HTML
72
+ gr.Markdown("# Gooya ASR v1.4 Transcription", elem_id="gooya-title")
73
+ with gr.Row():
74
+ with gr.Column():
75
+ audio_input = gr.Audio(
76
+ label="Audio Input (upload or record, up to 30 s)",
77
+ type="filepath",
78
+ sources=["upload", "microphone"],
79
+ )
80
+ with gr.Column():
81
+ processing_time_tb = gr.Textbox(
82
+ label="⏱️ Processing Time",
83
+ interactive=False,
84
+ elem_classes="gooya-badge", # Use elem_classes for multiple classes
85
+ )
86
+ transcription_tb = gr.Textbox(
87
+ label="📝 Transcription",
88
+ lines=5,
89
+ show_copy_button=True,
90
+ placeholder="The transcription will appear here...",
91
+ elem_id="gooya-textbox", # elem_id should be unique if used
92
+ )
93
+
94
+ with gr.Row():
95
+ btn_transcribe = gr.Button("Transcribe", variant="primary")
96
+ btn_clear = gr.Button("Clear", variant="secondary")
97
+
98
+ gr.Markdown(
99
+ """
100
+ **Guidelines**
101
+ - Maximum audio length: **30 seconds**
102
+ - Audio content should be in Persian.
103
+ - Both transcription and processing time are displayed upon completion.
104
+ - See the [Persian ASR Leaderboard](https://huggingface.co/spaces/navidved/open_persian_asr_leaderboard) for benchmarks.
105
+ """
106
+ )
107
+
108
+ # ---------- Callbacks ----------
109
+ # Update outputs to potentially include audio_input if you want to keep it on error
110
+ btn_transcribe.click(
111
+ fn=transcribe_audio,
112
+ inputs=[audio_input],
113
+ outputs=[transcription_tb, processing_time_tb, audio_input], # Keep audio input displayed
114
+ )
115
+
116
+ # Clear function
117
+ def clear_all():
118
+ return "", "", None # Clears transcription, time, and audio input
119
+
120
+ btn_clear.click(
121
+ fn=clear_all, # Use a named function for clarity
122
+ inputs=None,
123
+ outputs=[transcription_tb, processing_time_tb, audio_input],
124
+ )
125
+
126
+ # ---------- Launch ----------
127
+ if __name__ == "__main__":
128
+ # Set share=True to generate a public link when localhost is not accessible
129
+ # This is necessary in environments like Docker containers or cloud platforms (e.g., HF Spaces)
130
+ demo.queue().launch(debug=True, share=True) # <-- Changed share=False to share=True