navidved commited on
Commit
5b1e694
·
verified ·
1 Parent(s): b27d0c7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +117 -28
app.py CHANGED
@@ -1,38 +1,127 @@
1
- import gradio as gr
2
- import os, time, requests
 
 
 
 
 
 
 
3
 
4
  # ---------- Environment Variables ----------
5
  ASR_API_URL = os.getenv("ASR_API_URL")
6
  AUTH_TOKEN = os.getenv("AUTH_TOKEN")
7
-
8
  if not ASR_API_URL or not AUTH_TOKEN:
9
  print("⚠️ ASR_API_URL or AUTH_TOKEN is not set; API calls will fail.")
10
 
11
- def transcribe_audio(audio_file):
 
 
 
 
12
  headers = {
 
13
  "Authorization": f"Bearer {AUTH_TOKEN}",
14
  }
15
- files = {'file': open(audio_file, 'rb')}
16
-
17
- response = requests.post(ASR_API_URL, headers=headers, files=files)
18
- if response.status_code == 200:
19
- return response.json().get('transcription', '')
20
- else:
21
- return f"Error: {response.text}"
22
-
23
- with gr.Blocks() as interface:
24
- gr.Markdown("# Whisper Large V3 Speech Recognition")
25
- gr.Markdown("Upload an audio file or use your microphone to transcribe speech to text.")
26
-
27
- # Create the input and output components
28
- audio_input = gr.Audio(type="filepath", label="Input Audio")
29
- output_text = gr.Textbox(label="Transcription")
30
-
31
- # Add a button to trigger the transcription
32
- transcribe_button = gr.Button("Transcribe")
33
-
34
- # Bind the transcribe_audio function to the button click
35
- transcribe_button.click(fn=transcribe_audio, inputs=audio_input, outputs=output_text)
36
-
37
- # Launch the Gradio app
38
- interface.queue().launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, time, requests, gradio as gr
2
+
3
+ # ---------- Hot-patch to bypass Gradio 4.44.0 JSON-schema bug ----------
4
+ import gradio.blocks as _blocks
5
+ if not hasattr(_blocks.Blocks, "_api_info_patched"):
6
+ _blocks.Blocks._api_info_patched = True
7
+ _blocks.Blocks.get_api_info = lambda self: {}
8
+
9
+ print("Gradio version:", gr.__version__) # should be 4.44.0
10
 
11
  # ---------- Environment Variables ----------
12
  ASR_API_URL = os.getenv("ASR_API_URL")
13
  AUTH_TOKEN = os.getenv("AUTH_TOKEN")
 
14
  if not ASR_API_URL or not AUTH_TOKEN:
15
  print("⚠️ ASR_API_URL or AUTH_TOKEN is not set; API calls will fail.")
16
 
17
+ # ---------- Core Transcription Function ----------
18
+ def transcribe_audio(file_path: str):
19
+ if not ASR_API_URL or not AUTH_TOKEN:
20
+ return "❌ Error: ASR_API_URL or AUTH_TOKEN is not set.", ""
21
+
22
  headers = {
23
+ "accept": "application/json",
24
  "Authorization": f"Bearer {AUTH_TOKEN}",
25
  }
26
+
27
+ start = time.time()
28
+ try:
29
+ with open(file_path, "rb") as f:
30
+ files = {"file": (os.path.basename(file_path), f, "audio/mpeg")}
31
+ resp = requests.post(ASR_API_URL, headers=headers, files=files, timeout=120)
32
+ except Exception as e:
33
+ return f"❌ Error while calling ASR API: {e}", ""
34
+
35
+ elapsed = time.time() - start
36
+ if resp.status_code == 200:
37
+ data = resp.json()
38
+ text = data.get("transcription", "No transcription returned.")
39
+ return text, f"{data.get('time', elapsed):.2f} s"
40
+ return f"❌ Error: {resp.status_code}, {resp.text}", ""
41
+
42
+ # ---------- Styling ----------
43
+ VIOLET_MAIN = "#7F3FBF" # primary violet
44
+ VIOLET_LIGHT = "#C3A6FF" # lighter violet for gradient/badge
45
+
46
+ custom_css = f"""
47
+ #gooya-title {{
48
+ color:#fff;
49
+ background:linear-gradient(90deg,{VIOLET_MAIN} 0%,{VIOLET_LIGHT} 100%);
50
+ border-radius:12px;padding:20px 10px;margin-bottom:12px;
51
+ }}
52
+ .gooya-badge {{
53
+ display:inline-block;background:{VIOLET_MAIN};color:#fff;
54
+ border-radius:16px;padding:6px 16px;font-size:.97rem;margin-top:4px;
55
+ }}
56
+ """
57
+
58
+ # ---------- UI ----------
59
+ with gr.Blocks(css=custom_css, title="Gooya ASR v1.4") as demo:
60
+ gr.HTML(
61
+ f"""
62
+ <div id="gooya-title">
63
+ <h1 style='margin-bottom:10px;font-weight:800;font-size:2rem;'>
64
+ Gooya ASR <span style="font-size:1.1rem;font-weight:400;opacity:.8;">v1.4</span>
65
+ </h1>
66
+ <p style='font-size:1.12rem;margin-bottom:2px;'>
67
+ High-performance Persian Speech-to-Text
68
+ </p>
69
+ <p style='font-size:.98rem;color:#e9dbff'>
70
+ Upload or record a Persian audio file (max&nbsp;30&nbsp;s) and instantly get the transcription.
71
+ </p>
72
+ </div>
73
+ """
74
+ )
75
+
76
+ with gr.Row():
77
+ with gr.Column():
78
+ audio_input = gr.Audio(
79
+ label="Audio Input (upload or record, up to 30 s)",
80
+ type="filepath",
81
+ sources=["upload", "microphone"],
82
+ )
83
+ with gr.Column():
84
+ processing_time_tb = gr.Textbox(
85
+ label="⏱️ Processing Time",
86
+ interactive=False,
87
+ elem_classes="gooya-badge",
88
+ )
89
+ transcription_tb = gr.Textbox(
90
+ label="📝 Transcription",
91
+ lines=5,
92
+ show_copy_button=True,
93
+ placeholder="The transcription will appear here...",
94
+ elem_id="gooya-textbox",
95
+ )
96
+
97
+ with gr.Row():
98
+ btn_transcribe = gr.Button("Transcribe", variant="primary")
99
+ btn_clear = gr.Button("Clear", variant="secondary")
100
+
101
+ gr.Markdown(
102
+ """
103
+ **Guidelines**
104
+ - Maximum audio length: **30 seconds**
105
+ - Audio content should be in Persian.
106
+ - Both transcription and processing time are displayed immediately.
107
+
108
+ See the [Persian ASR Leaderboard](https://huggingface.co/spaces/navidved/open_persian_asr_leaderboard) for benchmarks.
109
+ """
110
+ )
111
+
112
+ # ---------- Callbacks ----------
113
+ btn_transcribe.click(
114
+ transcribe_audio,
115
+ inputs=audio_input,
116
+ outputs=[transcription_tb, processing_time_tb],
117
+ )
118
+
119
+ btn_clear.click(
120
+ lambda: ("", "", None),
121
+ inputs=None,
122
+ outputs=[transcription_tb, processing_time_tb, audio_input],
123
+ )
124
+
125
+ # ---------- Launch ----------
126
+ if __name__ == "__main__":
127
+ demo.queue().launch()