navidved commited on
Commit
545c1fc
·
verified ·
1 Parent(s): 837ea3c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -117
app.py CHANGED
@@ -1,127 +1,38 @@
1
- import os, time, requests, gradio as gr
2
-
3
- # ---------- Hot-patch to bypass Gradio 4.44.0 JSON-schema bug ----------
4
- import gradio.blocks as _blocks
5
- if not hasattr(_blocks.Blocks, "_api_info_patched"):
6
- _blocks.Blocks._api_info_patched = True
7
- _blocks.Blocks.get_api_info = lambda self: {}
8
-
9
- print("Gradio version:", gr.__version__) # should be 4.44.0
10
 
11
  # ---------- Environment Variables ----------
12
  ASR_API_URL = os.getenv("ASR_API_URL")
13
  AUTH_TOKEN = os.getenv("AUTH_TOKEN")
 
14
  if not ASR_API_URL or not AUTH_TOKEN:
15
  print("⚠️ ASR_API_URL or AUTH_TOKEN is not set; API calls will fail.")
16
 
17
- # ---------- Core Transcription Function ----------
18
- def transcribe_audio(file_path: str):
19
- if not ASR_API_URL or not AUTH_TOKEN:
20
- return "❌ Error: ASR_API_URL or AUTH_TOKEN is not set.", ""
21
-
22
  headers = {
23
- "accept": "application/json",
24
  "Authorization": f"Bearer {AUTH_TOKEN}",
25
  }
26
-
27
- start = time.time()
28
- try:
29
- with open(file_path, "rb") as f:
30
- files = {"file": (os.path.basename(file_path), f, "audio/mpeg")}
31
- resp = requests.post(ASR_API_URL, headers=headers, files=files, timeout=120)
32
- except Exception as e:
33
- return f"❌ Error while calling ASR API: {e}", ""
34
-
35
- elapsed = time.time() - start
36
- if resp.status_code == 200:
37
- data = resp.json()
38
- text = data.get("transcription", "No transcription returned.")
39
- return text, f"{data.get('time', elapsed):.2f} s"
40
- return f"❌ Error: {resp.status_code}, {resp.text}", ""
41
-
42
- # ---------- Styling ----------
43
- VIOLET_MAIN = "#7F3FBF" # primary violet
44
- VIOLET_LIGHT = "#C3A6FF" # lighter violet for gradient/badge
45
-
46
- custom_css = f"""
47
- #gooya-title {{
48
- color:#fff;
49
- background:linear-gradient(90deg,{VIOLET_MAIN} 0%,{VIOLET_LIGHT} 100%);
50
- border-radius:12px;padding:20px 10px;margin-bottom:12px;
51
- }}
52
- .gooya-badge {{
53
- display:inline-block;background:{VIOLET_MAIN};color:#fff;
54
- border-radius:16px;padding:6px 16px;font-size:.97rem;margin-top:4px;
55
- }}
56
- """
57
-
58
- # ---------- UI ----------
59
- with gr.Blocks(css=custom_css, title="Gooya ASR v1.4") as demo:
60
- gr.HTML(
61
- f"""
62
- <div id="gooya-title">
63
- <h1 style='margin-bottom:10px;font-weight:800;font-size:2rem;'>
64
- Gooya ASR <span style="font-size:1.1rem;font-weight:400;opacity:.8;">v1.4</span>
65
- </h1>
66
- <p style='font-size:1.12rem;margin-bottom:2px;'>
67
- High-performance Persian Speech-to-Text
68
- </p>
69
- <p style='font-size:.98rem;color:#e9dbff'>
70
- Upload or record a Persian audio file (max&nbsp;30&nbsp;s) and instantly get the transcription.
71
- </p>
72
- </div>
73
- """
74
- )
75
-
76
- with gr.Row():
77
- with gr.Column():
78
- audio_input = gr.Audio(
79
- label="Audio Input (upload or record, up to 30 s)",
80
- type="filepath",
81
- sources=["upload", "microphone"],
82
- )
83
- with gr.Column():
84
- processing_time_tb = gr.Textbox(
85
- label="⏱️ Processing Time",
86
- interactive=False,
87
- elem_classes="gooya-badge",
88
- )
89
- transcription_tb = gr.Textbox(
90
- label="📝 Transcription",
91
- lines=5,
92
- show_copy_button=True,
93
- placeholder="The transcription will appear here...",
94
- elem_id="gooya-textbox",
95
- )
96
-
97
- with gr.Row():
98
- btn_transcribe = gr.Button("Transcribe", variant="primary")
99
- btn_clear = gr.Button("Clear", variant="secondary")
100
-
101
- gr.Markdown(
102
- """
103
- **Guidelines**
104
- - Maximum audio length: **30 seconds**
105
- - Audio content should be in Persian.
106
- - Both transcription and processing time are displayed immediately.
107
-
108
- See the [Persian ASR Leaderboard](https://huggingface.co/spaces/navidved/open_persian_asr_leaderboard) for benchmarks.
109
- """
110
- )
111
-
112
- # ---------- Callbacks ----------
113
- btn_transcribe.click(
114
- transcribe_audio,
115
- inputs=audio_input,
116
- outputs=[transcription_tb, processing_time_tb],
117
- )
118
-
119
- btn_clear.click(
120
- lambda: ("", "", None),
121
- inputs=None,
122
- outputs=[transcription_tb, processing_time_tb, audio_input],
123
- )
124
-
125
- # ---------- Launch ----------
126
- if __name__ == "__main__":
127
- demo.queue().launch(show_api=True, share=True, debug=True)
 
1
+ import gradio as gr
2
+ import os, time, requests
 
 
 
 
 
 
 
3
 
4
  # ---------- Environment Variables ----------
5
  ASR_API_URL = os.getenv("ASR_API_URL")
6
  AUTH_TOKEN = os.getenv("AUTH_TOKEN")
7
+
8
  if not ASR_API_URL or not AUTH_TOKEN:
9
  print("⚠️ ASR_API_URL or AUTH_TOKEN is not set; API calls will fail.")
10
 
11
+ def transcribe_audio(audio_file):
 
 
 
 
12
  headers = {
 
13
  "Authorization": f"Bearer {AUTH_TOKEN}",
14
  }
15
+ files = {'file': open(audio_file, 'rb')}
16
+
17
+ response = requests.post(ASR_API_URL, headers=headers, files=files)
18
+ if response.status_code == 200:
19
+ return response.json().get('transcription', '')
20
+ else:
21
+ return f"Error: {response.text}"
22
+
23
+ with gr.Blocks() as interface:
24
+ gr.Markdown("# Whisper Large V3 Speech Recognition")
25
+ gr.Markdown("Upload an audio file or use your microphone to transcribe speech to text.")
26
+
27
+ # Create the input and output components
28
+ audio_input = gr.Audio(type="filepath", label="Input Audio")
29
+ output_text = gr.Textbox(label="Transcription")
30
+
31
+ # Add a button to trigger the transcription
32
+ transcribe_button = gr.Button("Transcribe")
33
+
34
+ # Bind the transcribe_audio function to the button click
35
+ transcribe_button.click(fn=transcribe_audio, inputs=audio_input, outputs=output_text)
36
+
37
+ # Launch the Gradio app
38
+ interface.launch(share=True)