File size: 3,347 Bytes
f39366f
d726228
 
 
f118118
d726228
 
bda9ee3
d726228
5b1e694
55c61cc
1967b9f
d726228
 
1967b9f
d726228
 
 
 
5b1e694
d726228
5b1e694
d726228
 
1967b9f
f39366f
d726228
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55c61cc
f118118
 
1967b9f
 
 
 
d726228
1967b9f
f118118
d726228
1967b9f
 
 
 
 
d726228
1967b9f
f118118
 
d726228
 
f118118
d726228
 
 
 
 
99106b6
d726228
 
99106b6
f39366f
 
 
d726228
f118118
f39366f
d726228
f39366f
d726228
f118118
 
d726228
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import gradio as gr
import requests
import os
import time

ASR_API_URL = os.getenv('ASR_API_URL')
AUTH_TOKEN = os.getenv('AUTH_TOKEN')

def transcribe_audio(file_path):
    if not ASR_API_URL or not AUTH_TOKEN:
        return "❌ Error: ASR_API_URL or AUTH_TOKEN is not set.", ""
    headers = {
        'accept': 'application/json',
        'Authorization': f'Bearer {AUTH_TOKEN}',
    }
    files = {
        'file': (file_path, open(file_path, 'rb'), 'audio/mpeg'),
    }
    start_time = time.time()
    try:
        response = requests.post(ASR_API_URL, headers=headers, files=files)
    except Exception as e:
        return f"❌ Error: {str(e)}", ""
    inference_time = time.time() - start_time

    if response.status_code == 200:
        res = response.json()
        transcription = res.get("transcription", "No transcription returned.")
        inference_time_str = f"{res.get('time', inference_time):.2f} seconds"
        return transcription, inference_time_str
    else:
        return f"❌ Error: {response.status_code}, {response.text}", ""

with gr.Blocks(css="""
#gooya-title {color:white; background: linear-gradient(90deg, #224CA5 0%, #2CD8D5 100%); border-radius: 12px; padding:20px 10px;margin-bottom:12px;}
.gooya-badge {display:inline-block; background:#224CA5; color:#fff; border-radius:16px; padding:6px 16px; font-size:0.97rem; margin-top:4px;}
#gooya-box {background:#F7FAFF; border:1px solid #e7e9ef; border-radius:14px; padding:22px 18px; margin-top:12px;}
""") as demo:
    gr.HTML("""<div id="gooya-title">
    <h1 style='margin-bottom:10px;font-weight:800;font-size:2rem;'>Gooya ASR <span style="font-size:1.1rem; font-weight:400; opacity:0.8;">v1.4</span></h1>
    <p style='font-size:1.12rem; margin-bottom:2px;'>High-performance Persian Speech-to-Text</p>
    <p style='font-size:0.98rem; color:#c6e8fa'>Upload or record a Persian audio file (max 30s) and instantly receive the transcription.</p>
    </div>""")

    with gr.Row():
        with gr.Column():
            audio = gr.Audio(
                label="Audio Input (Upload or record, up to 30s)",
                type="filepath",
                show_label=True,
                sources=["upload", "microphone"]
            )
        with gr.Column():
            inference_time = gr.Label(label="⏱️ Processing Time", elem_classes="gooya-badge")
            transcription = gr.Textbox(
                label="πŸ“ Transcription",
                lines=5,
                show_copy_button=True,
                placeholder="The transcription will appear here...",
                elem_id="gooya-textbox"
            )

    with gr.Row():
        submit_btn = gr.Button("Transcribe", variant="primary")
        clear_btn = gr.Button("Clear", variant="secondary")

    gr.Markdown("""
**Instructions:**  
- Maximum audio length: **30 seconds**
- Input audio should be in Persian.
- The transcription and processing time will be displayed instantly.

For performance benchmarks, visit: [Persian ASR Leaderboard](https://huggingface.co/spaces/navidved/open_persian_asr_leaderboard)
""")

    submit_btn.click(
        transcribe_audio,
        inputs=audio,
        outputs=[transcription, inference_time]
    )
    clear_btn.click(
        lambda: ("", ""),
        None,
        [transcription, inference_time, audio]
    )

demo.launch(share=True)