File size: 2,130 Bytes
6c1f7df
 
a025dac
6c1f7df
 
a025dac
6c1f7df
 
 
 
 
 
a025dac
e592dcc
 
 
a025dac
e592dcc
 
 
 
 
a025dac
6c1f7df
 
 
 
 
a025dac
 
 
 
 
 
6c1f7df
 
 
a025dac
 
6c1f7df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import gradio as gr
import requests
import os

# List of supported languages
LANGUAGES = ["malayalam", "tamil", "telugu", "hindi", "kannada"]

# Function to extract language name
def get_lang_name(lang_string):
    return lang_string.split("(")[0].strip().lower()

def transcribe_api(audio_file, language):
    # Get the base URL from environment variable
    base_url = os.getenv("DWANI_AI_API_BASE_URL")

    if not base_url:
        return {"error": "DWANI_AI_API_BASE_URL environment variable is not set"}

    # Define the endpoint path
    endpoint = "v1/transcribe/?language"

    # Construct the full API URL
    url = f"{base_url.rstrip('/')}/{endpoint}={get_lang_name(language)}"
    
    headers = {
        "accept": "application/json",
    }
    
    try:
        # Open the file in binary mode
        with open(audio_file, 'rb') as f:
            files = {
                "file": (os.path.basename(audio_file), f, "audio/x-wav")
            }
            response = requests.post(url, headers=headers, files=files)
            response.raise_for_status()
            return response.json()
    except requests.exceptions.RequestException as e:
        return {"error": str(e)}

# Create Gradio interface
with gr.Blocks(title="Speech to Text API Interface") as demo:
    gr.Markdown("# Speech to Text API Interface")
    
    with gr.Row():
        with gr.Column():
            # Input components
            audio_input = gr.Audio(
                label="Audio File",
                type="filepath",
                sources=["upload"]
            )
            language_input = gr.Dropdown(
                label="Language",
                choices=LANGUAGES,
                value="kannada"
            )
            
            submit_btn = gr.Button("Transcribe")
        
        with gr.Column():
            # Output component
            output = gr.JSON(label="Transcription Response")
    
    # Connect the button click to the API function
    submit_btn.click(
        fn=transcribe_api,
        inputs=[audio_input, language_input],
        outputs=output
    )

# Launch the interface
demo.launch()