File size: 4,654 Bytes
535c52b
 
 
 
 
 
55d906c
c7bfded
f461359
 
55d906c
 
 
535c52b
0354ffb
157ba12
55d906c
535c52b
3405f58
 
9a3f61d
55d906c
f461359
c7bfded
 
 
535c52b
 
 
 
55d906c
00e87f4
f461359
55d906c
 
 
00e87f4
55d906c
f461359
55d906c
 
 
c7bfded
 
 
 
 
 
 
 
 
 
 
 
8d7ef77
c7bfded
535c52b
 
78e5124
535c52b
 
 
 
55d906c
 
 
00e87f4
 
55d906c
c7bfded
 
1a00545
c7bfded
bd4b418
55d906c
 
 
 
 
535c52b
 
55d906c
 
 
00e87f4
 
55d906c
c7bfded
 
1a00545
c7bfded
bd4b418
55d906c
 
 
 
 
4aaf04f
535c52b
55d906c
 
 
00e87f4
55d906c
00e87f4
 
 
4aaf04f
535c52b
157ba12
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import requests
import gradio as gr
import tempfile
import os
from transformers import pipeline
from huggingface_hub import InferenceClient
import time
import psutil
# import torch
# import numpy as np

# Ensure CUDA is available and set device accordingly
# device = 0 if torch.cuda.is_available() else -1

model_id = "openai/whisper-small"
client = InferenceClient(model_id,token=os.getenv('HF_TOKEN'))
pipe = pipeline("automatic-speech-recognition", model=model_id) #, device=device)

print(f'The Server is Running !!!')

def transcribe(inputs, use_api):
    start = time.time()
    API_STATUS = ''
    
    memory_before = psutil.Process(os.getpid()).memory_info().rss

    if inputs is None:
        raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")

    try:
        if use_api:
            print(f'Using API for transcription...')
            API_STATUS = 'Using API it took: '
            # Use InferenceClient (API) if checkbox is checked
            res = client.automatic_speech_recognition(inputs).text
        else:
            print(f'Using local pipeline for transcription...')
            # Use local pipeline if checkbox is unchecked
            API_STATUS = 'Using local pipeline it took: '
            res = pipe(inputs, chunk_length_s=30)["text"]
        
        end = time.time() - start

        # Measure memory after running the transcription process
        memory_after = psutil.Process(os.getpid()).memory_info().rss
        
        # Calculate the difference to see how much memory was used by the code
        memory_used = memory_after - memory_before  # Memory used in bytes
        memory_used_gb = round(memory_used / (1024 ** 3), 2)  # Convert memory used to GB
        total_memory_gb = round(psutil.virtual_memory().total / (1024 ** 3), 2)  # Total RAM in GB

        # Calculate the percentage of RAM used by this process
        memory_used_percent = round((memory_used / psutil.virtual_memory().total) * 100, 2)
        
        return res, API_STATUS + str(round(end, 2)) + ' seconds', f"RAM Used by code: {memory_used_gb} GB ({memory_used_percent}%) Total RAM: {total_memory_gb} GB"
    
    
    except Exception as e:
        return fr'Error: {str(e)}', None, None

demo = gr.Blocks()

mf_transcribe = gr.Interface(
                fn=transcribe,
                inputs=[
                    gr.Audio(sources="microphone", type="filepath"),
                    # gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
                    gr.Checkbox(label="Use API", value=False)
                ],
                outputs=[gr.Textbox(label="Transcribed Text", type="text"),
                         gr.Textbox(label="Time taken", type="text"),
                         gr.Textbox(label="RAM Utilization", type="text") 
                        ],  # Placeholder for transcribed text and time taken
                title="Welcome to QuickTranscribe",
                description=(
                    "Transcribe long-form microphone or audio inputs with the click of a button!"
                ),
                allow_flagging="never",
            )

file_transcribe = gr.Interface(
                fn=transcribe,
                inputs=[
                    gr.Audio(sources="upload", type="filepath", label="Audio file"),
                    # gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
                    gr.Checkbox(label="Use API", value=False)  # Checkbox for API usage
                ],
                outputs=[ gr.Textbox(label="Transcribed Text", type="text"),
                         gr.Textbox(label="Time taken", type="text"),
                         gr.Textbox(label="RAM Utilization", type="text") 
                        ],  # Placeholder for transcribed text and time taken
                title="Welcome to QuickTranscribe",
                description=(
                    "Transcribe long-form microphone or audio inputs with the click of a button!"
                ),
                allow_flagging="never",
            )

with demo:
    with gr.Row():
    # with gr.Column():
        # Group the tabs for microphone and file-based transcriptions
        tab = gr.TabbedInterface([mf_transcribe, file_transcribe], ["Microphone", "Audio file"])

        # with gr.Column():
        #     use_api_checkbox = gr.Checkbox(label="Use API", value=False)  # Checkbox outside
        #     # time_taken = gr.Textbox(label="Time taken", type="text")  # Time taken outside the interfaces

if __name__ == "__main__":
    demo.queue().launch(server_name="0.0.0.0", server_port=5000)