Pijush2023 commited on
Commit
aa156da
·
verified ·
1 Parent(s): e8f10e6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -0
app.py CHANGED
@@ -46,6 +46,53 @@
46
 
47
  # demo.launch(show_error=True)
48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  import gradio as gr
50
  import numpy as np
51
  import torch
 
46
 
47
  # demo.launch(show_error=True)
48
 
49
+ # import gradio as gr
50
+ # import numpy as np
51
+ # import torch
52
+ # from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
53
+
54
+ # model_id = 'openai/whisper-large-v3'
55
+ # device = "cuda:0" if torch.cuda.is_available() else "cpu"
56
+ # torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
57
+ # model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id, torch_dtype=torch_dtype).to(device)
58
+ # processor = AutoProcessor.from_pretrained(model_id)
59
+
60
+ # pipe_asr = pipeline("automatic-speech-recognition", model=model, tokenizer=processor.tokenizer, feature_extractor=processor.feature_extractor, max_new_tokens=128, chunk_length_s=15, batch_size=16, torch_dtype=torch_dtype, device=device, return_timestamps=False)
61
+
62
+ # def transcribe_function(new_chunk, state):
63
+ # try:
64
+ # sr, y = new_chunk
65
+ # except TypeError:
66
+ # print(f"Error chunk structure: {type(new_chunk)}, content: {new_chunk}")
67
+ # return state, "", None
68
+
69
+ # y = y.astype(np.float32) / np.max(np.abs(y))
70
+
71
+ # if state is not None:
72
+ # state = np.concatenate([state, y])
73
+ # else:
74
+ # state = y
75
+
76
+ # result = pipe_asr({"array": state, "sampling_rate": sr}, return_timestamps=False)
77
+
78
+ # full_text = result.get("text", "")
79
+
80
+ # return state, full_text
81
+
82
+ # with gr.Blocks() as demo:
83
+ # gr.Markdown("# Voice to Text Transcription")
84
+
85
+ # state = gr.State(None)
86
+
87
+ # with gr.Row():
88
+ # with gr.Column():
89
+ # audio_input = gr.Audio(sources=["microphone"], streaming=True, type='numpy', label="Microphone Input")
90
+ # with gr.Column():
91
+ # output_text = gr.Textbox(label="Transcription")
92
+
93
+ # audio_input.stream(transcribe_function, inputs=[audio_input, state], outputs=[state, output_text], api_name="SAMLOne_real_time")
94
+
95
+ # demo.launch(show_error=True)
96
  import gradio as gr
97
  import numpy as np
98
  import torch