imansarraf commited on
Commit
68c7ea2
·
verified ·
1 Parent(s): a1f5663

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +75 -0
app.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from sad_tf import *
3
+
4
+ from transformers import AutoProcessor, AutoModelForCTC
5
+ processor = AutoProcessor.from_pretrained("Akashpb13/Central_kurdish_xlsr")
6
+ model = AutoModelForCTC.from_pretrained("Akashpb13/Central_kurdish_xlsr")
7
+
8
+ import soundfile as sf
9
+
10
+ css = """
11
+ textarea { direction: rtl; text-align: right; font-family: Calibri, sans-serif; font-size: 16px;}
12
+ """
13
+
14
+
15
+
16
+ seg = Segmenter(ffmpeg_path="ffmpeg",model_path="keras_speech_music_noise_cnn.hdf5" , device="cpu",vad_type="vad")
17
+
18
+
19
+
20
+ def process_segment(args):
21
+ segment, wav = args
22
+ start, stop = segment
23
+ # pp = converter((start, stop))
24
+ pp = wav[int(start*16000) : int(stop*16000)]
25
+ input_values =processor(pp, sampling_rate=16000 , return_tensors="pt").input_values
26
+ with torch.no_grad():
27
+ logits=model(input_values).logits
28
+ predicted_ids = torch.argmax(logits, dim=-1)
29
+ transcription = processor.batch_decode(predicted_ids)[0]
30
+
31
+ return start, stop, transcription
32
+
33
+ def pcm_to_flac(pcm_data, sample_rate=16000):
34
+ buffer = io.BytesIO()
35
+ sf.write(buffer, pcm_data, sample_rate, format='FLAC')
36
+ flac_data = buffer.getvalue()
37
+ return flac_data
38
+
39
+
40
+ def transcribe_audio(audio_file):
41
+
42
+ text=""
43
+ isig,wav = seg(audio_file)
44
+ isig = filter_output(isig , max_silence=0.5 ,ignore_small_speech_segments=0.1 , max_speech_len=15 ,split_speech_bigger_than=20)
45
+ isig = [(a,b) for x,a,b,_,_ in isig]
46
+ print(isig)
47
+ results=[]
48
+ for segment in isig:
49
+ results.append (process_segment((segment, wav)))
50
+ for start, stop, tr_beamsearch_lm in results:
51
+
52
+ try:
53
+
54
+ text += ' ' + tr_beamsearch_lm + '\r\n'
55
+ print(start)
56
+ print(stop)
57
+ print(text)
58
+ except:
59
+ pass
60
+
61
+ return text
62
+
63
+ # Define the Gradio interface
64
+ interface = gr.Interface(
65
+ fn=transcribe_audio,
66
+ inputs=gr.Audio(type="filepath"),
67
+
68
+ outputs=gr.Textbox(label="Transcription", elem_id="output-text",interactive=True),
69
+ title="Soorani Audio Transcription",
70
+ description="Upload an audio file or record audio to get the transcription.",
71
+ css=css
72
+ )
73
+
74
+ # Launch the Gradio app
75
+ interface.launch()