maher13 commited on
Commit
b5f889a
·
1 Parent(s): e89c526

Upload app

Browse files
Files changed (1) hide show
  1. app.py +72 -0
app.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """2-Copy1.ipynb
3
+
4
+ Automatically generated by Colaboratory.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1UkxKUpI5tPpdFrJIUFWSlk4LlD75Qgf6
8
+ """
9
+
10
+ !pip install -q gradio
11
+ !pip install transformers
12
+
13
+ import gradio as gr
14
+ import librosa
15
+ import torch
16
+ import torchaudio
17
+ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
18
+ import numpy as np
19
+
20
+ processor = Wav2Vec2Processor.from_pretrained("maher13/arabic-iti")
21
+ model = Wav2Vec2ForCTC.from_pretrained("maher13/arabic-iti").eval()
22
+
23
+ def asr_transcript(audio_file, audio_file2):
24
+ transcript = ""
25
+
26
+ if audio_file :
27
+ wav, sr = librosa.load(audio_file.name, sr=16000)
28
+
29
+ input_values = processor(wav, sampling_rate=16000, return_tensors="pt", padding=True).input_values
30
+ logits = model(input_values).logits
31
+
32
+ with torch.no_grad():
33
+ predicted_ids = torch.argmax(logits, dim=-1)
34
+ predicted_ids[predicted_ids == -100] = processor.tokenizer.pad_token_id
35
+ transcription1 = processor.tokenizer.batch_decode(predicted_ids)[0]
36
+ else:
37
+ transcription1 = "N/A"
38
+
39
+ if audio_file2 :
40
+ wav, sr = librosa.load(audio_file2.name, sr=16000)
41
+
42
+ input_values = processor(wav, sampling_rate=16000, return_tensors="pt", padding=True).input_values
43
+ logits = model(input_values).logits
44
+
45
+ with torch.no_grad():
46
+ predicted_ids = torch.argmax(logits, dim=-1)
47
+ predicted_ids[predicted_ids == -100] = processor.tokenizer.pad_token_id
48
+ transcription2 = processor.tokenizer.batch_decode(predicted_ids)[0]
49
+ else :
50
+ transcription2 = "N/A"
51
+
52
+ return transcription1, transcription2
53
+
54
+ gradio_ui = gr.Interface(
55
+ fn=asr_transcript,
56
+ title="Speech to Text Graduation project \n sponsored by TensorGraph",
57
+ inputs=
58
+ [
59
+ gr.inputs.Audio(source = 'microphone', type="file", optional = True),
60
+ gr.inputs.Audio(source = 'upload', type="file", optional = True)
61
+ ],
62
+ outputs=[
63
+ gr.outputs.Textbox(label="Auto-Transcript"),
64
+ gr.outputs.Textbox(label="Auto-Transcript")
65
+ ],
66
+ )
67
+
68
+
69
+
70
+ #gradio_ui.launch(share=True)
71
+ gradio_ui.launch(share=True)
72
+