Update app.py
Browse files
app.py
CHANGED
@@ -1,29 +1,53 @@
|
|
1 |
import os
|
2 |
import gradio as gr
|
3 |
from gtts import gTTS
|
4 |
-
from transformers import pipeline
|
5 |
from openai import OpenAI
|
6 |
|
7 |
client = OpenAI()
|
8 |
|
|
|
|
|
|
|
|
|
9 |
pipe = pipeline(
|
10 |
"automatic-speech-recognition",
|
11 |
-
model=
|
|
|
|
|
12 |
chunk_length_s=30,
|
13 |
-
return_timestamps=False
|
|
|
14 |
)
|
15 |
|
16 |
def transcribe(audio):
|
17 |
try:
|
18 |
if audio is None:
|
19 |
return "No audio input received"
|
|
|
20 |
# Get the audio file path from the tuple if it exists
|
21 |
audio_path = audio if isinstance(audio, str) else audio[0]
|
22 |
if not os.path.exists(audio_path):
|
23 |
return "Audio file not found"
|
24 |
|
25 |
-
result = pipe(
|
26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
except Exception as e:
|
28 |
print(f"Transcription error: {str(e)}")
|
29 |
return "Error in transcription. Please try again."
|
@@ -92,7 +116,7 @@ def process_audio_and_respond(audio):
|
|
92 |
demo = gr.Interface(
|
93 |
fn=process_audio_and_respond,
|
94 |
inputs=gr.Audio(
|
95 |
-
sources="microphone",
|
96 |
type="filepath",
|
97 |
label="Bonyeza kitufe cha kurekodi na uliza swali lako"
|
98 |
),
|
|
|
1 |
import os
|
2 |
import gradio as gr
|
3 |
from gtts import gTTS
|
4 |
+
from transformers import pipeline, AutoProcessor, WhisperForConditionalGeneration
|
5 |
from openai import OpenAI
|
6 |
|
7 |
client = OpenAI()
|
8 |
|
9 |
+
# Initialize the processor and model separately for better control
|
10 |
+
processor = AutoProcessor.from_pretrained("seeafricatz/kiaziboraasr")
|
11 |
+
model = WhisperForConditionalGeneration.from_pretrained("seeafricatz/kiaziboraasr")
|
12 |
+
|
13 |
pipe = pipeline(
|
14 |
"automatic-speech-recognition",
|
15 |
+
model=model,
|
16 |
+
tokenizer=processor.tokenizer,
|
17 |
+
feature_extractor=processor.feature_extractor,
|
18 |
chunk_length_s=30,
|
19 |
+
return_timestamps=False,
|
20 |
+
generate_kwargs={"language": "<|swahili|>", "task": "transcribe"}
|
21 |
)
|
22 |
|
23 |
def transcribe(audio):
|
24 |
try:
|
25 |
if audio is None:
|
26 |
return "No audio input received"
|
27 |
+
|
28 |
# Get the audio file path from the tuple if it exists
|
29 |
audio_path = audio if isinstance(audio, str) else audio[0]
|
30 |
if not os.path.exists(audio_path):
|
31 |
return "Audio file not found"
|
32 |
|
33 |
+
result = pipe(
|
34 |
+
audio_path,
|
35 |
+
return_timestamps=False,
|
36 |
+
generate_kwargs={
|
37 |
+
"language": "<|swahili|>",
|
38 |
+
"task": "transcribe",
|
39 |
+
"num_beams": 5,
|
40 |
+
"temperature": 0
|
41 |
+
}
|
42 |
+
)
|
43 |
+
|
44 |
+
if isinstance(result, dict) and "text" in result:
|
45 |
+
return result["text"]
|
46 |
+
elif isinstance(result, str):
|
47 |
+
return result
|
48 |
+
else:
|
49 |
+
return "Error in transcription format"
|
50 |
+
|
51 |
except Exception as e:
|
52 |
print(f"Transcription error: {str(e)}")
|
53 |
return "Error in transcription. Please try again."
|
|
|
116 |
demo = gr.Interface(
|
117 |
fn=process_audio_and_respond,
|
118 |
inputs=gr.Audio(
|
119 |
+
sources=["microphone"],
|
120 |
type="filepath",
|
121 |
label="Bonyeza kitufe cha kurekodi na uliza swali lako"
|
122 |
),
|