seeafrica commited on
Commit
8598440
·
verified ·
1 Parent(s): d389419

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -6
app.py CHANGED
@@ -1,29 +1,53 @@
1
  import os
2
  import gradio as gr
3
  from gtts import gTTS
4
- from transformers import pipeline
5
  from openai import OpenAI
6
 
7
  client = OpenAI()
8
 
 
 
 
 
9
  pipe = pipeline(
10
  "automatic-speech-recognition",
11
- model="seeafricatz/kiaziboraasr",
 
 
12
  chunk_length_s=30,
13
- return_timestamps=False
 
14
  )
15
 
16
  def transcribe(audio):
17
  try:
18
  if audio is None:
19
  return "No audio input received"
 
20
  # Get the audio file path from the tuple if it exists
21
  audio_path = audio if isinstance(audio, str) else audio[0]
22
  if not os.path.exists(audio_path):
23
  return "Audio file not found"
24
 
25
- result = pipe(audio_path)
26
- return result["text"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  except Exception as e:
28
  print(f"Transcription error: {str(e)}")
29
  return "Error in transcription. Please try again."
@@ -92,7 +116,7 @@ def process_audio_and_respond(audio):
92
  demo = gr.Interface(
93
  fn=process_audio_and_respond,
94
  inputs=gr.Audio(
95
- sources="microphone",
96
  type="filepath",
97
  label="Bonyeza kitufe cha kurekodi na uliza swali lako"
98
  ),
 
1
  import os
2
  import gradio as gr
3
  from gtts import gTTS
4
+ from transformers import pipeline, AutoProcessor, WhisperForConditionalGeneration
5
  from openai import OpenAI
6
 
7
  client = OpenAI()
8
 
9
+ # Initialize the processor and model separately for better control
10
+ processor = AutoProcessor.from_pretrained("seeafricatz/kiaziboraasr")
11
+ model = WhisperForConditionalGeneration.from_pretrained("seeafricatz/kiaziboraasr")
12
+
13
  pipe = pipeline(
14
  "automatic-speech-recognition",
15
+ model=model,
16
+ tokenizer=processor.tokenizer,
17
+ feature_extractor=processor.feature_extractor,
18
  chunk_length_s=30,
19
+ return_timestamps=False,
20
+ generate_kwargs={"language": "<|swahili|>", "task": "transcribe"}
21
  )
22
 
23
  def transcribe(audio):
24
  try:
25
  if audio is None:
26
  return "No audio input received"
27
+
28
  # Get the audio file path from the tuple if it exists
29
  audio_path = audio if isinstance(audio, str) else audio[0]
30
  if not os.path.exists(audio_path):
31
  return "Audio file not found"
32
 
33
+ result = pipe(
34
+ audio_path,
35
+ return_timestamps=False,
36
+ generate_kwargs={
37
+ "language": "<|swahili|>",
38
+ "task": "transcribe",
39
+ "num_beams": 5,
40
+ "temperature": 0
41
+ }
42
+ )
43
+
44
+ if isinstance(result, dict) and "text" in result:
45
+ return result["text"]
46
+ elif isinstance(result, str):
47
+ return result
48
+ else:
49
+ return "Error in transcription format"
50
+
51
  except Exception as e:
52
  print(f"Transcription error: {str(e)}")
53
  return "Error in transcription. Please try again."
 
116
  demo = gr.Interface(
117
  fn=process_audio_and_respond,
118
  inputs=gr.Audio(
119
+ sources=["microphone"],
120
  type="filepath",
121
  label="Bonyeza kitufe cha kurekodi na uliza swali lako"
122
  ),