techysanoj commited on
Commit
97549eb
·
verified ·
1 Parent(s): 500ff86

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -28
app.py CHANGED
@@ -2,54 +2,59 @@ import gradio as gr
2
  from transformers import pipeline
3
  from gtts import gTTS
4
  import tempfile
 
5
 
6
  # Initialize the speech-to-text transcriber
7
- from transformers import pipeline
8
  transcriber = pipeline("automatic-speech-recognition", model="jonatasgrosman/wav2vec2-large-xlsr-53-english")
9
 
10
- # Initialize the pre-trained question-answering model
11
- model_name = "AVISHKAARAM/avishkaarak-ekta-hindi"
12
- qa_model = pipeline("question-answering", model=model_name)
13
 
14
  def answer_question(context, question=None, audio=None):
15
- # Handle audio input
16
- if audio is not None:
17
- # Convert audio to text using transcriber
18
- transcription_result = transcriber(audio)["text"]
19
- question_text = transcription_result
20
- else:
21
- question_text = question
 
 
 
 
 
 
 
 
 
22
 
23
- # Generate the answer using the QA model
24
- qa_result = qa_model(question=question_text, context=context)
25
- answer = qa_result["answer"]
26
 
27
- # Convert the answer to speech using gTTS
28
- tts = gTTS(text=answer, lang='en')
29
- audio_path = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False).name
30
- tts.save(audio_path)
31
 
32
- return answer, audio_path
33
 
34
  # Define the Gradio interface
35
- context_input = gr.Textbox(label="Context")
36
  question_input = gr.Textbox(label="Question")
37
- audio_input = gr.Audio(type="filepath", label="Question Audio")
38
 
39
  output_text = gr.Textbox(label="Answer")
40
- output_audio = gr.Audio(label="Answer Audio")
41
 
42
- inter = gr.Interface(
43
  fn=answer_question,
44
  inputs=[context_input, question_input, audio_input],
45
  outputs=[output_text, output_audio],
46
- title="Question Answering",
47
- description="Enter a context and a question to get an answer. You can also record audio with the question.",
48
  examples=[
49
  ["The capital of France is Paris.", "What is the capital of France?", None],
50
  ["OpenAI is famous for developing GPT-3.", "What is OpenAI known for?", None],
51
- ]
52
  )
53
 
54
- # Launch the Gradio interface
55
- inter.launch()
 
 
2
  from transformers import pipeline
3
  from gtts import gTTS
4
  import tempfile
5
+ import os
6
 
7
  # Initialize the speech-to-text transcriber
 
8
  transcriber = pipeline("automatic-speech-recognition", model="jonatasgrosman/wav2vec2-large-xlsr-53-english")
9
 
10
+ # Initialize the question-answering model
11
+ qa_model = pipeline("question-answering", model="AVISHKAARAM/avishkaarak-ekta-hindi")
12
+
13
 
14
  def answer_question(context, question=None, audio=None):
15
+ try:
16
+ # If audio is provided, transcribe it
17
+ if audio:
18
+ transcription_result = transcriber(audio)["text"]
19
+ question_text = transcription_result
20
+ else:
21
+ question_text = question
22
+
23
+ # Generate an answer to the question
24
+ qa_result = qa_model(question=question_text, context=context)
25
+ answer = qa_result["answer"]
26
+
27
+ # Convert the answer to speech
28
+ tts = gTTS(text=answer, lang="en")
29
+ audio_path = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False).name
30
+ tts.save(audio_path)
31
 
32
+ return answer, audio_path
 
 
33
 
34
+ except Exception as e:
35
+ return str(e), None
 
 
36
 
 
37
 
38
  # Define the Gradio interface
39
+ context_input = gr.Textbox(label="Context", lines=3)
40
  question_input = gr.Textbox(label="Question")
41
+ audio_input = gr.Audio(type="filepath", label="Question (Audio Input)")
42
 
43
  output_text = gr.Textbox(label="Answer")
44
+ output_audio = gr.Audio(label="Answer (Audio Output)")
45
 
46
+ interface = gr.Interface(
47
  fn=answer_question,
48
  inputs=[context_input, question_input, audio_input],
49
  outputs=[output_text, output_audio],
50
+ title="Multimodal Question Answering",
51
+ description="Provide a context and either a text question or an audio question to get an answer.",
52
  examples=[
53
  ["The capital of France is Paris.", "What is the capital of France?", None],
54
  ["OpenAI is famous for developing GPT-3.", "What is OpenAI known for?", None],
55
+ ],
56
  )
57
 
58
+ # Launch the Gradio app
59
+ if __name__ == "__main__":
60
+ interface.launch()