imessien's picture
Upload app.py
d9ed04a
import gradio as gr
from transformers import GPT2LMHeadModel, GPT2Tokenizer, pipeline
# Initialize the GPT2 model and tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2LMHeadModel.from_pretrained("gpt2")
# Initialize the Whisper GPT model
translation_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-large-v2")
# Geriatric Depression Scale Quiz Questions
questions = [
"Are you basically satisfied with your life?",
"Have you dropped many of your activities and interests?",
"Do you feel that your life is empty?",
"Do you often get bored?",
"Are you in good spirits most of the time?",
"Are you afraid that something bad is going to happen to you?",
"Do you feel happy most of the time?",
"Do you often feel helpless?",
"Do you prefer to stay at home, rather than going out and doing things?",
"Do you feel that you have more problems with memory than most?",
"Do you think it is wonderful to be alive now?",
"Do you feel worthless the way you are now?",
"Do you feel full of energy?",
"Do you feel that your situation is hopeless?",
"Do you think that most people are better off than you are?"
]
def ask_questions(answers):
"""Calculate score based on answers."""
score = 0
for answer in answers:
if answer.lower() == 'yes':
score += 1
elif answer.lower() != 'no':
raise ValueError(f"Invalid answer: {answer}")
return score
def understand_answers(audio_answers):
"""Convert audio answers to text using the Whisper ASR model."""
asr_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-large-v2")
text_answers = []
for audio in audio_answers:
transcript = asr_pipeline(audio)
text_answers.append(transcript[0]['generated_text'])
return text_answers
def modified_summarize(answers):
"""Summarize answers using the GPT2 model."""
answers_str = " ".join(answers)
inputs = tokenizer.encode("summarize: " + answers_str, return_tensors='pt')
summary_ids = model.generate(inputs, max_length=150, num_beams=5, early_stopping=True)
return tokenizer.decode(summary_ids[0], skip_special_tokens=True)
def assistant(*audio_answers):
"""Convert audio answers to text, evaluate and provide a summary."""
text_answers = understand_answers(audio_answers)
summarized_text = modified_summarize(text_answers)
score = ask_questions(text_answers)
return summarized_text, f"Your score is: {score}/{len(questions)}", text_answers # Return text_answers as well
# Create the Gradio Blocks interface with button click
def update():
audio_answers = [audio.value for audio in inp] # Using inp as it collects all the audio inputs
# Handling the three returned values from the assistant function
summarized_text, score_string, text_answers = assistant(*audio_answers)
out_last_transcription.value = summarized_text # Displaying the summarized text
out_score.value = score_string # Displaying the score
with gr.Blocks() as demo:
gr.Markdown("Start recording your responses below and then click **Run** to see the transcription and your score.")
# Clearly initializing Inputs and Outputs lists for the button click
inp = []
# Using Column to nest questions
with gr.Column(scale=1, min_width=600):
for i, question in enumerate(questions):
gr.Markdown(f"**Question {i+1}:** {question}")
audio_input = gr.Audio(source="microphone")
inp.append(audio_input)
# Two output textboxes: one for the last transcribed answer and another for the score
out_last_transcription = gr.Textbox(label="Last Transcribed Answer", placeholder="Last transcribed answer will appear here.")
out_score = gr.Textbox(label="Score", placeholder="Your score will appear here.")
# Button with click event
btn = gr.Button("Run")
btn.click(fn=update, inputs=inp, outputs=[out_last_transcription, out_score])
demo.launch()