Spaces:

Vihang28
/

Audio_Recognition_QnA

Sleeping

File size: 2,837 Bytes

import speech_recognition as sr
from pydub import AudioSegment
import gradio as gr
from os import path
import requests
import openai
from openai import OpenAI

prompt = "Type and press Enter"


def record_text(audio_file,api_key):
    client = OpenAI(api_key = api_key)
    audio_file = open(audio_file, "rb") 
    transcript = client.audio.transcriptions.create(
        model="whisper-1", 
        file=audio_file,
        response_format="text"
        ) 
    return transcript


def api_calling(audio_file, prompt, api_key):
    audio_text = record_text(audio_file,api_key)
    if len(prompt) == 0:
        prompt = "Apply proper punctuations, upper case and lower case to the provided text."
    
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}"
    }
    payload = {
        "model": "gpt-3.5-turbo",
        "messages": [
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": prompt
                    },
                    {
                        "type": "text",
                        "text": audio_text
                    }
                ]
            }
        ],
        "max_tokens": 1500
    }
    response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
    audio_text_res = response.json()
    return audio_text_res["choices"][0]["message"]["content"]



def message_and_history(audio_text,input, history, api_key):
    history = history or []
    output_text = api_calling(audio_text,input,api_key)
    
    if len(input) == 0:
        input = "Speech from the video."
        history.append((input, output_text))
    else:
        history.append((input, output_text))
    
    return history, history


block = gr.Blocks(theme=gr.themes.Glass(primary_hue="slate"))
with block:
    gr.Markdown("""<h1><center>Audio Recognition - Ask and Learn about an Audio</center></h1> """)
    with gr.Row():
        with gr.Column(scale=0.5):
            aud_input = gr.Audio(type="filepath", label="Upload Audio", sources="upload")
            api_input = gr.Textbox(label="Enter Api-key")
            upload_button = gr.Button(value="Upload & Start Chat", interactive=True, variant="primary")
        with gr.Column():
            chatbot = gr.Chatbot(label="Ask questions about the audio")
            message = gr.Textbox(label="User", placeholder=prompt)
            state = gr.State()
            
    upload_button.click(message_and_history, inputs=[aud_input,message, state, api_input], outputs=[chatbot, state])
    message.submit(message_and_history, inputs=[aud_input,message, state, api_input], outputs=[chatbot, state])
    message.submit(lambda: None, None, message, queue=False)
block.launch()