Spaces:
Sleeping
Sleeping
File size: 2,837 Bytes
1c66a80 ed0abb5 f67682f 678aa20 1c66a80 5a85225 93c16a2 f888789 97f56b5 ae21500 97f56b5 12d3854 246aa34 5a85225 12d3854 08f3107 12d3854 c1347d6 12d3854 d66b86d 12d3854 1c66a80 08f3107 1c66a80 08f3107 1c66a80 cefb089 e103d16 cefb089 1c66a80 cefb089 1c66a80 678aa20 1c66a80 89f0384 e687694 1c66a80 246aa34 1c66a80 b91edc3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
import speech_recognition as sr
from pydub import AudioSegment
import gradio as gr
from os import path
import requests
import openai
from openai import OpenAI
prompt = "Type and press Enter"
def record_text(audio_file,api_key):
client = OpenAI(api_key = api_key)
audio_file = open(audio_file, "rb")
transcript = client.audio.transcriptions.create(
model="whisper-1",
file=audio_file,
response_format="text"
)
return transcript
def api_calling(audio_file, prompt, api_key):
audio_text = record_text(audio_file,api_key)
if len(prompt) == 0:
prompt = "Apply proper punctuations, upper case and lower case to the provided text."
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}"
}
payload = {
"model": "gpt-3.5-turbo",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": prompt
},
{
"type": "text",
"text": audio_text
}
]
}
],
"max_tokens": 1500
}
response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
audio_text_res = response.json()
return audio_text_res["choices"][0]["message"]["content"]
def message_and_history(audio_text,input, history, api_key):
history = history or []
output_text = api_calling(audio_text,input,api_key)
if len(input) == 0:
input = "Speech from the video."
history.append((input, output_text))
else:
history.append((input, output_text))
return history, history
block = gr.Blocks(theme=gr.themes.Glass(primary_hue="slate"))
with block:
gr.Markdown("""<h1><center>Audio Recognition - Ask and Learn about an Audio</center></h1> """)
with gr.Row():
with gr.Column(scale=0.5):
aud_input = gr.Audio(type="filepath", label="Upload Audio", sources="upload")
api_input = gr.Textbox(label="Enter Api-key")
upload_button = gr.Button(value="Upload & Start Chat", interactive=True, variant="primary")
with gr.Column():
chatbot = gr.Chatbot(label="Ask questions about the audio")
message = gr.Textbox(label="User", placeholder=prompt)
state = gr.State()
upload_button.click(message_and_history, inputs=[aud_input,message, state, api_input], outputs=[chatbot, state])
message.submit(message_and_history, inputs=[aud_input,message, state, api_input], outputs=[chatbot, state])
message.submit(lambda: None, None, message, queue=False)
block.launch() |