Spaces:
Sleeping
Sleeping
File size: 3,603 Bytes
1c66a80 ed0abb5 f67682f 678aa20 1c66a80 5a85225 93c16a2 f888789 97f56b5 a855368 fcabb84 1f38dc2 fcabb84 1c66a80 fcabb84 c52635d fcabb84 12d3854 246aa34 5a85225 12d3854 08f3107 12d3854 d66b86d 12d3854 1c66a80 08f3107 1c66a80 08f3107 1c66a80 cefb089 e103d16 cefb089 1c66a80 cefb089 1c66a80 678aa20 1c66a80 246aa34 1c66a80 246aa34 1c66a80 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 |
import speech_recognition as sr
from pydub import AudioSegment
import gradio as gr
from os import path
import requests
import openai
from openai import OpenAI
prompt = "Type and press Enter"
def record_text(audio_file,api_key):
client = OpenAI(api_key = api_key)
audio_file = open(audio_file, "rb")
transcript = client.audio.transcriptions.create(
model="whisper-1",
file=audio_file
)
return transcript
# sound = audio_file
# sound_type = sound.split(".")
# if sound_type[-1] == 'mp3':
# input_file = sound
# output_file = "con_sound.wav"
# # convert mp3 file to wav file
# sound = AudioSegment.from_mp3(input_file)
# sound.export(output_file, format="wav")
# sound = "con_sound.wav"
# MyText = ""
# with sr.AudioFile(sound) as source:
# r.adjust_for_ambient_noise(source)
# print("Converting audio file to text..")
# audio2 = r.record(source, duration=None) # Use record instead of listen
# MyText = r.recognize_google(audio2, language="en-US", key=None, show_all=False)
# MyText = MyText.lower()
# return (MyText)
def api_calling(audio_file, prompt, api_key):
audio_text = record_text(audio_file,api_key)
if len(prompt) == 0:
prompt = "Apply proper punctuations, upper case and lower case to the provided text."
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}"
}
payload = {
"model": "gpt-3.5-turbo",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": prompt
},
{
"type": "text",
"text": audio_text
}
]
}
],
"max_tokens": 1000
}
response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
audio_text_res = response.json()
return audio_text_res["choices"][0]["message"]["content"]
def message_and_history(audio_text,input, history, api_key):
history = history or []
output_text = api_calling(audio_text,input,api_key)
if len(input) == 0:
input = "Speech from the video."
history.append((input, output_text))
else:
history.append((input, output_text))
return history, history
block = gr.Blocks(theme=gr.themes.Glass(primary_hue="slate"))
with block:
gr.Markdown("""<h1><center>Audio Recognition - Ask and Learn about an Audio</center></h1> """)
with gr.Row():
with gr.Column(scale=0.5):
aud_input = gr.Audio(type="filepath", label="Upload Audio")
# audio_text = record_text(aud_input)
api_input = gr.Textbox(label="Enter Api-key")
upload_button = gr.Button(value="Upload & Start Chat", interactive=True, variant="primary")
with gr.Column():
chatbot = gr.Chatbot(label="Ask questions about the audio")
message = gr.Textbox(label="User", placeholder=prompt)
state = gr.State()
upload_button.click(message_and_history, inputs=[aud_input,message, state, api_input], outputs=[chatbot, state])
message.submit(message_and_history, inputs=[aud_input,message, state, api_input], outputs=[chatbot, state])
message.submit(lambda: None, None, message, queue=False)
block.launch(share=True) |