import speech_recognition as sr from pydub import AudioSegment import gradio as gr from os import path import requests import openai from openai import OpenAI import numpy as np prompt = "Type and press Enter" def record_text(audio_file,api_key): client = OpenAI(api_key = api_key) audio_file = open(audio_file, "rb") transcript = client.audio.transcriptions.create( model="whisper-1", file=audio_file, response_format="text" ) return transcript def api_calling(audio_file, prompt, api_key): audio_text = record_text(audio_file,api_key) if len(prompt) == 0: prompt = "Apply proper punctuations, upper case and lower case to the provided text." headers = { "Content-Type": "application/json", "Authorization": f"Bearer {api_key}" } payload = { "model": "gpt-3.5-turbo", "messages": [ { "role": "user", "content": [ { "type": "text", "text": prompt }, { "type": "text", "text": audio_text } ] } ], "max_tokens": 1500 } response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload) audio_text_res = response.json() return audio_text_res["choices"][0]["message"]["content"] def message_and_history(audio_text,input, history, api_key): history = history or [] output_text = api_calling(audio_text,input,api_key) if len(input) == 0: input = "Speech from the video." history.append((input, output_text)) else: history.append((input, output_text)) return history, history block = gr.Blocks(theme=gr.themes.Glass(primary_hue="slate")) with block: gr.Markdown("""