|
import speech_recognition as sr |
|
from pydub import AudioSegment |
|
import gradio as gr |
|
from os import path |
|
import requests |
|
import openai |
|
from openai import OpenAI |
|
from moviepy.editor import * |
|
|
|
prompt = "Type and press Enter" |
|
|
|
|
|
def record_text(audio_file,api_key): |
|
client = OpenAI(api_key = api_key) |
|
input_file = audio_file |
|
output_file = "converted_sound.mp3" |
|
sound = AudioSegment.from_wav(input_file) |
|
sound.export(output_file, format="mp3") |
|
audio_file = "converted_sound.mp3" |
|
audio_file = open(audio_file, "rb") |
|
transcript = client.audio.transcriptions.create( |
|
model="whisper-1", |
|
file=audio_file, |
|
response_format="srt" |
|
) |
|
return transcript |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def api_calling(audio_file, prompt, api_key): |
|
audio_text = record_text(audio_file,api_key) |
|
sp_txt = audio_text.split("\n") |
|
new_lst = '' |
|
for i in range(2,len(sp_txt),4): |
|
new_lst = new_lst + ' ' + sp_txt[i] |
|
if len(prompt) == 0: |
|
prompt = '''Hi, act as a content writer and from the transcript provided to you separate all the text. |
|
Apply proper punctuations, upper case and lower case to the provided text.''' |
|
|
|
return new_lst |
|
else: |
|
headers = { |
|
"Content-Type": "application/json", |
|
"Authorization": f"Bearer {api_key}" |
|
} |
|
payload = { |
|
"model": "gpt-3.5-turbo", |
|
"messages": [ |
|
{ |
|
"role": "user", |
|
"content": [ |
|
{ |
|
"type": "text", |
|
"text": prompt |
|
}, |
|
{ |
|
"type": "text", |
|
"text": audio_text |
|
} |
|
] |
|
} |
|
], |
|
"max_tokens": 1000 |
|
} |
|
response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload) |
|
audio_text_res = response.json() |
|
return audio_text_res["choices"][0]["message"]["content"] |
|
|
|
def convert_to_mp3(mp4_file, wav_file): |
|
audio = wav_file |
|
video = VideoFileClip(mp4_file) |
|
audio = video.audio |
|
audio.write_audiofile(wav_file) |
|
|
|
def message_and_history(audio_text,input, history, api_key): |
|
mp4_file = audio_text |
|
wav_file = "output.wav" |
|
convert_to_mp3(mp4_file, wav_file) |
|
history = history or [] |
|
output_text = api_calling(wav_file,input,api_key) |
|
|
|
if len(input) == 0: |
|
input = "Speech from the video." |
|
history.append((input, output_text)) |
|
else: |
|
history.append((input, output_text)) |
|
|
|
return history, history |
|
|
|
|
|
block = gr.Blocks(theme=gr.themes.Soft(primary_hue="slate")) |
|
with block: |
|
gr.Markdown("""<h1><center>Stock-Analysis</center></h1> """) |
|
with gr.Row(): |
|
with gr.Column(scale=0.5): |
|
vid_input = gr.Video(format="mp4", label="Upload .mp4 file") |
|
api_input = gr.Textbox(label="Enter Api-key") |
|
upload_button = gr.Button(value="Upload & Start Chat", interactive=True, variant="primary") |
|
with gr.Column(): |
|
chatbot = gr.Chatbot(label="Ask questions about the Video") |
|
message = gr.Textbox(label="User", placeholder=prompt) |
|
state = gr.State() |
|
|
|
upload_button.click(message_and_history, inputs=[vid_input,message, state, api_input], outputs=[chatbot, state]) |
|
message.submit(message_and_history, inputs=[vid_input,message, state, api_input], outputs=[chatbot, state]) |
|
message.submit(lambda: None, None, message, queue=False) |
|
block.launch() |