Spaces:

Vihang28
/

Video_QnA

Sleeping

App Files Files Community

Video_QnA / app.py

Vihang28

Update app.py

e737ee7 verified about 1 year ago

raw

history blame contribute delete

4.44 kB

	import speech_recognition as sr
	from pydub import AudioSegment
	import gradio as gr
	from os import path
	import requests
	import openai
	from openai import OpenAI
	from moviepy.editor import *

	prompt = "Type and press Enter"


	def record_text(audio_file,api_key):
	client = OpenAI(api_key = api_key)
	input_file = audio_file
	output_file = "converted_sound.mp3"
	sound = AudioSegment.from_wav(input_file)
	sound.export(output_file, format="mp3")
	audio_file = "converted_sound.mp3"
	audio_file = open(audio_file, "rb")
	transcript = client.audio.transcriptions.create(
	model="whisper-1",
	file=audio_file,
	response_format="srt"
	)
	return transcript
	# return(str(path.getsize(audio_file)/1000000)+'mb')
	# sound = audio_file
	# sound_type = sound.split(".")
	# if sound_type[-1] == 'mp3':
	# input_file = sound
	# output_file = "con_sound.wav"

	# # convert mp3 file to wav file
	# sound = AudioSegment.from_mp3(input_file)
	# sound.export(output_file, format="wav")
	# sound = "con_sound.wav"

	# MyText = ""
	# with sr.AudioFile(sound) as source:
	# r.adjust_for_ambient_noise(source)
	# print("Converting audio file to text..")
	# audio2 = r.record(source, duration=None) # Use record instead of listen

	# MyText = r.recognize_google(audio2, language="en-US", key=None, show_all=False)
	# MyText = MyText.lower()
	# return (MyText)


	def api_calling(audio_file, prompt, api_key):
	audio_text = record_text(audio_file,api_key)
	sp_txt = audio_text.split("\n")
	new_lst = ''
	for i in range(2,len(sp_txt),4):
	new_lst = new_lst + ' ' + sp_txt[i]
	if len(prompt) == 0:
	prompt = '''Hi, act as a content writer and from the transcript provided to you separate all the text.
	Apply proper punctuations, upper case and lower case to the provided text.'''

	return new_lst
	else:
	headers = {
	"Content-Type": "application/json",
	"Authorization": f"Bearer {api_key}"
	}
	payload = {
	"model": "gpt-3.5-turbo",
	"messages": [
	{
	"role": "user",
	"content": [
	{
	"type": "text",
	"text": prompt
	},
	{
	"type": "text",
	"text": audio_text
	}
	]
	}
	],
	"max_tokens": 1000
	}
	response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
	audio_text_res = response.json()
	return audio_text_res["choices"][0]["message"]["content"]

	def convert_to_mp3(mp4_file, wav_file):
	audio = wav_file
	video = VideoFileClip(mp4_file)
	audio = video.audio
	audio.write_audiofile(wav_file)

	def message_and_history(audio_text,input, history, api_key):
	mp4_file = audio_text
	wav_file = "output.wav"
	convert_to_mp3(mp4_file, wav_file)
	history = history or []
	output_text = api_calling(wav_file,input,api_key)

	if len(input) == 0:
	input = "Speech from the video."
	history.append((input, output_text))
	else:
	history.append((input, output_text))

	return history, history


	block = gr.Blocks(theme=gr.themes.Soft(primary_hue="slate"))
	with block:
	gr.Markdown("""<h1><center>Stock-Analysis</center></h1> """)
	with gr.Row():
	with gr.Column(scale=0.5):
	vid_input = gr.Video(format="mp4", label="Upload .mp4 file")
	api_input = gr.Textbox(label="Enter Api-key")
	upload_button = gr.Button(value="Upload & Start Chat", interactive=True, variant="primary")
	with gr.Column():
	chatbot = gr.Chatbot(label="Ask questions about the Video")
	message = gr.Textbox(label="User", placeholder=prompt)
	state = gr.State()

	upload_button.click(message_and_history, inputs=[vid_input,message, state, api_input], outputs=[chatbot, state])
	message.submit(message_and_history, inputs=[vid_input,message, state, api_input], outputs=[chatbot, state])
	message.submit(lambda: None, None, message, queue=False)
	block.launch()