|
import streamlit as st |
|
import requests |
|
import io |
|
from gradio_client import Client, handle_file |
|
import tempfile |
|
import os |
|
from utils import clean_response, get_translation, get_image_prompts, generate_images, generate_video |
|
import constants |
|
|
|
|
|
|
|
if 'client' not in st.session_state: |
|
st.session_state.client = Client("habib926653/openai-whisper-large-v3-turbo", hf_token=constants.HF_TOKEN) |
|
|
|
|
|
if 'transcript_visible' not in st.session_state: |
|
st.session_state.transcript_visible = False |
|
if 'translation_visible' not in st.session_state: |
|
st.session_state.translation_visible = False |
|
if 'uploaded_file_name' not in st.session_state: |
|
st.session_state.uploaded_file_name = None |
|
if 'converted_audio' not in st.session_state: |
|
st.session_state.converted_audio = None |
|
if 'was_converted' not in st.session_state: |
|
st.session_state.was_converted = False |
|
if 'transcript' not in st.session_state: |
|
st.session_state.transcript = None |
|
if 'translation' not in st.session_state: |
|
st.session_state.translation = None |
|
if 'generated_video' not in st.session_state: |
|
st.session_state.generated_video = None |
|
if 'image_prompts' not in st.session_state: |
|
st.session_state.image_prompts = None |
|
if 'generated_images' not in st.session_state: |
|
st.session_state.generated_images = None |
|
|
|
|
|
def convert_to_mp3(audio_file): |
|
if audio_file.name.endswith(".mp3"): |
|
return audio_file, False |
|
else: |
|
|
|
url = constants.AUDIO_CONVERTER_ENDPOINT |
|
files = {"file": (audio_file.name, audio_file, "audio/mp3")} |
|
|
|
with st.spinner("Converting audio to MP3... Please wait."): |
|
response = requests.post(url, files=files) |
|
|
|
if response.status_code == 200: |
|
|
|
converted_file = io.BytesIO(response.content) |
|
converted_file.name = "converted.mp3" |
|
st.success("✅ File successfully converted to MP3!") |
|
return converted_file, True |
|
else: |
|
st.error("❌ Conversion failed. Please try another format.") |
|
return None, None |
|
|
|
|
|
st.markdown( |
|
"<h1 style='text-align: center;'>AI Video Generator</h1>", |
|
unsafe_allow_html=True |
|
) |
|
st.info("Video Generation Feature Currently Under Development") |
|
|
|
|
|
audio_file = st.file_uploader("🔼 Upload your audio file:", type=constants.SUPPORTED_FORMATS) |
|
|
|
if audio_file: |
|
|
|
if st.session_state.uploaded_file_name != audio_file.name: |
|
st.session_state.uploaded_file_name = audio_file.name |
|
st.session_state.converted_audio, st.session_state.was_converted = convert_to_mp3(audio_file) |
|
st.session_state.transcript = None |
|
st.session_state.translation = None |
|
st.session_state.image_prompts = None |
|
st.session_state.generated_images = None |
|
|
|
st.info(f"Uploaded file: **{audio_file.name}**") |
|
|
|
if st.session_state.converted_audio: |
|
if not st.session_state.was_converted: |
|
st.success("🎧 The uploaded file is already in MP3 format.") |
|
else: |
|
st.success("✅ File successfully converted to MP3!") |
|
|
|
|
|
if st.session_state.transcript is None: |
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file: |
|
tmp_file.write(st.session_state.converted_audio.read()) |
|
tmp_file_path = tmp_file.name |
|
|
|
with st.spinner("Transcribing audio... Please wait."): |
|
result = st.session_state.client.predict( |
|
param_0=handle_file(tmp_file_path), |
|
api_name="/predict" |
|
) |
|
st.session_state.transcript = clean_response(result) |
|
os.remove(tmp_file_path) |
|
|
|
|
|
if st.session_state.transcript and st.session_state.translation is None: |
|
with st.spinner("Generating translation... Please wait."): |
|
st.session_state.translation = get_translation(st.session_state.transcript) |
|
|
|
st.audio(st.session_state.converted_audio, format="audio/mp3") |
|
|
|
|
|
toggle_transcript = st.checkbox("Show Transcript", value=st.session_state.transcript_visible) |
|
st.session_state.transcript_visible = toggle_transcript |
|
|
|
if st.session_state.transcript_visible: |
|
st.write("### Transcription:") |
|
st.write(st.session_state.transcript) |
|
|
|
|
|
toggle_translation = st.checkbox("Show Translation", value=st.session_state.translation_visible) |
|
st.session_state.translation_visible = toggle_translation |
|
|
|
if st.session_state.translation_visible: |
|
st.write("### Translation:") |
|
st.write(st.session_state.translation) |
|
|
|
|
|
if st.session_state.translation and st.session_state.image_prompts is None: |
|
with st.spinner("Generating image prompts... Please wait."): |
|
if 'Already in English' in st.session_state.translation: |
|
st.info("Audio is Already in English. Using Transcription to generate Image Prompts") |
|
st.session_state.image_prompts = get_image_prompts(st.session_state.transcript)['image_prompts'] |
|
else: |
|
st.session_state.image_prompts = get_image_prompts(st.session_state.translation)['image_prompts'] |
|
|
|
|
|
|
|
if 'generated_images' not in st.session_state or st.session_state.generated_images is None: |
|
st.session_state.generated_images = [] |
|
|
|
|
|
if st.session_state.image_prompts and not st.session_state.generated_images: |
|
with st.spinner("Generating images... Please wait."): |
|
for prompt, image_path in generate_images(st.session_state.image_prompts): |
|
|
|
st.image(image_path, caption=f"{prompt}", use_container_width=True) |
|
|
|
st.session_state.generated_images.append((prompt, image_path)) |
|
|
|
|
|
else: |
|
for prompt, image_path in st.session_state.generated_images: |
|
|
|
st.image(image_path, caption=f"{prompt}", use_container_width=True) |
|
|
|
else: |
|
st.warning("Please upload an audio file to proceed.") |
|
|
|
|