File size: 6,917 Bytes
c14d84c 661f7c4 c14d84c e0f02e8 c14d84c 698fb1d c14d84c 698fb1d c14d84c 698fb1d c14d84c 698fb1d c14d84c e0f02e8 c14d84c 698fb1d c14d84c 698fb1d c14d84c 698fb1d c14d84c 698fb1d c14d84c 698fb1d c14d84c 698fb1d c14d84c 698fb1d c14d84c 698fb1d c14d84c 698fb1d c14d84c 698fb1d c14d84c 698fb1d c14d84c 698fb1d c14d84c 698fb1d c14d84c 698fb1d c14d84c 698fb1d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 |
import streamlit as st
import requests
import io
from gradio_client import Client, handle_file
import tempfile
import os
from utils import clean_response, get_translation, get_image_prompts, generate_images, generate_video
import constants
# Initialize the client only once
if 'client' not in st.session_state:
st.session_state.client = Client("habib926653/openai-whisper-large-v3-turbo", hf_token=constants.HF_TOKEN)
# Initialize state variables
if 'transcript_visible' not in st.session_state:
st.session_state.transcript_visible = False
if 'translation_visible' not in st.session_state:
st.session_state.translation_visible = False
if 'uploaded_file_name' not in st.session_state:
st.session_state.uploaded_file_name = None
if 'converted_audio' not in st.session_state:
st.session_state.converted_audio = None
if 'was_converted' not in st.session_state:
st.session_state.was_converted = False
if 'transcript' not in st.session_state:
st.session_state.transcript = None
if 'translation' not in st.session_state:
st.session_state.translation = None
if 'generated_video' not in st.session_state:
st.session_state.generated_video = None
if 'image_prompts' not in st.session_state:
st.session_state.image_prompts = None
if 'generated_images' not in st.session_state:
st.session_state.generated_images = None
# Function to convert the audio to MP3 using the external API
def convert_to_mp3(audio_file):
if audio_file.name.endswith(".mp3"):
return audio_file, False # File is already MP3
else:
# Send to the external converter API
url = constants.AUDIO_CONVERTER_ENDPOINT
files = {"file": (audio_file.name, audio_file, "audio/mp3")}
with st.spinner("Converting audio to MP3... Please wait."):
response = requests.post(url, files=files)
if response.status_code == 200:
# If conversion is successful, save and return the MP3 file
converted_file = io.BytesIO(response.content)
converted_file.name = "converted.mp3"
st.success("✅ File successfully converted to MP3!")
return converted_file, True # File was converted
else:
st.error("❌ Conversion failed. Please try another format.")
return None, None
# Streamlit UI
st.markdown(
"<h1 style='text-align: center;'>AI Video Generator</h1>",
unsafe_allow_html=True
)
st.info("Video Generation Feature Currently Under Development")
# Upload audio file
audio_file = st.file_uploader("🔼 Upload your audio file:", type=constants.SUPPORTED_FORMATS)
if audio_file:
# Reset states only when a new file is uploaded
if st.session_state.uploaded_file_name != audio_file.name:
st.session_state.uploaded_file_name = audio_file.name
st.session_state.converted_audio, st.session_state.was_converted = convert_to_mp3(audio_file)
st.session_state.transcript = None
st.session_state.translation = None
st.session_state.image_prompts = None
st.session_state.generated_images = None # Reset image generation state
st.info(f"Uploaded file: **{audio_file.name}**")
if st.session_state.converted_audio:
if not st.session_state.was_converted:
st.success("🎧 The uploaded file is already in MP3 format.")
else:
st.success("✅ File successfully converted to MP3!")
# Transcription logic
if st.session_state.transcript is None:
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
tmp_file.write(st.session_state.converted_audio.read())
tmp_file_path = tmp_file.name
with st.spinner("Transcribing audio... Please wait."):
result = st.session_state.client.predict(
param_0=handle_file(tmp_file_path),
api_name="/predict"
)
st.session_state.transcript = clean_response(result)
os.remove(tmp_file_path)
# Translation logic
if st.session_state.transcript and st.session_state.translation is None:
with st.spinner("Generating translation... Please wait."):
st.session_state.translation = get_translation(st.session_state.transcript)
st.audio(st.session_state.converted_audio, format="audio/mp3")
# Toggle transcript visibility
toggle_transcript = st.checkbox("Show Transcript", value=st.session_state.transcript_visible)
st.session_state.transcript_visible = toggle_transcript
if st.session_state.transcript_visible:
st.write("### Transcription:")
st.write(st.session_state.transcript)
# Toggle translation visibility
toggle_translation = st.checkbox("Show Translation", value=st.session_state.translation_visible)
st.session_state.translation_visible = toggle_translation
if st.session_state.translation_visible:
st.write("### Translation:")
st.write(st.session_state.translation)
# Image generation logic
if st.session_state.translation and st.session_state.image_prompts is None:
with st.spinner("Generating image prompts... Please wait."):
if 'Already in English' in st.session_state.translation:
st.info("Audio is Already in English. Using Transcription to generate Image Prompts")
st.session_state.image_prompts = get_image_prompts(st.session_state.transcript)['image_prompts']
else:
st.session_state.image_prompts = get_image_prompts(st.session_state.translation)['image_prompts']
# Ensure that generated_images is always a list
if 'generated_images' not in st.session_state or st.session_state.generated_images is None:
st.session_state.generated_images = []
# Generate images only if they have not been generated already
if st.session_state.image_prompts and not st.session_state.generated_images:
with st.spinner("Generating images... Please wait."):
for prompt, image_path in generate_images(st.session_state.image_prompts):
# Display each image as soon as it's generated
st.image(image_path, caption=f"{prompt}", use_container_width=True)
# Append the generated image to the session state
st.session_state.generated_images.append((prompt, image_path))
# Display all previously generated images (including newly generated ones)
else:
for prompt, image_path in st.session_state.generated_images:
# Display each image
st.image(image_path, caption=f"{prompt}", use_container_width=True)
else:
st.warning("Please upload an audio file to proceed.")
|