Kishorekumar7's picture
Update app.py
f182d83 verified
raw
history blame
2.55 kB
import streamlit as st
import tempfile
import torch
from transformers import pipeline
from diffusers import StableDiffusionPipeline
from pydub import AudioSegment
import base64
st.set_page_config(page_title="Tamil Audio to Story & Image", layout="centered")
# Load lightweight models
@st.cache_resource
def load_models():
whisper = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-ta-en")
text_gen = pipeline("text-generation", model="sshleifer/tiny-gpt2")
image_gen = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4")
image_gen.to("cuda" if torch.cuda.is_available() else "cpu")
return whisper, translator, text_gen, image_gen
whisper, translator, text_gen, image_gen = load_models()
st.title("๐ŸŽ™๏ธ Tamil Audio to Story & Image")
st.write("Upload or record Tamil audio to generate English story and image.")
input_mode = st.radio("Choose Input Mode", ["Upload Audio", "Record Live Audio"])
audio_bytes = None
if input_mode == "Upload Audio":
uploaded_file = st.file_uploader("Upload Tamil Audio (.wav, .mp3)", type=["wav", "mp3"], key="upload")
if uploaded_file:
audio_bytes = uploaded_file.read()
else:
audio_recorder = st.audio_recorder("Record your audio", format="audio/wav", key="recorder")
if audio_recorder:
audio_bytes = audio_recorder
if audio_bytes:
st.audio(audio_bytes, format="audio/wav")
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
tmp.write(audio_bytes)
tmp_path = tmp.name
# Convert mp3 to wav if needed
if tmp_path.endswith(".mp3"):
sound = AudioSegment.from_mp3(tmp_path)
tmp_path = tmp_path.replace(".mp3", ".wav")
sound.export(tmp_path, format="wav")
with st.spinner("Transcribing..."):
transcription = whisper(tmp_path)["text"]
st.text_area("Transcribed Tamil Text", transcription)
with st.spinner("Translating..."):
translation = translator(transcription)[0]['translation_text']
st.text_area("Translated English Text", translation)
with st.spinner("Generating Story..."):
story = text_gen(translation, max_length=100)[0]['generated_text']
st.text_area("Generated Story", story)
with st.spinner("Generating Image..."):
image = image_gen(prompt=translation).images[0]
st.image(image, caption="Generated Image")
else:
st.warning("Please upload or record an audio to proceed.")