Kishorekumar7's picture
Update app.py
f182d83 verified
import streamlit as st
import tempfile
import torch
from transformers import pipeline
from diffusers import StableDiffusionPipeline
from pydub import AudioSegment
import base64
st.set_page_config(page_title="Tamil Audio to Story & Image", layout="centered")
# Load lightweight models
@st.cache_resource
def load_models():
whisper = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-ta-en")
text_gen = pipeline("text-generation", model="sshleifer/tiny-gpt2")
image_gen = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4")
image_gen.to("cuda" if torch.cuda.is_available() else "cpu")
return whisper, translator, text_gen, image_gen
whisper, translator, text_gen, image_gen = load_models()
st.title("๐ŸŽ™๏ธ Tamil Audio to Story & Image")
st.write("Upload or record Tamil audio to generate English story and image.")
input_mode = st.radio("Choose Input Mode", ["Upload Audio", "Record Live Audio"])
audio_bytes = None
if input_mode == "Upload Audio":
uploaded_file = st.file_uploader("Upload Tamil Audio (.wav, .mp3)", type=["wav", "mp3"], key="upload")
if uploaded_file:
audio_bytes = uploaded_file.read()
else:
audio_recorder = st.audio_recorder("Record your audio", format="audio/wav", key="recorder")
if audio_recorder:
audio_bytes = audio_recorder
if audio_bytes:
st.audio(audio_bytes, format="audio/wav")
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
tmp.write(audio_bytes)
tmp_path = tmp.name
# Convert mp3 to wav if needed
if tmp_path.endswith(".mp3"):
sound = AudioSegment.from_mp3(tmp_path)
tmp_path = tmp_path.replace(".mp3", ".wav")
sound.export(tmp_path, format="wav")
with st.spinner("Transcribing..."):
transcription = whisper(tmp_path)["text"]
st.text_area("Transcribed Tamil Text", transcription)
with st.spinner("Translating..."):
translation = translator(transcription)[0]['translation_text']
st.text_area("Translated English Text", translation)
with st.spinner("Generating Story..."):
story = text_gen(translation, max_length=100)[0]['generated_text']
st.text_area("Generated Story", story)
with st.spinner("Generating Image..."):
image = image_gen(prompt=translation).images[0]
st.image(image, caption="Generated Image")
else:
st.warning("Please upload or record an audio to proceed.")