File size: 2,545 Bytes
4a997af f182d83 4dffac9 f182d83 9c054fd f182d83 9c054fd f182d83 b0b5043 f182d83 9c054fd f182d83 9c054fd f182d83 b0b5043 f182d83 9c054fd f182d83 9c054fd f182d83 9c054fd f182d83 9c054fd f182d83 9c054fd f182d83 9c054fd f182d83 9c054fd f182d83 60b0b0c f182d83 9c054fd f182d83 60b0b0c f182d83 9c054fd f182d83 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
import streamlit as st
import tempfile
import torch
from transformers import pipeline
from diffusers import StableDiffusionPipeline
from pydub import AudioSegment
import base64
st.set_page_config(page_title="Tamil Audio to Story & Image", layout="centered")
# Load lightweight models
@st.cache_resource
def load_models():
whisper = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-ta-en")
text_gen = pipeline("text-generation", model="sshleifer/tiny-gpt2")
image_gen = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4")
image_gen.to("cuda" if torch.cuda.is_available() else "cpu")
return whisper, translator, text_gen, image_gen
whisper, translator, text_gen, image_gen = load_models()
st.title("🎙️ Tamil Audio to Story & Image")
st.write("Upload or record Tamil audio to generate English story and image.")
input_mode = st.radio("Choose Input Mode", ["Upload Audio", "Record Live Audio"])
audio_bytes = None
if input_mode == "Upload Audio":
uploaded_file = st.file_uploader("Upload Tamil Audio (.wav, .mp3)", type=["wav", "mp3"], key="upload")
if uploaded_file:
audio_bytes = uploaded_file.read()
else:
audio_recorder = st.audio_recorder("Record your audio", format="audio/wav", key="recorder")
if audio_recorder:
audio_bytes = audio_recorder
if audio_bytes:
st.audio(audio_bytes, format="audio/wav")
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
tmp.write(audio_bytes)
tmp_path = tmp.name
# Convert mp3 to wav if needed
if tmp_path.endswith(".mp3"):
sound = AudioSegment.from_mp3(tmp_path)
tmp_path = tmp_path.replace(".mp3", ".wav")
sound.export(tmp_path, format="wav")
with st.spinner("Transcribing..."):
transcription = whisper(tmp_path)["text"]
st.text_area("Transcribed Tamil Text", transcription)
with st.spinner("Translating..."):
translation = translator(transcription)[0]['translation_text']
st.text_area("Translated English Text", translation)
with st.spinner("Generating Story..."):
story = text_gen(translation, max_length=100)[0]['generated_text']
st.text_area("Generated Story", story)
with st.spinner("Generating Image..."):
image = image_gen(prompt=translation).images[0]
st.image(image, caption="Generated Image")
else:
st.warning("Please upload or record an audio to proceed.")
|