Spaces:

Kishorekumar7
/

Voice_to_Text_and_Image

Sleeping

App Files Files Community

Voice_to_Text_and_Image / app.py

Kishorekumar7

Update app.py

f182d83 verified 3 months ago

raw

history blame contribute delete

2.55 kB

	import streamlit as st
	import tempfile
	import torch
	from transformers import pipeline
	from diffusers import StableDiffusionPipeline
	from pydub import AudioSegment
	import base64

	st.set_page_config(page_title="Tamil Audio to Story & Image", layout="centered")

	# Load lightweight models
	@st.cache_resource
	def load_models():
	whisper = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
	translator = pipeline("translation", model="Helsinki-NLP/opus-mt-ta-en")
	text_gen = pipeline("text-generation", model="sshleifer/tiny-gpt2")
	image_gen = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4")
	image_gen.to("cuda" if torch.cuda.is_available() else "cpu")
	return whisper, translator, text_gen, image_gen

	whisper, translator, text_gen, image_gen = load_models()

	st.title("🎙️ Tamil Audio to Story & Image")
	st.write("Upload or record Tamil audio to generate English story and image.")

	input_mode = st.radio("Choose Input Mode", ["Upload Audio", "Record Live Audio"])

	audio_bytes = None
	if input_mode == "Upload Audio":
	uploaded_file = st.file_uploader("Upload Tamil Audio (.wav, .mp3)", type=["wav", "mp3"], key="upload")
	if uploaded_file:
	audio_bytes = uploaded_file.read()
	else:
	audio_recorder = st.audio_recorder("Record your audio", format="audio/wav", key="recorder")
	if audio_recorder:
	audio_bytes = audio_recorder

	if audio_bytes:
	st.audio(audio_bytes, format="audio/wav")

	with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
	tmp.write(audio_bytes)
	tmp_path = tmp.name

	# Convert mp3 to wav if needed
	if tmp_path.endswith(".mp3"):
	sound = AudioSegment.from_mp3(tmp_path)
	tmp_path = tmp_path.replace(".mp3", ".wav")
	sound.export(tmp_path, format="wav")

	with st.spinner("Transcribing..."):
	transcription = whisper(tmp_path)["text"]
	st.text_area("Transcribed Tamil Text", transcription)

	with st.spinner("Translating..."):
	translation = translator(transcription)[0]['translation_text']
	st.text_area("Translated English Text", translation)

	with st.spinner("Generating Story..."):
	story = text_gen(translation, max_length=100)[0]['generated_text']
	st.text_area("Generated Story", story)

	with st.spinner("Generating Image..."):
	image = image_gen(prompt=translation).images[0]
	st.image(image, caption="Generated Image")

	else:
	st.warning("Please upload or record an audio to proceed.")