import streamlit as st import os import torch import soundfile as sf from groq import Groq from diffusers import AutoPipelineForText2Image from streamlit_webrtc import webrtc_streamer, AudioRecorder # Load API keys GROQ_API_KEY = os.getenv("GROQ_API_KEY") HF_API_KEY = os.getenv("HF_API_KEY") # Initialize Groq client client = Groq(api_key=GROQ_API_KEY) # Load image generation model device = "cuda" if torch.cuda.is_available() else "cpu" image_gen = AutoPipelineForText2Image.from_pretrained("stabilityai/sdxl-turbo").to(device) # Function to transcribe audio def transcribe(audio_path): with open(audio_path, "rb") as file: transcription = client.audio.transcriptions.create( file=(audio_path, file.read()), model="whisper-large-v3", language="ta", response_format="verbose_json" ) return transcription["text"] # Function to translate Tamil to English def translate_text(tamil_text): response = client.chat.completions.create( model="gemma-7b-it", messages=[{"role": "user", "content": f"Translate this Tamil text to English: {tamil_text}"}] ) return response.choices[0].message.content # Function to generate text def generate_text(prompt): response = client.chat.completions.create( model="deepseek-coder-r1-7b", messages=[{"role": "user", "content": f"Write a short story about: {prompt}"}] ) return response.choices[0].message.content # Function to generate an image def generate_image(prompt): img = image_gen(prompt=prompt).images[0] return img # Streamlit UI st.title("Tamil Speech to Image & Story Generator") # Choose input method input_method = st.radio("Choose Input Method:", ("Record Audio", "Upload Audio")) audio_path = None if input_method == "Record Audio": st.subheader("Record your Tamil speech") recorder = webrtc_streamer(key="record_audio", audio=True) if recorder.audio_receiver: audio_data = recorder.audio_receiver.get_frames() # Get recorded audio audio_path = "recorded_audio.wav" sf.write(audio_path, audio_data, 16000) # Save recorded audio elif input_method == "Upload Audio": uploaded_file = st.file_uploader("Upload an audio file", type=["wav", "mp3"]) if uploaded_file: audio_path = "uploaded_audio.wav" with open(audio_path, "wb") as f: f.write(uploaded_file.getbuffer()) if st.button("Generate"): if not audio_path: st.error("Please provide an audio file.") st.stop() # Process audio tamil_text = transcribe(audio_path) english_text = translate_text(tamil_text) story = generate_text(english_text) image = generate_image(english_text) # Display results st.subheader("Tamil Transcription") st.write(tamil_text) st.subheader("English Translation") st.write(english_text) st.subheader("Generated Story") st.write(story) st.subheader("Generated Image") st.image(image, caption="Generated Image")