Kishorekumar7's picture
Create app.py
b0b5043 verified
raw
history blame
2.95 kB
import os
import streamlit as st
import torch
import tempfile
from groq import Groq
from diffusers import AutoPipelineForText2Image
from io import BytesIO
# Load API keys
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
HF_API_KEY = os.getenv("HF_API_KEY")
# Initialize Groq client
client = Groq(api_key=GROQ_API_KEY)
# Load image generation model
device = "cuda" if torch.cuda.is_available() else "cpu"
image_gen = AutoPipelineForText2Image.from_pretrained("stabilityai/sdxl-turbo", use_auth_token=HF_API_KEY).to(device)
# Function to transcribe Tamil audio using Groq's Whisper
def transcribe(audio_bytes):
if not audio_bytes:
return "No audio provided."
# Save the audio file temporarily
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
temp_audio.write(audio_bytes)
temp_audio_path = temp_audio.name
# Call Whisper API
with open(temp_audio_path, "rb") as file:
transcription = client.audio.transcriptions.create(
file=file,
model="whisper-large-v3",
language="ta",
response_format="verbose_json"
)
# Cleanup temp file
os.remove(temp_audio_path)
return transcription["text"]
# Function to translate Tamil to English using Groq's Gemma
def translate_text(tamil_text):
response = client.chat.completions.create(
model="gemma-7b-it",
messages=[{"role": "user", "content": f"Translate this Tamil text to English: {tamil_text}"}]
)
return response.choices[0].message.content
# Function to generate text using Groq's DeepSeek R1
def generate_text(prompt):
response = client.chat.completions.create(
model="deepseek-coder-r1-7b",
messages=[{"role": "user", "content": f"Write a short story about: {prompt}"}]
)
return response.choices[0].message.content
# Function to generate an image
def generate_image(prompt):
img = image_gen(prompt=prompt).images[0]
return img
# Streamlit UI
st.title("🎀 Tamil Speech to Image & Story Generator")
# Upload audio file
audio_file = st.file_uploader("Upload a Tamil audio file", type=["wav", "mp3"])
if st.button("Generate"):
if audio_file is not None:
# Read audio bytes
audio_bytes = audio_file.read()
# Process Steps
tamil_text = transcribe(audio_bytes)
english_text = translate_text(tamil_text)
story = generate_text(english_text)
image = generate_image(english_text)
# Display Outputs
st.subheader("πŸ“ Transcribed Tamil Text")
st.write(tamil_text)
st.subheader("πŸ”  Translated English Text")
st.write(english_text)
st.subheader("πŸ“– Generated Story")
st.write(story)
st.subheader("πŸ–ΌοΈ Generated Image")
st.image(image, caption="Generated Image from Story")
else:
st.warning("⚠️ Please upload an audio file before generating.")