Spaces:

Kishorekumar7
/

gradio_voice_to_image_and_text

Sleeping

App Files Files Community

Kishorekumar7 commited on Apr 4

Commit

434fa6f

verified ·

1 Parent(s): 0239fd9

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -57

app.py CHANGED Viewed

@@ -1,63 +1,43 @@
 import os
 import gradio as gr
-from groq import Groq
 from diffusers import StableDiffusionPipeline
 import torch
 from pydub import AudioSegment
-# --- Groq Client ---
-client = Groq(api_key=os.getenv("GROQ_API_KEY"))
-# --- Image Generation Pipeline ---
-device = "cuda" if torch.cuda.is_available() else "cpu"
-image_model_id = "CompVis/stable-diffusion-v1-4"  # fallback to a lightweight stable model
-image_pipeline = StableDiffusionPipeline.from_pretrained(
-    image_model_id, torch_dtype=torch.float16 if device == "cuda" else torch.float32
-)
-image_pipeline.to(device)
-# --- Step 1: Transcribe Tamil Audio using Groq's whisper-large-v3 ---
 def transcribe_tamil_audio(audio):
-    if not audio or not os.path.exists(audio):
-        print("Audio path is invalid or file does not exist.")
-        return "⚠️ Failed to process audio. Please try uploading a different file."
-    filename = "temp_audio.m4a"
     try:
         audio_segment = AudioSegment.from_file(audio)
-        audio_segment.export(filename, format="m4a")
     except Exception as e:
-        print("Audio loading error:", str(e))
-        return "⚠️ Failed to process audio. Please try uploading a different file."
     try:
-        with open(filename, "rb") as file:
-            transcription = client.audio.transcriptions.create(
-                file=(filename, file.read()),
-                model="whisper-large-v3",
-                language="ta",
-                response_format="verbose_json",
-            )
-        return transcription.text
     except Exception as e:
-        print("Transcription error:", str(e))
-        return "⚠️ Failed to transcribe audio."
-# --- Step 2: Translate using gemma2-9b-it ---
-def translate_tamil_to_english(tamil_text):
-    prompt = f"Translate this Tamil sentence to English:\n\nTamil: {tamil_text}\nEnglish:"
-    completion = client.chat.completions.create(
-        model="gemma2-9b-it",
-        messages=[{"role": "user", "content": prompt}],
-        temperature=0.7,
-        max_completion_tokens=1024,
-        top_p=1,
-        stream=False,
-    )
-    return completion.choices[0].message.content.strip()
-# --- Step 3: Generate Image from Translated English Text ---
 def generate_image(prompt):
     try:
         result = image_pipeline(prompt)
@@ -66,18 +46,19 @@ def generate_image(prompt):
         print("Image generation error:", str(e))
         return None
-# --- Step 4: Generate Poem from Prompt using deepseek model ---
 def generate_poem(prompt):
-    poem_prompt = f"Write a short 25-word poem based on this: {prompt}"
-    completion = client.chat.completions.create(
-        model="deepseek-r1-distill-llama-70b",
-        messages=[{"role": "user", "content": poem_prompt}],
-        temperature=0.6,
-        max_completion_tokens=200,
-        top_p=0.95,
-        stream=False,
-    )
-    return completion.choices[0].message.content.strip()
 # --- Step 5: Master Function ---
 def process_audio(audio):
@@ -100,7 +81,7 @@ iface = gr.Interface(
         gr.Textbox(label="📜 Generated Poem (25 words)"),
     ],
     title="🎤 Tamil Voice to Image + Poem Generator",
-    description="Speak in Tamil → Translates to English → Creates an Image → Writes a Poem. Powered by Groq and Hugging Face.",
 )
-iface.launch()

 import os
 import gradio as gr
+from transformers import pipeline
 from diffusers import StableDiffusionPipeline
 import torch
 from pydub import AudioSegment
+import tempfile
+# --- Step 1: Transcribe Tamil Audio using Whisper tiny model ---
+asr_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-tiny", device=0 if torch.cuda.is_available() else -1)
 def transcribe_tamil_audio(audio):
+    if not audio:
+        return "⚠️ No audio provided."
     try:
         audio_segment = AudioSegment.from_file(audio)
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
+            audio_segment.export(tmp.name, format="wav")
+            result = asr_pipeline(tmp.name, generate_kwargs={"language": "<|ta|>"})
+            return result["text"]
     except Exception as e:
+        print("Transcription error:", e)
+        return "⚠️ Failed to process audio. Please upload a valid audio file."
+# --- Step 2: Translate Tamil to English using NLLB ---
+translator = pipeline("translation", model="facebook/nllb-200-distilled-600M", src_lang="tam_Taml", tgt_lang="eng_Latn")
+def translate_tamil_to_english(tamil_text):
     try:
+        result = translator(tamil_text)
+        return result[0]['translation_text']
     except Exception as e:
+        print("Translation error:", e)
+        return "⚠️ Failed to translate."
+# --- Step 3: Generate Image using Stable Diffusion ---
+device = "cuda" if torch.cuda.is_available() else "cpu"
+image_pipeline = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4")
+image_pipeline.to(device)
 def generate_image(prompt):
     try:
         result = image_pipeline(prompt)
         print("Image generation error:", str(e))
         return None
+# --- Step 4: Generate Poem using TinyLlama ---
+from transformers import AutoModelForCausalLM, AutoTokenizer
+poem_model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
+tokenizer = AutoTokenizer.from_pretrained(poem_model_id)
+model = AutoModelForCausalLM.from_pretrained(poem_model_id)
+model.to(device)
 def generate_poem(prompt):
+    poem_prompt = f"Write a short 25-word poem about: {prompt}\n"
+    inputs = tokenizer(poem_prompt, return_tensors="pt").to(device)
+    outputs = model.generate(**inputs, max_new_tokens=60, do_sample=True, temperature=0.7)
+    return tokenizer.decode(outputs[0], skip_special_tokens=True).split("\n")[-1]
 # --- Step 5: Master Function ---
 def process_audio(audio):
         gr.Textbox(label="📜 Generated Poem (25 words)"),
     ],
     title="🎤 Tamil Voice to Image + Poem Generator",
+    description="Speak in Tamil → Translates to English → Creates an Image → Writes a Poem. Powered by Hugging Face lightweight models.",
 )
+iface.launch()