Kishorekumar7 commited on
Commit
434fa6f
Β·
verified Β·
1 Parent(s): 0239fd9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -57
app.py CHANGED
@@ -1,63 +1,43 @@
1
  import os
2
  import gradio as gr
3
- from groq import Groq
4
  from diffusers import StableDiffusionPipeline
5
  import torch
6
  from pydub import AudioSegment
 
7
 
8
- # --- Groq Client ---
9
- client = Groq(api_key=os.getenv("GROQ_API_KEY"))
10
 
11
- # --- Image Generation Pipeline ---
12
- device = "cuda" if torch.cuda.is_available() else "cpu"
13
- image_model_id = "CompVis/stable-diffusion-v1-4" # fallback to a lightweight stable model
14
- image_pipeline = StableDiffusionPipeline.from_pretrained(
15
- image_model_id, torch_dtype=torch.float16 if device == "cuda" else torch.float32
16
- )
17
- image_pipeline.to(device)
18
-
19
- # --- Step 1: Transcribe Tamil Audio using Groq's whisper-large-v3 ---
20
  def transcribe_tamil_audio(audio):
21
- if not audio or not os.path.exists(audio):
22
- print("Audio path is invalid or file does not exist.")
23
- return "⚠️ Failed to process audio. Please try uploading a different file."
24
-
25
- filename = "temp_audio.m4a"
26
-
27
  try:
28
  audio_segment = AudioSegment.from_file(audio)
29
- audio_segment.export(filename, format="m4a")
 
 
 
30
  except Exception as e:
31
- print("Audio loading error:", str(e))
32
- return "⚠️ Failed to process audio. Please try uploading a different file."
 
 
 
33
 
 
34
  try:
35
- with open(filename, "rb") as file:
36
- transcription = client.audio.transcriptions.create(
37
- file=(filename, file.read()),
38
- model="whisper-large-v3",
39
- language="ta",
40
- response_format="verbose_json",
41
- )
42
- return transcription.text
43
  except Exception as e:
44
- print("Transcription error:", str(e))
45
- return "⚠️ Failed to transcribe audio."
46
 
47
- # --- Step 2: Translate using gemma2-9b-it ---
48
- def translate_tamil_to_english(tamil_text):
49
- prompt = f"Translate this Tamil sentence to English:\n\nTamil: {tamil_text}\nEnglish:"
50
- completion = client.chat.completions.create(
51
- model="gemma2-9b-it",
52
- messages=[{"role": "user", "content": prompt}],
53
- temperature=0.7,
54
- max_completion_tokens=1024,
55
- top_p=1,
56
- stream=False,
57
- )
58
- return completion.choices[0].message.content.strip()
59
 
60
- # --- Step 3: Generate Image from Translated English Text ---
61
  def generate_image(prompt):
62
  try:
63
  result = image_pipeline(prompt)
@@ -66,18 +46,19 @@ def generate_image(prompt):
66
  print("Image generation error:", str(e))
67
  return None
68
 
69
- # --- Step 4: Generate Poem from Prompt using deepseek model ---
 
 
 
 
 
 
 
70
  def generate_poem(prompt):
71
- poem_prompt = f"Write a short 25-word poem based on this: {prompt}"
72
- completion = client.chat.completions.create(
73
- model="deepseek-r1-distill-llama-70b",
74
- messages=[{"role": "user", "content": poem_prompt}],
75
- temperature=0.6,
76
- max_completion_tokens=200,
77
- top_p=0.95,
78
- stream=False,
79
- )
80
- return completion.choices[0].message.content.strip()
81
 
82
  # --- Step 5: Master Function ---
83
  def process_audio(audio):
@@ -100,7 +81,7 @@ iface = gr.Interface(
100
  gr.Textbox(label="πŸ“œ Generated Poem (25 words)"),
101
  ],
102
  title="🎀 Tamil Voice to Image + Poem Generator",
103
- description="Speak in Tamil β†’ Translates to English β†’ Creates an Image β†’ Writes a Poem. Powered by Groq and Hugging Face.",
104
  )
105
 
106
- iface.launch()
 
1
  import os
2
  import gradio as gr
3
+ from transformers import pipeline
4
  from diffusers import StableDiffusionPipeline
5
  import torch
6
  from pydub import AudioSegment
7
+ import tempfile
8
 
9
+ # --- Step 1: Transcribe Tamil Audio using Whisper tiny model ---
10
+ asr_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-tiny", device=0 if torch.cuda.is_available() else -1)
11
 
 
 
 
 
 
 
 
 
 
12
  def transcribe_tamil_audio(audio):
13
+ if not audio:
14
+ return "⚠️ No audio provided."
 
 
 
 
15
  try:
16
  audio_segment = AudioSegment.from_file(audio)
17
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
18
+ audio_segment.export(tmp.name, format="wav")
19
+ result = asr_pipeline(tmp.name, generate_kwargs={"language": "<|ta|>"})
20
+ return result["text"]
21
  except Exception as e:
22
+ print("Transcription error:", e)
23
+ return "⚠️ Failed to process audio. Please upload a valid audio file."
24
+
25
+ # --- Step 2: Translate Tamil to English using NLLB ---
26
+ translator = pipeline("translation", model="facebook/nllb-200-distilled-600M", src_lang="tam_Taml", tgt_lang="eng_Latn")
27
 
28
+ def translate_tamil_to_english(tamil_text):
29
  try:
30
+ result = translator(tamil_text)
31
+ return result[0]['translation_text']
 
 
 
 
 
 
32
  except Exception as e:
33
+ print("Translation error:", e)
34
+ return "⚠️ Failed to translate."
35
 
36
+ # --- Step 3: Generate Image using Stable Diffusion ---
37
+ device = "cuda" if torch.cuda.is_available() else "cpu"
38
+ image_pipeline = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4")
39
+ image_pipeline.to(device)
 
 
 
 
 
 
 
 
40
 
 
41
  def generate_image(prompt):
42
  try:
43
  result = image_pipeline(prompt)
 
46
  print("Image generation error:", str(e))
47
  return None
48
 
49
+ # --- Step 4: Generate Poem using TinyLlama ---
50
+ from transformers import AutoModelForCausalLM, AutoTokenizer
51
+
52
+ poem_model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
53
+ tokenizer = AutoTokenizer.from_pretrained(poem_model_id)
54
+ model = AutoModelForCausalLM.from_pretrained(poem_model_id)
55
+ model.to(device)
56
+
57
  def generate_poem(prompt):
58
+ poem_prompt = f"Write a short 25-word poem about: {prompt}\n"
59
+ inputs = tokenizer(poem_prompt, return_tensors="pt").to(device)
60
+ outputs = model.generate(**inputs, max_new_tokens=60, do_sample=True, temperature=0.7)
61
+ return tokenizer.decode(outputs[0], skip_special_tokens=True).split("\n")[-1]
 
 
 
 
 
 
62
 
63
  # --- Step 5: Master Function ---
64
  def process_audio(audio):
 
81
  gr.Textbox(label="πŸ“œ Generated Poem (25 words)"),
82
  ],
83
  title="🎀 Tamil Voice to Image + Poem Generator",
84
+ description="Speak in Tamil β†’ Translates to English β†’ Creates an Image β†’ Writes a Poem. Powered by Hugging Face lightweight models.",
85
  )
86
 
87
+ iface.launch()