Kishorekumar7 commited on
Commit
b0b5043
ยท
verified ยท
1 Parent(s): f44e4eb

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +97 -0
app.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ import torch
4
+ import tempfile
5
+ from groq import Groq
6
+ from diffusers import AutoPipelineForText2Image
7
+ from io import BytesIO
8
+
9
+ # Load API keys
10
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY")
11
+ HF_API_KEY = os.getenv("HF_API_KEY")
12
+
13
+ # Initialize Groq client
14
+ client = Groq(api_key=GROQ_API_KEY)
15
+
16
+ # Load image generation model
17
+ device = "cuda" if torch.cuda.is_available() else "cpu"
18
+ image_gen = AutoPipelineForText2Image.from_pretrained("stabilityai/sdxl-turbo", use_auth_token=HF_API_KEY).to(device)
19
+
20
+ # Function to transcribe Tamil audio using Groq's Whisper
21
+ def transcribe(audio_bytes):
22
+ if not audio_bytes:
23
+ return "No audio provided."
24
+
25
+ # Save the audio file temporarily
26
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
27
+ temp_audio.write(audio_bytes)
28
+ temp_audio_path = temp_audio.name
29
+
30
+ # Call Whisper API
31
+ with open(temp_audio_path, "rb") as file:
32
+ transcription = client.audio.transcriptions.create(
33
+ file=file,
34
+ model="whisper-large-v3",
35
+ language="ta",
36
+ response_format="verbose_json"
37
+ )
38
+
39
+ # Cleanup temp file
40
+ os.remove(temp_audio_path)
41
+
42
+ return transcription["text"]
43
+
44
+ # Function to translate Tamil to English using Groq's Gemma
45
+ def translate_text(tamil_text):
46
+ response = client.chat.completions.create(
47
+ model="gemma-7b-it",
48
+ messages=[{"role": "user", "content": f"Translate this Tamil text to English: {tamil_text}"}]
49
+ )
50
+ return response.choices[0].message.content
51
+
52
+ # Function to generate text using Groq's DeepSeek R1
53
+ def generate_text(prompt):
54
+ response = client.chat.completions.create(
55
+ model="deepseek-coder-r1-7b",
56
+ messages=[{"role": "user", "content": f"Write a short story about: {prompt}"}]
57
+ )
58
+ return response.choices[0].message.content
59
+
60
+ # Function to generate an image
61
+ def generate_image(prompt):
62
+ img = image_gen(prompt=prompt).images[0]
63
+ return img
64
+
65
+ # Streamlit UI
66
+ st.title("๐ŸŽค Tamil Speech to Image & Story Generator")
67
+
68
+ # Upload audio file
69
+ audio_file = st.file_uploader("Upload a Tamil audio file", type=["wav", "mp3"])
70
+
71
+ if st.button("Generate"):
72
+ if audio_file is not None:
73
+ # Read audio bytes
74
+ audio_bytes = audio_file.read()
75
+
76
+ # Process Steps
77
+ tamil_text = transcribe(audio_bytes)
78
+ english_text = translate_text(tamil_text)
79
+ story = generate_text(english_text)
80
+ image = generate_image(english_text)
81
+
82
+ # Display Outputs
83
+ st.subheader("๐Ÿ“ Transcribed Tamil Text")
84
+ st.write(tamil_text)
85
+
86
+ st.subheader("๐Ÿ”  Translated English Text")
87
+ st.write(english_text)
88
+
89
+ st.subheader("๐Ÿ“– Generated Story")
90
+ st.write(story)
91
+
92
+ st.subheader("๐Ÿ–ผ๏ธ Generated Image")
93
+ st.image(image, caption="Generated Image from Story")
94
+
95
+ else:
96
+ st.warning("โš ๏ธ Please upload an audio file before generating.")
97
+