Kishorekumar7 commited on
Commit
4a997af
ยท
verified ยท
1 Parent(s): 3a8f5e2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -53
app.py CHANGED
@@ -1,44 +1,35 @@
1
  import os
2
- import streamlit as st
3
  import torch
4
- import tempfile
5
  from groq import Groq
6
  from diffusers import AutoPipelineForText2Image
7
- from io import BytesIO
8
 
9
  # Load API keys
10
  GROQ_API_KEY = os.getenv("GROQ_API_KEY")
11
  HF_API_KEY = os.getenv("HF_API_KEY")
12
 
13
- # Initialize Groq client
14
  client = Groq(api_key=GROQ_API_KEY)
15
 
16
- # Load image generation model
17
  device = "cuda" if torch.cuda.is_available() else "cpu"
18
- image_gen = AutoPipelineForText2Image.from_pretrained("stabilityai/sdxl-turbo", use_auth_token=HF_API_KEY).to(device)
19
-
20
- # Function to transcribe Tamil audio using Groq's Whisper
21
- def transcribe(audio_bytes):
22
- if not audio_bytes:
23
- return "No audio provided."
24
 
25
- # Save the audio file temporarily
26
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
27
- temp_audio.write(audio_bytes)
28
- temp_audio_path = temp_audio.name
 
29
 
30
- # Call Whisper API
31
- with open(temp_audio_path, "rb") as file:
 
32
  transcription = client.audio.transcriptions.create(
33
- file=file,
34
  model="whisper-large-v3",
35
- language="ta",
36
  response_format="verbose_json"
37
  )
38
-
39
- # Cleanup temp file
40
- os.remove(temp_audio_path)
41
-
42
  return transcription["text"]
43
 
44
  # Function to translate Tamil to English using Groq's Gemma
@@ -63,35 +54,31 @@ def generate_image(prompt):
63
  return img
64
 
65
  # Streamlit UI
66
- st.title("๐ŸŽค Tamil Speech to Image & Story Generator")
67
-
68
- # Upload audio file
69
- audio_file = st.file_uploader("Upload a Tamil audio file", type=["wav", "mp3"])
70
 
71
- if st.button("Generate"):
72
- if audio_file is not None:
73
- # Read audio bytes
74
- audio_bytes = audio_file.read()
75
-
76
- # Process Steps
77
- tamil_text = transcribe(audio_bytes)
78
- english_text = translate_text(tamil_text)
79
- story = generate_text(english_text)
80
- image = generate_image(english_text)
81
-
82
- # Display Outputs
83
- st.subheader("๐Ÿ“ Transcribed Tamil Text")
84
- st.write(tamil_text)
85
-
86
- st.subheader("๐Ÿ”  Translated English Text")
87
- st.write(english_text)
88
-
89
- st.subheader("๐Ÿ“– Generated Story")
90
- st.write(story)
91
-
92
- st.subheader("๐Ÿ–ผ๏ธ Generated Image")
93
- st.image(image, caption="Generated Image from Story")
94
-
95
- else:
96
- st.warning("โš ๏ธ Please upload an audio file before generating.")
97
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
 
2
  import torch
3
+ import streamlit as st
4
  from groq import Groq
5
  from diffusers import AutoPipelineForText2Image
 
6
 
7
  # Load API keys
8
  GROQ_API_KEY = os.getenv("GROQ_API_KEY")
9
  HF_API_KEY = os.getenv("HF_API_KEY")
10
 
11
+ # Initialize Groq client with API key
12
  client = Groq(api_key=GROQ_API_KEY)
13
 
14
+ # Select device (GPU if available, else CPU)
15
  device = "cuda" if torch.cuda.is_available() else "cpu"
16
+ st.write(f"Using device: {device}") # Display device info
 
 
 
 
 
17
 
18
+ # Load lightweight Hugging Face image generation model
19
+ image_gen = AutoPipelineForText2Image.from_pretrained(
20
+ "stabilityai/sdxl-turbo", use_auth_token=HF_API_KEY
21
+ )
22
+ image_gen.to(device)
23
 
24
+ # Function to transcribe Tamil audio using Groq's Whisper
25
+ def transcribe(audio_file):
26
+ with open(audio_file, "rb") as file:
27
  transcription = client.audio.transcriptions.create(
28
+ file=(audio_file, file.read()),
29
  model="whisper-large-v3",
30
+ language="ta", # Tamil
31
  response_format="verbose_json"
32
  )
 
 
 
 
33
  return transcription["text"]
34
 
35
  # Function to translate Tamil to English using Groq's Gemma
 
54
  return img
55
 
56
  # Streamlit UI
57
+ st.title("Tamil Speech to Image & Story Generator")
 
 
 
58
 
59
+ # File uploader for audio
60
+ uploaded_audio = st.file_uploader("Upload your Tamil speech", type=["wav", "mp3", "m4a"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
+ if uploaded_audio is not None:
63
+ st.audio(uploaded_audio, format="audio/wav")
64
+
65
+ if st.button("Generate"):
66
+ with st.spinner("Transcribing..."):
67
+ tamil_text = transcribe(uploaded_audio)
68
+ st.success("Transcription complete!")
69
+ st.text_area("Tamil Text Output", tamil_text)
70
+
71
+ with st.spinner("Translating to English..."):
72
+ english_text = translate_text(tamil_text)
73
+ st.success("Translation complete!")
74
+ st.text_area("Translated English Text", english_text)
75
+
76
+ with st.spinner("Generating story..."):
77
+ story = generate_text(english_text)
78
+ st.success("Story generation complete!")
79
+ st.text_area("Generated Story", story)
80
+
81
+ with st.spinner("Generating image..."):
82
+ image = generate_image(english_text)
83
+ st.success("Image generation complete!")
84
+ st.image(image, caption="Generated Image")