Kishorekumar7 commited on
Commit
5ce3342
·
verified ·
1 Parent(s): 60b0b0c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -41
app.py CHANGED
@@ -1,25 +1,22 @@
1
- import os
2
- import torch
3
  import streamlit as st
 
 
 
4
  from groq import Groq
5
  from diffusers import AutoPipelineForText2Image
6
- import tempfile
7
- import soundfile as sf
8
 
9
  # Load API keys
10
  GROQ_API_KEY = os.getenv("GROQ_API_KEY")
11
  HF_API_KEY = os.getenv("HF_API_KEY")
12
 
13
- # Initialize Groq client with API key
14
  client = Groq(api_key=GROQ_API_KEY)
15
 
16
- # Load lightweight Hugging Face image generation model
17
- image_gen = AutoPipelineForText2Image.from_pretrained(
18
- "stabilityai/sdxl-turbo", use_auth_token=HF_API_KEY
19
- )
20
- image_gen.to("cuda" if torch.cuda.is_available() else "cpu")
21
 
22
- # Function to transcribe Tamil audio using Groq's Whisper
23
  def transcribe(audio_path):
24
  with open(audio_path, "rb") as file:
25
  transcription = client.audio.transcriptions.create(
@@ -30,21 +27,21 @@ def transcribe(audio_path):
30
  )
31
  return transcription["text"]
32
 
33
- # Function to translate Tamil to English using Groq's Gemma
34
  def translate_text(tamil_text):
35
  response = client.chat.completions.create(
36
  model="gemma-7b-it",
37
  messages=[{"role": "user", "content": f"Translate this Tamil text to English: {tamil_text}"}]
38
  )
39
- return response.choices[0].delta.content
40
 
41
- # Function to generate text using Groq's DeepSeek R1
42
  def generate_text(prompt):
43
  response = client.chat.completions.create(
44
  model="deepseek-coder-r1-7b",
45
  messages=[{"role": "user", "content": f"Write a short story about: {prompt}"}]
46
  )
47
- return response.choices[0].delta.content
48
 
49
  # Function to generate an image
50
  def generate_image(prompt):
@@ -54,36 +51,37 @@ def generate_image(prompt):
54
  # Streamlit UI
55
  st.title("Tamil Speech to Image & Story Generator")
56
 
57
- # Audio input - Recording or Uploading
58
- st.subheader("Upload or Record Audio")
59
- recorded_audio = st.audio("", format='audio/wav', start_time=0)
60
- uploaded_file = st.file_uploader("Upload an audio file", type=["wav", "mp3", "m4a"])
61
 
62
- audio_path = None
 
 
 
63
 
64
- if uploaded_file is not None:
65
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
66
- temp_audio.write(uploaded_file.read())
67
- audio_path = temp_audio.name
68
- elif recorded_audio:
69
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
70
  audio_data, samplerate = sf.read(recorded_audio)
71
- sf.write(temp_audio.name, audio_data, samplerate)
72
- audio_path = temp_audio.name
73
-
74
- if st.button("Generate") and audio_path:
75
- with st.spinner("Transcribing Tamil speech..."):
76
- tamil_text = transcribe(audio_path)
77
- with st.spinner("Translating to English..."):
78
- english_text = translate_text(tamil_text)
79
- with st.spinner("Generating story..."):
80
- story = generate_text(english_text)
81
- with st.spinner("Generating image..."):
82
- image = generate_image(english_text)
83
-
 
 
 
 
84
  st.subheader("Tamil Transcription")
85
  st.write(tamil_text)
86
-
87
  st.subheader("English Translation")
88
  st.write(english_text)
89
 
@@ -91,4 +89,5 @@ if st.button("Generate") and audio_path:
91
  st.write(story)
92
 
93
  st.subheader("Generated Image")
94
- st.image(image)
 
 
 
 
1
  import streamlit as st
2
+ import torch
3
+ import os
4
+ import soundfile as sf
5
  from groq import Groq
6
  from diffusers import AutoPipelineForText2Image
 
 
7
 
8
  # Load API keys
9
  GROQ_API_KEY = os.getenv("GROQ_API_KEY")
10
  HF_API_KEY = os.getenv("HF_API_KEY")
11
 
12
+ # Initialize Groq client
13
  client = Groq(api_key=GROQ_API_KEY)
14
 
15
+ # Load image generation model
16
+ device = "cuda" if torch.cuda.is_available() else "cpu"
17
+ image_gen = AutoPipelineForText2Image.from_pretrained("stabilityai/sdxl-turbo").to(device)
 
 
18
 
19
+ # Function to transcribe audio
20
  def transcribe(audio_path):
21
  with open(audio_path, "rb") as file:
22
  transcription = client.audio.transcriptions.create(
 
27
  )
28
  return transcription["text"]
29
 
30
+ # Function to translate Tamil to English
31
  def translate_text(tamil_text):
32
  response = client.chat.completions.create(
33
  model="gemma-7b-it",
34
  messages=[{"role": "user", "content": f"Translate this Tamil text to English: {tamil_text}"}]
35
  )
36
+ return response.choices[0].message.content
37
 
38
+ # Function to generate text
39
  def generate_text(prompt):
40
  response = client.chat.completions.create(
41
  model="deepseek-coder-r1-7b",
42
  messages=[{"role": "user", "content": f"Write a short story about: {prompt}"}]
43
  )
44
+ return response.choices[0].message.content
45
 
46
  # Function to generate an image
47
  def generate_image(prompt):
 
51
  # Streamlit UI
52
  st.title("Tamil Speech to Image & Story Generator")
53
 
54
+ # Choose input method
55
+ input_method = st.radio("Choose Input Method:", ("Record Audio", "Upload Audio"))
 
 
56
 
57
+ if input_method == "Record Audio":
58
+ recorded_audio = st.audio(st.file_uploader("Record your Tamil speech", type=["wav", "mp3"]))
59
+ elif input_method == "Upload Audio":
60
+ uploaded_file = st.file_uploader("Upload an audio file", type=["wav", "mp3"])
61
 
62
+ if st.button("Generate"):
63
+ if input_method == "Record Audio" and recorded_audio:
 
 
 
 
64
  audio_data, samplerate = sf.read(recorded_audio)
65
+ audio_path = "recorded_audio.wav"
66
+ sf.write(audio_path, audio_data, samplerate)
67
+ elif input_method == "Upload Audio" and uploaded_file:
68
+ audio_path = "uploaded_audio.wav"
69
+ with open(audio_path, "wb") as f:
70
+ f.write(uploaded_file.getbuffer())
71
+ else:
72
+ st.error("Please provide an audio file.")
73
+ st.stop()
74
+
75
+ # Process audio
76
+ tamil_text = transcribe(audio_path)
77
+ english_text = translate_text(tamil_text)
78
+ story = generate_text(english_text)
79
+ image = generate_image(english_text)
80
+
81
+ # Display results
82
  st.subheader("Tamil Transcription")
83
  st.write(tamil_text)
84
+
85
  st.subheader("English Translation")
86
  st.write(english_text)
87
 
 
89
  st.write(story)
90
 
91
  st.subheader("Generated Image")
92
+ st.image(image, caption="Generated Image")
93
+