Kishorekumar7 commited on
Commit
f182d83
Β·
verified Β·
1 Parent(s): 9c054fd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -74
app.py CHANGED
@@ -1,94 +1,68 @@
1
  import streamlit as st
2
- import torchaudio
3
  import torch
4
- from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM
5
  from diffusers import StableDiffusionPipeline
6
- from io import BytesIO
7
- import tempfile
8
- import os
9
 
10
- st.set_page_config(page_title="Tamil Voice to Story & Image Generator", layout="wide")
11
- st.title("🎀 Tamil Voice to Story & Image Generator")
12
 
13
- # Load models only once
14
  @st.cache_resource
15
  def load_models():
16
- # 1. Whisper small for speech recognition
17
- whisper_pipe = pipeline("automatic-speech-recognition", model="openai/whisper-small", device=0 if torch.cuda.is_available() else -1)
18
-
19
- # 2. NLLB for Tamil to English translation
20
- tokenizer_trans = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
21
- model_trans = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
22
-
23
- # 3. Tiny Story Generator
24
- story_gen = pipeline("text-generation", model="sshleifer/tiny-gpt2", device=0 if torch.cuda.is_available() else -1)
25
-
26
- # 4. Image Generator
27
- image_pipe = StableDiffusionPipeline.from_pretrained(
28
- "CompVis/stable-diffusion-v1-4",
29
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
30
- )
31
- if torch.cuda.is_available():
32
- image_pipe.to("cuda")
33
 
34
- return whisper_pipe, tokenizer_trans, model_trans, story_gen, image_pipe
35
 
36
- whisper_pipe, tokenizer_trans, model_trans, story_gen, image_pipe = load_models()
 
37
 
38
- # Function: Translate Tamil to English
39
- def translate_ta_to_en(text):
40
- inputs = tokenizer_trans(text, return_tensors="pt", padding=True)
41
- translated = model_trans.generate(**inputs, forced_bos_token_id=tokenizer_trans.lang_code_to_id["eng_Latn"])
42
- return tokenizer_trans.batch_decode(translated, skip_special_tokens=True)[0]
43
 
44
- # Function: Generate story
45
- def generate_story(prompt):
46
- story = story_gen(prompt, max_length=100, num_return_sequences=1)
47
- return story[0]['generated_text']
48
-
49
- # Function: Generate image
50
- def generate_image(prompt):
51
- image = image_pipe(prompt).images[0]
52
- return image
53
-
54
- # Upload or Record
55
- input_method = st.radio("Select Input Method", ["Upload Audio", "Record Live"])
56
-
57
- if input_method == "Upload Audio":
58
- audio_file = st.file_uploader("Upload Tamil Audio", type=["wav", "mp3", "m4a"])
59
  else:
60
- audio_bytes = st.audio("Record or Upload Audio Below", format='audio/wav')
61
- audio_file = None
62
- if audio_bytes:
63
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmpfile:
64
- tmpfile.write(audio_bytes.read())
65
- audio_file = tmpfile.name
66
 
67
- # Process Button
68
- if st.button("Generate from Audio") and audio_file:
69
- with st.spinner("πŸ”„ Transcribing Tamil audio..."):
70
- result = whisper_pipe(audio_file)
71
- tamil_text = result['text']
72
 
73
- st.success("βœ… Tamil Transcription")
74
- st.write(tamil_text)
 
75
 
76
- with st.spinner("🌐 Translating to English..."):
77
- english_text = translate_ta_to_en(tamil_text)
 
 
 
78
 
79
- st.success("βœ… English Translation")
80
- st.write(english_text)
 
81
 
82
- with st.spinner("✍️ Generating Story..."):
83
- story = generate_story(english_text)
 
84
 
85
- st.success("βœ… Story Generated")
86
- st.write(story)
 
87
 
88
- with st.spinner("🎨 Generating Image..."):
89
- image = generate_image(english_text)
 
90
 
91
- st.image(image, caption="Generated Image")
92
-
93
- elif st.button("Generate from Audio") and not audio_file:
94
- st.warning("Please upload or record an audio file.")
 
1
  import streamlit as st
2
+ import tempfile
3
  import torch
4
+ from transformers import pipeline
5
  from diffusers import StableDiffusionPipeline
6
+ from pydub import AudioSegment
7
+ import base64
 
8
 
9
+ st.set_page_config(page_title="Tamil Audio to Story & Image", layout="centered")
 
10
 
11
+ # Load lightweight models
12
  @st.cache_resource
13
  def load_models():
14
+ whisper = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
15
+ translator = pipeline("translation", model="Helsinki-NLP/opus-mt-ta-en")
16
+ text_gen = pipeline("text-generation", model="sshleifer/tiny-gpt2")
17
+ image_gen = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4")
18
+ image_gen.to("cuda" if torch.cuda.is_available() else "cpu")
19
+ return whisper, translator, text_gen, image_gen
 
 
 
 
 
 
 
 
 
 
 
20
 
21
+ whisper, translator, text_gen, image_gen = load_models()
22
 
23
+ st.title("πŸŽ™οΈ Tamil Audio to Story & Image")
24
+ st.write("Upload or record Tamil audio to generate English story and image.")
25
 
26
+ input_mode = st.radio("Choose Input Mode", ["Upload Audio", "Record Live Audio"])
 
 
 
 
27
 
28
+ audio_bytes = None
29
+ if input_mode == "Upload Audio":
30
+ uploaded_file = st.file_uploader("Upload Tamil Audio (.wav, .mp3)", type=["wav", "mp3"], key="upload")
31
+ if uploaded_file:
32
+ audio_bytes = uploaded_file.read()
 
 
 
 
 
 
 
 
 
 
33
  else:
34
+ audio_recorder = st.audio_recorder("Record your audio", format="audio/wav", key="recorder")
35
+ if audio_recorder:
36
+ audio_bytes = audio_recorder
 
 
 
37
 
38
+ if audio_bytes:
39
+ st.audio(audio_bytes, format="audio/wav")
 
 
 
40
 
41
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
42
+ tmp.write(audio_bytes)
43
+ tmp_path = tmp.name
44
 
45
+ # Convert mp3 to wav if needed
46
+ if tmp_path.endswith(".mp3"):
47
+ sound = AudioSegment.from_mp3(tmp_path)
48
+ tmp_path = tmp_path.replace(".mp3", ".wav")
49
+ sound.export(tmp_path, format="wav")
50
 
51
+ with st.spinner("Transcribing..."):
52
+ transcription = whisper(tmp_path)["text"]
53
+ st.text_area("Transcribed Tamil Text", transcription)
54
 
55
+ with st.spinner("Translating..."):
56
+ translation = translator(transcription)[0]['translation_text']
57
+ st.text_area("Translated English Text", translation)
58
 
59
+ with st.spinner("Generating Story..."):
60
+ story = text_gen(translation, max_length=100)[0]['generated_text']
61
+ st.text_area("Generated Story", story)
62
 
63
+ with st.spinner("Generating Image..."):
64
+ image = image_gen(prompt=translation).images[0]
65
+ st.image(image, caption="Generated Image")
66
 
67
+ else:
68
+ st.warning("Please upload or record an audio to proceed.")