frankai98 commited on
Commit
7352851
·
verified ·
1 Parent(s): 9c3b3a9

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +155 -0
app.py ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import pipeline
3
+ import torch
4
+ from gtts import gTTS
5
+ import io
6
+ import time
7
+
8
+ # Initialize session state
9
+ if 'processed_data' not in st.session_state:
10
+ st.session_state.processed_data = {
11
+ 'scenario': None,
12
+ 'story': None,
13
+ 'audio': None
14
+ }
15
+
16
+ if 'progress' not in st.session_state:
17
+ st.session_state.progress = {
18
+ 'current_stage': '',
19
+ 'percent': 0,
20
+ 'start_time': None
21
+ }
22
+
23
+ # Page setup
24
+ st.set_page_config(page_title="Your Image to Audio Story", page_icon="🦜")
25
+ st.header("Turn Your Image to a Short Audio Story for Children")
26
+
27
+ # Helper functions
28
+ def update_progress(stage, percentage):
29
+ st.session_state.progress.update({
30
+ 'current_stage': stage,
31
+ 'percent': percentage,
32
+ 'start_time': time.time() if percentage == 0 else st.session_state.progress['start_time']
33
+ })
34
+
35
+ def get_elapsed_time():
36
+ elapsed = time.time() - st.session_state.progress['start_time']
37
+ return f"{int(elapsed//60):02d}:{int(elapsed%60):02d}"
38
+
39
+ # Model loading
40
+ @st.cache_resource
41
+ def load_models():
42
+ return {
43
+ "img_model": pipeline("image-to-text", "cnmoro/tiny-image-captioning"),
44
+ "story_model": pipeline("text-generation", "Qwen/Qwen2.5-0.5B-Instruct")
45
+ }
46
+
47
+ models = load_models()
48
+
49
+ # Processing functions
50
+ def img2text(url):
51
+ return models["img_model"](url)[0]["generated_text"]
52
+
53
+ def text2story(text):
54
+ prompt = f"Generate a brief 100-word story about: {text}"
55
+ messages = [
56
+ {"role": "system", "content": "You are a helpful assistant."},
57
+ {"role": "user", "content": prompt}
58
+ ]
59
+ response = models["story_model"](
60
+ messages,
61
+ max_new_tokens=100,
62
+ do_sample=True,
63
+ temperature=0.7
64
+ )[0]["generated_text"]
65
+ return response[2]["content"]
66
+
67
+ def text2audio(story_text):
68
+ audio_io = io.BytesIO()
69
+ tts = gTTS(text=story_text, lang='en', slow=False)
70
+ tts.write_to_fp(audio_io)
71
+ audio_io.seek(0)
72
+ return {'audio': audio_io, 'sampling_rate': 16000}
73
+
74
+ # UI components
75
+ uploaded_file = st.file_uploader("Select an Image After the Models are Loaded...")
76
+
77
+ if uploaded_file is not None:
78
+ # Initialize progress containers
79
+ status_text = st.empty()
80
+ progress_bar = st.progress(0)
81
+ time_text = st.empty()
82
+
83
+ try:
84
+ # Save uploaded file
85
+ bytes_data = uploaded_file.getvalue()
86
+ with open(uploaded_file.name, "wb") as file:
87
+ file.write(bytes_data)
88
+
89
+ if st.session_state.get('current_file') != uploaded_file.name:
90
+ st.session_state.current_file = uploaded_file.name
91
+ update_progress("Starting processing...", 0)
92
+
93
+ # Display image
94
+ st.image(uploaded_file, caption="Uploaded Image", use_container_width=True)
95
+
96
+ # Stage 1: Image to Text
97
+ update_progress("🖼️ Analyzing image...", 0)
98
+ status_text.markdown(f"""
99
+ **{st.session_state.progress['current_stage']}**
100
+ ⏱️ Elapsed: {get_elapsed_time()}
101
+ """)
102
+ st.session_state.processed_data['scenario'] = img2text(uploaded_file.name)
103
+ progress_bar.progress(33)
104
+
105
+ # Stage 2: Text to Story
106
+ update_progress("📖 Generating story...", 33)
107
+ status_text.markdown(f"""
108
+ **{st.session_state.progress['current_stage']}**
109
+ ⏱️ Elapsed: {get_elapsed_time()}
110
+ """)
111
+ st.session_state.processed_data['story'] = text2story(
112
+ st.session_state.processed_data['scenario']
113
+ )
114
+ progress_bar.progress(66)
115
+
116
+ # Stage 3: Story to Audio
117
+ update_progress("🔊 Synthesizing audio...", 66)
118
+ status_text.markdown(f"""
119
+ **{st.session_state.progress['current_stage']}**
120
+ ⏱️ Elapsed: {get_elapsed_time()}
121
+ """)
122
+ st.session_state.processed_data['audio'] = text2audio(
123
+ st.session_state.processed_data['story']
124
+ )
125
+ progress_bar.progress(100)
126
+
127
+ # Final status
128
+ status_text.success(f"""
129
+ ✅ Generation complete!
130
+ Total time: {get_elapsed_time()}
131
+ """)
132
+
133
+ # Show results
134
+ st.subheader("Results")
135
+ st.write("**Caption:**", st.session_state.processed_data['scenario'])
136
+ st.write("**Story:**", st.session_state.processed_data['story'])
137
+
138
+ except Exception as e:
139
+ status_text.error(f"❌ Error: {str(e)}")
140
+ progress_bar.empty()
141
+ raise e
142
+
143
+ finally:
144
+ time_text.empty()
145
+
146
+ # Audio playback
147
+ if st.button("Play Audio of the Story Generated"):
148
+ if st.session_state.processed_data.get('audio'):
149
+ audio_data = st.session_state.processed_data['audio']
150
+ st.audio(
151
+ audio_data['audio'].getvalue(),
152
+ format="audio/mp3"
153
+ )
154
+ else:
155
+ st.warning("Please generate a story first!")