CR7CAD commited on
Commit
1ebc71c
·
verified ·
1 Parent(s): a4fc174

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -30
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # import part
2
  import streamlit as st
3
  from transformers import pipeline
4
 
@@ -55,7 +55,7 @@ def text2story(text):
55
 
56
  return story_text
57
 
58
- # text2audio - Simplified without numpy/scipy
59
  def text2audio(story_text):
60
  try:
61
  # Use the HelpingAI TTS model as requested
@@ -71,32 +71,17 @@ def text2audio(story_text):
71
  story_text = story_text[:max_chars]
72
 
73
  # Generate speech
74
- st.write("Generating audio...")
75
  speech = synthesizer(story_text)
 
 
76
  st.write(f"Speech output keys: {list(speech.keys())}")
77
 
78
- # We'll pass the audio data directly to Streamlit instead of saving to a file
79
- # This works because Streamlit's st.audio() can take raw audio data
80
  return speech
81
 
82
  except Exception as e:
83
  st.error(f"Error generating audio: {str(e)}")
84
- import traceback
85
- st.error(traceback.format_exc())
86
  return None
87
 
88
- # Function to save temporary image file
89
- def save_uploaded_image(uploaded_file):
90
- if not os.path.exists("temp"):
91
- os.makedirs("temp")
92
-
93
- image_path = os.path.join("temp", uploaded_file.name)
94
-
95
- with open(image_path, "wb") as f:
96
- f.write(uploaded_file.getvalue())
97
-
98
- return image_path
99
-
100
  # main part
101
  st.set_page_config(page_title="Your Image to Audio Story", page_icon="🦜")
102
  st.header("Turn Your Image to Audio Story")
@@ -106,12 +91,12 @@ if uploaded_file is not None:
106
  # Display the uploaded image
107
  st.image(uploaded_file, caption="Uploaded Image", use_container_width=True)
108
 
109
- # Save the image temporarily
110
- image_path = save_uploaded_image(uploaded_file)
111
 
112
  # Stage 1: Image to Text
113
  st.text('Processing img2text...')
114
- caption = img2text(image_path)
115
  st.write(caption)
116
 
117
  # Stage 2: Text to Story
@@ -135,14 +120,21 @@ if uploaded_file is not None:
135
  elif 'waveform' in speech_output and 'sample_rate' in speech_output:
136
  st.audio(speech_output['waveform'], sample_rate=speech_output['sample_rate'])
137
  else:
138
- st.error(f"Could not find compatible audio format in: {list(speech_output.keys())}")
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  except Exception as e:
140
  st.error(f"Error playing audio: {str(e)}")
141
  else:
142
- st.error("Audio generation failed. Please try again.")
143
-
144
- # Clean up the temporary files
145
- try:
146
- os.remove(image_path)
147
- except:
148
- pass
 
1
+ # import part - only using the two requested imports
2
  import streamlit as st
3
  from transformers import pipeline
4
 
 
55
 
56
  return story_text
57
 
58
+ # text2audio - Using HelpingAI-TTS-v1 model
59
  def text2audio(story_text):
60
  try:
61
  # Use the HelpingAI TTS model as requested
 
71
  story_text = story_text[:max_chars]
72
 
73
  # Generate speech
 
74
  speech = synthesizer(story_text)
75
+
76
+ # Get output information
77
  st.write(f"Speech output keys: {list(speech.keys())}")
78
 
 
 
79
  return speech
80
 
81
  except Exception as e:
82
  st.error(f"Error generating audio: {str(e)}")
 
 
83
  return None
84
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  # main part
86
  st.set_page_config(page_title="Your Image to Audio Story", page_icon="🦜")
87
  st.header("Turn Your Image to Audio Story")
 
91
  # Display the uploaded image
92
  st.image(uploaded_file, caption="Uploaded Image", use_container_width=True)
93
 
94
+ # Create a temporary file in memory from the uploaded file
95
+ image_bytes = uploaded_file.getvalue()
96
 
97
  # Stage 1: Image to Text
98
  st.text('Processing img2text...')
99
+ caption = img2text(image_bytes) # Pass bytes directly to pipeline
100
  st.write(caption)
101
 
102
  # Stage 2: Text to Story
 
120
  elif 'waveform' in speech_output and 'sample_rate' in speech_output:
121
  st.audio(speech_output['waveform'], sample_rate=speech_output['sample_rate'])
122
  else:
123
+ # Try the first array-like value as audio data
124
+ for key, value in speech_output.items():
125
+ if hasattr(value, '__len__') and len(value) > 1000:
126
+ if 'rate' in speech_output:
127
+ st.audio(value, sample_rate=speech_output['rate'])
128
+ elif 'sample_rate' in speech_output:
129
+ st.audio(value, sample_rate=speech_output['sample_rate'])
130
+ elif 'sampling_rate' in speech_output:
131
+ st.audio(value, sample_rate=speech_output['sampling_rate'])
132
+ else:
133
+ st.audio(value, sample_rate=24000) # Default sample rate
134
+ break
135
+ else:
136
+ st.error(f"Could not find compatible audio format in: {list(speech_output.keys())}")
137
  except Exception as e:
138
  st.error(f"Error playing audio: {str(e)}")
139
  else:
140
+ st.error("Audio generation failed. Please try again.")