CR7CAD commited on
Commit
76abf5e
·
verified ·
1 Parent(s): 3fd88eb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -43
app.py CHANGED
@@ -2,8 +2,7 @@
2
  import streamlit as st
3
  from transformers import pipeline
4
  import os
5
- import numpy as np
6
- import io
7
 
8
  # function part
9
  # img2text
@@ -43,52 +42,37 @@ def text2story(text):
43
 
44
  return story_text
45
 
46
- # text2audio - REVISED to use a simpler approach without scipy
47
  def text2audio(story_text):
48
  try:
49
- # Use the facebook/mms-tts-eng model with fewer features
50
  synthesizer = pipeline("text-to-speech", model="facebook/mms-tts-eng")
51
 
52
- # For simplicity, we'll limit the text length to avoid timeouts
53
- # If text is too long, truncate it to a reasonable length (500 chars ~ 100 words)
54
- max_length = 500
55
- if len(story_text) > max_length:
56
- last_period = story_text[:max_length].rfind('.')
57
  if last_period > 0:
58
  story_text = story_text[:last_period + 1]
59
  else:
60
- story_text = story_text[:max_length]
61
 
62
  # Generate speech
63
  speech = synthesizer(story_text)
64
 
65
- # Save the audio to a file instead of using in-memory processing
66
- # This avoids needing scipy
67
- temp_audio_path = "temp_audio.wav"
 
68
 
69
- # Convert numpy array to bytes and save
70
- with open(temp_audio_path, "wb") as f:
71
- # Assuming the audio is in the right format already
72
- np.save(f, speech["audio"])
73
-
74
- # Read the file back
75
- with open(temp_audio_path, "rb") as f:
76
- audio_data = f.read()
77
-
78
- # Clean up
79
- try:
80
- os.remove(temp_audio_path)
81
- except:
82
- pass
83
-
84
- return {
85
- "audio": audio_data,
86
- "sampling_rate": speech["sampling_rate"]
87
- }
88
 
89
  except Exception as e:
90
  st.error(f"Error generating audio: {str(e)}")
91
- # No fallback - just return None
92
  return None
93
 
94
  # Function to save temporary image file
@@ -127,22 +111,19 @@ if uploaded_file is not None:
127
 
128
  # Stage 3: Story to Audio data
129
  st.text('Generating audio data...')
130
- audio_data = text2audio(story)
131
 
132
  # Play button
133
  if st.button("Play Audio"):
134
- if audio_data:
135
- st.audio(
136
- audio_data["audio"],
137
- format="audio/wav",
138
- start_time=0,
139
- sample_rate=audio_data["sampling_rate"]
140
- )
141
  else:
142
- st.error("Failed to generate audio. Please try again.")
143
 
144
- # Clean up the temporary file
145
  try:
146
  os.remove(image_path)
 
147
  except:
148
  pass
 
2
  import streamlit as st
3
  from transformers import pipeline
4
  import os
5
+ import tempfile
 
6
 
7
  # function part
8
  # img2text
 
42
 
43
  return story_text
44
 
45
+ # text2audio - REVISED to handle audio format correctly
46
  def text2audio(story_text):
47
  try:
48
+ # Use a simple, reliable TTS model
49
  synthesizer = pipeline("text-to-speech", model="facebook/mms-tts-eng")
50
 
51
+ # Limit text length to avoid timeouts
52
+ max_chars = 500
53
+ if len(story_text) > max_chars:
54
+ last_period = story_text[:max_chars].rfind('.')
 
55
  if last_period > 0:
56
  story_text = story_text[:last_period + 1]
57
  else:
58
+ story_text = story_text[:max_chars]
59
 
60
  # Generate speech
61
  speech = synthesizer(story_text)
62
 
63
+ # Create a temporary file with .wav extension
64
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.wav')
65
+ temp_filename = temp_file.name
66
+ temp_file.close() # Close the file so we can write to it
67
 
68
+ # Write the raw audio data to the file
69
+ with open(temp_filename, 'wb') as f:
70
+ f.write(speech['bytes']) # Using the 'bytes' field instead of 'audio'
71
+
72
+ return temp_filename
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
 
74
  except Exception as e:
75
  st.error(f"Error generating audio: {str(e)}")
 
76
  return None
77
 
78
  # Function to save temporary image file
 
111
 
112
  # Stage 3: Story to Audio data
113
  st.text('Generating audio data...')
114
+ audio_file = text2audio(story)
115
 
116
  # Play button
117
  if st.button("Play Audio"):
118
+ if audio_file and os.path.exists(audio_file):
119
+ # Play the audio file
120
+ st.audio(audio_file)
 
 
 
 
121
  else:
122
+ st.error("Audio generation failed. Please try again.")
123
 
124
+ # Clean up the temporary files
125
  try:
126
  os.remove(image_path)
127
+ # Don't delete audio file immediately as it might still be playing
128
  except:
129
  pass