CR7CAD commited on
Commit
ce9aea5
·
verified ·
1 Parent(s): 862568a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -56
app.py CHANGED
@@ -5,17 +5,53 @@ from PIL import Image
5
  import torch
6
  import os
7
  import tempfile
8
- import sys
9
- import subprocess
10
 
11
- # Try to import gTTS, install if missing
12
  try:
 
13
  from gtts import gTTS
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  except ImportError:
15
- st.warning("Installing required package: gTTS...")
16
- subprocess.check_call([sys.executable, "-m", "pip", "install", "gTTS"])
17
- from gtts import gTTS
18
- st.success("gTTS installed successfully!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
  # Simple image-to-text function
21
  def img2text(image):
@@ -66,26 +102,6 @@ def text2story(text):
66
  # If no good ending is found, return as is
67
  return story_text
68
 
69
- # Updated text-to-audio function using gTTS
70
- def text2audio(story_text):
71
- # Create a temporary file
72
- temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
73
- temp_filename = temp_file.name
74
- temp_file.close()
75
-
76
- # Use gTTS to convert text to speech
77
- tts = gTTS(text=story_text, lang='en', slow=False)
78
- tts.save(temp_filename)
79
-
80
- # Read the audio file
81
- with open(temp_filename, 'rb') as audio_file:
82
- audio_bytes = audio_file.read()
83
-
84
- # Clean up the temporary file
85
- os.unlink(temp_filename)
86
-
87
- return audio_bytes
88
-
89
  # Basic Streamlit interface
90
  st.title("Image to Audio Story")
91
  uploaded_file = st.file_uploader("Upload an image")
@@ -110,36 +126,13 @@ if uploaded_file is not None:
110
  # Text to Audio
111
  with st.spinner("Generating audio..."):
112
  try:
113
- audio_bytes = text2audio(story)
114
 
115
  # Play audio
116
- st.audio(audio_bytes, format='audio/mp3')
 
 
 
117
  except Exception as e:
118
  st.error(f"Error generating or playing audio: {e}")
119
- st.info("If you're having issues with gTTS, you might need to manually install it with: pip install gTTS")
120
-
121
- # Fallback to a simple TTS if gTTS fails
122
- try:
123
- st.write("Attempting fallback to pyttsx3...")
124
- import pyttsx3
125
- engine = pyttsx3.init()
126
-
127
- # Create a temporary file for the fallback audio
128
- temp_wav = tempfile.NamedTemporaryFile(delete=False, suffix='.wav')
129
- temp_wav_filename = temp_wav.name
130
- temp_wav.close()
131
-
132
- # Generate and save speech
133
- engine.save_to_file(story, temp_wav_filename)
134
- engine.runAndWait()
135
-
136
- # Read the audio file
137
- with open(temp_wav_filename, 'rb') as audio_file:
138
- fallback_audio = audio_file.read()
139
-
140
- # Clean up
141
- os.unlink(temp_wav_filename)
142
-
143
- st.audio(fallback_audio, format='audio/wav')
144
- except:
145
- st.error("Both TTS methods failed. Please install gTTS manually.")
 
5
  import torch
6
  import os
7
  import tempfile
 
 
8
 
9
+ # For TTS, try multiple options in order of preference
10
  try:
11
+ # Try gTTS first
12
  from gtts import gTTS
13
+
14
+ def text2audio(story_text):
15
+ # Create a temporary file
16
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
17
+ temp_filename = temp_file.name
18
+ temp_file.close()
19
+
20
+ # Use gTTS to convert text to speech
21
+ tts = gTTS(text=story_text, lang='en', slow=False)
22
+ tts.save(temp_filename)
23
+
24
+ # Read the audio file
25
+ with open(temp_filename, 'rb') as audio_file:
26
+ audio_bytes = audio_file.read()
27
+
28
+ # Clean up the temporary file
29
+ os.unlink(temp_filename)
30
+
31
+ return audio_bytes, 'audio/mp3'
32
+
33
  except ImportError:
34
+ st.warning("gTTS not available. Using alternative text-to-speech method.")
35
+
36
+ # Define alternative TTS using built-in transformers pipeline
37
+ def text2audio(story_text):
38
+ # Use a different TTS method
39
+ from transformers import pipeline
40
+
41
+ # Try a simple TTS model that should work with base transformers
42
+ synthesizer = pipeline("text-to-speech", model="facebook/mms-tts-eng")
43
+
44
+ # Generate speech
45
+ speech = synthesizer(story_text)
46
+
47
+ # Return the audio data
48
+ if 'audio' in speech:
49
+ return speech['audio'], speech.get('sampling_rate', 16000)
50
+ elif 'audio_array' in speech:
51
+ return speech['audio_array'], speech.get('sampling_rate', 16000)
52
+ else:
53
+ # In case of failure, return an error message
54
+ raise Exception("Failed to generate audio with any available method")
55
 
56
  # Simple image-to-text function
57
  def img2text(image):
 
102
  # If no good ending is found, return as is
103
  return story_text
104
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  # Basic Streamlit interface
106
  st.title("Image to Audio Story")
107
  uploaded_file = st.file_uploader("Upload an image")
 
126
  # Text to Audio
127
  with st.spinner("Generating audio..."):
128
  try:
129
+ audio_data, audio_format = text2audio(story)
130
 
131
  # Play audio
132
+ if isinstance(audio_format, str) and audio_format.startswith('audio/'):
133
+ st.audio(audio_data, format=audio_format)
134
+ else:
135
+ st.audio(audio_data, sample_rate=audio_format)
136
  except Exception as e:
137
  st.error(f"Error generating or playing audio: {e}")
138
+ st.info("There was an issue with the text-to-speech conversion.")