CR7CAD commited on
Commit
4e37056
·
verified ·
1 Parent(s): 118cd25

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -56
app.py CHANGED
@@ -2,54 +2,24 @@ import streamlit as st
2
  from PIL import Image
3
  import os
4
  import tempfile
5
- import subprocess
6
  import sys
7
 
8
- # Check for required dependencies and install if missing
9
- def check_and_install_dependencies():
10
- required_packages = {
11
- "transformers": "transformers",
12
- "sentencepiece": "sentencepiece",
13
- "gtts": "gTTS"
14
- }
15
-
16
- missing_packages = []
17
- for package, pip_name in required_packages.items():
18
- try:
19
- __import__(package)
20
- except ImportError:
21
- missing_packages.append((package, pip_name))
22
-
23
- if missing_packages:
24
- st.warning("Missing required dependencies. Please install them before continuing.")
25
- for package, pip_name in missing_packages:
26
- st.code(f"pip install {pip_name}", language="bash")
27
-
28
- if st.button("Install Dependencies Automatically"):
29
- with st.spinner("Installing dependencies..."):
30
- for package, pip_name in missing_packages:
31
- try:
32
- subprocess.check_call([sys.executable, "-m", "pip", "install", pip_name])
33
- st.success(f"Successfully installed {pip_name}")
34
- except Exception as e:
35
- st.error(f"Failed to install {pip_name}: {str(e)}")
36
- st.info("Please restart the application after installing dependencies.")
37
- return False
38
- return True
39
-
40
  # function part
41
- # img2text
42
  def img2text(image_path):
43
  try:
44
- # Import here to ensure dependencies are checked first
45
  from transformers import pipeline
46
 
47
- # Load the image-to-text model
48
- image_to_text_model = pipeline("image-to-text", model="naver-clova-ix/donut-base")
 
 
49
  # Open the image file
50
  image = Image.open(image_path)
 
51
  # Extract text from the image
52
  result = image_to_text_model(image)
 
53
  # Get the generated text
54
  text = result[0]["generated_text"] if result else "No text detected"
55
  return text
@@ -69,7 +39,7 @@ def text2audio(story_text):
69
  from gtts import gTTS
70
 
71
  # Create a temporary file
72
- temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix='.wav')
73
  temp_audio_path = temp_audio.name
74
  temp_audio.close()
75
 
@@ -88,26 +58,58 @@ def text2audio(story_text):
88
  st.set_page_config(page_title="Your Image to Audio Story",
89
  page_icon="🦜")
90
  st.header("Turn Your Image to Audio Story")
91
- st.subheader("Using Donut model for text extraction")
 
 
 
 
 
 
 
 
 
 
 
 
 
92
 
93
- # Check dependencies before proceeding
94
- dependencies_ok = check_and_install_dependencies()
 
 
 
95
 
96
- if dependencies_ok:
97
- uploaded_file = st.file_uploader("Select an Image...", type=['png', 'jpg', 'jpeg', 'gif', 'bmp', 'webp'])
 
 
 
98
 
99
- if uploaded_file is not None:
100
- # Save the uploaded file temporarily
101
- bytes_data = uploaded_file.getvalue()
102
- image_temp_path = os.path.join(tempfile.gettempdir(), uploaded_file.name)
103
- with open(image_temp_path, "wb") as file:
104
- file.write(bytes_data)
105
 
106
- # Display the uploaded image
107
- st.image(uploaded_file, caption="Uploaded Image",
108
- use_column_width=True)
109
 
110
- # Stage 1: Image to Text
111
- with st.spinner('Processing img2text...'):
112
- extracted_text = img2text(image_temp_path)
113
- st.subheader("Extracted Text:")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  from PIL import Image
3
  import os
4
  import tempfile
 
5
  import sys
6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  # function part
8
+ # img2text with a model that doesn't require sentencepiece
9
  def img2text(image_path):
10
  try:
 
11
  from transformers import pipeline
12
 
13
+ # Use the Salesforce model instead of Donut to avoid sentencepiece issues
14
+ st.info("Using Salesforce/blip-image-captioning-base model for image-to-text")
15
+ image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
16
+
17
  # Open the image file
18
  image = Image.open(image_path)
19
+
20
  # Extract text from the image
21
  result = image_to_text_model(image)
22
+
23
  # Get the generated text
24
  text = result[0]["generated_text"] if result else "No text detected"
25
  return text
 
39
  from gtts import gTTS
40
 
41
  # Create a temporary file
42
+ temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
43
  temp_audio_path = temp_audio.name
44
  temp_audio.close()
45
 
 
58
  st.set_page_config(page_title="Your Image to Audio Story",
59
  page_icon="🦜")
60
  st.header("Turn Your Image to Audio Story")
61
+ st.subheader("Image to Text to Audio Conversion")
62
+
63
+ uploaded_file = st.file_uploader("Select an Image...", type=['png', 'jpg', 'jpeg', 'gif', 'bmp', 'webp'])
64
+
65
+ if uploaded_file is not None:
66
+ # Save the uploaded file temporarily
67
+ bytes_data = uploaded_file.getvalue()
68
+ image_temp_path = os.path.join(tempfile.gettempdir(), uploaded_file.name)
69
+ with open(image_temp_path, "wb") as file:
70
+ file.write(bytes_data)
71
+
72
+ # Display the uploaded image
73
+ st.image(uploaded_file, caption="Uploaded Image",
74
+ use_column_width=True)
75
 
76
+ # Stage 1: Image to Text
77
+ with st.spinner('Processing img2text...'):
78
+ extracted_text = img2text(image_temp_path)
79
+ st.subheader("Extracted Text:")
80
+ st.write(extracted_text)
81
 
82
+ # Stage 2: Text to Story
83
+ with st.spinner('Generating a story...'):
84
+ story = text2story(extracted_text)
85
+ st.subheader("Generated Story:")
86
+ st.write(story)
87
 
88
+ # Stage 3: Story to Audio data
89
+ audio_file_path = None
90
+ with st.spinner('Generating audio data...'):
91
+ audio_file_path = text2audio(story)
 
 
92
 
93
+ # Remove the temporary image file
94
+ if os.path.exists(image_temp_path):
95
+ os.remove(image_temp_path)
96
 
97
+ # Play button
98
+ if st.button("Play Audio"):
99
+ if audio_file_path and os.path.exists(audio_file_path):
100
+ # Play the generated audio
101
+ with open(audio_file_path, "rb") as audio_file:
102
+ audio_bytes = audio_file.read()
103
+ st.audio(audio_bytes, format="audio/mp3")
104
+
105
+ # Clean up the audio file after playing
106
+ try:
107
+ os.remove(audio_file_path)
108
+ except:
109
+ pass
110
+ else:
111
+ st.warning("Audio generation failed. Playing a placeholder audio.")
112
+ try:
113
+ st.audio("kids_playing_audio.wav")
114
+ except FileNotFoundError:
115
+ st.error("Placeholder audio file not found. Audio playback is unavailable.")