Charleshhh commited on
Commit
83803c4
·
verified ·
1 Parent(s): dbe8f63

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -36
app.py CHANGED
@@ -1,28 +1,52 @@
1
  import streamlit as st
2
- from transformers import pipeline
3
- from gtts import gTTS
4
- import io
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  # function part
7
  # img2text
8
  def img2text(url):
9
- image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
10
- text = image_to_text_model(url)[0]["generated_text"]
11
- return text
 
 
 
 
12
 
13
  # text2story
14
  def text2story(text):
15
- story_generator = pipeline("text-generation", model="deepseek-ai/DeepSeek-R1", trust_remote_code=True)
16
- story = story_generator(text, max_length=200, num_return_sequences=1)[0]['generated_text']
17
- return story
 
 
 
 
18
 
19
  # text2audio
20
  def text2audio(story_text):
21
- tts = gTTS(text=story_text, lang='en')
22
- audio_file = io.BytesIO()
23
- tts.write_to_fp(audio_file)
24
- audio_file.seek(0)
25
- return audio_file
 
 
 
 
26
 
27
  st.set_page_config(page_title="Your Image to Audio Story",
28
  page_icon="🦜")
@@ -31,29 +55,35 @@ uploaded_file = st.file_uploader("Select an Image...")
31
 
32
  if uploaded_file is not None:
33
  print(uploaded_file)
34
- bytes_data = uploaded_file.getvalue()
35
- with open(uploaded_file.name, "wb") as file:
36
- file.write(bytes_data)
 
 
37
  st.image(uploaded_file, caption="Uploaded Image",
38
- use_column_width=True)
39
 
40
  #Stage 1: Image to Text
41
  st.text('Processing img2text...')
42
- scenario = img2text(uploaded_file.name)
43
- st.write(scenario)
44
-
45
- #Stage 2: Text to Story
46
- st.text('Generating a story...')
47
- story = text2story(scenario)
48
- st.write(story)
49
-
50
- #Stage 3: Story to Audio data
51
- st.text('Generating audio data...')
52
- audio_data = text2audio(story)
53
-
54
- # Play button
55
- if st.button("Play Audio"):
56
- st.audio(audio_data,
57
- format="audio/mpeg",
58
- start_time=0)
59
-
 
 
 
 
 
1
  import streamlit as st
2
+ import importlib.util
3
+ try:
4
+ # 检查 accelerate 库是否安装
5
+ spec = importlib.util.find_spec("accelerate")
6
+ if spec is None:
7
+ st.error("缺少 'accelerate' 库,请安装该库以加载 FP8 量化模型。可以使用 'pip install accelerate' 进行安装。")
8
+ st.stop()
9
+ from transformers import pipeline
10
+ from gtts import gTTS
11
+ import io
12
+ import tempfile
13
+ import os
14
+ except ImportError as e:
15
+ st.error(f"导入库时出错: {e}")
16
+ st.stop()
17
 
18
  # function part
19
  # img2text
20
  def img2text(url):
21
+ try:
22
+ image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
23
+ text = image_to_text_model(url)[0]["generated_text"]
24
+ return text
25
+ except Exception as e:
26
+ st.error(f"图像描述生成出错: {e}")
27
+ return None
28
 
29
  # text2story
30
  def text2story(text):
31
+ try:
32
+ story_generator = pipeline("text-generation", model="deepseek-ai/DeepSeek-R1", trust_remote_code=True)
33
+ story = story_generator(text, max_length=200, num_return_sequences=1)[0]['generated_text']
34
+ return story
35
+ except Exception as e:
36
+ st.error(f"故事生成出错: {e}")
37
+ return None
38
 
39
  # text2audio
40
  def text2audio(story_text):
41
+ try:
42
+ tts = gTTS(text=story_text, lang='en')
43
+ audio_file = io.BytesIO()
44
+ tts.write_to_fp(audio_file)
45
+ audio_file.seek(0)
46
+ return audio_file
47
+ except Exception as e:
48
+ st.error(f"文本转语音出错: {e}")
49
+ return None
50
 
51
  st.set_page_config(page_title="Your Image to Audio Story",
52
  page_icon="🦜")
 
55
 
56
  if uploaded_file is not None:
57
  print(uploaded_file)
58
+ # 使用临时文件处理上传的图像
59
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
60
+ temp_file.write(uploaded_file.getvalue())
61
+ temp_file_path = temp_file.name
62
+
63
  st.image(uploaded_file, caption="Uploaded Image",
64
+ use_container_width=True) # 修改为 use_container_width
65
 
66
  #Stage 1: Image to Text
67
  st.text('Processing img2text...')
68
+ scenario = img2text(temp_file_path)
69
+ if scenario:
70
+ st.write(scenario)
71
+
72
+ #Stage 2: Text to Story
73
+ st.text('Generating a story...')
74
+ story = text2story(scenario)
75
+ if story:
76
+ st.write(story)
77
+
78
+ #Stage 3: Story to Audio data
79
+ st.text('Generating audio data...')
80
+ audio_data = text2audio(story)
81
+ if audio_data:
82
+ # Play button
83
+ if st.button("Play Audio"):
84
+ st.audio(audio_data,
85
+ format="audio/mpeg",
86
+ start_time=0)
87
+
88
+ # 删除临时文件
89
+ os.remove(temp_file_path)