czhaobt commited on
Commit
221ba92
·
verified ·
1 Parent(s): 6b0b027

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -50
app.py CHANGED
@@ -1,21 +1,9 @@
1
- import streamlit as st
2
- import importlib.util
3
- import langdetect
4
 
5
- try:
6
- # 检查 accelerate 库是否安装
7
- spec = importlib.util.find_spec("accelerate")
8
- if spec is None:
9
- st.error("缺少 'accelerate' 库,请安装该库以加载 FP8 量化模型。可以使用 'pip install accelerate' 进行安装。")
10
- st.stop()
11
- from transformers import pipeline
12
- from gtts import gTTS
13
- import io
14
- import tempfile
15
- import os
16
- except ImportError as e:
17
- st.error(f"导入库时出错: {e}")
18
- st.stop()
19
 
20
  # function part
21
  # img2text
@@ -25,38 +13,35 @@ def img2text(url):
25
  text = image_to_text_model(url)[0]["generated_text"]
26
  return text
27
  except Exception as e:
28
- st.error(f"图像描述生成出错: {e}")
29
  return None
30
 
31
- # text2story
32
  def text2story(text):
33
  try:
34
- story_generator = pipeline("text-generation", model="perplexity-ai/r1-1776", trust_remote_code=True)
35
- story = story_generator(text, max_length=200, num_return_sequences=1)[0]['generated_text']
 
 
36
  return story
37
  except Exception as e:
38
- st.error(f"故事生成出错: {e}")
39
  return None
40
 
41
  # text2audio
42
  def text2audio(story_text):
43
  try:
44
- # 检测故事的语言
45
- detected_lang = langdetect.detect(story_text)
46
- tts = gTTS(text=story_text, lang=detected_lang)
47
- audio_file = io.BytesIO()
48
- tts.write_to_fp(audio_file)
49
- audio_file.seek(0)
50
- return audio_file
51
- except langdetect.LangDetectException:
52
- st.error("无法检测故事的语言,默认使用英语进行语音合成。")
53
  tts = gTTS(text=story_text, lang='en')
 
54
  audio_file = io.BytesIO()
 
55
  tts.write_to_fp(audio_file)
 
56
  audio_file.seek(0)
57
  return audio_file
58
  except Exception as e:
59
- st.error(f"文本转语音出错: {e}")
60
  return None
61
 
62
  st.set_page_config(page_title="Your Image to Audio Story",
@@ -65,39 +50,35 @@ st.header("Turn Your Image to Audio Story")
65
  uploaded_file = st.file_uploader("Select an Image...")
66
 
67
  if uploaded_file is not None:
68
- print(uploaded_file)
69
- # 使用临时文件处理上传的图像
70
- with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
71
- temp_file.write(uploaded_file.getvalue())
72
- temp_file_path = temp_file.name
73
 
74
- st.image(uploaded_file, caption="Uploaded Image",
75
- use_container_width=True) # 修改为 use_container_width
76
 
77
- #Stage 1: Image to Text
78
  st.text('Processing img2text...')
79
  scenario = img2text(temp_file_path)
80
  if scenario:
81
  st.write(scenario)
82
 
83
- #Stage 2: Text to Story
84
  st.text('Generating a story...')
85
  story = text2story(scenario)
86
  if story:
87
  st.write(story)
88
 
89
- #Stage 3: Story to Audio data
90
  st.text('Generating audio data...')
91
  audio_data = text2audio(story)
92
  if audio_data:
93
  # Play button
94
  if st.button("Play Audio"):
95
- st.audio(audio_data,
96
- format="audio/mpeg",
97
- start_time=0)
98
 
99
- # 删除临时文件并进行异常处理
100
- try:
101
- os.remove(temp_file_path)
102
- except Exception as e:
103
- st.error(f"删除临时文件时出错: {e}")
 
 
 
 
1
 
2
+ import streamlit as st
3
+ from transformers import pipeline
4
+ from gtts import gTTS
5
+ import io
6
+ import os
 
 
 
 
 
 
 
 
 
7
 
8
  # function part
9
  # img2text
 
13
  text = image_to_text_model(url)[0]["generated_text"]
14
  return text
15
  except Exception as e:
16
+ st.error(f"图像转文本出错: {e}")
17
  return None
18
 
19
+ # text2story
20
  def text2story(text):
21
  try:
22
+ story_generator = pipeline("text-generation", model="Qwen/QwQ-32B-Preview")
23
+ # 生成故事文本
24
+ result = story_generator(text, max_length=200, num_return_sequences=1)
25
+ story = result[0]['generated_text']
26
  return story
27
  except Exception as e:
28
+ st.error(f"文本生成故事出错: {e}")
29
  return None
30
 
31
  # text2audio
32
  def text2audio(story_text):
33
  try:
34
+ # 创建 gTTS 对象,将文本转换为语音
 
 
 
 
 
 
 
 
35
  tts = gTTS(text=story_text, lang='en')
36
+ # 创建字节流对象用于存储音频数据
37
  audio_file = io.BytesIO()
38
+ # 将音频数据写入字节流
39
  tts.write_to_fp(audio_file)
40
+ # 移动文件指针到开头
41
  audio_file.seek(0)
42
  return audio_file
43
  except Exception as e:
44
+ st.error(f"文本转音频出错: {e}")
45
  return None
46
 
47
  st.set_page_config(page_title="Your Image to Audio Story",
 
50
  uploaded_file = st.file_uploader("Select an Image...")
51
 
52
  if uploaded_file is not None:
53
+ # 使用临时文件存储上传的图像
54
+ temp_file_path = "temp_image.jpg"
55
+ bytes_data = uploaded_file.getvalue()
56
+ with open(temp_file_path, "wb") as file:
57
+ file.write(bytes_data)
58
 
59
+ # 显示上传的图像,使用 use_container_width 避免弃用警告
60
+ st.image(uploaded_file, caption="Uploaded Image", use_container_width=True)
61
 
62
+ # Stage 1: Image to Text
63
  st.text('Processing img2text...')
64
  scenario = img2text(temp_file_path)
65
  if scenario:
66
  st.write(scenario)
67
 
68
+ # Stage 2: Text to Story
69
  st.text('Generating a story...')
70
  story = text2story(scenario)
71
  if story:
72
  st.write(story)
73
 
74
+ # Stage 3: Story to Audio data
75
  st.text('Generating audio data...')
76
  audio_data = text2audio(story)
77
  if audio_data:
78
  # Play button
79
  if st.button("Play Audio"):
80
+ st.audio(audio_data, format="audio/mpeg", start_time=0)
 
 
81
 
82
+ # 删除临时文件
83
+ if os.path.exists(temp_file_path):
84
+ os.remove(temp_file_path)