czhaobt commited on
Commit
4fcc6b4
·
verified ·
1 Parent(s): 28b471c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -33
app.py CHANGED
@@ -3,31 +3,53 @@ from transformers import pipeline
3
  from gtts import gTTS
4
  import io
5
  import os
 
6
 
7
  # function part
8
  # img2text
9
  def img2text(url):
10
- image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
11
- text = image_to_text_model(url)[0]["generated_text"]
12
- return text
 
 
 
 
13
 
14
  # text2story
15
  def text2story(text):
16
- story_generator = pipeline("text-generation", model="Qwen/QwQ-32B")
17
- story = story_generator(text, max_length=200, num_return_sequences=1)[0]["generated_text"]
18
- return story
 
 
 
 
19
 
20
  # text2audio
21
  def text2audio(story_text):
22
- # 创建 gTTS 对象
23
- tts = gTTS(text=story_text, lang='en')
24
- # 创建一个字节流对象用于存储音频数据
25
- audio_file = io.BytesIO()
26
- # 将音频数据写入字节流
27
- tts.write_to_fp(audio_file)
28
- # 将文件指针移到开头
29
- audio_file.seek(0)
30
- return audio_file
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
  st.set_page_config(page_title="Your Image to Audio Story",
33
  page_icon="🦜")
@@ -41,28 +63,30 @@ if uploaded_file is not None:
41
  with open(temp_file_path, "wb") as file:
42
  file.write(bytes_data)
43
  st.image(uploaded_file, caption="Uploaded Image",
44
- use_column_width=True)
45
 
46
  # Stage 1: Image to Text
47
  st.text('Processing img2text...')
48
  scenario = img2text(temp_file_path)
49
- st.write(scenario)
 
50
 
51
- # 删除临时文件
52
- if os.path.exists(temp_file_path):
53
- os.remove(temp_file_path)
54
 
55
- # Stage 2: Text to Story
56
- st.text('Generating a story...')
57
- story = text2story(scenario)
58
- st.write(story)
 
59
 
60
- # Stage 3: Story to Audio data
61
- st.text('Generating audio data...')
62
- audio_data = text2audio(story)
63
-
64
- # Play button
65
- if st.button("Play Audio"):
66
- st.audio(audio_data,
67
- format="audio/mpeg",
68
- start_time=0)
 
3
  from gtts import gTTS
4
  import io
5
  import os
6
+ import langdetect
7
 
8
  # function part
9
  # img2text
10
  def img2text(url):
11
+ try:
12
+ image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
13
+ text = image_to_text_model(url)[0]["generated_text"]
14
+ return text
15
+ except Exception as e:
16
+ st.error(f"图像转文本出错: {e}")
17
+ return None
18
 
19
  # text2story
20
  def text2story(text):
21
+ try:
22
+ story_generator = pipeline("text-generation", model="Qwen/QwQ-32B")
23
+ story = story_generator(text, max_length=200, num_return_sequences=1)[0]["generated_text"]
24
+ return story
25
+ except Exception as e:
26
+ st.error(f"文本生成故事出错: {e}")
27
+ return None
28
 
29
  # text2audio
30
  def text2audio(story_text):
31
+ try:
32
+ # 检测故事的语言
33
+ detected_lang = langdetect.detect(story_text)
34
+ # 根据检测到的语言创建 gTTS 对象
35
+ tts = gTTS(text=story_text, lang=detected_lang)
36
+ # 创建一个字节流对象用于存储音频数据
37
+ audio_file = io.BytesIO()
38
+ # 将音频数据写入字节流
39
+ tts.write_to_fp(audio_file)
40
+ # 将文件指针移到开头
41
+ audio_file.seek(0)
42
+ return audio_file
43
+ except langdetect.LangDetectException:
44
+ st.error("无法检测故事的语言,默认使用英语进行语音合成。")
45
+ tts = gTTS(text=story_text, lang='en')
46
+ audio_file = io.BytesIO()
47
+ tts.write_to_fp(audio_file)
48
+ audio_file.seek(0)
49
+ return audio_file
50
+ except Exception as e:
51
+ st.error(f"文本转音频出错: {e}")
52
+ return None
53
 
54
  st.set_page_config(page_title="Your Image to Audio Story",
55
  page_icon="🦜")
 
63
  with open(temp_file_path, "wb") as file:
64
  file.write(bytes_data)
65
  st.image(uploaded_file, caption="Uploaded Image",
66
+ use_container_width=True)
67
 
68
  # Stage 1: Image to Text
69
  st.text('Processing img2text...')
70
  scenario = img2text(temp_file_path)
71
+ if scenario:
72
+ st.write(scenario)
73
 
74
+ # 删除临时文件
75
+ if os.path.exists(temp_file_path):
76
+ os.remove(temp_file_path)
77
 
78
+ # Stage 2: Text to Story
79
+ st.text('Generating a story...')
80
+ story = text2story(scenario)
81
+ if story:
82
+ st.write(story)
83
 
84
+ # Stage 3: Story to Audio data
85
+ st.text('Generating audio data...')
86
+ audio_data = text2audio(story)
87
+ if audio_data:
88
+ # Play button
89
+ if st.button("Play Audio"):
90
+ st.audio(audio_data,
91
+ format="audio/mpeg",
92
+ start_time=0)