File size: 3,111 Bytes
9e85403
f288107
 
 
 
 
 
9e85403
 
 
 
83803c4
 
 
 
 
f288107
83803c4
9e85403
f288107
9e85403
83803c4
f288107
 
83803c4
 
f288107
83803c4
9e85403
 
 
83803c4
f288107
 
 
 
 
 
 
 
 
 
 
 
 
83803c4
 
 
 
 
 
f288107
83803c4
9e85403
 
 
 
 
 
 
 
83803c4
 
 
 
 
f288107
9e85403
f288107
9e85403
83803c4
 
 
 
f288107
83803c4
 
 
 
 
f288107
83803c4
 
 
 
 
 
 
 
 
f288107
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import streamlit as st
from transformers import pipeline
from gtts import gTTS
import io
import os
import langdetect
import tempfile

# function part
# img2text
def img2text(url):
    try:
        image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
        text = image_to_text_model(url)[0]["generated_text"]
        return text
    except Exception as e:
        st.error(f"图像转文本出错: {e}")
        return None

# text2story
def text2story(text):
    try:
        story_generator = pipeline("text-generation", model="Qwen/QwQ-32B")
        story = story_generator(text, max_length=200, num_return_sequences=1)[0]["generated_text"]
        return story
    except Exception as e:
        st.error(f"文本生成故事出错: {e}")
        return None

# text2audio
def text2audio(story_text):
    try:
        # 检测故事的语言
        detected_lang = langdetect.detect(story_text)
        # 根据检测到的语言创建 gTTS 对象
        tts = gTTS(text=story_text, lang=detected_lang)
        # 创建一个字节流对象用于存储音频数据
        audio_file = io.BytesIO()
        # 将音频数据写入字节流
        tts.write_to_fp(audio_file)
        # 将文件指针移到开头
        audio_file.seek(0)
        return audio_file
    except langdetect.LangDetectException:
        st.error("无法检测故事的语言,默认使用英语进行语音合成。")
        tts = gTTS(text=story_text, lang='en')
        audio_file = io.BytesIO()
        tts.write_to_fp(audio_file)
        audio_file.seek(0)
        return audio_file
    except Exception as e:
        st.error(f"文本转音频出错: {e}")
        return None

st.set_page_config(page_title="Your Image to Audio Story",
                   page_icon="🦜")
st.header("Turn Your Image to Audio Story")
uploaded_file = st.file_uploader("Select an Image...")

if uploaded_file is not None:
    print(uploaded_file)
    # 使用临时文件处理上传的图像
    with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
        temp_file.write(uploaded_file.getvalue())
        temp_file_path = temp_file.name

    st.image(uploaded_file, caption="Uploaded Image", use_container_width=True)

    # Stage 1: Image to Text
    st.text('Processing img2text...')
    scenario = img2text(temp_file_path)
    if scenario:
        st.write(scenario)

        # Stage 2: Text to Story
        st.text('Generating a story...')
        story = text2story(scenario)
        if story:
            st.write(story)

            # Stage 3: Story to Audio data
            st.text('Generating audio data...')
            audio_data = text2audio(story)
            if audio_data:
                # Play button
                if st.button("Play Audio"):
                    st.audio(audio_data,
                             format="audio/mpeg",
                             start_time=0)

    # 删除临时文件并进行异常处理
    try:
        os.remove(temp_file_path)
    except Exception as e:
        st.error(f"删除临时文件出错: {e}")