def text_to_video(text, voice, rate, pitch, video_width, video_height, bg_color, text_color, text_font, text_size): # 字体文件路径 font_path = os.path.abspath(text_font) # 计算每页可以容纳的行数和每行可以容纳的字符数 max_chars_per_line = video_width // (text_size // 2) # 字体宽度假设为字体大小的一半 max_lines_per_page = video_height // (text_size + 15) # 10是行间距 # 按页拆分文本 words = text.split() lines = [] current_line = "" pages = [] for word in words: if len(current_line) + len(word) + 1 > max_chars_per_line: lines.append(current_line) current_line = word if len(lines) == max_lines_per_page: pages.append("\n".join(lines)) lines = [] else: current_line = f"{current_line} {word}".strip() lines.append(current_line) if lines: pages.append("\n".join(lines)) # 为每页生成独立音频 audio_clips = [] video_clips = [] for i, page in enumerate(pages): # 将每页的文本连贯朗读生成一个音频文件 audio_text = page.replace("\n", " ") # 移除换行符以防止 TTS 停顿 audio, warning = asyncio.run(text_to_speech(audio_text, voice, rate, pitch)) if warning: return None, warning audio_clip = AudioFileClip(audio) audio_clips.append(audio_clip) # 使用 wand 生成视频片段 with Drawing() as draw: draw.font = font_path draw.font_size = text_size draw.fill_color = Color(text_color) draw.text_alignment = 'center' draw.text_interline_spacing = 10 with Image(width=video_width, height=video_height, background=Color(bg_color)) as img: lines = page.split("\n") for j, line in enumerate(lines): draw.text(int(video_width / 2), (j + 1) * (text_size + 10), line) # Apply the drawing to the image draw(img) # Correct usage to apply the drawing img.format = 'png' img_path = os.path.join(tempfile.gettempdir(), f"page_{i}.png") img.save(filename=img_path) text_clip = ImageClip(img_path).set_duration(audio_clip.duration).set_audio(audio_clip) video_clips.append(text_clip) # 合并所有视频片段 final_video = concatenate_videoclips(video_clips) final_video_path = os.path.join(tempfile.gettempdir(), "output_video.mp4") final_video.write_videofile(final_video_path, fps=24, codec="libx264") return final_video_path, None demo.launch(share=True)