Spaces:

MonkeyDLLLLLLuffy
/

ISOM5240_Assignment_1

Sleeping

App Files Files Community

Leo Liu commited on Mar 9

Commit

15e4fc0

verified ·

1 Parent(s): fae883c

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -23

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import streamlit as st
 from transformers import pipeline
 from gtts import gTTS
 import io
 # function part
@@ -15,14 +16,20 @@ def img2text(url):
 # text2story
 def text2story(text):
-    # 定义提示词模板（包含变量占位符）
-    prompt_template = f"""Write a children's story for ages 3-10 based on: {text}
-    Requirements:
-    - Use simple words
-    - Include a happy ending
-    """
-    # 填充模板中的变量
     full_prompt = prompt_template.format(text=text)
     # 初始化生成管道
@@ -31,29 +38,29 @@ def text2story(text):
         model="pranavpsv/genre-story-generator-v2",
         max_new_tokens=180,
         min_new_tokens=130,
-        temperature=0.7
     )
     # 生成原始文本
     raw_output = pipe(full_prompt, return_full_text=False)[0]['generated_text']
-    # 增强版提示词移除功能
-    def clean_output(generated_text, prompt):
-        # 方法1：精确匹配移除
-        if generated_text.startswith(prompt):
-            return generated_text[len(prompt):].strip()
-        # 方法2：正则表达式模糊匹配
-        import re
-        pattern = re.compile(r'Write a children\'s story.*?based on:.*?\n', re.DOTALL)
-        cleaned = re.sub(pattern, '', generated_text, count=1)
-        # 移除残留的提示词片段
-        cleaned = cleaned.split("Requirements:")[0].strip()
-        return cleaned
-    # 返回处理后的干净文本
-    return clean_output(raw_output, full_prompt)
 # text2audio

 from transformers import pipeline
 from gtts import gTTS
 import io
+import re
 # function part
 # text2story
 def text2story(text):
+    # 优化提示词模板（使用统一的分隔符）
+    prompt_template = """[PROMPT_START]
+Write a children's story for ages 3-10 based on: {text}
+Requirements:
+1. Use simple words (1st-3rd grade level)
+2. Main character must be an animal
+3. Include magic elements
+4. Have a happy ending
+5. Story length: 100-120 words
+[PROMPT_END]
+"""
+    # 生成完整提示词（避免重复插入）
     full_prompt = prompt_template.format(text=text)
     # 初始化生成管道
         model="pranavpsv/genre-story-generator-v2",
         max_new_tokens=180,
         min_new_tokens=130,
+        temperature=0.7,
+        pad_token_id=50256
     )
     # 生成原始文本
     raw_output = pipe(full_prompt, return_full_text=False)[0]['generated_text']
+    # 增强版清洗逻辑
+    def clean_output(generated_text):
+        # 使用正则表达式匹配提示词块
+        prompt_pattern = re.compile(
+            r'\[PROMPT_START\].*?\[PROMPT_END\]',
+            re.DOTALL  # 匹配多行内容
+        )
+        # 移除整个提示词块
+        cleaned = re.sub(prompt_pattern, '', generated_text)
+        # 二次清理残留内容
+        cleaned = re.sub(r'^Write a children.*?\n', '', cleaned)  # 处理可能的开头残留
+        cleaned = re.sub(r'Requirements:.*?\n', '', cleaned)      # 移除要求残留
+        return cleaned.strip()
+    return clean_output(raw_output)
 # text2audio