Spaces:

JunyiAcademy
/

vaitor2

Sleeping

App Files Files Community

youngtsai commited on Feb 13, 2024

Commit

5fde70f

1 Parent(s): c12f160

summary_json

Browse files

Files changed (1) hide show

app.py +49 -30

app.py CHANGED Viewed

@@ -392,6 +392,48 @@ def process_web_link(link):
     soup = BeautifulSoup(response.content, 'html.parser')
     return soup.get_text()
 # get video_id_summary.json content
 def get_video_id_summary(video_id, df_string):
@@ -406,9 +448,8 @@ def get_video_id_summary(video_id, df_string):
         exists, file_id = check_file_exists(service, folder_id, file_name)
         if not exists:
             summary = generate_summarise(df_string)
-            processed_summary = str(summary)
-            summary_json = {"summary": processed_summary}
-            summary_text = json.dumps(summary_json, ensure_ascii=False, indent=2)
             file_id = upload_content_directly(service, file_name, folder_id, summary_text)
             print("summary已上傳到Google Drive")
         else:
@@ -444,24 +485,6 @@ def generate_summarise(df_string):
         4. 關鍵時刻（段落摘要）
         5. 結論反思（為什麼我們要學這個？）
         6. 延伸小問題
-        JSON format 為 {{
-            "content_type": "資料",
-            "summary": "整體摘要",
-            "key_points": "條列式重點",
-            "key_moments": [
-                {{
-                    "start": "開始時間",
-                    "end": "結束時間",
-                    "text": "段落摘要"
-                }},
-                ...
-            ],
-            ]",
-            "conclusion_reflection": "結論反思（為什麼我們要學這個？）",
-            "extension_questions": "延伸小問題 LIST"
-        }}
     """
     # 🗂️ 1. 內容類型：？
@@ -484,16 +507,12 @@ def generate_summarise(df_string):
     }
     response = client.chat.completions.create(**request_payload)
-    print("====response====")
-    print(response)
-    print("====response====")
-    summarise_json = response.choices[0].message.content
-    print("=====summarise_json=====")
-    print(summarise_json)
-    print("=====summarise_json=====")
-    return summarise_json
 def generate_questions(df_string):

     soup = BeautifulSoup(response.content, 'html.parser')
     return soup.get_text()
+def processed_video_summary_to_json(summary):
+    """
+        整體格式為：
+        1. 內容類型
+        2. 整體摘要
+        3. 條列式重點
+        4. 關鍵時刻（段落摘要）
+        5. 結論反思（為什麼我們要學這個？）
+        6. 延伸小問題
+        使用 regex 拆解 summary 抓取各個部分
+        example:
+        1. 內容類型：影片類型\n\n2. 整體摘要\n本段影片透過一組劇情式的場景講述，描述了一群人物進行VR教育體驗的故事，涵蓋了冒險、探索、學習和救援等元素。影片同時充分融合了互動問答和地理科學知識，並對南極和北極的地理環境、生態系統以及國際政治局勢進行了介紹。\n\n3. 條列式重點\n- VR教育體驗的場景設置。\n- 冒險遊戲中融入地理科學知識。\n- 南北極的環境差異和重要性。\n- 介紹了南極條約的內容。\n- 探討全球暖化對極地生物的影響。\n\n4. 關鍵時刻（段落摘要）\n【00:00:05 - 00:00:21】: 一群人物於周末下午前往VR教育體驗館，選擇了\"極地探險\"遊戲，透過體感裝置體驗寒冷和震動。\n【00:00:34 - 00:02:00】: 故事背景設置，玩家需解開科學家R的神秘失蹤之謎，在極地嚴酷環境中尋找線索。期間介紹了南極的地理氣候和與英國的關係。\n【00:03:08 - 00:03:23】: 透過與店員互動回答來隱喻人工智慧語言模型的限制，並提示玩家搜集線索進行冒險。\n【00:04:41 - 00:05:03】: 發現北極熊的照片，玩家決定行動至北極，並提供了地形知識問答。\n【00:06:01 - 00:08:11】: 揭露科學家R被綁架的原因，並討論了北極的政治和經濟重要性。最後強調保護環境的重要性。\n\n5. 結論反思（為什麼我們要學這個？）\n通過這個故事情節，學習者不僅能夠體驗虛擬實境的樂趣，也能夠學習到關於地理、生態、以及環境保護的知識，提高對全球環境議題的認知和理解。它教導我們通過娛樂來學習如何關懷地球的未來，同時也啟發了對於科學研究與國際政治的基本認知。\n\n6. 延伸小問題\n- 你認為VR遊戲在教育上有哪些潛力？\n- 與南極相比，為什麼北極會成為各國政治和經濟角力的場所？\n- 全球暖化對極地動物的生存造成了哪些影響？我們能做些什麼來幫助改善這種情況？
+    """
+    # 1. 內容類型 ->  針對 1. 內容類型：？ 進行處理
+    content_type = summary.split("1. 內容類型：")[1].split("\n")[0].strip()
+    # 2. 整體摘要
+    overall_summary = summary.split("2. 整體摘要")[1].split("\n\n")[1].strip()
+    # 3. 條列式重點
+    key_points = summary.split("3. 條列式重點")[1].split("\n\n")[1].strip()
+    # 4. 關鍵時刻（段落摘要）
+    key_moments = summary.split("4. 關鍵時刻（段落摘要）")[1].split("\n\n")[1].strip()
+    # 5. 結論反思（為什麼我們要學這個？）
+    conclusion_reflection = summary.split("5. 結論反思（為什麼我們要學這個？）")[1].split("\n\n")[1].strip()
+    # 6. 延伸小問題
+    extension_questions = summary.split("6. 延伸小問題")[1].split("\n\n")[1].strip()
+    summary_json = {
+        "content_type": content_type,
+        "overall_summary": overall_summary,
+        "key_points": key_points,
+        "key_moments": key_moments,
+        "conclusion_reflection": conclusion_reflection,
+        "extension_questions": extension_questions
+    }
+    print("===processed_video_summary_to_json===")
+    print(summary_json)
+    print("===processed_video_summary_to_json===")
+    return summary_json
 # get video_id_summary.json content
 def get_video_id_summary(video_id, df_string):
         exists, file_id = check_file_exists(service, folder_id, file_name)
         if not exists:
             summary = generate_summarise(df_string)
+            processed_summary = processed_video_summary_to_json(summary)
+            summary_text = json.dumps(processed_summary, ensure_ascii=False, indent=2)
             file_id = upload_content_directly(service, file_name, folder_id, summary_text)
             print("summary已上傳到Google Drive")
         else:
         4. 關鍵時刻（段落摘要）
         5. 結論反思（為什麼我們要學這個？）
         6. 延伸小問題
     """
     # 🗂️ 1. 內容類型：？
     }
     response = client.chat.completions.create(**request_payload)
+    df_summarise = response.choices[0].message.content.strip()
+    print("=====df_summarise=====")
+    print(df_summarise)
+    print("=====df_summarise=====")
+    return df_summarise
 def generate_questions(df_string):