Hyeonseo's picture
Update app.py
423add2
raw
history blame
10.3 kB
import env_set
env_set.env_set()
import streamlit as st
import time
import numpy as np
import pandas as pd
import PyPDF2
from pptx import Presentation
import openai
import subprocess
# text2ppt ν”„λ‘¬ν”„νŠΈ 생성 ν•¨μˆ˜
def generate_text2ppt_input_prompt(input_type, input_value, input_pages):
header = """
λ„ˆκ°€ Marp λ¬Έλ²•μœΌλ‘œ PPTλ₯Ό μ œμž‘ν•˜λŠ” λ””μžμ΄λ„ˆλΌκ³  κ°€μ •ν•˜κ³ , %sμž₯의 PPTλ₯Ό μž‘μ„±ν•΄.
+++ μ•„λž˜ λ‚΄μš© λ˜λŠ” 링크λ₯Ό μš”μ•½ν•΄μ„œ λ§ˆν¬λ‹€μš΄ μ–Έμ–΄λ‘œ μž‘μ„±ν•˜λŠ”λ°, === μ•„λž˜ κ·œμΉ™κ³Ό 지킀고, ~~~ μ•„λž˜ μŠ¬λΌμ΄λ“œ μ˜ˆμ‹œλ₯Ό μ°Έκ³ ν•΄.
+++
""" % input_pages
summary_value = ""
if input_type == "링크":
summary_value += input_value
summary_value += "ν…μŠ€νŠΈ"
elif input_type == "text":
summary_value += input_value
summary_value += "\n"
elif input_type == "PDF":
with open(input_value, 'rb') as pdf_file:
pdf_reader = PyPDF2.PdfReader(pdf_file)
num_pages = len(pdf_reader.pages)
# 각 νŽ˜μ΄μ§€μ˜ λ‚΄μš©μ„ λ¬Έμžμ—΄λ‘œ λ³€ν™˜ν•©λ‹ˆλ‹€.
text = ""
for page_num in range(num_pages):
page = pdf_reader.pages[page_num]
text += page.extract_text()
summary_value += text
summary_value += "\n"
else:
print("ERROR: 잘λͺ»λœ μž…λ ₯")
rule_value = """
===
- μ œμ‹œν•œ λ‚΄μš© λ˜λŠ” 링크의 λ‚΄μš©μ— λŒ€ν•΄μ„œλ§Œ μ‚¬μ‹€μ μœΌλ‘œ μž‘μ„±ν•΄μ€˜.
- μŠ¬λΌμ΄λ“œ κ΅¬λΆ„μžλ‘œ ---λ₯Ό 무쑰건 μ‚¬μš©ν•΄μ€˜.
- μ£Όμ œμ— μ μ ˆν•œ λ„ν˜•, 이미지(![이미지](이미지링크), https://unsplash.com/ko/images/stock/non-copyrighted μ—μ„œ μ‹€μ œλ‘œ μ‚¬μš© κ°€λŠ₯ν•œ), ν‘œ(|-|), 인용(>), κ°•μ‘°(bold, ``), 이λͺ¨μ§€(https://kr.piliapp.com/twitter-symbols/), μ•„μ΄μ½˜ (https://kr.piliapp.com/symbol/#popular) 등이 λ‹€μ–‘ν•˜κ²Œ μŠ¬λΌμ΄λ“œλ₯Ό λ””μžμΈν•˜κ³  λ°°μΉ˜ν•΄μ€˜.
- 이λͺ¨μ§€λŠ” μ΅œλŒ€ 2 νŽ˜μ΄μ§€μ— ν•œ 번만 μ‚¬μš©ν•˜κ³ , λ‹€λ₯Έ λ””μžμΈμ„ λ‹€μ–‘ν•˜κ²Œ μ‚¬μš©ν•΄μ€˜.
- 이미지와 ν‘œλ₯Ό μ‚¬μš©ν•  λ•Œ, νŽ˜μ΄μ§€ 크기와 κ³ λ €ν•΄μ„œ κΈ€ λ‚΄μš©μ΄ λͺ¨λ‘ λ‚˜νƒ€λ‚˜λ„λ‘ 크기λ₯Ό μ§€μ •ν•΄μ€˜.
- Slide 1λ₯Ό 제λͺ©μœΌλ‘œ ν•΄μ„œ 총 %sμž₯이야.
- PPT의 λ‚΄μš©μ„ ν’λΆ€ν•˜κ²Œ λ§ˆν¬λ‹€μš΄μœΌλ‘œ μž‘μ„±ν•΄μ€˜.
- μŠ¬λΌμ΄λ“œ λ³„λ‘œ μ„€λͺ…ν•˜μ§€λ§κ³ , μ½”λ“œλ§Œ μž‘μ„±ν•΄μ€˜.
- μ˜ˆμ‹œμ˜ λ‚΄μš©μ„ μ‚¬μš©ν•΄μ„œ μž‘μ„±ν•˜μ§€λ§κ³ , ν˜•μ‹λ§Œ μ°Έκ³ ν•΄.
~~~
<!-- Slide 0. μŠ¬λΌμ΄λ“œ 주제 -->
# μŠ¬λΌμ΄λ“œ 제λͺ©
![이미지링크](https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo-with-title.png)
- ChatGPTλ₯Ό ν™œμš©ν•œ πŸ€—**TEXT2PPT μ„œλΉ„μŠ€ PA!**μž…λ‹ˆλ‹€.
- `링크`,`ν…μŠ€νŠΈ`, `PDF`λ₯Ό μž…λ ₯ λ˜λŠ” μ—…λ‘œλ“œν•˜λ©΄, PPT둜 λ³€ν™˜ν•©λ‹ˆλ‹€.
""" % input_pages
return header + summary_value + rule_value
# text2ppt μ‹€ν–‰ ν•¨μˆ˜
def text2ppt(token_key, input_prompt, input_theme):
openai.api_key = token_key
messages = [
{"role": "system", "content": "You are a kind helpful PPT designer."},
]
message = input_prompt
if message:
messages.append(
{"role": "user", "content": message},
)
chat = openai.ChatCompletion.create(
model="gpt-3.5-turbo-0301", messages=messages
)
reply = chat.choices[0].message.content
messages.append({"role": "assistant", "content": reply})
revised_reply = reply[4:] if reply[:3] == "---" else reply
ppt_theme = input_theme
marp_header = """---
marp: true
theme: %s
footer: 'PA!(Presentation Assistant)'
paginate: true
---
""" % input_theme
md_text = marp_header + revised_reply
md_text_list = md_text.split('\n')
# writedata.py
f = open("text2ppt_test.md", 'w')
for i in range(0, len(md_text_list)):
data = md_text_list[i] + "\n"
f.write(data)
f.close()
subprocess.run(["./pandoc-2.14.2/bin/pandoc", "text2ppt_test.md", "-t", "pdf", "-o", "output.pdf"], shell=True)
# subprocess.run(
# f"npx @marp-team/marp-cli@latest --pdf-fonts-dir=/usr/share/fonts/truetype/nanum --pdf-default-font=NanumGothic -o output.pdf text2ppt_test.md --chrome-path=/usr/bin/google-chrome-stable",
# shell=True)
def ppt2script(token_key, input_file, input_type):
openai.api_key = token_key
if input_type=="PDF":
with open(input_file, 'rb') as pdf_file:
pdf_reader = PyPDF2.PdfReader(pdf_file)
num_pages = len(pdf_reader.pages)
# 각 νŽ˜μ΄μ§€μ˜ λ‚΄μš©μ„ λ¬Έμžμ—΄λ‘œ λ³€ν™˜ν•©λ‹ˆλ‹€.
text = ""
for page_num in range(num_pages):
page = pdf_reader.pages[page_num]
text += "[PAGE_NUM " + str(page_num + 1) + "]"
text += page.extract_text()
else:
prs = Presentation(path_to_presentation)
text = ""
page_num = 0
for slide in prs.slides:
text += "[PAGE_NUM " + str(page_num + 1) + "]"
page_num += 1
for shape in slide.shapes:
if not shape.has_text_frame:
continue
for paragraph in shape.text_frame.paragraphs:
for run in paragraph.runs:
text += run.text
header = """
λ„ˆλŠ”λŠ” PPT λ°œν‘œμ— 도움을 μ£ΌλŠ” μ‘°λ ₯μžμ•Ό.
~~~μ•„λž˜ κ·œμΉ™μ„ 지킀고, --- μ•„λž˜ PPT λ‚΄μš©μ— λŒ€ν•΄ λ°œν‘œ 슀크립트λ₯Ό ν•œκΈ€λ‘œ μž‘μ„±ν•΄.
~~~
- [PAGE_NUM 1] 일 λ•Œ, μ—¬κΈ°μ„œ 1은 νŽ˜μ΄μ§€ 번호인데, νŽ˜μ΄μ§€ λ²ˆν˜Έλ§ˆλ‹€ λ°œν‘œ 슀크립트λ₯Ό μž‘μ„±ν•΄.
- λ§ˆν¬λ‹€μš΄ μ–Έμ–΄λ₯Ό μ“°μ§€μ•Šκ³ , ν…μŠ€νŠΈλ‘œλ§Œ μž‘μ„±ν•΄.
- PPT λ‚΄μš©μ— 좔가적인 μ„€λͺ…μ΄λ‚˜ 사둀λ₯Ό λ§λΆ™μ—¬μ€˜.
---
"""
input_prompt = header + text
messages = [
{"role": "system", "content": "You are a kind helpful PPT Assistant."},
]
message = input_prompt
if message:
messages.append(
{"role": "user", "content": message},
)
chat = openai.ChatCompletion.create(
model="gpt-3.5-turbo-0301", messages=messages
)
reply = chat.choices[0].message.content
messages.append({"role": "assistant", "content": reply})
return reply
####### ν™”λ©΄ μ‹œμž‘ ########
tab1, tab2, tab3 = st.tabs(['PA!λž€?', 'Text2PPT', 'PPT2Script'])
with tab1:
st.header('μ†Œκ°œ')
st.title('PA!(Presentation Assistant):sparkles:')
contents = """
: μ‚¬μš©μžκ°€ μž…λ ₯ν•œ λ‚΄μš©μ„ 기반으둜 PPTλ₯Ό :blue[μžλ™ μ œμž‘]ν•˜κ³ ,
ν”„λ ˆμ  ν…Œμ΄μ…˜ :red[슀크립트λ₯Ό 제곡]ν•˜μ—¬ ν”„λ ˆμ  ν…Œμ΄μ…˜ μ—­λŸ‰μ„ ν–₯μƒμ‹œν‚΅λ‹ˆλ‹€!"""
st.markdown(contents)
st.markdown('-------------------------')
st.header('μ‚¬μš©λ²•')
st.subheader('Text2PPT')
contents = """
: μ‚¬μš©μžμ—κ²Œ λ§ν¬λ‚˜ νŒŒμΌμ„ μ „λ‹¬λ°›μœΌλ©΄ κ·Έ λ‚΄μš©μœΌλ‘œ :blue[λ°œν‘œ 자료λ₯Ό μ œμž‘]ν•΄ λ“œλ¦½λ‹ˆλ‹€!
μ‚¬μš©μžλŠ” μ›ν•˜λŠ” ν…Œλ§ˆ(ν…œν”Œλ¦Ώ) μ’…λ₯˜μ™€ νŽ˜μ΄μ§€ 수만 μ„ νƒν•˜μ„Έμš”!"""
st.markdown(contents)
st.subheader('PPT2Script')
contents = """
: ppt λ˜λŠ” pdf λ°œν‘œ 자료λ₯Ό μ‚¬μš©μžλ‘œλΆ€ν„° μ œκ³΅λ°›μœΌλ©΄ μžλ™μœΌλ‘œ :blue[λ°œν‘œ λŒ€λ³Έ]을 λ§Œλ“€μ–΄λ“œλ¦½λ‹ˆλ‹€!"""
st.markdown(contents)
with tab2:
st.header('Text2PPT')
gpt_token = st.text_input('μ±— gpt토큰을 μž…λ ₯ν•΄ μ£Όμ„Έμš”.', key="<Text2PPT_token>")
st.markdown('-------------------------')
st.subheader(':computer:λ¬Έμ„œ ppt μžλ™ 생성기:computer:')
thema_select = st.selectbox(
'μ›ν•˜λŠ” ν…Œλ§ˆλ₯Ό μ„ νƒν•˜μ„Έμš”',
['default', 'gaia', 'uncover'])
st.markdown('-------------------------')
page_choice = st.slider('ppt νŽ˜μ΄μ§€ μž₯수', min_value=2, max_value=10, step=1, value=5)
st.markdown('-------------------------')
my_order = ['ν…μŠ€νŠΈ', '링크', 'PDF']
status = st.radio('파일 μ’…λ₯˜λ₯Ό μ„ νƒν•˜κ³  λ‚΄μš©μ„ μž…λ ₯ν•˜μ„Έμš”! :smile: ', my_order)
# 첫번째 방법
if status == my_order[0]:
input_text = st.text_area('textλ₯Ό μž…λ ₯ν•˜μ„Έμš”', height=5)
elif status == my_order[1]:
input_text = st.text_area('urlλ₯Ό μž…λ ₯ν•˜μ„Έμš”', height=5)
elif status == my_order[2]:
input_text = st.file_uploader('νŒŒμΌμ„ μ—…λ‘œλ“œ ν•˜μ„Έμš”', type=['pdf'])
input_text_check = st.button('확인', key="<Text2PPT_start>") # 이 λ²„νŠΌ λˆ„λ₯΄λ©΄ μž…λ ₯ν…μŠ€νŠΈκ°€ λ„˜μ–΄κ°€κ²Œ 해야함
st.markdown('-------------------------')
if input_text_check == True:
with st.spinner('Wait for it...'):
text2ppt(gpt_token, generate_text2ppt_input_prompt(status, input_text, page_choice), thema_select)
with open("output.pdf", "rb") as pdf_file:
PDFbyte = pdf_file.read()
st.success('Done!')
st.download_button(label="Download PPT",
data=PDFbyte,
file_name="export_output.pdf",
mime='application/octet-stream', key="<Text2PPT_download>")
with tab3:
st.header('PPT2Script')
gpt_token = st.text_input('μ±—gpt토큰을 μž…λ ₯ν•΄μ£Όμ„Έμš”.', key="<PPT2Script_token>")
st.markdown('-------------------------')
st.subheader(':bookmark_tabs:λ°œν‘œ λŒ€λ³Έ 생성기')
file_order = ['PDF', 'PPT']
choose = st.radio('λ°œν‘œ 자료의 파일 ν˜•μ‹μ„ 선택해 μ£Όμ„Έμš”', file_order)
if choose == file_order[0]:
uploaded_file = st.file_uploader('Choose File!', type='pdf')
elif choose == file_order[1]:
uploaded_file = st.file_uploader('Choose File!', type='pptx')
input_file_check = st.button('확인', key="<PPT2Script_start>") # 이 λ²„νŠΌ λˆ„λ₯΄λ©΄ μž…λ ₯ 파일이 λ„˜μ–΄κ°€κ²Œ 해야함
st.markdown('-------------------------')
if input_file_check == True:
with st.spinner('Wait for it...'):
with open(uploaded_file.name, mode='wb') as w:
w.write(uploaded_file.getvalue())
script = ppt2script(gpt_token, uploaded_file.name, choose)
st.success('Done!')
st.download_button('Download Script',
data=script, file_name="script_output.txt", key="<PPT2Script_download>")