Hyeonseo's picture
Update presentation_assistant/presentation_assistant.py
1b9fc54
raw
history blame
6.06 kB
import os
import PyPDF2
from pptx import Presentation
import openai
import subprocess
from io import BytesIO
import sys
import requests
hf_token = os.environ['MY_HF_TOKEN']
API_URL = "https://api-inference.huggingface.co/models/meta-llama/Llama-2-70b-chat-hf"
headers = {"Authorization": "Bearer "+hf_token}
def query(payload):
response = requests.post(API_URL, headers=headers, json=payload)
return response.json()
sys.path.append("/home/user/app")
# Function to generate text2ppt input prompt
def generate_text2ppt_input_prompt(input_type, input_value, input_pages):
header = """
Assume you are a designer creating a PPT using markdown syntax, and write a PPT of %s pages.
+++ Summarize the content or link below in markdown language, adhering to the rules in ===, and refer to the slide examples in ~~~.
+++
""" % input_pages
summary_value = ""
if input_type == "Link":
summary_value += input_value
summary_value += "\n"
elif input_type == "Text":
summary_value += input_value
summary_value += "\n"
elif input_type == "PDF":
with open(input_value, 'rb') as pdf_file:
pdf_reader = PyPDF2.PdfReader(pdf_file)
num_pages = len(pdf_reader.pages)
# Convert the content of each page to a string.
text = ""
for page_num in range(num_pages):
page = pdf_reader.pages[page_num]
text += page.extract_text()
summary_value += text
summary_value += "\n"
else:
print("ERROR: Invalid input")
rule_value = """
===
- Always use '---' as a slide divider.
- Write factually only about the content or link provided.
- Design and arrange the slides diversely with appropriate shapes, images(![Image](Image link), https://unsplash.com/ko/images/stock/non-copyrighted for actual use), tables(|-|), quotes(>), emphasis(bold, ``), emojis(https://kr.piliapp.com/twitter-symbols/), icons (https://kr.piliapp.com/symbol/#popular).
- Use emojis only once in every two pages, and use various other designs.
- When using images and tables, specify the size considering the page size so that all the text content appears.
- Make Slide 1 the title, for a total of %s pages.
- Write the content of the PPT richly in markdown.
- Don't explain slide by slide, just write the code.
- Don't write using the content of the example, just refer to the format.
~~~
<!-- Slide 0. Slide Topic -->
# Slide Title
![Image link](https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo-with-title.png)
- This is 🤗**TEXT2PPT service PA!** using llama2.
- Converts `link`,`text`, `PDF` input or upload into PPT.
""" % input_pages
return header + summary_value + rule_value
# Function to execute text2ppt
def text2ppt(token_key, input_prompt, input_theme):
output = query({"inputs": "You are a kind helpful PPT designer. "+input_prompt,
"parameters": {
"return_full_text": False,
"max_new_tokens": 200}})
# reply = output[0]['generated_text'][len("You are a kind helpful PPT designer. "+input_prompt):]
reply = output[0]['generated_text']
print(reply)
md_text = reply[4:] if reply[:3] == "---" else reply
md_text_list = md_text.split('\n')
print(md_text_list)
f = open("text2ppt_input.md", 'w')
for i in range(0, len(md_text_list)):
# data = ""
# if md_text_list[i] and "<!--" in md_text_list[i]:
# data = "---" + "\n"
data = md_text_list[i].strip() + "\n"
# print(data)
f.write(data)
f.close()
if input_theme == 'default':
subprocess.run(["/home/user/app/pandoc-2.14.2/bin/pandoc", "/home/user/app/text2ppt_input.md", "-t", "pptx", "-o", "/home/user/app/text2ppt_output.pptx"], capture_output=True)
else:
ppt_theme = "--reference-doc=/home/user/app/template/"+input_theme+".pptx"
subprocess.run(["/home/user/app/pandoc-2.14.2/bin/pandoc", "/home/user/app/text2ppt_input.md", "-t", "pptx", ppt_theme, "-o", "/home/user/app/text2ppt_output.pptx"], capture_output=True)
def ppt2script(token_key, input_file, input_type):
openai.api_key = token_key
if input_type=="PDF":
with open(input_file, 'rb') as pdf_file:
pdf_reader = PyPDF2.PdfReader(pdf_file)
num_pages = len(pdf_reader.pages)
# Convert the content of each page to a string.
text = ""
for page_num in range(num_pages):
page = pdf_reader.pages[page_num]
text += "[PAGE_NUM " + str(page_num + 1) + "]"
text += page.extract_text()
else:
prs = Presentation(input_file)
text = ""
page_num = 0
for slide in prs.slides:
text += "[PAGE_NUM " + str(page_num + 1) + "]"
page_num += 1
for shape in slide.shapes:
if not shape.has_text_frame:
continue
for paragraph in shape.text_frame.paragraphs:
for run in paragraph.runs:
text += run.text
header = """
You are an assistant helping with PPT presentations.
~~~Follow the rules below and write a presentation script for the PPT content below.
~~~
- When [PAGE_NUM 1], where 1 is the page number, write a presentation script for each page number.
- Write only in text without using markdown language.
- Add additional explanations or examples to the PPT content.
---
"""
input_prompt = header + text
output = query({"inputs": "You are a kind helpful PPT Assistant."+input_prompt,
"parameters": {
"return_full_text": False,
"max_new_tokens": 200}})
# reply = output[0]['generated_text'][len("You are a kind helpful PPT Assistant."+input_prompt):]
reply = output[0]['generated_text']
return reply