Witold Wydmański
fix: specify tokenizer
cab993a
raw
history blame
4.19 kB
import gradio as gr
import openai
import tiktoken
from multiprocessing.pool import ThreadPool
enc = tiktoken.get_encoding("cl100k_base")
MODES = {
"Short summary": "Succintly summarize the following meeting transcript in a single paragraph.",
"Detailed summary": "Summarize the following meeting transcript. The summary should include all the important points discussed in the meeting.",
"Action points": "Summarize the following meeting transcript in form of action points.",
"Further actions": "Who and what should be done next? Summarize the following meeting transcript in form of action points.",
"Custom": "",
}
SUMMARY_PROMPT = "Summarize the following meeting in very great detail. The summary should include all the important points discussed in the meeting."
def summarize_part(text, api_key):
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{ "role": "system", "content": f"You are a meeting organizer. You want to summarize a meeting. You are given the following transcript of the meeting. {SUMMARY_PROMPT}" },
{ "role": "user", "content": text },
],
api_key=api_key,
)
return response["choices"][0]["message"]["content"]
def shorten_text(text, api_key):
# Split into chunks so that each chunk is less than 3000 words (not characters!)
# Overlap by halves.
chunks = []
words = text.split()
for i in range(0, len(words), 1500):
chunk = ""
while len(enc.encode(chunk)) < 4000 and i < len(words):
chunk += words[i] + " "
i += 1
chunks.append(chunk)
with ThreadPool(4) as pool:
shortened = pool.starmap(summarize_part, zip(chunks, [api_key]*len(chunks)))
return "".join(shortened)
def modify_text(text, api_key, command, custom_command=None):
if command == "Custom":
prompt = custom_command
else:
prompt = MODES[command]
if len(enc.encode(text)) < 4096:
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{ "role": "system", "content": f"You are a meeting organizer. You want to summarize a meeting. You are given the following transcript of the meeting. {prompt}" },
{ "role": "user", "content": text },
],
api_key=api_key,
)
return response["choices"][0]["message"]["content"]
else:
prompt = prompt.replace("meeting transcript", "meeting parts")
shortened = text
while len(enc.encode(shortened)) > 4096:
shortened = shorten_text(shortened, api_key)
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{ "role": "system", "content": f"You are a meeting organizer. You want to summarize a meeting. You are given the following summary of the meeting parts. {prompt}" },
{ "role": "user", "content": shortened },
],
api_key=api_key,
)
return response["choices"][0]["message"]["content"]
with gr.Blocks() as demo:
with gr.Row():
with gr.Column():
api_key = gr.Textbox(lines=1, label="OpenAI API Key")
input_text = gr.Textbox(lines=15, label="Meeting Transcript")
with gr.Column():
command = gr.Dropdown(list(MODES.keys()), label="Command", value="Short summary")
custom_command = gr.Textbox(lines=2, label="Custom command", visible=False, value="Summarize the following meeting transcript in a single paragraph. The summary should include all the important points discussed in the meeting.")
output_text = gr.Textbox(lines=10, label="Summary")
def show_command(command):
if command == "Custom":
return {custom_command: gr.update(visible=True)}
else:
return {custom_command: gr.update(visible=False)}
command.change(show_command, command, custom_command)
button = gr.Button(label="Process")
button.click(modify_text, [input_text, api_key, command, custom_command], output_text)
demo.title = "Meeting Summary"
demo.launch()