Spaces:
Sleeping
Sleeping
fracapuano
commited on
add files via upload
Browse files- .Dockerfile +34 -0
- app.py +26 -0
- meeting_notes.py +202 -0
- poetry.lock +0 -0
- pyproject.toml +18 -0
.Dockerfile
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Use an official Python runtime as a base image
|
2 |
+
FROM python:3.12
|
3 |
+
|
4 |
+
# Set the HOME environment variable and make /home directory world-writable
|
5 |
+
ENV HOME=/home
|
6 |
+
RUN mkdir -p $HOME && chmod 777 $HOME
|
7 |
+
|
8 |
+
# Set the working directory in the container
|
9 |
+
WORKDIR /usr/src/app
|
10 |
+
|
11 |
+
# Copy the current directory contents into the container at /usr/src/app
|
12 |
+
COPY pyproject.toml poetry.lock* /usr/src/app/
|
13 |
+
|
14 |
+
# Install Poetry
|
15 |
+
RUN pip install -U pip
|
16 |
+
RUN pip install poetry
|
17 |
+
|
18 |
+
# Configure Poetry: Do not create a virtual environment
|
19 |
+
RUN poetry config virtualenvs.create false
|
20 |
+
|
21 |
+
# Install project dependencies
|
22 |
+
RUN poetry install
|
23 |
+
|
24 |
+
# Copy the rest of your app's source code from your host to your image filesystem.
|
25 |
+
COPY . /usr/src/app
|
26 |
+
|
27 |
+
# This is the port exposed by the container
|
28 |
+
EXPOSE 7860
|
29 |
+
|
30 |
+
# Checking the container is still working
|
31 |
+
HEALTHCHECK CMD curl --fail http://localhost:7860/_stcore/health
|
32 |
+
|
33 |
+
# The command to run the app
|
34 |
+
ENTRYPOINT ["streamlit", "run", "app.py", "--server.port=7860", "--server.address=0.0.0.0"]
|
app.py
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from meeting_notes import transcript_to_notes
|
3 |
+
|
4 |
+
st.title('Meeting Transcript to Notes Converter')
|
5 |
+
|
6 |
+
uploaded_file = st.file_uploader("Choose a file", type=["txt"])
|
7 |
+
if uploaded_file is not None:
|
8 |
+
transcript = str(uploaded_file.read(), "utf-8") # Read and decode file
|
9 |
+
speakers_input = st.text_input("Enter the list of speakers, separated by commas (optional)")
|
10 |
+
|
11 |
+
if st.button("Generate Notes"):
|
12 |
+
speakers_list = [speaker.strip() for speaker in speakers_input.split(',')] if speakers_input else None
|
13 |
+
notes = transcript_to_notes(transcript, speakers_list)
|
14 |
+
|
15 |
+
# Write notes to a temporary file
|
16 |
+
with open("meeting_notes.txt", "w") as file:
|
17 |
+
file.write(notes)
|
18 |
+
|
19 |
+
st.success("Meeting notes generated successfully!")
|
20 |
+
st.download_button(
|
21 |
+
label="Download Meeting Notes",
|
22 |
+
data=notes,
|
23 |
+
file_name="meeting_notes.txt",
|
24 |
+
mime="text/plain"
|
25 |
+
)
|
26 |
+
|
meeting_notes.py
ADDED
@@ -0,0 +1,202 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from openai import OpenAI
|
2 |
+
from dotenv import load_dotenv
|
3 |
+
from typing import List, Optional
|
4 |
+
from tqdm import tqdm
|
5 |
+
|
6 |
+
load_dotenv()
|
7 |
+
client = OpenAI()
|
8 |
+
|
9 |
+
def extract_topics(meeting_transcript):
|
10 |
+
prompt_text = f"""
|
11 |
+
## Transcript
|
12 |
+
<transcript>
|
13 |
+
{meeting_transcript}
|
14 |
+
</transcript>
|
15 |
+
|
16 |
+
You are a topic extractor whose main task is to identify and list the top 5 most important topics discussed
|
17 |
+
in a meeting, to whom you have access thanks to the provided meeting transcript.
|
18 |
+
Provided the transcript available under the <transcript> tags, analyze it and extract the main topics discussed.
|
19 |
+
Format your output as a list of skills I could iterate on.
|
20 |
+
|
21 |
+
An ideal output for a meeting covering budgeting budget concers, project handling and staffing looks like
|
22 |
+
[Budgeting, Project Deadlines, Staffing, New Policies, Client Feedback]
|
23 |
+
"""
|
24 |
+
|
25 |
+
response = client.chat.completions.create(
|
26 |
+
model="gpt-4-turbo",
|
27 |
+
messages=[{"role": "user", "content": prompt_text}],
|
28 |
+
max_tokens=100, # short list of topics discussed
|
29 |
+
temperature=0.0,
|
30 |
+
stop=["\n", "]"] # Stops to help ensure the list format is respected
|
31 |
+
)
|
32 |
+
|
33 |
+
topics = response.choices[0].message.content.strip('][').split(', ') # Processing the string output into a Python list
|
34 |
+
return topics
|
35 |
+
|
36 |
+
def generate_bullet_point_summary(topic, meeting_transcript):
|
37 |
+
prompt_text = f"""
|
38 |
+
<topic>
|
39 |
+
{topic}
|
40 |
+
</topic>
|
41 |
+
<transcript>
|
42 |
+
{meeting_transcript}
|
43 |
+
</transcript>
|
44 |
+
|
45 |
+
You are an AI assistant tasked with assisting in summarizing meeting discussions.
|
46 |
+
Below is the transcript of a meeting, and a specific topic to focus on.
|
47 |
+
Please provide a summary of all the discussions related to this topic in bullet points. Be very
|
48 |
+
concise and to the point. Each bullet point must contain one concept only.
|
49 |
+
"""
|
50 |
+
|
51 |
+
response = client.chat.completions.create(
|
52 |
+
model="gpt-4-turbo",
|
53 |
+
messages=[{"role": "user", "content": prompt_text}],
|
54 |
+
max_tokens=400, # Increase if more detailed summaries are needed
|
55 |
+
stop=["\n\n"] # A double newline to signify the end of the summary list
|
56 |
+
)
|
57 |
+
summary = response.choices[0].message.content.strip()
|
58 |
+
return summary
|
59 |
+
|
60 |
+
def summarize_topics(topics, meeting_transcript):
|
61 |
+
# Generate summaries for each topic
|
62 |
+
summaries = {}
|
63 |
+
for topic in topics:
|
64 |
+
summaries[topic] = generate_bullet_point_summary(topic, meeting_transcript)
|
65 |
+
|
66 |
+
return summaries
|
67 |
+
|
68 |
+
def extract_actionable_items(meeting_transcript):
|
69 |
+
prompt_text = f"""
|
70 |
+
Please carefully analyze the following meeting transcript, which will be provided between XML tags:
|
71 |
+
|
72 |
+
<meeting_transcript>
|
73 |
+
{meeting_transcript}
|
74 |
+
</meeting_transcript>
|
75 |
+
|
76 |
+
First, identify each unique speaker who participated in the meeting.
|
77 |
+
|
78 |
+
Then, for each speaker you identified, carefully extract any concrete action items, tasks, or next
|
79 |
+
steps that were assigned to them during the meeting. Use the full context of the meeting to
|
80 |
+
determine what the key next steps are for each person.
|
81 |
+
|
82 |
+
Format your response as a bulleted list, with each speaker's full name followed by a sublist of the
|
83 |
+
specific action items you identified for them. Here is an example of the desired format:
|
84 |
+
|
85 |
+
## John Smith:
|
86 |
+
- Follow up with the client by next Wednesday.
|
87 |
+
- Prepare a detailed budget proposal for the next meeting.
|
88 |
+
- Jane Doe:
|
89 |
+
- Coordinate with the marketing team to draft the new campaign outline.
|
90 |
+
- Send updated staffing requirements to HR by Friday.
|
91 |
+
|
92 |
+
Omit any speakers for whom no clear action items or next steps were specified in the meeting. Focus
|
93 |
+
on extracting the most concrete and actionable items for each speaker.
|
94 |
+
|
95 |
+
Write your full list of speakers and action items inside <result> tags.
|
96 |
+
If you are unable to identify the speakers' names, please write "Speaker 1", "Speaker 2", etc.
|
97 |
+
"""
|
98 |
+
response = client.chat.completions.create(
|
99 |
+
model="gpt-4-turbo",
|
100 |
+
messages=[{"role": "user", "content": prompt_text}],
|
101 |
+
stop=["\n\n"] # A double newline to signify the end of the list
|
102 |
+
)
|
103 |
+
action_items = response.choices[0].message.content.strip()
|
104 |
+
return action_items
|
105 |
+
|
106 |
+
def cleanup_meeting_notes(meeting_notes, speakers_list=None):
|
107 |
+
prompt_text = f"""
|
108 |
+
<meeting_notes_draft>
|
109 |
+
{meeting_notes}
|
110 |
+
</meeting_notes_draft>
|
111 |
+
<speakers_list>
|
112 |
+
{speakers_list if speakers_list else "No speakers list provided"}
|
113 |
+
</speakers_list>
|
114 |
+
|
115 |
+
You are a meeting notes editor who has been tasked with cleaning up the draft of a meeting notes document.
|
116 |
+
You must not modify the content you receive in any way or form, your task is simply to reformat the text to make it adhere to
|
117 |
+
the following guidelines:
|
118 |
+
- Production-ready meeting notes are always formatted in markdown. Ensure that the text is properly formatted in markdown.
|
119 |
+
- Production-ready meeting notes always have 3 sections: "Speakers", "Meeting Summary", "Action Items". These sections are always H1 in markdown (#Speakers, #Meeting Summary, #Action Items).
|
120 |
+
- Production-ready meeting notes always have a horizontal rule (---) between each section.
|
121 |
+
- Production-ready meeting notes always present the topics discussed in the #Meeting Summary section, with each topic being a toggle subheading (> ##Topic).
|
122 |
+
- Production-ready meeting notes always present the bullet points under each topic as markdown bullet points points.
|
123 |
+
- Production-ready meeting notes always have each speaker's name in bold.
|
124 |
+
- Production-ready meeting notes always have the action items in a bulleted list.
|
125 |
+
- Production-ready meeting notes always have the action items grouped by the speaker who is responsible for them.
|
126 |
+
- Production-ready meetings always presents speakers mapped to the name in the <speakers_list> tag, if available, in the same order. This means that for ["Francesco", "Carlo", "Antonio"]
|
127 |
+
you would have that "Francesco" is the "Speaker 0", "Carlo" is the "Speaker 1", and "Antonio" is the "Speaker 2".
|
128 |
+
|
129 |
+
Your output must exactly match the format described above. You must not modify the content of the meeting notes in any way, only the formatting. You will be
|
130 |
+
penalized if you change the content of the meeting notes.
|
131 |
+
An example template for the meeting notes is as follows:
|
132 |
+
# Speakers
|
133 |
+
- **Speaker 0**
|
134 |
+
- **Speaker 1**
|
135 |
+
...
|
136 |
+
|
137 |
+
---
|
138 |
+
# Meeting Summary
|
139 |
+
> ## Topic 1
|
140 |
+
- Bullet point 1
|
141 |
+
- Bullet point 2
|
142 |
+
...
|
143 |
+
> ## Topic 2
|
144 |
+
- Bullet point 1
|
145 |
+
- Bullet point 2
|
146 |
+
...
|
147 |
+
|
148 |
+
---
|
149 |
+
# Action Items
|
150 |
+
## <Speaker 0's name> to own
|
151 |
+
- Action item 1
|
152 |
+
- Action item 2
|
153 |
+
## <Speaker 1's name> to own
|
154 |
+
- Action item 1
|
155 |
+
- Action item 2
|
156 |
+
"""
|
157 |
+
|
158 |
+
response = client.chat.completions.create(
|
159 |
+
model="gpt-4-turbo",
|
160 |
+
messages=[{"role": "user", "content": prompt_text}]
|
161 |
+
)
|
162 |
+
return response.choices[0].message.content
|
163 |
+
|
164 |
+
def transcript_to_notes(meeting_transcript: str, speakers_list:Optional[List[str]]=None) -> str:
|
165 |
+
"""Converts a meeting transcript into formatted meeting notes.
|
166 |
+
|
167 |
+
Args:
|
168 |
+
meeting_transcript (str): The text of the meeting transcript
|
169 |
+
speakers_list (Optional[List[str]]): A list of speakers in the meeting
|
170 |
+
|
171 |
+
Returns:
|
172 |
+
str: The formatted meeting notes
|
173 |
+
"""
|
174 |
+
pbar = tqdm(total=3)
|
175 |
+
topics = extract_topics(meeting_transcript)
|
176 |
+
pbar.update(1)
|
177 |
+
by_topic_summaries = summarize_topics(topics, meeting_transcript)
|
178 |
+
pbar.update(1)
|
179 |
+
actions_by_speaker = extract_actionable_items(meeting_transcript)
|
180 |
+
pbar.update(1)
|
181 |
+
|
182 |
+
draft_notes = f"""
|
183 |
+
topics: {topics}
|
184 |
+
summaries: {by_topic_summaries}
|
185 |
+
actions: {actions_by_speaker}
|
186 |
+
"""
|
187 |
+
|
188 |
+
meeting_notes = cleanup_meeting_notes(draft_notes, speakers_list)
|
189 |
+
return meeting_notes
|
190 |
+
|
191 |
+
# Example usage
|
192 |
+
if __name__ == "__main__":
|
193 |
+
with open("tanguy-off-boarding-meeting.txt", "r") as file:
|
194 |
+
meeting_transcript = file.read()
|
195 |
+
|
196 |
+
speakers_list = ["Tanguy", "Francesco"]
|
197 |
+
|
198 |
+
notes = transcript_to_notes(meeting_transcript, speakers_list)
|
199 |
+
with open("meeting_notes.md", "w") as file:
|
200 |
+
file.write(notes)
|
201 |
+
|
202 |
+
print("Meeting notes generated successfully!")
|
poetry.lock
ADDED
The diff for this file is too large to render.
See raw diff
|
|
pyproject.toml
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[tool.poetry]
|
2 |
+
name = "transcript2notes"
|
3 |
+
version = "0.1.0"
|
4 |
+
description = "A LLM-based chain made agents to make my life at Alan easier."
|
5 |
+
authors = ["fracapuano <[email protected]>"]
|
6 |
+
readme = "README.md"
|
7 |
+
|
8 |
+
[tool.poetry.dependencies]
|
9 |
+
python = "^3.12"
|
10 |
+
openai = "^1.30.3"
|
11 |
+
anthropic = "^0.26.1"
|
12 |
+
python-dotenv = "^1.0.1"
|
13 |
+
streamlit = "^1.35.0"
|
14 |
+
|
15 |
+
|
16 |
+
[build-system]
|
17 |
+
requires = ["poetry-core"]
|
18 |
+
build-backend = "poetry.core.masonry.api"
|